[v2,1/7] exec: Fix for qemu_ram_resize() callback
diff mbox series

Message ID 20200117174522.22044-2-shameerali.kolothum.thodi@huawei.com
State New
Headers show
Series
  • ARM virt: Add NVDIMM support
Related show

Commit Message

Shameerali Kolothum Thodi Jan. 17, 2020, 5:45 p.m. UTC
If ACPI blob length modifications happens after the initial
virt_acpi_build() call, and the changed blob length is within
the PAGE size boundary, then the revised size is not seen by
the firmware on Guest reboot. The is because in the
virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize()
path, qemu_ram_resize() uses used_length (ram_block size which is
aligned to PAGE size) and the "resize callback" to update the size
seen by firmware is not getting invoked.

Hence make sure callback is called if the new size is different
from original requested size.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
Please find the previous discussions on this issue here,
https://patchwork.kernel.org/patch/11174947/

But this one attempts a different solution to fix it by introducing
req_length var to RAMBlock struct. 

---
 exec.c                  | 36 +++++++++++++++++++++++-------------
 include/exec/ram_addr.h |  5 +++--
 2 files changed, 26 insertions(+), 15 deletions(-)

Comments

Igor Mammedov Feb. 4, 2020, 3:23 p.m. UTC | #1
On Fri, 17 Jan 2020 17:45:16 +0000
Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:

> If ACPI blob length modifications happens after the initial
> virt_acpi_build() call, and the changed blob length is within
> the PAGE size boundary, then the revised size is not seen by
> the firmware on Guest reboot. The is because in the
> virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize()
> path, qemu_ram_resize() uses used_length (ram_block size which is
> aligned to PAGE size) and the "resize callback" to update the size
> seen by firmware is not getting invoked.
> 
> Hence make sure callback is called if the new size is different
> from original requested size.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
> Please find the previous discussions on this issue here,
> https://patchwork.kernel.org/patch/11174947/
> 
> But this one attempts a different solution to fix it by introducing
> req_length var to RAMBlock struct. 
> 

looks fine to me, so
Acked-by: Igor Mammedov <imammedo@redhat.com>

CCing David who touches this area in his latest series for and
might have an opinion on how it should be handled.

> ---
>  exec.c                  | 36 +++++++++++++++++++++++-------------
>  include/exec/ram_addr.h |  5 +++--
>  2 files changed, 26 insertions(+), 15 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index d4b769d0d4..9ce33992f8 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2123,16 +2123,18 @@ static int memory_try_enable_merging(void *addr, size_t len)
>   * resize callback to update device state and/or add assertions to detect
>   * misuse, if necessary.
>   */
> -int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
> +int qemu_ram_resize(RAMBlock *block, ram_addr_t size, Error **errp)
>  {
> -    assert(block);
> +    ram_addr_t newsize;
>  
> -    newsize = HOST_PAGE_ALIGN(newsize);
> +    assert(block);
>  
> -    if (block->used_length == newsize) {
> +    if (block->req_length == size) {
>          return 0;
>      }
>  
> +    newsize = HOST_PAGE_ALIGN(size);
> +
>      if (!(block->flags & RAM_RESIZEABLE)) {
>          error_setg_errno(errp, EINVAL,
>                           "Length mismatch: %s: 0x" RAM_ADDR_FMT
> @@ -2149,13 +2151,19 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
>          return -EINVAL;
>      }
>  
> -    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
> -    block->used_length = newsize;
> -    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
> -                                        DIRTY_CLIENTS_ALL);
> -    memory_region_set_size(block->mr, newsize);
> +    block->req_length = size;
> +
> +    if (newsize != block->used_length) {
> +        cpu_physical_memory_clear_dirty_range(block->offset,
> +                                              block->used_length);
> +        block->used_length = newsize;
> +        cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
> +                                            DIRTY_CLIENTS_ALL);
> +        memory_region_set_size(block->mr, newsize);
> +    }
> +
>      if (block->resized) {
> -        block->resized(block->idstr, newsize, block->host);
> +        block->resized(block->idstr, block->req_length, block->host);
>      }
>      return 0;
>  }
> @@ -2412,16 +2420,18 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
>                                    MemoryRegion *mr, Error **errp)
>  {
>      RAMBlock *new_block;
> +    ram_addr_t newsize;
>      Error *local_err = NULL;
>  
> -    size = HOST_PAGE_ALIGN(size);
> +    newsize = HOST_PAGE_ALIGN(size);
>      max_size = HOST_PAGE_ALIGN(max_size);
>      new_block = g_malloc0(sizeof(*new_block));
>      new_block->mr = mr;
>      new_block->resized = resized;
> -    new_block->used_length = size;
> +    new_block->req_length = size;
> +    new_block->used_length = newsize;
>      new_block->max_length = max_size;
> -    assert(max_size >= size);
> +    assert(max_size >= newsize);
>      new_block->fd = -1;
>      new_block->page_size = qemu_real_host_page_size;
>      new_block->host = host;
> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
> index 5adebb0bc7..fd13082224 100644
> --- a/include/exec/ram_addr.h
> +++ b/include/exec/ram_addr.h
> @@ -31,8 +31,9 @@ struct RAMBlock {
>      uint8_t *host;
>      uint8_t *colo_cache; /* For colo, VM's ram cache */
>      ram_addr_t offset;
> -    ram_addr_t used_length;
> -    ram_addr_t max_length;
> +    ram_addr_t req_length; /* Original requested size, used if RAM_RESIZEABLE */
> +    ram_addr_t used_length; /* aligned to qemu_host_page_size */
> +    ram_addr_t max_length; /*  aligned to qemu_host_page_size */
>      void (*resized)(const char*, uint64_t length, void *host);
>      uint32_t flags;
>      /* Protected by iothread lock.  */
David Hildenbrand Feb. 4, 2020, 4:44 p.m. UTC | #2
On 04.02.20 16:23, Igor Mammedov wrote:
> On Fri, 17 Jan 2020 17:45:16 +0000
> Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
> 
>> If ACPI blob length modifications happens after the initial
>> virt_acpi_build() call, and the changed blob length is within
>> the PAGE size boundary, then the revised size is not seen by
>> the firmware on Guest reboot. The is because in the
>> virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize()
>> path, qemu_ram_resize() uses used_length (ram_block size which is
>> aligned to PAGE size) and the "resize callback" to update the size
>> seen by firmware is not getting invoked.
>>
>> Hence make sure callback is called if the new size is different
>> from original requested size.
>>
>> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
>> ---
>> Please find the previous discussions on this issue here,
>> https://patchwork.kernel.org/patch/11174947/
>>
>> But this one attempts a different solution to fix it by introducing
>> req_length var to RAMBlock struct. 
>>
> 
> looks fine to me, so
> Acked-by: Igor Mammedov <imammedo@redhat.com>

Thanks for CCing.

This in fact collides with my changes ... but not severely :)

> 
> CCing David who touches this area in his latest series for and
> might have an opinion on how it should be handled.
> 

So we are talking about sub-page size changes? I somewhat dislike
storing "req_length" in ram blocks. Looks like sub-pages stuff does not
belong there.

Ram blocks only operate on page granularity. Ram block notifiers only
operate on page granularity. Memory regions only operate on page
granularity. Dirty bitmaps operate on page granularity. Especially,
memory_region_size(mr) will always return aligned values.

I think users/owner should deal with anything smaller manually if
they really need it.

What about always calling the resized() callback and letting the
actual owner figure out if the size changed on sub-page granularity
or not? (by looking up the size manually using some mechanism not glued to
memory regions/ram blocks/whatever)

diff --git a/exec.c b/exec.c
index 67e520d18e..59d46cc388 100644
--- a/exec.c
+++ b/exec.c
@@ -2130,6 +2130,13 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
     newsize = HOST_PAGE_ALIGN(newsize);
 
     if (block->used_length == newsize) {
+        /*
+         * The owner might want to handle sub-page resizes. We only provide
+         * the aligned size - because ram blocks are always page aligned.
+         */
+        if (block->resized) {
+            block->resized(block->idstr, newsize, block->host);
+        }
         return 0;
     }
David Hildenbrand Feb. 4, 2020, 7:05 p.m. UTC | #3
On 04.02.20 17:44, David Hildenbrand wrote:
> On 04.02.20 16:23, Igor Mammedov wrote:
>> On Fri, 17 Jan 2020 17:45:16 +0000
>> Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
>>
>>> If ACPI blob length modifications happens after the initial
>>> virt_acpi_build() call, and the changed blob length is within
>>> the PAGE size boundary, then the revised size is not seen by
>>> the firmware on Guest reboot. The is because in the
>>> virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize()
>>> path, qemu_ram_resize() uses used_length (ram_block size which is
>>> aligned to PAGE size) and the "resize callback" to update the size
>>> seen by firmware is not getting invoked.
>>>
>>> Hence make sure callback is called if the new size is different
>>> from original requested size.
>>>
>>> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
>>> ---
>>> Please find the previous discussions on this issue here,
>>> https://patchwork.kernel.org/patch/11174947/
>>>
>>> But this one attempts a different solution to fix it by introducing
>>> req_length var to RAMBlock struct. 
>>>
>>
>> looks fine to me, so
>> Acked-by: Igor Mammedov <imammedo@redhat.com>
> 
> Thanks for CCing.
> 
> This in fact collides with my changes ... but not severely :)
> 
>>
>> CCing David who touches this area in his latest series for and
>> might have an opinion on how it should be handled.
>>
> 
> So we are talking about sub-page size changes? I somewhat dislike
> storing "req_length" in ram blocks. Looks like sub-pages stuff does not
> belong there.
> 
> Ram blocks only operate on page granularity. Ram block notifiers only
> operate on page granularity. Memory regions only operate on page
> granularity. Dirty bitmaps operate on page granularity. Especially,
> memory_region_size(mr) will always return aligned values.
> 
> I think users/owner should deal with anything smaller manually if
> they really need it.
> 
> What about always calling the resized() callback and letting the
> actual owner figure out if the size changed on sub-page granularity
> or not? (by looking up the size manually using some mechanism not glued to
> memory regions/ram blocks/whatever)
> 
> diff --git a/exec.c b/exec.c
> index 67e520d18e..59d46cc388 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2130,6 +2130,13 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
>      newsize = HOST_PAGE_ALIGN(newsize);
>  
>      if (block->used_length == newsize) {
> +        /*
> +         * The owner might want to handle sub-page resizes. We only provide
> +         * the aligned size - because ram blocks are always page aligned.
> +         */
> +        if (block->resized) {
> +            block->resized(block->idstr, newsize, block->host);
> +        }
>          return 0;
>      }
>

Oh, and one more reason why the proposal in this patch is inconsistent:

When migrating resizable memory regions (RAM_SAVE_FLAG_MEM_SIZE) we
store the block->used_length (ram_save_setup()) and use that value to
resize the region on the target (ram_load_precopy() -> qemu_ram_resize()).

This will be the value the callback will be called with. Page aligned.
Shameerali Kolothum Thodi Feb. 5, 2020, 4:29 p.m. UTC | #4
Hi David,

> -----Original Message-----
> From: Qemu-devel
> [mailto:qemu-devel-bounces+shameerali.kolothum.thodi=huawei.com@nongn
> u.org] On Behalf Of David Hildenbrand
> Sent: 04 February 2020 19:05
> To: Igor Mammedov <imammedo@redhat.com>; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> On 04.02.20 17:44, David Hildenbrand wrote:
> > On 04.02.20 16:23, Igor Mammedov wrote:
> >> On Fri, 17 Jan 2020 17:45:16 +0000
> >> Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
> >>
> >>> If ACPI blob length modifications happens after the initial
> >>> virt_acpi_build() call, and the changed blob length is within
> >>> the PAGE size boundary, then the revised size is not seen by
> >>> the firmware on Guest reboot. The is because in the
> >>> virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize()
> >>> path, qemu_ram_resize() uses used_length (ram_block size which is
> >>> aligned to PAGE size) and the "resize callback" to update the size
> >>> seen by firmware is not getting invoked.
> >>>
> >>> Hence make sure callback is called if the new size is different
> >>> from original requested size.
> >>>
> >>> Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> >>> ---
> >>> Please find the previous discussions on this issue here,
> >>> https://patchwork.kernel.org/patch/11174947/
> >>>
> >>> But this one attempts a different solution to fix it by introducing
> >>> req_length var to RAMBlock struct.
> >>>
> >>
> >> looks fine to me, so
> >> Acked-by: Igor Mammedov <imammedo@redhat.com>
> >
> > Thanks for CCing.
> >
> > This in fact collides with my changes ... but not severely :)
> >
> >>
> >> CCing David who touches this area in his latest series for and
> >> might have an opinion on how it should be handled.
> >>
> >
> > So we are talking about sub-page size changes? I somewhat dislike
> > storing "req_length" in ram blocks. Looks like sub-pages stuff does not
> > belong there.

Thanks for taking a look at this. Agree, I didn’t like that "req_length" either.

> > Ram blocks only operate on page granularity. Ram block notifiers only
> > operate on page granularity. Memory regions only operate on page
> > granularity. Dirty bitmaps operate on page granularity. Especially,
> > memory_region_size(mr) will always return aligned values.
> >
> > I think users/owner should deal with anything smaller manually if
> > they really need it.
> >
> > What about always calling the resized() callback and letting the
> > actual owner figure out if the size changed on sub-page granularity
> > or not? (by looking up the size manually using some mechanism not glued to
> > memory regions/ram blocks/whatever)
> >
> > diff --git a/exec.c b/exec.c
> > index 67e520d18e..59d46cc388 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -2130,6 +2130,13 @@ int qemu_ram_resize(RAMBlock *block,
> ram_addr_t newsize, Error **errp)
> >      newsize = HOST_PAGE_ALIGN(newsize);
> >
> >      if (block->used_length == newsize) {
> > +        /*
> > +         * The owner might want to handle sub-page resizes. We only
> provide
> > +         * the aligned size - because ram blocks are always page aligned.
> > +         */
> > +        if (block->resized) {
> > +            block->resized(block->idstr, newsize, block->host);

Does it make sense to pass the requested size in the callback than the aligned size
as the owner might be interested more in the org_req_size vs new_req _size case?

> > +        }
> >          return 0;
> >      }
> >

 
> Oh, and one more reason why the proposal in this patch is inconsistent:
> 
> When migrating resizable memory regions (RAM_SAVE_FLAG_MEM_SIZE) we
> store the block->used_length (ram_save_setup()) and use that value to
> resize the region on the target (ram_load_precopy() -> qemu_ram_resize()).
> 
> This will be the value the callback will be called with. Page aligned.
> 

Sorry, I didn’t quite get that point and not sure how "req_length" approach 
will affect the migration.

Anyway, I have reworked the patch(below) with the above suggestion, that is
always calling the resized() callback, but modified it to pass the requested size
instead of the page aligned size. Please let me know your feedback.

Thanks,
Shameer

diff --git a/exec.c b/exec.c
index 67e520d18e..c9cb9a54fa 100644
--- a/exec.c
+++ b/exec.c
@@ -2123,13 +2123,22 @@ static int memory_try_enable_merging(void *addr, size_t len)
  * resize callback to update device state and/or add assertions to detect
  * misuse, if necessary.
  */
-int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
+int qemu_ram_resize(RAMBlock *block, ram_addr_t size, Error **errp)
 {
+    ram_addr_t newsize;
     assert(block);
 
-    newsize = HOST_PAGE_ALIGN(newsize);
+    newsize = HOST_PAGE_ALIGN(size);
 
     if (block->used_length == newsize) {
+        /*
+         * RAM blocks are always page aligned, but the owner might want
+         * to handle sub-page resizes. Let the owner know about any
+         * sub-page changes.
+         */
+        if (block->resized) {
+            block->resized(block->idstr, size, block->host);
+        }
         return 0;
     }
 
@@ -2155,7 +2164,7 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
                                         DIRTY_CLIENTS_ALL);
     memory_region_set_size(block->mr, newsize);
     if (block->resized) {
-        block->resized(block->idstr, newsize, block->host);
+        block->resized(block->idstr, size, block->host);
     }
     return 0;
 }
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 179b302f01..ec9cfa4d10 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -934,9 +934,11 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
 
     for (i = 0; i < index; i++) {
         if (strcmp(filename, s->files->f[i].name) == 0) {
-            ptr = fw_cfg_modify_bytes_read(s, FW_CFG_FILE_FIRST + i,
-                                           data, len);
-            s->files->f[i].size   = cpu_to_be32(len);
+            if (s->files->f[i].size != cpu_to_be32(len)) {
+                ptr = fw_cfg_modify_bytes_read(s, FW_CFG_FILE_FIRST + i,
+                                               data, len);
+                s->files->f[i].size   = cpu_to_be32(len);
+            }
             return ptr;
         }
     }
---
David Hildenbrand Feb. 5, 2020, 4:40 p.m. UTC | #5
>> Oh, and one more reason why the proposal in this patch is inconsistent:
>>
>> When migrating resizable memory regions (RAM_SAVE_FLAG_MEM_SIZE) we
>> store the block->used_length (ram_save_setup()) and use that value to
>> resize the region on the target (ram_load_precopy() -> qemu_ram_resize()).
>>
>> This will be the value the callback will be called with. Page aligned.
>>
> 
> Sorry, I didn’t quite get that point and not sure how "req_length" approach 
> will affect the migration.

The issue is that on migration, you will lose the sub-page size either
way. So your callback will be called
- on the migration source with a sub-page size (via
  memory_region_ram_resize() from e.g., hw/i386/acpi-build.c)
- on the migration target with a page-aligned size (via
  qemu_ram_resize() from migration/ram.c)

So this is inconsistent, especially when migrating.

Is there a way to get access to the sub-page size without passing it
through the callback?

Like in fw_cfg_modify_file() do some fancy lookup and use the sub-page
size instead of the passed size? (might have to be stored somewhere and
refetched - and migrated)
Shameerali Kolothum Thodi Feb. 6, 2020, 10:20 a.m. UTC | #6
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 05 February 2020 16:41
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> >> Oh, and one more reason why the proposal in this patch is inconsistent:
> >>
> >> When migrating resizable memory regions (RAM_SAVE_FLAG_MEM_SIZE)
> we
> >> store the block->used_length (ram_save_setup()) and use that value to
> >> resize the region on the target (ram_load_precopy() -> qemu_ram_resize()).
> >>
> >> This will be the value the callback will be called with. Page aligned.
> >>
> >
> > Sorry, I didn’t quite get that point and not sure how "req_length" approach
> > will affect the migration.
> 
> The issue is that on migration, you will lose the sub-page size either
> way. So your callback will be called
> - on the migration source with a sub-page size (via
>   memory_region_ram_resize() from e.g., hw/i386/acpi-build.c)
> - on the migration target with a page-aligned size (via
>   qemu_ram_resize() from migration/ram.c)
> 
> So this is inconsistent, especially when migrating.

Thanks for explaining. I tried to add some debug prints to further understand
what actually happens during migration case.

Guest-source with initial one nvdimm
----------------------------------------------------

-object memory-backend-ram,id=mem1,size=1G \
-device nvdimm,id=dimm1,memdev=mem1 \

fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
fw_cfg_add_file_callback: filename etc/acpi/tables size 0x55f4
fw_cfg_add_file_callback: filename etc/table-loader size 0xd00
fw_cfg_add_file_callback: filename etc/tpm/log size 0x0
fw_cfg_add_file_callback: filename etc/acpi/rsdp size 0x24
fw_cfg_add_file_callback: filename etc/smbios/smbios-tables size 0x104
fw_cfg_add_file_callback: filename etc/smbios/smbios-anchor size 0x18
fw_cfg_modify_file: filename bootorder len 0x0
fw_cfg_add_file_callback: filename bootorder size 0x0
fw_cfg_modify_file: filename bios-geometry len 0x0
fw_cfg_add_file_callback: filename bios-geometry size 0x0
fw_cfg_modify_file: filename etc/acpi/tables len 0x55f4
fw_cfg_modify_file: filename etc/acpi/rsdp len 0x24
fw_cfg_modify_file: filename etc/table-loader len 0xd00
....

hot add another nvdimm device,

(qemu) object_add memory-backend-ram,id=mem2,size=1G
(qemu) device_add nvdimm,id=dimm2,memdev=mem2


root@ubuntu:/# cat /dev/pmem
pmem0  pmem1

Guest-target with two nvdimms
-----------------------------------------------

-object memory-backend-ram,id=mem1,size=1G \
-device nvdimm,id=dimm1,memdev=mem1 \
-object memory-backend-ram,id=mem2,size=1G \
-device nvdimm,id=dimm2,memdev=mem2 \

fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
fw_cfg_add_file_callback: filename etc/acpi/tables size 0x56ac
fw_cfg_add_file_callback: filename etc/table-loader size 0xd00
fw_cfg_add_file_callback: filename etc/tpm/log size 0x0
fw_cfg_add_file_callback: filename etc/acpi/rsdp size 0x24
fw_cfg_add_file_callback: filename etc/smbios/smbios-tables size 0x104
fw_cfg_add_file_callback: filename etc/smbios/smbios-anchor size 0x18
fw_cfg_modify_file: filename bootorder len 0x0
fw_cfg_add_file_callback: filename bootorder size 0x0
fw_cfg_modify_file: filename bios-geometry len 0x0
fw_cfg_add_file_callback: filename bios-geometry size 0x0


Initiate migration Source --> Target,

ram_load_precopy: Ram blk mach-virt.ram length 0x100000000 used_length 0x100000000
ram_load_precopy: Ram blk mem1 length 0x40000000 used_length 0x40000000
ram_load_precopy: Ram blk mem2 length 0x40000000 used_length 0x40000000
ram_load_precopy: Ram blk virt.flash0 length 0x4000000 used_length 0x4000000
ram_load_precopy: Ram blk virt.flash1 length 0x4000000 used_length 0x4000000
ram_load_precopy: Ram blk /rom@etc/acpi/tables length 0x6000 used_length 0x6000
ram_load_precopy: Ram blk 0000:00:01.0/virtio-net-pci.rom length 0x40000 used_length 0x40000
ram_load_precopy: Ram blk /rom@etc/table-loader length 0x1000 used_length 0x1000
ram_load_precopy: Ram blk /rom@etc/acpi/rsdp length 0x1000 used_length 0x1000


root@ubuntu:/# cat /dev/pmem
pmem0  pmem1  

From the logs, it looks like the ram_load_precopy() --> qemu_ram_resize() is not
called as length == used_length and both seems to be page aligned values.
And from https://github.com/qemu/qemu/blob/master/migration/ram.c#L3421
qemu_ram_resize() is called with length if length != used_length.

Of course my knowledge on Qemu migration is very limited and maybe I am
missing something here. But I am trying to see under what circumstances 
qemu_ram_resize() will get invoked with a page aligned size that will be different
to the size used in the FWCfgEntry . Please let me know,

Much appreciated,
Shameer

> Is there a way to get access to the sub-page size without passing it
> through the callback?
> 
> Like in fw_cfg_modify_file() do some fancy lookup and use the sub-page
> size instead of the passed size? (might have to be stored somewhere and
> refetched - and migrated)
> 
> --
> Thanks,
> 
> David / dhildenb
David Hildenbrand Feb. 6, 2020, 10:55 a.m. UTC | #7
On 06.02.20 11:20, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: David Hildenbrand [mailto:david@redhat.com]
>> Sent: 05 February 2020 16:41
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> Igor Mammedov <imammedo@redhat.com>
>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
>>
>>>> Oh, and one more reason why the proposal in this patch is inconsistent:
>>>>
>>>> When migrating resizable memory regions (RAM_SAVE_FLAG_MEM_SIZE)
>> we
>>>> store the block->used_length (ram_save_setup()) and use that value to
>>>> resize the region on the target (ram_load_precopy() -> qemu_ram_resize()).
>>>>
>>>> This will be the value the callback will be called with. Page aligned.
>>>>
>>>
>>> Sorry, I didn’t quite get that point and not sure how "req_length" approach
>>> will affect the migration.
>>
>> The issue is that on migration, you will lose the sub-page size either
>> way. So your callback will be called
>> - on the migration source with a sub-page size (via
>>   memory_region_ram_resize() from e.g., hw/i386/acpi-build.c)
>> - on the migration target with a page-aligned size (via
>>   qemu_ram_resize() from migration/ram.c)
>>
>> So this is inconsistent, especially when migrating.
> 
> Thanks for explaining. I tried to add some debug prints to further understand
> what actually happens during migration case.
> 
> Guest-source with initial one nvdimm
> ----------------------------------------------------
> 
> -object memory-backend-ram,id=mem1,size=1G \
> -device nvdimm,id=dimm1,memdev=mem1 \
> 
> fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
> fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
> fw_cfg_add_file_callback: filename etc/acpi/tables size 0x55f4
> fw_cfg_add_file_callback: filename etc/table-loader size 0xd00
> fw_cfg_add_file_callback: filename etc/tpm/log size 0x0
> fw_cfg_add_file_callback: filename etc/acpi/rsdp size 0x24
> fw_cfg_add_file_callback: filename etc/smbios/smbios-tables size 0x104
> fw_cfg_add_file_callback: filename etc/smbios/smbios-anchor size 0x18
> fw_cfg_modify_file: filename bootorder len 0x0
> fw_cfg_add_file_callback: filename bootorder size 0x0
> fw_cfg_modify_file: filename bios-geometry len 0x0
> fw_cfg_add_file_callback: filename bios-geometry size 0x0
> fw_cfg_modify_file: filename etc/acpi/tables len 0x55f4
> fw_cfg_modify_file: filename etc/acpi/rsdp len 0x24
> fw_cfg_modify_file: filename etc/table-loader len 0xd00
> ....
> 
> hot add another nvdimm device,
> 
> (qemu) object_add memory-backend-ram,id=mem2,size=1G
> (qemu) device_add nvdimm,id=dimm2,memdev=mem2
> 
> 
> root@ubuntu:/# cat /dev/pmem
> pmem0  pmem1
> 
> Guest-target with two nvdimms
> -----------------------------------------------
> 
> -object memory-backend-ram,id=mem1,size=1G \
> -device nvdimm,id=dimm1,memdev=mem1 \
> -object memory-backend-ram,id=mem2,size=1G \
> -device nvdimm,id=dimm2,memdev=mem2 \
> 
> fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
> fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
> fw_cfg_add_file_callback: filename etc/acpi/tables size 0x56ac
> fw_cfg_add_file_callback: filename etc/table-loader size 0xd00
> fw_cfg_add_file_callback: filename etc/tpm/log size 0x0
> fw_cfg_add_file_callback: filename etc/acpi/rsdp size 0x24
> fw_cfg_add_file_callback: filename etc/smbios/smbios-tables size 0x104
> fw_cfg_add_file_callback: filename etc/smbios/smbios-anchor size 0x18
> fw_cfg_modify_file: filename bootorder len 0x0
> fw_cfg_add_file_callback: filename bootorder size 0x0
> fw_cfg_modify_file: filename bios-geometry len 0x0
> fw_cfg_add_file_callback: filename bios-geometry size 0x0
> 
> 
> Initiate migration Source --> Target,
> 
> ram_load_precopy: Ram blk mach-virt.ram length 0x100000000 used_length 0x100000000
> ram_load_precopy: Ram blk mem1 length 0x40000000 used_length 0x40000000
> ram_load_precopy: Ram blk mem2 length 0x40000000 used_length 0x40000000
> ram_load_precopy: Ram blk virt.flash0 length 0x4000000 used_length 0x4000000
> ram_load_precopy: Ram blk virt.flash1 length 0x4000000 used_length 0x4000000
> ram_load_precopy: Ram blk /rom@etc/acpi/tables length 0x6000 used_length 0x6000
> ram_load_precopy: Ram blk 0000:00:01.0/virtio-net-pci.rom length 0x40000 used_length 0x40000
> ram_load_precopy: Ram blk /rom@etc/table-loader length 0x1000 used_length 0x1000
> ram_load_precopy: Ram blk /rom@etc/acpi/rsdp length 0x1000 used_length 0x1000
> 
> 
> root@ubuntu:/# cat /dev/pmem
> pmem0  pmem1  
> 
> From the logs, it looks like the ram_load_precopy() --> qemu_ram_resize() is not
> called as length == used_length and both seems to be page aligned values.
> And from https://github.com/qemu/qemu/blob/master/migration/ram.c#L3421
> qemu_ram_resize() is called with length if length != used_length.

Assume on your source, the old size is 12345 bytes. So 16384 aligned up
(4 pages).

Assume on your target, the new size is 123456 bytes, so 126976 aligned
up (31 pages).

If you migrate from source to destination, the migration code would
resize to 16384, although the "actual size" is 12345. The callback will
be called with the aligned size, not the actual size. Same the other way
around. That's what's inconsistent IMHO.
Shameerali Kolothum Thodi Feb. 6, 2020, 11:28 a.m. UTC | #8
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 06 February 2020 10:56
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback

[...]
 
> > root@ubuntu:/# cat /dev/pmem
> > pmem0  pmem1
> >
> > From the logs, it looks like the ram_load_precopy() --> qemu_ram_resize() is
> not
> > called as length == used_length and both seems to be page aligned values.
> > And from
> https://github.com/qemu/qemu/blob/master/migration/ram.c#L3421
> > qemu_ram_resize() is called with length if length != used_length.
> 
> Assume on your source, the old size is 12345 bytes. So 16384 aligned up
> (4 pages).
> 
> Assume on your target, the new size is 123456 bytes, so 126976 aligned
> up (31 pages).
> 
> If you migrate from source to destination, the migration code would
> resize to 16384, although the "actual size" is 12345. The callback will
> be called with the aligned size, not the actual size. Same the other way
> around. That's what's inconsistent IMHO.

Thanks. You are right. I didn’t consider the case where the target can be
configured with a larger number of devices than the source. I can replicate
the scenario now,

Source:

fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
fw_cfg_add_file_callback: filename etc/acpi/tables size 0x6210

Target:
ram_load_precopy: Ram blk mem1 length 0x40000000 used_length 0x40000000
ram_load_precopy: Ram blk virt.flash0 length 0x4000000 used_length 0x4000000
ram_load_precopy: Ram blk virt.flash1 length 0x4000000 used_length 0x4000000
ram_load_precopy: Ram blk /rom@etc/acpi/tables length 0x7000 used_length 0x8000
fw_cfg_modify_file: filename etc/acpi/tables len 0x7000

Target updates FWCfgEntry with a page aligned size :(. I will look into this and see how
we can solve this. Any pointers welcome.

Cheers,
Shameer
David Hildenbrand Feb. 6, 2020, 2:54 p.m. UTC | #9
On 06.02.20 12:28, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: David Hildenbrand [mailto:david@redhat.com]
>> Sent: 06 February 2020 10:56
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> Igor Mammedov <imammedo@redhat.com>
>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> [...]
>  
>>> root@ubuntu:/# cat /dev/pmem
>>> pmem0  pmem1
>>>
>>> From the logs, it looks like the ram_load_precopy() --> qemu_ram_resize() is
>> not
>>> called as length == used_length and both seems to be page aligned values.
>>> And from
>> https://github.com/qemu/qemu/blob/master/migration/ram.c#L3421
>>> qemu_ram_resize() is called with length if length != used_length.
>>
>> Assume on your source, the old size is 12345 bytes. So 16384 aligned up
>> (4 pages).
>>
>> Assume on your target, the new size is 123456 bytes, so 126976 aligned
>> up (31 pages).
>>
>> If you migrate from source to destination, the migration code would
>> resize to 16384, although the "actual size" is 12345. The callback will
>> be called with the aligned size, not the actual size. Same the other way
>> around. That's what's inconsistent IMHO.
> 
> Thanks. You are right. I didn’t consider the case where the target can be
> configured with a larger number of devices than the source. I can replicate
> the scenario now,
> 
> Source:
> 
> fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
> fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
> fw_cfg_add_file_callback: filename etc/acpi/tables size 0x6210
> 
> Target:
> ram_load_precopy: Ram blk mem1 length 0x40000000 used_length 0x40000000
> ram_load_precopy: Ram blk virt.flash0 length 0x4000000 used_length 0x4000000
> ram_load_precopy: Ram blk virt.flash1 length 0x4000000 used_length 0x4000000
> ram_load_precopy: Ram blk /rom@etc/acpi/tables length 0x7000 used_length 0x8000
> fw_cfg_modify_file: filename etc/acpi/tables len 0x7000
> 
> Target updates FWCfgEntry with a page aligned size :(. I will look into this and see how
> we can solve this. Any pointers welcome.

Can you look the original value up somehow and us the resize callback
only as a notification that something changed? (that value would have to
be stored somewhere and migrated I assume - maybe that's already being done)
Shameerali Kolothum Thodi Feb. 7, 2020, 4:05 p.m. UTC | #10
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 06 February 2020 14:55
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> On 06.02.20 12:28, Shameerali Kolothum Thodi wrote:
> >
> >
> >> -----Original Message-----
> >> From: David Hildenbrand [mailto:david@redhat.com]
> >> Sent: 06 February 2020 10:56
> >> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> >> Igor Mammedov <imammedo@redhat.com>
> >> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> >> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> >> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> >> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> >> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> >
> > [...]
> >
> >>> root@ubuntu:/# cat /dev/pmem
> >>> pmem0  pmem1
> >>>
> >>> From the logs, it looks like the ram_load_precopy() --> qemu_ram_resize()
> is
> >> not
> >>> called as length == used_length and both seems to be page aligned values.
> >>> And from
> >> https://github.com/qemu/qemu/blob/master/migration/ram.c#L3421
> >>> qemu_ram_resize() is called with length if length != used_length.
> >>
> >> Assume on your source, the old size is 12345 bytes. So 16384 aligned up
> >> (4 pages).
> >>
> >> Assume on your target, the new size is 123456 bytes, so 126976 aligned
> >> up (31 pages).
> >>
> >> If you migrate from source to destination, the migration code would
> >> resize to 16384, although the "actual size" is 12345. The callback will
> >> be called with the aligned size, not the actual size. Same the other way
> >> around. That's what's inconsistent IMHO.
> >
> > Thanks. You are right. I didn’t consider the case where the target can be
> > configured with a larger number of devices than the source. I can replicate
> > the scenario now,
> >
> > Source:
> >
> > fw_cfg_add_file_callback: filename etc/boot-fail-wait size 0x4
> > fw_cfg_add_file_callback: filename etc/acpi/nvdimm-mem size 0x1000
> > fw_cfg_add_file_callback: filename etc/acpi/tables size 0x6210
> >
> > Target:
> > ram_load_precopy: Ram blk mem1 length 0x40000000 used_length
> 0x40000000
> > ram_load_precopy: Ram blk virt.flash0 length 0x4000000 used_length
> 0x4000000
> > ram_load_precopy: Ram blk virt.flash1 length 0x4000000 used_length
> 0x4000000
> > ram_load_precopy: Ram blk /rom@etc/acpi/tables length 0x7000
> used_length 0x8000
> > fw_cfg_modify_file: filename etc/acpi/tables len 0x7000
> >
> > Target updates FWCfgEntry with a page aligned size :(. I will look into this and
> see how
> > we can solve this. Any pointers welcome.
> 
> Can you look the original value up somehow and us the resize callback
> only as a notification that something changed? (that value would have to
> be stored somewhere and migrated I assume - maybe that's already being
> done)

Ok. I will take a look at that. But can we instead pass the block->used_length to
fw_cfg_add_file_callback(). That way we don’t have to change the qemu_ram_resize()
as well. I think Igor has suggested this before[1] and I had a go at it before coming up
with the "req_length" proposal here.

Thanks,
Shameer

[1] https://lore.kernel.org/qemu-devel/323aa74a92934b6a989e6e4dbe0dfe21@huawei.com/
David Hildenbrand Feb. 10, 2020, 9:29 a.m. UTC | #11
>> Can you look the original value up somehow and us the resize callback
>> only as a notification that something changed? (that value would have to
>> be stored somewhere and migrated I assume - maybe that's already being
>> done)
> 
> Ok. I will take a look at that. But can we instead pass the block->used_length to
> fw_cfg_add_file_callback(). That way we don’t have to change the qemu_ram_resize()
> as well. I think Igor has suggested this before[1] and I had a go at it before coming up
> with the "req_length" proposal here.

You mean, passing the old size as well? I don't see how that will solve
the issue, but yeah, nothing speaks against simply sending the old and
the new size.
Shameerali Kolothum Thodi Feb. 10, 2020, 9:50 a.m. UTC | #12
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 10 February 2020 09:29
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> >> Can you look the original value up somehow and us the resize callback
> >> only as a notification that something changed? (that value would have to
> >> be stored somewhere and migrated I assume - maybe that's already being
> >> done)
> >
> > Ok. I will take a look at that. But can we instead pass the block->used_length
> to
> > fw_cfg_add_file_callback(). That way we don’t have to change the
> qemu_ram_resize()
> > as well. I think Igor has suggested this before[1] and I had a go at it before
> coming up
> > with the "req_length" proposal here.
> 
> You mean, passing the old size as well? I don't see how that will solve
> the issue, but yeah, nothing speaks against simply sending the old and
> the new size.

Nope. I actually meant using the block->used_length to store in the 
s->files->f[index].size. 

virt_acpi_setup()
  acpi_add_rom_blob()
    rom_add_blob()
      rom_set_mr()  --> used_length  = page aligned blob size
        fw_cfg_add_file_callback()  --> uses actual blob size.


Right now what we do is use the actual blob size to store in FWCfgEntry.
Instead pass the RAMBlock used_length to fw_cfg_add_file_callback().
Of course by this, the firmware will see an aligned size, but that is fine I think.
But at the same time this means the qemu_ram_resize() can stay as it is 
because it will invoke the callback when the size changes beyond the aligned
page size. And also during migration, there won't be any inconsistency as everyone
works on aligned page size.

Does that make sense? Or I am again missing something here?

Thanks,
Shameer

> --
> Thanks,
> 
> David / dhildenb
David Hildenbrand Feb. 10, 2020, 9:53 a.m. UTC | #13
On 10.02.20 10:50, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: David Hildenbrand [mailto:david@redhat.com]
>> Sent: 10 February 2020 09:29
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> Igor Mammedov <imammedo@redhat.com>
>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
>>
>>>> Can you look the original value up somehow and us the resize callback
>>>> only as a notification that something changed? (that value would have to
>>>> be stored somewhere and migrated I assume - maybe that's already being
>>>> done)
>>>
>>> Ok. I will take a look at that. But can we instead pass the block->used_length
>> to
>>> fw_cfg_add_file_callback(). That way we don’t have to change the
>> qemu_ram_resize()
>>> as well. I think Igor has suggested this before[1] and I had a go at it before
>> coming up
>>> with the "req_length" proposal here.
>>
>> You mean, passing the old size as well? I don't see how that will solve
>> the issue, but yeah, nothing speaks against simply sending the old and
>> the new size.
> 
> Nope. I actually meant using the block->used_length to store in the 
> s->files->f[index].size. 
> 
> virt_acpi_setup()
>   acpi_add_rom_blob()
>     rom_add_blob()
>       rom_set_mr()  --> used_length  = page aligned blob size
>         fw_cfg_add_file_callback()  --> uses actual blob size.
> 
> 
> Right now what we do is use the actual blob size to store in FWCfgEntry.
> Instead pass the RAMBlock used_length to fw_cfg_add_file_callback().
> Of course by this, the firmware will see an aligned size, but that is fine I think.
> But at the same time this means the qemu_ram_resize() can stay as it is 
> because it will invoke the callback when the size changes beyond the aligned
> page size. And also during migration, there won't be any inconsistency as everyone
> works on aligned page size.
> 
> Does that make sense? Or I am again missing something here?

Oh, you mean simply rounding up to full pages in the fw entries? If we
can drop the "sub-page" restriction, that would be awesome!

Need to double check if that could be an issue for fw/migration/whatever.
Shameerali Kolothum Thodi Feb. 12, 2020, 5:07 p.m. UTC | #14
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 10 February 2020 09:54
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> On 10.02.20 10:50, Shameerali Kolothum Thodi wrote:
> >
> >
> >> -----Original Message-----
> >> From: David Hildenbrand [mailto:david@redhat.com]
> >> Sent: 10 February 2020 09:29
> >> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> >> Igor Mammedov <imammedo@redhat.com>
> >> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> >> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> >> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> >> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
> >> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> >>
> >>>> Can you look the original value up somehow and us the resize callback
> >>>> only as a notification that something changed? (that value would have to
> >>>> be stored somewhere and migrated I assume - maybe that's already
> being
> >>>> done)
> >>>
> >>> Ok. I will take a look at that. But can we instead pass the
> block->used_length
> >> to
> >>> fw_cfg_add_file_callback(). That way we don’t have to change the
> >> qemu_ram_resize()
> >>> as well. I think Igor has suggested this before[1] and I had a go at it before
> >> coming up
> >>> with the "req_length" proposal here.
> >>
> >> You mean, passing the old size as well? I don't see how that will solve
> >> the issue, but yeah, nothing speaks against simply sending the old and
> >> the new size.
> >
> > Nope. I actually meant using the block->used_length to store in the
> > s->files->f[index].size.
> >
> > virt_acpi_setup()
> >   acpi_add_rom_blob()
> >     rom_add_blob()
> >       rom_set_mr()  --> used_length  = page aligned blob size
> >         fw_cfg_add_file_callback()  --> uses actual blob size.
> >
> >
> > Right now what we do is use the actual blob size to store in FWCfgEntry.
> > Instead pass the RAMBlock used_length to fw_cfg_add_file_callback().
> > Of course by this, the firmware will see an aligned size, but that is fine I think.
> > But at the same time this means the qemu_ram_resize() can stay as it is
> > because it will invoke the callback when the size changes beyond the aligned
> > page size. And also during migration, there won't be any inconsistency as
> everyone
> > works on aligned page size.
> >
> > Does that make sense? Or I am again missing something here?
> 
> Oh, you mean simply rounding up to full pages in the fw entries? If we
> can drop the "sub-page" restriction, that would be awesome!
> 
> Need to double check if that could be an issue for fw/migration/whatever.

Hmm..it breaks x86 + seabios boot. The issue is seabios expects RSDP in FSEG
memory. With the above proposed change, RSDP will be aligned to PAGE_SIZE and
seabios mem allocation for RSDP fails at,

https://github.com/coreboot/seabios/blob/master/src/fw/romfile_loader.c#L85

To get pass the above, I changed "alloc_fseg" flag to false in build_rsdp(), but
seabios seems to make the assumption that RSDP has to be placed in FSEG memory
here,
https://github.com/coreboot/seabios/blob/master/src/fw/biostables.c#L126

So doesn’t look like there is an easy fix for this without changing the seabios code.

Between, OVMF works fine with the aligned size on x86.

One thing we can do is treat the RSDP case separately or only use the aligned
page size for "etc/acpi/tables" as below,

diff --git a/hw/core/loader.c b/hw/core/loader.c
index d1b78f60cd..f07f6a7a35 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -60,6 +60,7 @@
 #include "hw/boards.h"
 #include "qemu/cutils.h"
 #include "sysemu/runstate.h"
+#include "hw/acpi/aml-build.h"
 
 #include <zlib.h>
 
@@ -1056,6 +1057,7 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
     if (fw_file_name && fw_cfg) {
         char devpath[100];
         void *data;
+        size_t size = rom->datasize;
 
         if (read_only) {
             snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name);
@@ -1066,13 +1068,21 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
         if (mc->rom_file_has_mr) {
             data = rom_set_mr(rom, OBJECT(fw_cfg), devpath, read_only);
             mr = rom->mr;
+           /*
+            * Use the RAMblk used_length for acpi tables as this avoids any
+            * inconsistency with table size changes during hot add and during
+            * migration.
+            */
+           if (strcmp(fw_file_name, ACPI_BUILD_TABLE_FILE) == 0) {
+                size = memory_region_get_used_length(mr);
+           }
         } else {
             data = rom->data;
         }
 
         fw_cfg_add_file_callback(fw_cfg, fw_file_name,
                                  fw_callback, NULL, callback_opaque,
-                                 data, rom->datasize, read_only);
+                                 data, size, read_only);
     }
     return mr;
 }


Thoughts?

Thanks,
Shameer
David Hildenbrand Feb. 12, 2020, 6:20 p.m. UTC | #15
On 12.02.20 18:07, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: David Hildenbrand [mailto:david@redhat.com]
>> Sent: 10 February 2020 09:54
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> Igor Mammedov <imammedo@redhat.com>
>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
>>
>> On 10.02.20 10:50, Shameerali Kolothum Thodi wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: David Hildenbrand [mailto:david@redhat.com]
>>>> Sent: 10 February 2020 09:29
>>>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>>>> Igor Mammedov <imammedo@redhat.com>
>>>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>>>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>>>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>>>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com
>>>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
>>>>
>>>>>> Can you look the original value up somehow and us the resize callback
>>>>>> only as a notification that something changed? (that value would have to
>>>>>> be stored somewhere and migrated I assume - maybe that's already
>> being
>>>>>> done)
>>>>>
>>>>> Ok. I will take a look at that. But can we instead pass the
>> block->used_length
>>>> to
>>>>> fw_cfg_add_file_callback(). That way we don’t have to change the
>>>> qemu_ram_resize()
>>>>> as well. I think Igor has suggested this before[1] and I had a go at it before
>>>> coming up
>>>>> with the "req_length" proposal here.
>>>>
>>>> You mean, passing the old size as well? I don't see how that will solve
>>>> the issue, but yeah, nothing speaks against simply sending the old and
>>>> the new size.
>>>
>>> Nope. I actually meant using the block->used_length to store in the
>>> s->files->f[index].size.
>>>
>>> virt_acpi_setup()
>>>   acpi_add_rom_blob()
>>>     rom_add_blob()
>>>       rom_set_mr()  --> used_length  = page aligned blob size
>>>         fw_cfg_add_file_callback()  --> uses actual blob size.
>>>
>>>
>>> Right now what we do is use the actual blob size to store in FWCfgEntry.
>>> Instead pass the RAMBlock used_length to fw_cfg_add_file_callback().
>>> Of course by this, the firmware will see an aligned size, but that is fine I think.
>>> But at the same time this means the qemu_ram_resize() can stay as it is
>>> because it will invoke the callback when the size changes beyond the aligned
>>> page size. And also during migration, there won't be any inconsistency as
>> everyone
>>> works on aligned page size.
>>>
>>> Does that make sense? Or I am again missing something here?
>>
>> Oh, you mean simply rounding up to full pages in the fw entries? If we
>> can drop the "sub-page" restriction, that would be awesome!
>>
>> Need to double check if that could be an issue for fw/migration/whatever.
> 
> Hmm..it breaks x86 + seabios boot. The issue is seabios expects RSDP in FSEG
> memory. With the above proposed change, RSDP will be aligned to PAGE_SIZE and
> seabios mem allocation for RSDP fails at,
> 
> https://github.com/coreboot/seabios/blob/master/src/fw/romfile_loader.c#L85
> 
> To get pass the above, I changed "alloc_fseg" flag to false in build_rsdp(), but
> seabios seems to make the assumption that RSDP has to be placed in FSEG memory
> here,
> https://github.com/coreboot/seabios/blob/master/src/fw/biostables.c#L126
> 
> So doesn’t look like there is an easy fix for this without changing the seabios code.
> 
> Between, OVMF works fine with the aligned size on x86.
> 
> One thing we can do is treat the RSDP case separately or only use the aligned
> page size for "etc/acpi/tables" as below,
> 
> diff --git a/hw/core/loader.c b/hw/core/loader.c
> index d1b78f60cd..f07f6a7a35 100644
> --- a/hw/core/loader.c
> +++ b/hw/core/loader.c
> @@ -60,6 +60,7 @@
>  #include "hw/boards.h"
>  #include "qemu/cutils.h"
>  #include "sysemu/runstate.h"
> +#include "hw/acpi/aml-build.h"
>  
>  #include <zlib.h>
>  
> @@ -1056,6 +1057,7 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
>      if (fw_file_name && fw_cfg) {
>          char devpath[100];
>          void *data;
> +        size_t size = rom->datasize;
>  
>          if (read_only) {
>              snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name);
> @@ -1066,13 +1068,21 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
>          if (mc->rom_file_has_mr) {
>              data = rom_set_mr(rom, OBJECT(fw_cfg), devpath, read_only);
>              mr = rom->mr;
> +           /*
> +            * Use the RAMblk used_length for acpi tables as this avoids any
> +            * inconsistency with table size changes during hot add and during
> +            * migration.
> +            */
> +           if (strcmp(fw_file_name, ACPI_BUILD_TABLE_FILE) == 0) {
> +                size = memory_region_get_used_length(mr);
> +           }
>          } else {
>              data = rom->data;
>          }
>  
>          fw_cfg_add_file_callback(fw_cfg, fw_file_name,
>                                   fw_callback, NULL, callback_opaque,
> -                                 data, rom->datasize, read_only);
> +                                 data, size, read_only);
>      }
>      return mr;
>  }
> 
> 
> Thoughts?

I don't think introducing memory_region_get_used_length() is a
good idea. I also dislike, that the ram block size can differ
to the memory region size. I wasn't aware of that condition, sorry!

Making the memory region always store an aligned size might break other use cases.

Summarizing the issue:
1. Memory regions contain ram blocks with a different size, if the size is
   not properly aligned. While memory regions can have an unaligned size,
   ram blocks can't. This is true when creating resizable memory region with
   an unaligned size.
2. When resizing a ram block/memory region, the size of the memory region
   is set to the aligned size. The callback is called with the aligned size.
   The unaligned piece is lost.
3. When migrating, we migrate the aligned size.


What about something like this: (untested)


From d84c21bc67e15acdac2f6265cd1576d8dd920211 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 12 Feb 2020 19:16:34 +0100
Subject: [PATCH v1] tmp

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 exec.c          | 14 ++++++++++++--
 migration/ram.c | 44 ++++++++++++++++++++++++++++++++------------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/exec.c b/exec.c
index 05cfe868ab..d41a1e11b5 100644
--- a/exec.c
+++ b/exec.c
@@ -2130,11 +2130,21 @@ static int memory_try_enable_merging(void *addr, size_t len)
  */
 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
 {
+    const ram_addr_t unaligned_size = newsize;
+
     assert(block);
 
     newsize = HOST_PAGE_ALIGN(newsize);
 
     if (block->used_length == newsize) {
+        /*
+         * We don't have to resize the ram block (which only knows aligned
+         * sizes), however, we have to notify if the unaligned size changed.
+         */
+        if (block->resized && unaligned_size != memory_region_size(block->mr)) {
+            block->resized(block->idstr, unaligned_size, block->host);
+            memory_region_set_size(block->mr, unaligned_size);
+        }
         return 0;
     }
 
@@ -2158,9 +2168,9 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
     block->used_length = newsize;
     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                         DIRTY_CLIENTS_ALL);
-    memory_region_set_size(block->mr, newsize);
+    memory_region_set_size(block->mr, unaligned_size);
     if (block->resized) {
-        block->resized(block->idstr, newsize, block->host);
+        block->resized(block->idstr, unaligned_size, block->host);
     }
     return 0;
 }
diff --git a/migration/ram.c b/migration/ram.c
index ed23ed1c7c..2acc4b85ca 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1761,28 +1761,43 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
     }
 }
 
-static uint64_t ram_bytes_total_common(bool count_ignored)
+static uint64_t ramblock_ram_bytes_migration(RAMBlock *block)
+{
+    /*
+     * When resizing on the target, we need the unaligned size,
+     * otherwise, we lose the unaligned size during migration.
+     */
+    if (block->mr && (block->flags & RAM_RESIZEABLE)) {
+        return memory_region_size(block->mr);
+    } else {
+        return block->used_length;
+    }
+}
+
+static uint64_t ram_bytes_total_migration(void)
 {
     RAMBlock *block;
     uint64_t total = 0;
 
     RCU_READ_LOCK_GUARD();
 
-    if (count_ignored) {
-        RAMBLOCK_FOREACH_MIGRATABLE(block) {
-            total += block->used_length;
-        }
-    } else {
-        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-            total += block->used_length;
-        }
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        total += ramblock_ram_bytes_migration(block);
     }
     return total;
 }
 
 uint64_t ram_bytes_total(void)
 {
-    return ram_bytes_total_common(false);
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    RCU_READ_LOCK_GUARD();
+
+    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+        total += block->used_length;
+    }
+    return total;
 }
 
 static void xbzrle_load_setup(void)
@@ -2432,12 +2447,17 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     (*rsp)->f = f;
 
     WITH_RCU_READ_LOCK_GUARD() {
-        qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
+        qemu_put_be64(f, ram_bytes_total_migration() | RAM_SAVE_FLAG_MEM_SIZE);
 
         RAMBLOCK_FOREACH_MIGRATABLE(block) {
             qemu_put_byte(f, strlen(block->idstr));
             qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-            qemu_put_be64(f, block->used_length);
+            /*
+             * When resizing on the target, we need the unaligned size,
+             * otherwise we lose the unaligned sise during migration.
+             */
+            qemu_put_be64(f, ramblock_ram_bytes_migration(block));
+
             if (migrate_postcopy_ram() && block->page_size !=
                                           qemu_host_page_size) {
                 qemu_put_be64(f, block->page_size);
Shameerali Kolothum Thodi Feb. 13, 2020, 4:38 p.m. UTC | #16
> -----Original Message-----
> From: David Hildenbrand [mailto:david@redhat.com]
> Sent: 12 February 2020 18:21
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> Igor Mammedov <imammedo@redhat.com>
> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com;
> dgilbert@redhat.com; Juan Jose Quintela Carreira <quintela@redhat.com>
> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback

[...]

> > Hmm..it breaks x86 + seabios boot. The issue is seabios expects RSDP in FSEG
> > memory. With the above proposed change, RSDP will be aligned to PAGE_SIZE
> and
> > seabios mem allocation for RSDP fails at,
> >
> >
> https://github.com/coreboot/seabios/blob/master/src/fw/romfile_loader.c#L8
> 5
> >
> > To get pass the above, I changed "alloc_fseg" flag to false in build_rsdp(), but
> > seabios seems to make the assumption that RSDP has to be placed in FSEG
> memory
> > here,
> > https://github.com/coreboot/seabios/blob/master/src/fw/biostables.c#L126
> >
> > So doesn’t look like there is an easy fix for this without changing the seabios
> code.
> >
> > Between, OVMF works fine with the aligned size on x86.
> >
> > One thing we can do is treat the RSDP case separately or only use the aligned
> > page size for "etc/acpi/tables" as below,

[...]

> >
> > Thoughts?
> 
> I don't think introducing memory_region_get_used_length() is a
> good idea. I also dislike, that the ram block size can differ
> to the memory region size. I wasn't aware of that condition, sorry!

Right. They can differ in size and is the case here.

> Making the memory region always store an aligned size might break other use
> cases.
> 
> Summarizing the issue:
> 1. Memory regions contain ram blocks with a different size, if the size is
>    not properly aligned. While memory regions can have an unaligned size,
>    ram blocks can't. This is true when creating resizable memory region with
>    an unaligned size.
> 2. When resizing a ram block/memory region, the size of the memory region
>    is set to the aligned size. The callback is called with the aligned size.
>    The unaligned piece is lost.
> 3. When migrating, we migrate the aligned size.
> 
>
> What about something like this: (untested)

Thanks for that. I had a go with the below patch and it indeed fixes the issue
of callback not being called on resize. But the migration fails with the below
error,

For x86
---------
qemu-system-x86_64: Unknown combination of migration flags: 0x14
qemu-system-x86_64: error while loading state for instance 0x0 of device 'ram'
qemu-system-x86_64: load of migration failed: Invalid argument 

For arm64
--------------
qemu-system-aarch64: Received an unexpected compressed page
qemu-system-aarch64: error while loading state for instance 0x0 of device 'ram'
qemu-system-aarch64: load of migration failed: Invalid argument
 
I haven’t debugged this further but looks like there is a corruption happening.
Please let me know if you have any clue.

Thanks,
Shameer

> 
> From d84c21bc67e15acdac2f6265cd1576d8dd920211 Mon Sep 17 00:00:00
> 2001
> From: David Hildenbrand <david@redhat.com>
> Date: Wed, 12 Feb 2020 19:16:34 +0100
> Subject: [PATCH v1] tmp
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  exec.c          | 14 ++++++++++++--
>  migration/ram.c | 44 ++++++++++++++++++++++++++++++++------------
>  2 files changed, 44 insertions(+), 14 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 05cfe868ab..d41a1e11b5 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2130,11 +2130,21 @@ static int memory_try_enable_merging(void
> *addr, size_t len)
>   */
>  int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
>  {
> +    const ram_addr_t unaligned_size = newsize;
> +
>      assert(block);
> 
>      newsize = HOST_PAGE_ALIGN(newsize);
> 
>      if (block->used_length == newsize) {
> +        /*
> +         * We don't have to resize the ram block (which only knows aligned
> +         * sizes), however, we have to notify if the unaligned size changed.
> +         */
> +        if (block->resized && unaligned_size !=
> memory_region_size(block->mr)) {
> +            block->resized(block->idstr, unaligned_size, block->host);
> +            memory_region_set_size(block->mr, unaligned_size);
> +        }
>          return 0;
>      }
> 
> @@ -2158,9 +2168,9 @@ int qemu_ram_resize(RAMBlock *block,
> ram_addr_t newsize, Error **errp)
>      block->used_length = newsize;
>      cpu_physical_memory_set_dirty_range(block->offset,
> block->used_length,
>                                          DIRTY_CLIENTS_ALL);
> -    memory_region_set_size(block->mr, newsize);
> +    memory_region_set_size(block->mr, unaligned_size);
>      if (block->resized) {
> -        block->resized(block->idstr, newsize, block->host);
> +        block->resized(block->idstr, unaligned_size, block->host);
>      }
>      return 0;
>  }
> diff --git a/migration/ram.c b/migration/ram.c
> index ed23ed1c7c..2acc4b85ca 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1761,28 +1761,43 @@ void acct_update_position(QEMUFile *f, size_t
> size, bool zero)
>      }
>  }
> 
> -static uint64_t ram_bytes_total_common(bool count_ignored)
> +static uint64_t ramblock_ram_bytes_migration(RAMBlock *block)
> +{
> +    /*
> +     * When resizing on the target, we need the unaligned size,
> +     * otherwise, we lose the unaligned size during migration.
> +     */
> +    if (block->mr && (block->flags & RAM_RESIZEABLE)) {
> +        return memory_region_size(block->mr);
> +    } else {
> +        return block->used_length;
> +    }
> +}
> +
> +static uint64_t ram_bytes_total_migration(void)
>  {
>      RAMBlock *block;
>      uint64_t total = 0;
> 
>      RCU_READ_LOCK_GUARD();
> 
> -    if (count_ignored) {
> -        RAMBLOCK_FOREACH_MIGRATABLE(block) {
> -            total += block->used_length;
> -        }
> -    } else {
> -        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> -            total += block->used_length;
> -        }
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> +        total += ramblock_ram_bytes_migration(block);
>      }
>      return total;
>  }
> 
>  uint64_t ram_bytes_total(void)
>  {
> -    return ram_bytes_total_common(false);
> +    RAMBlock *block;
> +    uint64_t total = 0;
> +
> +    RCU_READ_LOCK_GUARD();
> +
> +    RAMBLOCK_FOREACH_NOT_IGNORED(block) {
> +        total += block->used_length;
> +    }
> +    return total;
>  }
> 
>  static void xbzrle_load_setup(void)
> @@ -2432,12 +2447,17 @@ static int ram_save_setup(QEMUFile *f, void
> *opaque)
>      (*rsp)->f = f;
> 
>      WITH_RCU_READ_LOCK_GUARD() {
> -        qemu_put_be64(f, ram_bytes_total_common(true) |
> RAM_SAVE_FLAG_MEM_SIZE);
> +        qemu_put_be64(f, ram_bytes_total_migration() |
> RAM_SAVE_FLAG_MEM_SIZE);
> 
>          RAMBLOCK_FOREACH_MIGRATABLE(block) {
>              qemu_put_byte(f, strlen(block->idstr));
>              qemu_put_buffer(f, (uint8_t *)block->idstr,
> strlen(block->idstr));
> -            qemu_put_be64(f, block->used_length);
> +            /*
> +             * When resizing on the target, we need the unaligned size,
> +             * otherwise we lose the unaligned sise during migration.
> +             */
> +            qemu_put_be64(f, ramblock_ram_bytes_migration(block));
> +
>              if (migrate_postcopy_ram() && block->page_size !=
>                                            qemu_host_page_size) {
>                  qemu_put_be64(f, block->page_size);
> --
> 2.24.1
> 
> 
> --
> Thanks,
> 
> David / dhildenb
David Hildenbrand Feb. 13, 2020, 4:59 p.m. UTC | #17
On 13.02.20 17:38, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: David Hildenbrand [mailto:david@redhat.com]
>> Sent: 12 February 2020 18:21
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> Igor Mammedov <imammedo@redhat.com>
>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com;
>> dgilbert@redhat.com; Juan Jose Quintela Carreira <quintela@redhat.com>
>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
> 
> [...]
> 
>>> Hmm..it breaks x86 + seabios boot. The issue is seabios expects RSDP in FSEG
>>> memory. With the above proposed change, RSDP will be aligned to PAGE_SIZE
>> and
>>> seabios mem allocation for RSDP fails at,
>>>
>>>
>> https://github.com/coreboot/seabios/blob/master/src/fw/romfile_loader.c#L8
>> 5
>>>
>>> To get pass the above, I changed "alloc_fseg" flag to false in build_rsdp(), but
>>> seabios seems to make the assumption that RSDP has to be placed in FSEG
>> memory
>>> here,
>>> https://github.com/coreboot/seabios/blob/master/src/fw/biostables.c#L126
>>>
>>> So doesn’t look like there is an easy fix for this without changing the seabios
>> code.
>>>
>>> Between, OVMF works fine with the aligned size on x86.
>>>
>>> One thing we can do is treat the RSDP case separately or only use the aligned
>>> page size for "etc/acpi/tables" as below,
> 
> [...]
> 
>>>
>>> Thoughts?
>>
>> I don't think introducing memory_region_get_used_length() is a
>> good idea. I also dislike, that the ram block size can differ
>> to the memory region size. I wasn't aware of that condition, sorry!
> 
> Right. They can differ in size and is the case here.
> 
>> Making the memory region always store an aligned size might break other use
>> cases.
>>
>> Summarizing the issue:
>> 1. Memory regions contain ram blocks with a different size, if the size is
>>    not properly aligned. While memory regions can have an unaligned size,
>>    ram blocks can't. This is true when creating resizable memory region with
>>    an unaligned size.
>> 2. When resizing a ram block/memory region, the size of the memory region
>>    is set to the aligned size. The callback is called with the aligned size.
>>    The unaligned piece is lost.
>> 3. When migrating, we migrate the aligned size.
>>
>>
>> What about something like this: (untested)
> 
> Thanks for that. I had a go with the below patch and it indeed fixes the issue
> of callback not being called on resize. But the migration fails with the below
> error,
> 
> For x86
> ---------
> qemu-system-x86_64: Unknown combination of migration flags: 0x14
> qemu-system-x86_64: error while loading state for instance 0x0 of device 'ram'
> qemu-system-x86_64: load of migration failed: Invalid argument 
> 
> For arm64
> --------------
> qemu-system-aarch64: Received an unexpected compressed page
> qemu-system-aarch64: error while loading state for instance 0x0 of device 'ram'
> qemu-system-aarch64: load of migration failed: Invalid argument
>  
> I haven’t debugged this further but looks like there is a corruption happening.
> Please let me know if you have any clue.

The issue is

qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE)

The total ram size we store must be page aligned, otherwise it will be
detected as flags. Hm ... maybe we can round it up ...
David Hildenbrand Feb. 13, 2020, 5:09 p.m. UTC | #18
On 13.02.20 17:59, David Hildenbrand wrote:
> On 13.02.20 17:38, Shameerali Kolothum Thodi wrote:
>>
>>
>>> -----Original Message-----
>>> From: David Hildenbrand [mailto:david@redhat.com]
>>> Sent: 12 February 2020 18:21
>>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>>> Igor Mammedov <imammedo@redhat.com>
>>> Cc: peter.maydell@linaro.org; xiaoguangrong.eric@gmail.com;
>>> mst@redhat.com; shannon.zhaosl@gmail.com; qemu-devel@nongnu.org;
>>> xuwei (O) <xuwei5@huawei.com>; Linuxarm <linuxarm@huawei.com>;
>>> eric.auger@redhat.com; qemu-arm@nongnu.org; lersek@redhat.com;
>>> dgilbert@redhat.com; Juan Jose Quintela Carreira <quintela@redhat.com>
>>> Subject: Re: [PATCH v2 1/7] exec: Fix for qemu_ram_resize() callback
>>
>> [...]
>>
>>>> Hmm..it breaks x86 + seabios boot. The issue is seabios expects RSDP in FSEG
>>>> memory. With the above proposed change, RSDP will be aligned to PAGE_SIZE
>>> and
>>>> seabios mem allocation for RSDP fails at,
>>>>
>>>>
>>> https://github.com/coreboot/seabios/blob/master/src/fw/romfile_loader.c#L8
>>> 5
>>>>
>>>> To get pass the above, I changed "alloc_fseg" flag to false in build_rsdp(), but
>>>> seabios seems to make the assumption that RSDP has to be placed in FSEG
>>> memory
>>>> here,
>>>> https://github.com/coreboot/seabios/blob/master/src/fw/biostables.c#L126
>>>>
>>>> So doesn’t look like there is an easy fix for this without changing the seabios
>>> code.
>>>>
>>>> Between, OVMF works fine with the aligned size on x86.
>>>>
>>>> One thing we can do is treat the RSDP case separately or only use the aligned
>>>> page size for "etc/acpi/tables" as below,
>>
>> [...]
>>
>>>>
>>>> Thoughts?
>>>
>>> I don't think introducing memory_region_get_used_length() is a
>>> good idea. I also dislike, that the ram block size can differ
>>> to the memory region size. I wasn't aware of that condition, sorry!
>>
>> Right. They can differ in size and is the case here.
>>
>>> Making the memory region always store an aligned size might break other use
>>> cases.
>>>
>>> Summarizing the issue:
>>> 1. Memory regions contain ram blocks with a different size, if the size is
>>>    not properly aligned. While memory regions can have an unaligned size,
>>>    ram blocks can't. This is true when creating resizable memory region with
>>>    an unaligned size.
>>> 2. When resizing a ram block/memory region, the size of the memory region
>>>    is set to the aligned size. The callback is called with the aligned size.
>>>    The unaligned piece is lost.
>>> 3. When migrating, we migrate the aligned size.
>>>
>>>
>>> What about something like this: (untested)
>>
>> Thanks for that. I had a go with the below patch and it indeed fixes the issue
>> of callback not being called on resize. But the migration fails with the below
>> error,
>>
>> For x86
>> ---------
>> qemu-system-x86_64: Unknown combination of migration flags: 0x14
>> qemu-system-x86_64: error while loading state for instance 0x0 of device 'ram'
>> qemu-system-x86_64: load of migration failed: Invalid argument 
>>
>> For arm64
>> --------------
>> qemu-system-aarch64: Received an unexpected compressed page
>> qemu-system-aarch64: error while loading state for instance 0x0 of device 'ram'
>> qemu-system-aarch64: load of migration failed: Invalid argument
>>  
>> I haven’t debugged this further but looks like there is a corruption happening.
>> Please let me know if you have any clue.
> 
> The issue is
> 
> qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE)
> 
> The total ram size we store must be page aligned, otherwise it will be
> detected as flags. Hm ... maybe we can round it up ...
> 

I'm afraid we can't otherwise we will run into issues in
ram_load_precopy(). Hm ...

Patch
diff mbox series

diff --git a/exec.c b/exec.c
index d4b769d0d4..9ce33992f8 100644
--- a/exec.c
+++ b/exec.c
@@ -2123,16 +2123,18 @@  static int memory_try_enable_merging(void *addr, size_t len)
  * resize callback to update device state and/or add assertions to detect
  * misuse, if necessary.
  */
-int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
+int qemu_ram_resize(RAMBlock *block, ram_addr_t size, Error **errp)
 {
-    assert(block);
+    ram_addr_t newsize;
 
-    newsize = HOST_PAGE_ALIGN(newsize);
+    assert(block);
 
-    if (block->used_length == newsize) {
+    if (block->req_length == size) {
         return 0;
     }
 
+    newsize = HOST_PAGE_ALIGN(size);
+
     if (!(block->flags & RAM_RESIZEABLE)) {
         error_setg_errno(errp, EINVAL,
                          "Length mismatch: %s: 0x" RAM_ADDR_FMT
@@ -2149,13 +2151,19 @@  int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
         return -EINVAL;
     }
 
-    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
-    block->used_length = newsize;
-    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
-                                        DIRTY_CLIENTS_ALL);
-    memory_region_set_size(block->mr, newsize);
+    block->req_length = size;
+
+    if (newsize != block->used_length) {
+        cpu_physical_memory_clear_dirty_range(block->offset,
+                                              block->used_length);
+        block->used_length = newsize;
+        cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
+                                            DIRTY_CLIENTS_ALL);
+        memory_region_set_size(block->mr, newsize);
+    }
+
     if (block->resized) {
-        block->resized(block->idstr, newsize, block->host);
+        block->resized(block->idstr, block->req_length, block->host);
     }
     return 0;
 }
@@ -2412,16 +2420,18 @@  RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   MemoryRegion *mr, Error **errp)
 {
     RAMBlock *new_block;
+    ram_addr_t newsize;
     Error *local_err = NULL;
 
-    size = HOST_PAGE_ALIGN(size);
+    newsize = HOST_PAGE_ALIGN(size);
     max_size = HOST_PAGE_ALIGN(max_size);
     new_block = g_malloc0(sizeof(*new_block));
     new_block->mr = mr;
     new_block->resized = resized;
-    new_block->used_length = size;
+    new_block->req_length = size;
+    new_block->used_length = newsize;
     new_block->max_length = max_size;
-    assert(max_size >= size);
+    assert(max_size >= newsize);
     new_block->fd = -1;
     new_block->page_size = qemu_real_host_page_size;
     new_block->host = host;
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 5adebb0bc7..fd13082224 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -31,8 +31,9 @@  struct RAMBlock {
     uint8_t *host;
     uint8_t *colo_cache; /* For colo, VM's ram cache */
     ram_addr_t offset;
-    ram_addr_t used_length;
-    ram_addr_t max_length;
+    ram_addr_t req_length; /* Original requested size, used if RAM_RESIZEABLE */
+    ram_addr_t used_length; /* aligned to qemu_host_page_size */
+    ram_addr_t max_length; /*  aligned to qemu_host_page_size */
     void (*resized)(const char*, uint64_t length, void *host);
     uint32_t flags;
     /* Protected by iothread lock.  */