acpi: SRAT: do not create reserved gap entries

Message ID 1533910017-55158-1-git-send-email-imammedo@redhat.com
State New
Headers show
Series
  • acpi: SRAT: do not create reserved gap entries
Related show

Commit Message

Igor Mammedov Aug. 10, 2018, 2:06 p.m.
Commit 848a1cc1e8b04 while introducing SRAT entries for DIMM and NVDIMM
also introduced fake entries for gaps between memory devices, assuming
that we need all possible range covered with SRAT entries.
And it did it wrong since gap would overlap with preceeding entry.
Reproduced with following CLI:

 -m 1G,slots=4,maxmem=8 \
 -object memory-backend-ram,size=1G,id=m0 \
 -device pc-dimm,memdev=m0,addr=0x101000000 \
 -object memory-backend-ram,size=1G,id=m1 \
 -device pc-dimm,memdev=m1

However recent development (10efd7e108) showed that gap entries might
be not need. And indeed testing with WS2008DC-WS2016DC guests range
shows that memory hotplug works just fine without gap entries.

So rather than fixing gap entry borders, just drop them altogether
and simplify code around it.

Spotted-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
There is no need to update reference blobs since gaps beween dimms
aren't generated by any exsting test case.

Considering issue is not visible by default lets just merge it into 3.1
and stable 3.0.1

---
 hw/i386/acpi-build.c | 62 +++++++++++++++++++---------------------------------
 1 file changed, 22 insertions(+), 40 deletions(-)

Comments

Igor Mammedov Aug. 10, 2018, 4:01 p.m. | #1
On Fri, 10 Aug 2018 16:06:57 +0200
Igor Mammedov <imammedo@redhat.com> wrote:

> Commit 848a1cc1e8b04 while introducing SRAT entries for DIMM and NVDIMM
> also introduced fake entries for gaps between memory devices, assuming
> that we need all possible range covered with SRAT entries.
> And it did it wrong since gap would overlap with preceeding entry.
> Reproduced with following CLI:
> 
>  -m 1G,slots=4,maxmem=8 \
>  -object memory-backend-ram,size=1G,id=m0 \
>  -device pc-dimm,memdev=m0,addr=0x101000000 \
>  -object memory-backend-ram,size=1G,id=m1 \
>  -device pc-dimm,memdev=m1
> 
> However recent development (10efd7e108) showed that gap entries might
> be not need. And indeed testing with WS2008DC-WS2016DC guests range
> shows that memory hotplug works just fine without gap entries.
> 
> So rather than fixing gap entry borders, just drop them altogether
> and simplify code around it.
> 
> Spotted-by: Laszlo Ersek <lersek@redhat.com>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> There is no need to update reference blobs since gaps beween dimms
> aren't generated by any exsting test case.
> 
> Considering issue is not visible by default lets just merge it into 3.1
> and stable 3.0.1
Pls ignore it for now I'll need to do more extensive testing with old kernels,
we might need these holes for old kernels or even new ones.
/me goes to read kernel code

/per spec possible to hotplug range could be in SRAT,
 even though I don't like it bu we might end up with static
 partitioning of hotplug area between nodes like on bare metal
 to avoid chasing after unknown requirements from windows /


> ---
>  hw/i386/acpi-build.c | 62 +++++++++++++++++++---------------------------------
>  1 file changed, 22 insertions(+), 40 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index e1ee8ae..d3d690f 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2254,48 +2254,16 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
>  static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
>                                             uint64_t len, int default_node)
>  {
> -    MemoryDeviceInfoList *info_list = qmp_memory_device_list();
>      MemoryDeviceInfoList *info;
> -    MemoryDeviceInfo *mi;
> -    PCDIMMDeviceInfo *di;
> -    uint64_t end = base + len, cur, size;
> -    bool is_nvdimm;
>      AcpiSratMemoryAffinity *numamem;
> -    MemoryAffinityFlags flags;
> +    MemoryDeviceInfoList *info_list = qmp_memory_device_list();;
>  
> -    for (cur = base, info = info_list;
> -         cur < end;
> -         cur += size, info = info->next) {
> -        numamem = acpi_data_push(table_data, sizeof *numamem);
> -
> -        if (!info) {
> -            /*
> -             * Entry is required for Windows to enable memory hotplug in OS
> -             * and for Linux to enable SWIOTLB when booted with less than
> -             * 4G of RAM. Windows works better if the entry sets proximity
> -             * to the highest NUMA node in the machine at the end of the
> -             * reserved space.
> -             * Memory devices may override proximity set by this entry,
> -             * providing _PXM method if necessary.
> -             */
> -            build_srat_memory(numamem, end - 1, 1, default_node,
> -                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
> -            break;
> -        }
> -
> -        mi = info->value;
> -        is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> -        di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> -
> -        if (cur < di->addr) {
> -            build_srat_memory(numamem, cur, di->addr - cur, default_node,
> -                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
> -            numamem = acpi_data_push(table_data, sizeof *numamem);
> -        }
> -
> -        size = di->size;
> +    for (info = info_list; info != NULL; info = info->next) {
> +        MemoryAffinityFlags flags = MEM_AFFINITY_ENABLED;
> +        MemoryDeviceInfo *mi = info->value;
> +        bool is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> +        PCDIMMDeviceInfo *di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
>  
> -        flags = MEM_AFFINITY_ENABLED;
>          if (di->hotpluggable) {
>              flags |= MEM_AFFINITY_HOTPLUGGABLE;
>          }
> @@ -2303,10 +2271,24 @@ static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
>              flags |= MEM_AFFINITY_NON_VOLATILE;
>          }
>  
> -        build_srat_memory(numamem, di->addr, size, di->node, flags);
> +        numamem = acpi_data_push(table_data, sizeof *numamem);
> +        build_srat_memory(numamem, di->addr, di->size, di->node, flags);
>      }
> -
>      qapi_free_MemoryDeviceInfoList(info_list);
> +
> +    /*
> +     * Entry is required for Windows to enable memory hotplug in OS
> +     * and for Linux to enable SWIOTLB when booted with less than
> +     * 4G of RAM. Windows works better if the entry sets proximity
> +     * to the highest NUMA node in the machine at the end of the
> +     * reserved space.
> +     * Memory devices may override proximity set by this entry,
> +     * providing _PXM method if necessary.
> +     */
> +    numamem = acpi_data_push(table_data, sizeof *numamem);
> +    build_srat_memory(numamem, base + len - 1, 1, default_node,
> +                      MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
> +
>  }
>  
>  static void
Eduardo Habkost Aug. 10, 2018, 6:28 p.m. | #2
On Fri, Aug 10, 2018 at 06:01:06PM +0200, Igor Mammedov wrote:
> On Fri, 10 Aug 2018 16:06:57 +0200
> Igor Mammedov <imammedo@redhat.com> wrote:
> 
> > Commit 848a1cc1e8b04 while introducing SRAT entries for DIMM and NVDIMM
> > also introduced fake entries for gaps between memory devices, assuming
> > that we need all possible range covered with SRAT entries.
> > And it did it wrong since gap would overlap with preceeding entry.
> > Reproduced with following CLI:
> > 
> >  -m 1G,slots=4,maxmem=8 \
> >  -object memory-backend-ram,size=1G,id=m0 \
> >  -device pc-dimm,memdev=m0,addr=0x101000000 \
> >  -object memory-backend-ram,size=1G,id=m1 \
> >  -device pc-dimm,memdev=m1
> > 
> > However recent development (10efd7e108) showed that gap entries might
> > be not need. And indeed testing with WS2008DC-WS2016DC guests range
> > shows that memory hotplug works just fine without gap entries.
> > 
> > So rather than fixing gap entry borders, just drop them altogether
> > and simplify code around it.
> > 
> > Spotted-by: Laszlo Ersek <lersek@redhat.com>
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > There is no need to update reference blobs since gaps beween dimms
> > aren't generated by any exsting test case.
> > 
> > Considering issue is not visible by default lets just merge it into 3.1
> > and stable 3.0.1
> Pls ignore it for now I'll need to do more extensive testing with old kernels,
> we might need these holes for old kernels or even new ones.
> /me goes to read kernel code
> 
> /per spec possible to hotplug range could be in SRAT,
>  even though I don't like it bu we might end up with static
>  partitioning of hotplug area between nodes like on bare metal
>  to avoid chasing after unknown requirements from windows /

Does that mean we might want to pair DIMM slots with NUMA nodes
in advance?  Do you have a suggestion on how the command-line
would look like, in this case?

Maybe "-numa mem-slot,slot=X,node=Y"?
Igor Mammedov Aug. 13, 2018, 7:07 a.m. | #3
On Fri, 10 Aug 2018 15:28:35 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Fri, Aug 10, 2018 at 06:01:06PM +0200, Igor Mammedov wrote:
> > On Fri, 10 Aug 2018 16:06:57 +0200
> > Igor Mammedov <imammedo@redhat.com> wrote:
> >   
> > > Commit 848a1cc1e8b04 while introducing SRAT entries for DIMM and NVDIMM
> > > also introduced fake entries for gaps between memory devices, assuming
> > > that we need all possible range covered with SRAT entries.
> > > And it did it wrong since gap would overlap with preceeding entry.
> > > Reproduced with following CLI:
> > > 
> > >  -m 1G,slots=4,maxmem=8 \
> > >  -object memory-backend-ram,size=1G,id=m0 \
> > >  -device pc-dimm,memdev=m0,addr=0x101000000 \
> > >  -object memory-backend-ram,size=1G,id=m1 \
> > >  -device pc-dimm,memdev=m1
> > > 
> > > However recent development (10efd7e108) showed that gap entries might
> > > be not need. And indeed testing with WS2008DC-WS2016DC guests range
> > > shows that memory hotplug works just fine without gap entries.
> > > 
> > > So rather than fixing gap entry borders, just drop them altogether
> > > and simplify code around it.
> > > 
> > > Spotted-by: Laszlo Ersek <lersek@redhat.com>
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > There is no need to update reference blobs since gaps beween dimms
> > > aren't generated by any exsting test case.
> > > 
> > > Considering issue is not visible by default lets just merge it into 3.1
> > > and stable 3.0.1  
> > Pls ignore it for now I'll need to do more extensive testing with old kernels,
> > we might need these holes for old kernels or even new ones.
> > /me goes to read kernel code
> > 
> > /per spec possible to hotplug range could be in SRAT,
> >  even though I don't like it bu we might end up with static
> >  partitioning of hotplug area between nodes like on bare metal
> >  to avoid chasing after unknown requirements from windows /  
> 
> Does that mean we might want to pair DIMM slots with NUMA nodes
> in advance?  Do you have a suggestion on how the command-line
> would look like, in this case?
> 
> Maybe "-numa mem-slot,slot=X,node=Y"?
it's either, to make it per slot (it implies fixed maximum size per slot)
or a bit more flexible maxmem per node, might be something like:
  -numa memory,node=X,maxmem=Y
Eduardo Habkost Aug. 13, 2018, 12:06 p.m. | #4
On Mon, Aug 13, 2018 at 09:07:13AM +0200, Igor Mammedov wrote:
> On Fri, 10 Aug 2018 15:28:35 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
> > On Fri, Aug 10, 2018 at 06:01:06PM +0200, Igor Mammedov wrote:
> > > On Fri, 10 Aug 2018 16:06:57 +0200
> > > Igor Mammedov <imammedo@redhat.com> wrote:
> > >   
> > > > Commit 848a1cc1e8b04 while introducing SRAT entries for DIMM and NVDIMM
> > > > also introduced fake entries for gaps between memory devices, assuming
> > > > that we need all possible range covered with SRAT entries.
> > > > And it did it wrong since gap would overlap with preceeding entry.
> > > > Reproduced with following CLI:
> > > > 
> > > >  -m 1G,slots=4,maxmem=8 \
> > > >  -object memory-backend-ram,size=1G,id=m0 \
> > > >  -device pc-dimm,memdev=m0,addr=0x101000000 \
> > > >  -object memory-backend-ram,size=1G,id=m1 \
> > > >  -device pc-dimm,memdev=m1
> > > > 
> > > > However recent development (10efd7e108) showed that gap entries might
> > > > be not need. And indeed testing with WS2008DC-WS2016DC guests range
> > > > shows that memory hotplug works just fine without gap entries.
> > > > 
> > > > So rather than fixing gap entry borders, just drop them altogether
> > > > and simplify code around it.
> > > > 
> > > > Spotted-by: Laszlo Ersek <lersek@redhat.com>
> > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > ---
> > > > There is no need to update reference blobs since gaps beween dimms
> > > > aren't generated by any exsting test case.
> > > > 
> > > > Considering issue is not visible by default lets just merge it into 3.1
> > > > and stable 3.0.1  
> > > Pls ignore it for now I'll need to do more extensive testing with old kernels,
> > > we might need these holes for old kernels or even new ones.
> > > /me goes to read kernel code
> > > 
> > > /per spec possible to hotplug range could be in SRAT,
> > >  even though I don't like it bu we might end up with static
> > >  partitioning of hotplug area between nodes like on bare metal
> > >  to avoid chasing after unknown requirements from windows /  
> > 
> > Does that mean we might want to pair DIMM slots with NUMA nodes
> > in advance?  Do you have a suggestion on how the command-line
> > would look like, in this case?
> > 
> > Maybe "-numa mem-slot,slot=X,node=Y"?
> it's either, to make it per slot (it implies fixed maximum size per slot)
> or a bit more flexible maxmem per node, might be something like:
>   -numa memory,node=X,maxmem=Y

That could work, but maybe it's too much flexibility?  It would
be tricky to define what is supposed happen when a slot cross
multiple NUMA nodes.

Patch

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index e1ee8ae..d3d690f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2254,48 +2254,16 @@  build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
 static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
                                            uint64_t len, int default_node)
 {
-    MemoryDeviceInfoList *info_list = qmp_memory_device_list();
     MemoryDeviceInfoList *info;
-    MemoryDeviceInfo *mi;
-    PCDIMMDeviceInfo *di;
-    uint64_t end = base + len, cur, size;
-    bool is_nvdimm;
     AcpiSratMemoryAffinity *numamem;
-    MemoryAffinityFlags flags;
+    MemoryDeviceInfoList *info_list = qmp_memory_device_list();;
 
-    for (cur = base, info = info_list;
-         cur < end;
-         cur += size, info = info->next) {
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-
-        if (!info) {
-            /*
-             * Entry is required for Windows to enable memory hotplug in OS
-             * and for Linux to enable SWIOTLB when booted with less than
-             * 4G of RAM. Windows works better if the entry sets proximity
-             * to the highest NUMA node in the machine at the end of the
-             * reserved space.
-             * Memory devices may override proximity set by this entry,
-             * providing _PXM method if necessary.
-             */
-            build_srat_memory(numamem, end - 1, 1, default_node,
-                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
-            break;
-        }
-
-        mi = info->value;
-        is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
-        di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
-
-        if (cur < di->addr) {
-            build_srat_memory(numamem, cur, di->addr - cur, default_node,
-                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
-            numamem = acpi_data_push(table_data, sizeof *numamem);
-        }
-
-        size = di->size;
+    for (info = info_list; info != NULL; info = info->next) {
+        MemoryAffinityFlags flags = MEM_AFFINITY_ENABLED;
+        MemoryDeviceInfo *mi = info->value;
+        bool is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
+        PCDIMMDeviceInfo *di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
 
-        flags = MEM_AFFINITY_ENABLED;
         if (di->hotpluggable) {
             flags |= MEM_AFFINITY_HOTPLUGGABLE;
         }
@@ -2303,10 +2271,24 @@  static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
             flags |= MEM_AFFINITY_NON_VOLATILE;
         }
 
-        build_srat_memory(numamem, di->addr, size, di->node, flags);
+        numamem = acpi_data_push(table_data, sizeof *numamem);
+        build_srat_memory(numamem, di->addr, di->size, di->node, flags);
     }
-
     qapi_free_MemoryDeviceInfoList(info_list);
+
+    /*
+     * Entry is required for Windows to enable memory hotplug in OS
+     * and for Linux to enable SWIOTLB when booted with less than
+     * 4G of RAM. Windows works better if the entry sets proximity
+     * to the highest NUMA node in the machine at the end of the
+     * reserved space.
+     * Memory devices may override proximity set by this entry,
+     * providing _PXM method if necessary.
+     */
+    numamem = acpi_data_push(table_data, sizeof *numamem);
+    build_srat_memory(numamem, base + len - 1, 1, default_node,
+                      MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+
 }
 
 static void