diff mbox

[RFC,v1,2/4] numa, pc-dimm: Store pc-dimm memory information in numa_info

Message ID 1434099628-18102-3-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao June 12, 2015, 9 a.m. UTC
Start storing the (start_addr, size, nodeid) of the pc-dimm memory
in numa_info so that this information can be used to lookup
node by address.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/mem/pc-dimm.c      |  4 ++++
 include/sysemu/numa.h | 10 ++++++++++
 numa.c                | 26 ++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

Comments

David Gibson June 15, 2015, 6:34 a.m. UTC | #1
On Fri, Jun 12, 2015 at 02:30:26PM +0530, Bharata B Rao wrote:
> Start storing the (start_addr, size, nodeid) of the pc-dimm memory
> in numa_info so that this information can be used to lookup
> node by address.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Igor Mammedov June 15, 2015, 9:17 a.m. UTC | #2
On Fri, 12 Jun 2015 14:30:26 +0530
Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> Start storing the (start_addr, size, nodeid) of the pc-dimm memory
> in numa_info so that this information can be used to lookup
> node by address.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/mem/pc-dimm.c      |  4 ++++
>  include/sysemu/numa.h | 10 ++++++++++
>  numa.c                | 26 ++++++++++++++++++++++++++
>  3 files changed, 40 insertions(+)
> 
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 98971b7..bb04862 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -97,6 +97,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>  
>      memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
>      vmstate_register_ram(mr, dev);
> +    numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
>  
>  out:
>      error_propagate(errp, local_err);
> @@ -105,6 +106,9 @@ out:
>  void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
>                             MemoryRegion *mr)
>  {
> +    PCDIMMDevice *dimm = PC_DIMM(dev);
> +
> +    numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
Wouldn't that cause pc-dimm range appear in SRAT table?
Before this pc-dimm-s are only added as ACPI devices but don't
advertised in SRAT ACPI table.

Perhaps make it up to target to decide if it want's to
report dimms with numa_unset_mem_node_id() and not in generic code.


>      memory_region_del_subregion(&hpms->mr, mr);
>      vmstate_unregister_ram(mr, dev);
>  }
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 6523b4d..7176364 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -10,16 +10,26 @@
>  
>  extern int nb_numa_nodes;   /* Number of NUMA nodes */
>  
> +struct numa_addr_range {
> +    ram_addr_t mem_start;
> +    ram_addr_t mem_end;
> +    QLIST_ENTRY(numa_addr_range) entry;
> +};
> +
>  typedef struct node_info {
>      uint64_t node_mem;
>      DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
>      struct HostMemoryBackend *node_memdev;
>      bool present;
> +    QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
>  } NodeInfo;
> +
>  extern NodeInfo numa_info[MAX_NODES];
>  void parse_numa_opts(MachineClass *mc);
>  void numa_post_machine_init(void);
>  void query_numa_node_mem(uint64_t node_mem[]);
>  extern QemuOptsList qemu_numa_opts;
> +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
>  
>  #endif
> diff --git a/numa.c b/numa.c
> index d227ccc..27ca743 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -53,6 +53,28 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
>  int nb_numa_nodes;
>  NodeInfo numa_info[MAX_NODES];
>  
> +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> +{
> +    struct numa_addr_range *range = g_malloc0(sizeof(*range));
> +
> +    range->mem_start = addr;
> +    range->mem_end = addr + size;
> +    QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
> +}
> +
> +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> +{
> +    struct numa_addr_range *range, *next;
> +
> +    QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
> +        if (addr == range->mem_start && (addr + size) == range->mem_end) {
> +            QLIST_REMOVE(range, entry);
> +            g_free(range);
> +            return;
> +        }
> +    }
> +}
> +
>  static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
>  {
>      uint16_t nodenr;
> @@ -275,6 +297,10 @@ void parse_numa_opts(MachineClass *mc)
>          }
>  
>          for (i = 0; i < nb_numa_nodes; i++) {
> +            QLIST_INIT(&numa_info[i].addr);
> +        }
> +
> +        for (i = 0; i < nb_numa_nodes; i++) {
>              if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
>                  break;
>              }
Bharata B Rao June 15, 2015, 1:04 p.m. UTC | #3
On Mon, Jun 15, 2015 at 11:17:59AM +0200, Igor Mammedov wrote:
> On Fri, 12 Jun 2015 14:30:26 +0530
> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > Start storing the (start_addr, size, nodeid) of the pc-dimm memory
> > in numa_info so that this information can be used to lookup
> > node by address.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > ---
> >  hw/mem/pc-dimm.c      |  4 ++++
> >  include/sysemu/numa.h | 10 ++++++++++
> >  numa.c                | 26 ++++++++++++++++++++++++++
> >  3 files changed, 40 insertions(+)
> > 
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> > index 98971b7..bb04862 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -97,6 +97,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
> >  
> >      memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
> >      vmstate_register_ram(mr, dev);
> > +    numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
> >  
> >  out:
> >      error_propagate(errp, local_err);
> > @@ -105,6 +106,9 @@ out:
> >  void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
> >                             MemoryRegion *mr)
> >  {
> > +    PCDIMMDevice *dimm = PC_DIMM(dev);
> > +
> > +    numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
> Wouldn't that cause pc-dimm range appear in SRAT table?

I don't think so.

numa_set_mem_node_id() and numa_unset_mem_node_id() APIs store/remove
address range and node id information of realized pc-dimm device in/from a
linked list in numa_info structure so that we can lookup the node id for a
given address from numa.c in a self-contained manner.

So unless I am missing something, I don't see this affecting ACPI/SRAT
table in any way.

> Before this pc-dimm-s are only added as ACPI devices but don't
> advertised in SRAT ACPI table.
> 
> Perhaps make it up to target to decide if it want's to
> report dimms with numa_unset_mem_node_id() and not in generic code.

Regards,
Bharata.
diff mbox

Patch

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 98971b7..bb04862 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -97,6 +97,7 @@  void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
 
     memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
     vmstate_register_ram(mr, dev);
+    numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
 
 out:
     error_propagate(errp, local_err);
@@ -105,6 +106,9 @@  out:
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr)
 {
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+
+    numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
     memory_region_del_subregion(&hpms->mr, mr);
     vmstate_unregister_ram(mr, dev);
 }
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 6523b4d..7176364 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,16 +10,26 @@ 
 
 extern int nb_numa_nodes;   /* Number of NUMA nodes */
 
+struct numa_addr_range {
+    ram_addr_t mem_start;
+    ram_addr_t mem_end;
+    QLIST_ENTRY(numa_addr_range) entry;
+};
+
 typedef struct node_info {
     uint64_t node_mem;
     DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
     struct HostMemoryBackend *node_memdev;
     bool present;
+    QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
 } NodeInfo;
+
 extern NodeInfo numa_info[MAX_NODES];
 void parse_numa_opts(MachineClass *mc);
 void numa_post_machine_init(void);
 void query_numa_node_mem(uint64_t node_mem[]);
 extern QemuOptsList qemu_numa_opts;
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
 
 #endif
diff --git a/numa.c b/numa.c
index d227ccc..27ca743 100644
--- a/numa.c
+++ b/numa.c
@@ -53,6 +53,28 @@  static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
 int nb_numa_nodes;
 NodeInfo numa_info[MAX_NODES];
 
+void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+    struct numa_addr_range *range = g_malloc0(sizeof(*range));
+
+    range->mem_start = addr;
+    range->mem_end = addr + size;
+    QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
+}
+
+void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
+{
+    struct numa_addr_range *range, *next;
+
+    QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
+        if (addr == range->mem_start && (addr + size) == range->mem_end) {
+            QLIST_REMOVE(range, entry);
+            g_free(range);
+            return;
+        }
+    }
+}
+
 static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
 {
     uint16_t nodenr;
@@ -275,6 +297,10 @@  void parse_numa_opts(MachineClass *mc)
         }
 
         for (i = 0; i < nb_numa_nodes; i++) {
+            QLIST_INIT(&numa_info[i].addr);
+        }
+
+        for (i = 0; i < nb_numa_nodes; i++) {
             if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
                 break;
             }