diff mbox

[RFC,v1,12/13] spapr: Support ibm, dynamic-reconfiguration-memory

Message ID 1420697420-16053-13-git-send-email-bharata@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bharata B Rao Jan. 8, 2015, 6:10 a.m. UTC
Parse ibm,architecture.vec table obtained from the guest and enable
memory node configuration via ibm,dynamic-reconfiguration-memory if guest
supports it. This is in preparation to support memory hotplug for
sPAPR guests.

This changes the way memory node configuration is done. Currently all
memory nodes are built upfront. But after this patch, only memory@0 node
for RMA is built upfront. Guest kernel boots with just that and rest of
the memory nodes (via memory@XXX or ibm,dynamic-reconfiguration-memory)
are built when guest does ibm,client-architecture-support call.

Note: This patch was tested with an enhancement to SLOF that supports
addition of device tree nodes from ibm,client-architecture-support call.

TODO: Enforce lmb-size alignment for node memory.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         | 232 ++++++++++++++++++++++++++++++++++++++++---------
 hw/ppc/spapr_hcall.c   |  51 +++++++++--
 include/hw/ppc/spapr.h |  12 ++-
 3 files changed, 246 insertions(+), 49 deletions(-)

Comments

David Gibson Feb. 12, 2015, 6:02 a.m. UTC | #1
On Thu, Jan 08, 2015 at 11:40:19AM +0530, Bharata B Rao wrote:
> Parse ibm,architecture.vec table obtained from the guest and enable
> memory node configuration via ibm,dynamic-reconfiguration-memory if guest
> supports it. This is in preparation to support memory hotplug for
> sPAPR guests.
> 
> This changes the way memory node configuration is done. Currently all
> memory nodes are built upfront. But after this patch, only memory@0 node
> for RMA is built upfront. Guest kernel boots with just that and rest of
> the memory nodes (via memory@XXX or ibm,dynamic-reconfiguration-memory)
> are built when guest does ibm,client-architecture-support call.
> 
> Note: This patch was tested with an enhancement to SLOF that supports
> addition of device tree nodes from ibm,client-architecture-support call.
> 
> TODO: Enforce lmb-size alignment for node memory.

I think this todo needs to be done before you're ready to merge.

> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         | 232 ++++++++++++++++++++++++++++++++++++++++---------
>  hw/ppc/spapr_hcall.c   |  51 +++++++++--
>  include/hw/ppc/spapr.h |  12 ++-
>  3 files changed, 246 insertions(+), 49 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 9ff08ff..6964b06 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -631,42 +631,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
>      return fdt;
>  }
>  
> -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
> -{
> -    void *fdt, *fdt_skel;
> -    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
> -
> -    size -= sizeof(hdr);
> -
> -    /* Create sceleton */
> -    fdt_skel = g_malloc0(size);
> -    _FDT((fdt_create(fdt_skel, size)));
> -    _FDT((fdt_begin_node(fdt_skel, "")));
> -    _FDT((fdt_end_node(fdt_skel)));
> -    _FDT((fdt_finish(fdt_skel)));
> -    fdt = g_malloc0(size);
> -    _FDT((fdt_open_into(fdt_skel, fdt, size)));
> -    g_free(fdt_skel);
> -
> -    /* Fix skeleton up */
> -    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
> -
> -    /* Pack resulting tree */
> -    _FDT((fdt_pack(fdt)));
> -
> -    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
> -        trace_spapr_cas_failed(size);
> -        return -1;
> -    }
> -
> -    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
> -    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
> -    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
> -    g_free(fdt);
> -
> -    return 0;
> -}
> -
>  static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
>                                         hwaddr size)
>  {
> @@ -720,7 +684,6 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
>          }
>          if (!mem_start) {
>              /* ppc_spapr_init() checks for rma_size <= node0_size already */
> -            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
>              mem_start += spapr->rma_size;
>              node_size -= spapr->rma_size;
>          }
> @@ -741,6 +704,190 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
>      return 0;
>  }
>  
> +/*
> + * TODO: Take care of sparsemem configuration ?
> + */
> +static uint64_t numa_node_end(uint32_t nodeid)
> +{
> +    uint32_t i = 0;
> +    uint64_t addr = 0;
> +
> +    do {
> +        addr += numa_info[i].node_mem;
> +    } while (++i <= nodeid);
> +
> +    return addr;
> +}
> +
> +static uint64_t numa_node_start(uint32_t nodeid)
> +{
> +    if (!nodeid) {
> +        return 0;
> +    } else {
> +        return numa_node_end(nodeid - 1);
> +    }
> +}

There's really no better generic way of finding the addresses of numa nodes?

> +/*
> + * Given the addr, return the NUMA node to which the address belongs to.
> + */
> +static uint32_t get_numa_node(uint64_t addr)
> +{
> +    uint32_t i;
> +
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        if ((addr >= numa_node_start(i)) && (addr < numa_node_end(i))) {

This is O(nb_numa_nodes^2) which is kind of nasty, althoguh that's
unlikely to be large enough to be a real problem.

> +            return i;
> +        }
> +    }
> +
> +    /* Unassigned memory goes to node 0 by default */
> +    return 0;
> +}
> +
> +/* Adds ibm,dynamic-reconfiguration-memory node */
> +static int spapr_populate_drconf_memory(sPAPREnvironment *spapr, void *fdt)
> +{
> +    int root_offset, ret, i, offset;
> +    uint32_t lmb_size = SPAPR_MIN_MEMORY_BLOCK_SIZE;
> +    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
> +    uint32_t dynamic_memory[DR_LMB_LIST_ENTRY_SIZE];
> +    uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size;
> +    uint32_t nr_lmbs = spapr->maxram_limit/lmb_size - nr_rma_lmbs;
> +    uint32_t nr_assigned_lmbs = spapr->ram_limit/lmb_size - nr_rma_lmbs;
> +    uint32_t *int_buf, *cur_index, buf_len;
> +
> +    /* Allocate enough buffer size to fit in ibm,dynamic-memory */
> +    buf_len = nr_lmbs * DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) +
> +                sizeof(uint32_t);
> +    cur_index = int_buf = g_malloc0(buf_len);
> +    root_offset = fdt_path_offset(fdt, "/");

You don't need this, the fdt offset of / is always 0 and you're
allowed to count on that.

> +
> +
> +    offset = fdt_add_subnode(fdt, root_offset,
> +                   "ibm,dynamic-reconfiguration-memory");
> +
> +    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
> +            sizeof(prop_lmb_size));
> +    if (ret < 0) {
> +        goto out;
> +    }

Current versions of libfdt have fdt_setprop_u64 which you could use
for this.

> +    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask",
> +                            cpu_to_be32(0xff));

fdt_setprop_cell has the byteswap built-in, so adding your own as well
will make it incorrect for an LE host.

> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time",
> +                            cpu_to_be32(0x0));
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    /* ibm,dynamic-memory */
> +    int_buf[0] = cpu_to_be32(nr_lmbs);
> +    cur_index++;
> +    for (i = 0; i < nr_lmbs; i++) {
> +        sPAPRDRConnector *drc;
> +        sPAPRDRConnectorClass *drck;
> +        uint64_t addr;
> +
> +        if (i < nr_assigned_lmbs) {
> +            addr = (i + nr_rma_lmbs) * lmb_size;
> +        } else {
> +            addr = (i - nr_assigned_lmbs) * lmb_size +
> +                SPAPR_MACHINE(qdev_get_machine())->hotplug_memory_base;
> +        }
> +        drc = spapr_dr_connector_new(qdev_get_machine(),
> +                SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size);
> +        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +
> +        dynamic_memory[0] = cpu_to_be32(addr >> 32);
> +        dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
> +        dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
> +        dynamic_memory[3] = cpu_to_be32(0); /* reserved */
> +        dynamic_memory[4] = cpu_to_be32(get_numa_node(addr));

As noted above your current get_numa_node() implementation is O(N^2)
making this routine O(N^3).

> +        dynamic_memory[5] = (addr < spapr->ram_limit) ?
> +                            cpu_to_be32(LMB_FLAGS_ASSIGNED) :
> +                            cpu_to_be32(0);
> +
> +        memcpy(cur_index, dynamic_memory, sizeof(dynamic_memory));

You could just use uint32_t *dynamic_memory = cur_index at the
beginning of the loop block to avoid this memcpy().

> +        cur_index += DR_LMB_LIST_ENTRY_SIZE;
> +    }
> +    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +
> +    /* ibm,associativity-lookup-arrays */
> +    cur_index = int_buf;
> +    int_buf[0] = cpu_to_be32(nb_numa_nodes);
> +    int_buf[1] = cpu_to_be32(4);

Something explaining the significance of this 4 for those of us that
don't have access to PAPR+ would be nice.

> +    cur_index += 2;
> +    for (i = 0; i < nb_numa_nodes; i++) {
> +        uint32_t associativity[] = {
> +            cpu_to_be32(0x0),
> +            cpu_to_be32(0x0),
> +            cpu_to_be32(0x0),
> +            cpu_to_be32(i)
> +        };
> +        memcpy(cur_index, associativity, sizeof(associativity));
> +        cur_index += 4;
> +    }
> +    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
> +            (cur_index - int_buf) * sizeof(uint32_t));
> +out:
> +    g_free(int_buf);
> +    return ret;
> +}
> +
> +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size,
> +                                bool cpu_update, bool memory_update)
> +{
> +    void *fdt, *fdt_skel;
> +    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
> +
> +    size -= sizeof(hdr);
> +
> +    /* Create sceleton */
> +    fdt_skel = g_malloc0(size);
> +    _FDT((fdt_create(fdt_skel, size)));
> +    _FDT((fdt_begin_node(fdt_skel, "")));
> +    _FDT((fdt_end_node(fdt_skel)));
> +    _FDT((fdt_finish(fdt_skel)));
> +    fdt = g_malloc0(size);
> +    _FDT((fdt_open_into(fdt_skel, fdt, size)));
> +    g_free(fdt_skel);
> +
> +    /* Fixup cpu nodes */
> +    if (cpu_update) {
> +        _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
> +    }
> +
> +    /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */
> +    if (memory_update) {
> +        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
> +    } else {
> +        _FDT((spapr_populate_memory(spapr, fdt)));
> +    }
> +
> +    /* Pack resulting tree */
> +    _FDT((fdt_pack(fdt)));
> +
> +    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {

You could just set the correct size (size - sizeof(hdr)) to
fdt_create() and fdt_open_into() and avoid this failure case.

> +        trace_spapr_cas_failed(size);
> +        return -1;
> +    }
> +
> +    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
> +    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
> +    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
> +    g_free(fdt);
> +
> +    return 0;
> +}
> +
>  static void spapr_finalize_fdt(sPAPREnvironment *spapr,
>                                 hwaddr fdt_addr,
>                                 hwaddr rtas_addr,
> @@ -757,11 +904,12 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
>      /* open out the base tree into a temp buffer for the final tweaks */
>      _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
>  
> -    ret = spapr_populate_memory(spapr, fdt);
> -    if (ret < 0) {
> -        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
> -        exit(1);
> -    }
> +    /*
> +     * Add memory@0 node to represent RMA. Rest of the memory is either
> +     * represented by memory nodes or ibm,dynamic-reconfiguration-memory
> +     * node later during ibm,client-architecture-support call.
> +     */
> +    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
>  
>      ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
>      if (ret < 0) {
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 8651447..10f05f4 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -805,6 +805,32 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>      return ret;
>  }
>  
> +/*
> + * Return the offset to the requested option vector @vector in the
> + * option vector table @table.
> + */
> +static target_ulong cas_get_option_vector(int vector, target_ulong table)
> +{
> +    int i;
> +    char nr_vectors, nr_entries;
> +
> +    if (!table) {
> +        return 0;
> +    }
> +
> +    nr_vectors = (rtas_ld(table, 0) >> 24) + 1;

This is kind of abusing the rtas_ld() function.  It's really only
intended for accessing rtas arguments, not an arbitrary array of u32s.

> +    if (!vector || vector > nr_vectors) {
> +        return 0;
> +    }
> +    table++; /* skip nr option vectors */
> +
> +    for (i = 0; i < vector - 1; i++) {
> +        nr_entries = rtas_ld(table, 0) >> 24;
> +        table += nr_entries + 2;
> +    }
> +    return table;
> +}
> +
>  typedef struct {
>      PowerPCCPU *cpu;
>      uint32_t cpu_version;
> @@ -825,19 +851,22 @@ static void do_set_compat(void *arg)
>      ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \
>      ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
>  
> +#define OV5_DRCONF_MEMORY 0x20
> +
>  static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
>                                                    sPAPREnvironment *spapr,
>                                                    target_ulong opcode,
>                                                    target_ulong *args)
>  {
> -    target_ulong list = args[0];
> +    target_ulong list = args[0], ov_table;
>      PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_);
>      CPUState *cs;
> -    bool cpu_match = false;
> +    bool cpu_match = false, cpu_update = true, memory_update = false;
>      unsigned old_cpu_version = cpu_->cpu_version;
>      unsigned compat_lvl = 0, cpu_version = 0;
>      unsigned max_lvl = get_compat_level(cpu_->max_compat);
>      int counter;
> +    char ov5_byte2;
>  
>      /* Parse PVR list */
>      for (counter = 0; counter < 512; ++counter) {
> @@ -887,8 +916,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
>          }
>      }
>  
> -    /* For the future use: here @list points to the first capability */
> -
>      /* Parsing finished */
>      trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match,
>                          cpu_version, pcc_->pcr_mask);
> @@ -912,14 +939,26 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
>      }
>  
>      if (!cpu_version) {
> -        return H_SUCCESS;
> +        cpu_update = false;
>      }
>  
> +    /* For the future use: here @ov_table points to the first option vector */
> +    ov_table = list;
> +
> +    list = cas_get_option_vector(5, ov_table);

What's the literal 5 mean?

>      if (!list) {
>          return H_SUCCESS;
>      }
>  
> -    if (spapr_h_cas_compose_response(args[1], args[2])) {
> +    /* @list now points to OV 5 */
> +    list += 2;
> +    ov5_byte2 = rtas_ld(list, 0) >> 24;
> +    if (ov5_byte2 & OV5_DRCONF_MEMORY) {
> +        memory_update = true;
> +    }
> +
> +    if (spapr_h_cas_compose_response(args[1], args[2], cpu_update,
> +                                    memory_update)) {
>          qemu_system_reset_request();
>      }
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 64681c4..10283f9 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -485,9 +485,19 @@ struct sPAPRTCETable {
>  /* Support a min of 1TB hotplug memory assuming 256MB per slot */
>  #define SPAPR_MAX_RAM_SLOTS     (1ULL << 12)
>  
> +/*
> + * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
> + * property under ibm,dynamic-reconfiguration-memory node.
> + */
> +#define DR_LMB_LIST_ENTRY_SIZE 6
> +
> +/* Flag values in Option Vector 5 ibm architecture vector table. */
> +#define LMB_FLAGS_ASSIGNED 0x00000008

Things declared in a public header should have some sort of
namespacing, so, SPAPR_DR_LBM_LIST_ENTRY_SIZE for example.

>  void spapr_events_init(sPAPREnvironment *spapr);
>  void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
> -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
> +int spapr_h_cas_compose_response(target_ulong addr, target_ulong size,
> +                                bool cpu_update, bool memory_update);
>  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
>                                     uint64_t bus_offset,
>                                     uint32_t page_shift,
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9ff08ff..6964b06 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -631,42 +631,6 @@  static void *spapr_create_fdt_skel(hwaddr initrd_base,
     return fdt;
 }
 
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
-{
-    void *fdt, *fdt_skel;
-    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
-
-    size -= sizeof(hdr);
-
-    /* Create sceleton */
-    fdt_skel = g_malloc0(size);
-    _FDT((fdt_create(fdt_skel, size)));
-    _FDT((fdt_begin_node(fdt_skel, "")));
-    _FDT((fdt_end_node(fdt_skel)));
-    _FDT((fdt_finish(fdt_skel)));
-    fdt = g_malloc0(size);
-    _FDT((fdt_open_into(fdt_skel, fdt, size)));
-    g_free(fdt_skel);
-
-    /* Fix skeleton up */
-    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
-
-    /* Pack resulting tree */
-    _FDT((fdt_pack(fdt)));
-
-    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
-        trace_spapr_cas_failed(size);
-        return -1;
-    }
-
-    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
-    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
-    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
-    g_free(fdt);
-
-    return 0;
-}
-
 static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
                                        hwaddr size)
 {
@@ -720,7 +684,6 @@  static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
         }
         if (!mem_start) {
             /* ppc_spapr_init() checks for rma_size <= node0_size already */
-            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
             mem_start += spapr->rma_size;
             node_size -= spapr->rma_size;
         }
@@ -741,6 +704,190 @@  static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     return 0;
 }
 
+/*
+ * TODO: Take care of sparsemem configuration ?
+ */
+static uint64_t numa_node_end(uint32_t nodeid)
+{
+    uint32_t i = 0;
+    uint64_t addr = 0;
+
+    do {
+        addr += numa_info[i].node_mem;
+    } while (++i <= nodeid);
+
+    return addr;
+}
+
+static uint64_t numa_node_start(uint32_t nodeid)
+{
+    if (!nodeid) {
+        return 0;
+    } else {
+        return numa_node_end(nodeid - 1);
+    }
+}
+
+/*
+ * Given the addr, return the NUMA node to which the address belongs to.
+ */
+static uint32_t get_numa_node(uint64_t addr)
+{
+    uint32_t i;
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        if ((addr >= numa_node_start(i)) && (addr < numa_node_end(i))) {
+            return i;
+        }
+    }
+
+    /* Unassigned memory goes to node 0 by default */
+    return 0;
+}
+
+/* Adds ibm,dynamic-reconfiguration-memory node */
+static int spapr_populate_drconf_memory(sPAPREnvironment *spapr, void *fdt)
+{
+    int root_offset, ret, i, offset;
+    uint32_t lmb_size = SPAPR_MIN_MEMORY_BLOCK_SIZE;
+    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
+    uint32_t dynamic_memory[DR_LMB_LIST_ENTRY_SIZE];
+    uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size;
+    uint32_t nr_lmbs = spapr->maxram_limit/lmb_size - nr_rma_lmbs;
+    uint32_t nr_assigned_lmbs = spapr->ram_limit/lmb_size - nr_rma_lmbs;
+    uint32_t *int_buf, *cur_index, buf_len;
+
+    /* Allocate enough buffer size to fit in ibm,dynamic-memory */
+    buf_len = nr_lmbs * DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) +
+                sizeof(uint32_t);
+    cur_index = int_buf = g_malloc0(buf_len);
+    root_offset = fdt_path_offset(fdt, "/");
+
+
+    offset = fdt_add_subnode(fdt, root_offset,
+                   "ibm,dynamic-reconfiguration-memory");
+
+    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
+            sizeof(prop_lmb_size));
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask",
+                            cpu_to_be32(0xff));
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time",
+                            cpu_to_be32(0x0));
+    if (ret < 0) {
+        goto out;
+    }
+
+    /* ibm,dynamic-memory */
+    int_buf[0] = cpu_to_be32(nr_lmbs);
+    cur_index++;
+    for (i = 0; i < nr_lmbs; i++) {
+        sPAPRDRConnector *drc;
+        sPAPRDRConnectorClass *drck;
+        uint64_t addr;
+
+        if (i < nr_assigned_lmbs) {
+            addr = (i + nr_rma_lmbs) * lmb_size;
+        } else {
+            addr = (i - nr_assigned_lmbs) * lmb_size +
+                SPAPR_MACHINE(qdev_get_machine())->hotplug_memory_base;
+        }
+        drc = spapr_dr_connector_new(qdev_get_machine(),
+                SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size);
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+        dynamic_memory[0] = cpu_to_be32(addr >> 32);
+        dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
+        dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
+        dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+        dynamic_memory[4] = cpu_to_be32(get_numa_node(addr));
+        dynamic_memory[5] = (addr < spapr->ram_limit) ?
+                            cpu_to_be32(LMB_FLAGS_ASSIGNED) :
+                            cpu_to_be32(0);
+
+        memcpy(cur_index, dynamic_memory, sizeof(dynamic_memory));
+        cur_index += DR_LMB_LIST_ENTRY_SIZE;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /* ibm,associativity-lookup-arrays */
+    cur_index = int_buf;
+    int_buf[0] = cpu_to_be32(nb_numa_nodes);
+    int_buf[1] = cpu_to_be32(4);
+    cur_index += 2;
+    for (i = 0; i < nb_numa_nodes; i++) {
+        uint32_t associativity[] = {
+            cpu_to_be32(0x0),
+            cpu_to_be32(0x0),
+            cpu_to_be32(0x0),
+            cpu_to_be32(i)
+        };
+        memcpy(cur_index, associativity, sizeof(associativity));
+        cur_index += 4;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
+            (cur_index - int_buf) * sizeof(uint32_t));
+out:
+    g_free(int_buf);
+    return ret;
+}
+
+int spapr_h_cas_compose_response(target_ulong addr, target_ulong size,
+                                bool cpu_update, bool memory_update)
+{
+    void *fdt, *fdt_skel;
+    sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+    size -= sizeof(hdr);
+
+    /* Create sceleton */
+    fdt_skel = g_malloc0(size);
+    _FDT((fdt_create(fdt_skel, size)));
+    _FDT((fdt_begin_node(fdt_skel, "")));
+    _FDT((fdt_end_node(fdt_skel)));
+    _FDT((fdt_finish(fdt_skel)));
+    fdt = g_malloc0(size);
+    _FDT((fdt_open_into(fdt_skel, fdt, size)));
+    g_free(fdt_skel);
+
+    /* Fixup cpu nodes */
+    if (cpu_update) {
+        _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
+    }
+
+    /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */
+    if (memory_update) {
+        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
+    } else {
+        _FDT((spapr_populate_memory(spapr, fdt)));
+    }
+
+    /* Pack resulting tree */
+    _FDT((fdt_pack(fdt)));
+
+    if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
+        trace_spapr_cas_failed(size);
+        return -1;
+    }
+
+    cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
+    cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
+    trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
+    g_free(fdt);
+
+    return 0;
+}
+
 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                                hwaddr fdt_addr,
                                hwaddr rtas_addr,
@@ -757,11 +904,12 @@  static void spapr_finalize_fdt(sPAPREnvironment *spapr,
     /* open out the base tree into a temp buffer for the final tweaks */
     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
 
-    ret = spapr_populate_memory(spapr, fdt);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
-        exit(1);
-    }
+    /*
+     * Add memory@0 node to represent RMA. Rest of the memory is either
+     * represented by memory nodes or ibm,dynamic-reconfiguration-memory
+     * node later during ibm,client-architecture-support call.
+     */
+    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
 
     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
     if (ret < 0) {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 8651447..10f05f4 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -805,6 +805,32 @@  static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return ret;
 }
 
+/*
+ * Return the offset to the requested option vector @vector in the
+ * option vector table @table.
+ */
+static target_ulong cas_get_option_vector(int vector, target_ulong table)
+{
+    int i;
+    char nr_vectors, nr_entries;
+
+    if (!table) {
+        return 0;
+    }
+
+    nr_vectors = (rtas_ld(table, 0) >> 24) + 1;
+    if (!vector || vector > nr_vectors) {
+        return 0;
+    }
+    table++; /* skip nr option vectors */
+
+    for (i = 0; i < vector - 1; i++) {
+        nr_entries = rtas_ld(table, 0) >> 24;
+        table += nr_entries + 2;
+    }
+    return table;
+}
+
 typedef struct {
     PowerPCCPU *cpu;
     uint32_t cpu_version;
@@ -825,19 +851,22 @@  static void do_set_compat(void *arg)
     ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \
     ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
 
+#define OV5_DRCONF_MEMORY 0x20
+
 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
                                                   sPAPREnvironment *spapr,
                                                   target_ulong opcode,
                                                   target_ulong *args)
 {
-    target_ulong list = args[0];
+    target_ulong list = args[0], ov_table;
     PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_);
     CPUState *cs;
-    bool cpu_match = false;
+    bool cpu_match = false, cpu_update = true, memory_update = false;
     unsigned old_cpu_version = cpu_->cpu_version;
     unsigned compat_lvl = 0, cpu_version = 0;
     unsigned max_lvl = get_compat_level(cpu_->max_compat);
     int counter;
+    char ov5_byte2;
 
     /* Parse PVR list */
     for (counter = 0; counter < 512; ++counter) {
@@ -887,8 +916,6 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
         }
     }
 
-    /* For the future use: here @list points to the first capability */
-
     /* Parsing finished */
     trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match,
                         cpu_version, pcc_->pcr_mask);
@@ -912,14 +939,26 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
     }
 
     if (!cpu_version) {
-        return H_SUCCESS;
+        cpu_update = false;
     }
 
+    /* For the future use: here @ov_table points to the first option vector */
+    ov_table = list;
+
+    list = cas_get_option_vector(5, ov_table);
     if (!list) {
         return H_SUCCESS;
     }
 
-    if (spapr_h_cas_compose_response(args[1], args[2])) {
+    /* @list now points to OV 5 */
+    list += 2;
+    ov5_byte2 = rtas_ld(list, 0) >> 24;
+    if (ov5_byte2 & OV5_DRCONF_MEMORY) {
+        memory_update = true;
+    }
+
+    if (spapr_h_cas_compose_response(args[1], args[2], cpu_update,
+                                    memory_update)) {
         qemu_system_reset_request();
     }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 64681c4..10283f9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -485,9 +485,19 @@  struct sPAPRTCETable {
 /* Support a min of 1TB hotplug memory assuming 256MB per slot */
 #define SPAPR_MAX_RAM_SLOTS     (1ULL << 12)
 
+/*
+ * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
+ * property under ibm,dynamic-reconfiguration-memory node.
+ */
+#define DR_LMB_LIST_ENTRY_SIZE 6
+
+/* Flag values in Option Vector 5 ibm architecture vector table. */
+#define LMB_FLAGS_ASSIGNED 0x00000008
+
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
+int spapr_h_cas_compose_response(target_ulong addr, target_ulong size,
+                                bool cpu_update, bool memory_update);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,