diff mbox series

[for-5.0,4/4] spapr: Correct clamping of RMA to Node 0 size

Message ID 20191129013504.145455-5-david@gibson.dropbear.id.au
State New
Headers show
Series Fixes for RMA size calculation | expand

Commit Message

David Gibson Nov. 29, 2019, 1:35 a.m. UTC
The Real Mode Area (RMA) needs to fit within Node 0 in NUMA configurations.
We use a helper function spapr_node0_size() to calculate this.

But that function doesn't actually get the size of Node 0, it gets the
minimum size of all nodes, ever since b082d65a300 "spapr: Add a helper for
node0_size calculation".  That was added, apparently, because Node 0 in
qemu's terms might not have corresponded to Node 0 in PAPR terms (i.e. the
node with memory at address 0).

That might not have been the case at the time, but it *is* the case now
that qemu node 0 must have the lowest address, which is the node we need.
So, we can simplify this logic, folding it into spapr_rma_size(), the only
remaining caller.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

Comments

Alexey Kardashevskiy Dec. 3, 2019, 4:18 a.m. UTC | #1
On 29/11/2019 12:35, David Gibson wrote:
> The Real Mode Area (RMA) needs to fit within Node 0 in NUMA configurations.
> We use a helper function spapr_node0_size() to calculate this.
> 
> But that function doesn't actually get the size of Node 0, it gets the
> minimum size of all nodes, ever since b082d65a300 "spapr: Add a helper for
> node0_size calculation".  That was added, apparently, because Node 0 in
> qemu's terms might not have corresponded to Node 0 in PAPR terms (i.e. the
> node with memory at address 0).


After looking at this closely, I think the idea was that the first
node(s) may have only CPUs but not memory, in this case
node#0.node_mem==0 and things crash:


/home/aik/pbuild/qemu-garrison2-ppc64/ppc64-softmmu/qemu-system-ppc64 \
-nodefaults \
-chardev stdio,id=STDIO0,signal=off,mux=on \
-device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
-mon id=MON0,chardev=STDIO0,mode=readline \
-nographic \
-vga none \
-enable-kvm \
-m 2G \
-kernel /home/aik/t/vml4120le \
-initrd /home/aik/t/le.cpio \
-object memory-backend-ram,id=memdev1,size=2G \
-numa node,nodeid=0 \
-numa node,nodeid=1,memdev=memdev1 \
-numa cpu,node-id=0 \
-smp 8,threads=8 \
-machine pseries \
-L /home/aik/t/qemu-ppc64-bios/ \
-trace events=qemu_trace_events \
-d guest_errors \
-chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.ssh37742 \
-mon chardev=SOCKET0,mode=control
QEMU PID = 12377
qemu-system-ppc64:qemu_trace_events:80: warning: trace event
'vfio_ram_register' does not exist
qemu-system-ppc64: pSeries SLOF firmware requires >= 128MiB guest RMA
(Real Mode Area)





> That might not have been the case at the time, but it *is* the case now
> that qemu node 0 must have the lowest address, which is the node we need.
> So, we can simplify this logic, folding it into spapr_rma_size(), the only
> remaining caller.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c | 26 ++++++++++----------------
>  1 file changed, 10 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 7efd4f2b85..6611f75bdf 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -295,20 +295,6 @@ static void spapr_populate_pa_features(SpaprMachineState *spapr,
>      _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
>  }
>  
> -static hwaddr spapr_node0_size(MachineState *machine)
> -{
> -    if (machine->numa_state->num_nodes) {
> -        int i;
> -        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> -            if (machine->numa_state->nodes[i].node_mem) {
> -                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
> -                           machine->ram_size);
> -            }
> -        }
> -    }
> -    return machine->ram_size;
> -}
> -
>  static void add_str(GString *s, const gchar *s1)
>  {
>      g_string_append_len(s, s1, strlen(s1) + 1);
> @@ -2668,10 +2654,13 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
>  {
>      MachineState *machine = MACHINE(spapr);
>      hwaddr rma_size = machine->ram_size;
> -    hwaddr node0_size = spapr_node0_size(machine);
>  
>      /* RMA has to fit in the first NUMA node */
> -    rma_size = MIN(rma_size, node0_size);
> +    if (machine->numa_state->num_nodes) {
> +        hwaddr node0_size = machine->numa_state->nodes[0].node_mem;
> +
> +        rma_size = MIN(rma_size, node0_size);
> +    }
>  
>      /*
>       * VRMA access is via a special 1TiB SLB mapping, so the RMA can
> @@ -2688,6 +2677,11 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
>          rma_size = MIN(rma_size, 16 * GiB);
>      }
>  
> +    /*
> +     * RMA size must be a power of 2
> +     */
> +    rma_size = pow2floor(rma_size);
> +
>      if (rma_size < (MIN_RMA_SLOF * MiB)) {
>          error_setg(errp,
>  "pSeries SLOF firmware requires >= %ldMiB guest RMA (Real Mode Area)",
>
David Gibson Dec. 10, 2019, 5:21 a.m. UTC | #2
On Tue, Dec 03, 2019 at 03:18:37PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 29/11/2019 12:35, David Gibson wrote:
> > The Real Mode Area (RMA) needs to fit within Node 0 in NUMA configurations.
> > We use a helper function spapr_node0_size() to calculate this.
> > 
> > But that function doesn't actually get the size of Node 0, it gets the
> > minimum size of all nodes, ever since b082d65a300 "spapr: Add a helper for
> > node0_size calculation".  That was added, apparently, because Node 0 in
> > qemu's terms might not have corresponded to Node 0 in PAPR terms (i.e. the
> > node with memory at address 0).
> 
> 
> After looking at this closely, I think the idea was that the first
> node(s) may have only CPUs but not memory, in this case
> node#0.node_mem==0 and things crash:

Ah!  Excellent point - I misread what the existing node0_size()
function was doing.  Corrected for the next spin.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7efd4f2b85..6611f75bdf 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -295,20 +295,6 @@  static void spapr_populate_pa_features(SpaprMachineState *spapr,
     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
 }
 
-static hwaddr spapr_node0_size(MachineState *machine)
-{
-    if (machine->numa_state->num_nodes) {
-        int i;
-        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
-            if (machine->numa_state->nodes[i].node_mem) {
-                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
-                           machine->ram_size);
-            }
-        }
-    }
-    return machine->ram_size;
-}
-
 static void add_str(GString *s, const gchar *s1)
 {
     g_string_append_len(s, s1, strlen(s1) + 1);
@@ -2668,10 +2654,13 @@  static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
     hwaddr rma_size = machine->ram_size;
-    hwaddr node0_size = spapr_node0_size(machine);
 
     /* RMA has to fit in the first NUMA node */
-    rma_size = MIN(rma_size, node0_size);
+    if (machine->numa_state->num_nodes) {
+        hwaddr node0_size = machine->numa_state->nodes[0].node_mem;
+
+        rma_size = MIN(rma_size, node0_size);
+    }
 
     /*
      * VRMA access is via a special 1TiB SLB mapping, so the RMA can
@@ -2688,6 +2677,11 @@  static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
         rma_size = MIN(rma_size, 16 * GiB);
     }
 
+    /*
+     * RMA size must be a power of 2
+     */
+    rma_size = pow2floor(rma_size);
+
     if (rma_size < (MIN_RMA_SLOF * MiB)) {
         error_setg(errp,
 "pSeries SLOF firmware requires >= %ldMiB guest RMA (Real Mode Area)",