diff mbox series

[qemu,RFC,4/4] spapr: Implement SLOF-less client_architecture_support

Message ID 20190720012850.14369-5-aik@ozlabs.ru
State New
Headers show
Series spapr: Kexec style boot | expand

Commit Message

Alexey Kardashevskiy July 20, 2019, 1:28 a.m. UTC
QEMU already implements H_CAS called by SLOF. The existing handler
prepares a diff FDT and SLOF applies it on top of its current tree.
In SLOF-less setup when the user explicitly selected "bios=no",
this updates the FDT from the OS, updates it and writes back to the OS.
The new behavior is advertised to the OS via "/chosen/qemu,h_cas".

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/hw/ppc/spapr.h |  5 +++++
 hw/ppc/spapr.c         | 24 ++++++++++++++++-----
 hw/ppc/spapr_hcall.c   | 49 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 70 insertions(+), 8 deletions(-)

Comments

David Gibson July 28, 2019, 6:09 a.m. UTC | #1
On Sat, Jul 20, 2019 at 11:28:50AM +1000, Alexey Kardashevskiy wrote:
> QEMU already implements H_CAS called by SLOF. The existing handler
> prepares a diff FDT and SLOF applies it on top of its current tree.
> In SLOF-less setup when the user explicitly selected "bios=no",
> this updates the FDT from the OS, updates it and writes back to the OS.
> The new behavior is advertised to the OS via "/chosen/qemu,h_cas".

I don't love having two different paths here, I'm wondering if we can
unify things at all.

I have wondered at some points if there's anything preventing us just
giving a whole new device tree at CAS, rather than selected updates -
that could simplify several things.

> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  include/hw/ppc/spapr.h |  5 +++++
>  hw/ppc/spapr.c         | 24 ++++++++++++++++-----
>  hw/ppc/spapr_hcall.c   | 49 +++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 70 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7f5d7a70d27e..73cd9cf25b83 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -766,9 +766,14 @@ struct SpaprEventLogEntry {
>  
>  void spapr_events_init(SpaprMachineState *sm);
>  void spapr_dt_events(SpaprMachineState *sm, void *fdt);
> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
> +                                    SpaprOptionVector *ov5_updates);
>  int spapr_h_cas_compose_response(SpaprMachineState *sm,
>                                   target_ulong addr, target_ulong size,
>                                   SpaprOptionVector *ov5_updates);
> +#define FDT_MAX_SIZE 0x100000
> +void *spapr_build_fdt(SpaprMachineState *spapr);
> +
>  void close_htab_fd(SpaprMachineState *spapr);
>  void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr);
>  void spapr_free_hpt(SpaprMachineState *spapr);
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index b097a99951f1..f84895f4a8b4 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -978,6 +978,19 @@ static bool spapr_hotplugged_dev_before_cas(void)
>      return false;
>  }
>  
> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
> +                                    SpaprOptionVector *ov5_updates)

Not a great function name.

> +{
> +    /* Fixup cpu nodes */
> +    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
> +
> +    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
>  int spapr_h_cas_compose_response(SpaprMachineState *spapr,
>                                   target_ulong addr, target_ulong size,
>                                   SpaprOptionVector *ov5_updates)
> @@ -1009,10 +1022,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr,
>      _FDT((fdt_open_into(fdt_skel, fdt, size)));
>      g_free(fdt_skel);
>  
> -    /* Fixup cpu nodes */
> -    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
> -
> -    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
> +    if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
>          return -1;
>      }
>  
> @@ -1232,6 +1242,10 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt)
>  
>      /* We always implemented RTAS as hcall, tell guests to call it directly */
>      _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1));
> +    /* Tell the guest that H_CAS will return the entire FDT now, not the diff */
> +    if (!spapr->bios_enabled) {
> +        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1));

As with H_RTAS< using qemu,hypertas-functions would be more
appropriate for this.

> +    }
>  
>      spapr_dt_ov5_platform_support(spapr, fdt, chosen);
>  
> @@ -1262,7 +1276,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
>      }
>  }
>  
> -static void *spapr_build_fdt(SpaprMachineState *spapr)
> +void *spapr_build_fdt(SpaprMachineState *spapr)
>  {
>      MachineState *machine = MACHINE(spapr);
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index b964d94f330b..c5cb06c9d507 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -17,6 +17,7 @@
>  #include "hw/ppc/spapr_ovec.h"
>  #include "mmu-book3s-v3.h"
>  #include "hw/mem/memory-device.h"
> +#include "sysemu/device_tree.h"
>  
>  static bool has_spr(PowerPCCPU *cpu, int spr)
>  {
> @@ -1774,9 +1775,51 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>              /* legacy hash or new hash: */
>              spapr_setup_hpt_and_vrma(spapr);
>          }
> -        spapr->cas_reboot =
> -            (spapr_h_cas_compose_response(spapr, args[1], args[2],
> -                                          ov5_updates) != 0);
> +
> +        if (spapr->bios_enabled) {
> +            spapr->cas_reboot =
> +                (spapr_h_cas_compose_response(spapr, args[1], args[2],
> +                                              ov5_updates) != 0);
> +        } else {
> +            int size;
> +            void *fdt, *fdt_skel;
> +            struct fdt_header hdr = { 0 };
> +
> +            cpu_physical_memory_read(args[1], &hdr, sizeof(hdr));
> +            size = fdt32_to_cpu(hdr.totalsize);
> +            if (size > FDT_MAX_SIZE) {
> +                return H_NOT_AVAILABLE;
> +            }
> +
> +            fdt_skel = g_malloc0(size);
> +            cpu_physical_memory_read(args[1], fdt_skel, size);
> +
> +            fdt = g_malloc0(FDT_MAX_SIZE);
> +            fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE);
> +            g_free(fdt_skel);

fdt_open_into() explicitly permits using the same buffer for both
arguments, so you don't need two allocations here - you can just
allocate FDT_MAX_SIZE, read it in, then fdt_open_into() in place.

You probably should check for errors from fdt_open_into(), though.

> +
> +            if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
> +                g_free(fdt);
> +                return H_NOT_AVAILABLE;
> +            }
> +            fdt_pack(fdt);
> +            if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {

This can't actually happen - you only ever allocated a buffer of size
FDT_MAX_SIZE, so if the DT was going to exceed that you would have hit
FDT_ERR_NOSPACE in an earlier step.

> +                error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
> +                             fdt_totalsize(fdt), FDT_MAX_SIZE);
> +                g_free(fdt);
> +                return H_NOT_AVAILABLE;
> +            }
> +
> +            /* Load the fdt */
> +            cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt));
> +
> +            g_free(spapr->fdt_blob);
> +            spapr->fdt_size = fdt_totalsize(fdt);
> +            spapr->fdt_initial_size = spapr->fdt_size;
> +            spapr->fdt_blob = fdt;
> +
> +            qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
> +        }
>      }
>  
>      /*
Alexey Kardashevskiy July 29, 2019, 5:03 a.m. UTC | #2
On 28/07/2019 16:09, David Gibson wrote:
> On Sat, Jul 20, 2019 at 11:28:50AM +1000, Alexey Kardashevskiy wrote:
>> QEMU already implements H_CAS called by SLOF. The existing handler
>> prepares a diff FDT and SLOF applies it on top of its current tree.
>> In SLOF-less setup when the user explicitly selected "bios=no",
>> this updates the FDT from the OS, updates it and writes back to the OS.
>> The new behavior is advertised to the OS via "/chosen/qemu,h_cas".
> 
> I don't love having two different paths here, I'm wondering if we can
> unify things at all.
> 
> I have wondered at some points if there's anything preventing us just
> giving a whole new device tree at CAS, rather than selected updates -
> that could simplify several things.


An update has a header, and this patch just copies the fdt over, if I
add a header to this new path, this will require a few more changes in
the guest which I would rather avoid. Thanks,


> 
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  include/hw/ppc/spapr.h |  5 +++++
>>  hw/ppc/spapr.c         | 24 ++++++++++++++++-----
>>  hw/ppc/spapr_hcall.c   | 49 +++++++++++++++++++++++++++++++++++++++---
>>  3 files changed, 70 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7f5d7a70d27e..73cd9cf25b83 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -766,9 +766,14 @@ struct SpaprEventLogEntry {
>>  
>>  void spapr_events_init(SpaprMachineState *sm);
>>  void spapr_dt_events(SpaprMachineState *sm, void *fdt);
>> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
>> +                                    SpaprOptionVector *ov5_updates);
>>  int spapr_h_cas_compose_response(SpaprMachineState *sm,
>>                                   target_ulong addr, target_ulong size,
>>                                   SpaprOptionVector *ov5_updates);
>> +#define FDT_MAX_SIZE 0x100000
>> +void *spapr_build_fdt(SpaprMachineState *spapr);
>> +
>>  void close_htab_fd(SpaprMachineState *spapr);
>>  void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr);
>>  void spapr_free_hpt(SpaprMachineState *spapr);
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index b097a99951f1..f84895f4a8b4 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -978,6 +978,19 @@ static bool spapr_hotplugged_dev_before_cas(void)
>>      return false;
>>  }
>>  
>> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
>> +                                    SpaprOptionVector *ov5_updates)
> 
> Not a great function name.
> 
>> +{
>> +    /* Fixup cpu nodes */
>> +    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
>> +
>> +    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  int spapr_h_cas_compose_response(SpaprMachineState *spapr,
>>                                   target_ulong addr, target_ulong size,
>>                                   SpaprOptionVector *ov5_updates)
>> @@ -1009,10 +1022,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr,
>>      _FDT((fdt_open_into(fdt_skel, fdt, size)));
>>      g_free(fdt_skel);
>>  
>> -    /* Fixup cpu nodes */
>> -    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
>> -
>> -    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
>> +    if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
>>          return -1;
>>      }
>>  
>> @@ -1232,6 +1242,10 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt)
>>  
>>      /* We always implemented RTAS as hcall, tell guests to call it directly */
>>      _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1));
>> +    /* Tell the guest that H_CAS will return the entire FDT now, not the diff */
>> +    if (!spapr->bios_enabled) {
>> +        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1));
> 
> As with H_RTAS< using qemu,hypertas-functions would be more
> appropriate for this.
> 
>> +    }
>>  
>>      spapr_dt_ov5_platform_support(spapr, fdt, chosen);
>>  
>> @@ -1262,7 +1276,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
>>      }
>>  }
>>  
>> -static void *spapr_build_fdt(SpaprMachineState *spapr)
>> +void *spapr_build_fdt(SpaprMachineState *spapr)
>>  {
>>      MachineState *machine = MACHINE(spapr);
>>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>> index b964d94f330b..c5cb06c9d507 100644
>> --- a/hw/ppc/spapr_hcall.c
>> +++ b/hw/ppc/spapr_hcall.c
>> @@ -17,6 +17,7 @@
>>  #include "hw/ppc/spapr_ovec.h"
>>  #include "mmu-book3s-v3.h"
>>  #include "hw/mem/memory-device.h"
>> +#include "sysemu/device_tree.h"
>>  
>>  static bool has_spr(PowerPCCPU *cpu, int spr)
>>  {
>> @@ -1774,9 +1775,51 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>>              /* legacy hash or new hash: */
>>              spapr_setup_hpt_and_vrma(spapr);
>>          }
>> -        spapr->cas_reboot =
>> -            (spapr_h_cas_compose_response(spapr, args[1], args[2],
>> -                                          ov5_updates) != 0);
>> +
>> +        if (spapr->bios_enabled) {
>> +            spapr->cas_reboot =
>> +                (spapr_h_cas_compose_response(spapr, args[1], args[2],
>> +                                              ov5_updates) != 0);
>> +        } else {
>> +            int size;
>> +            void *fdt, *fdt_skel;
>> +            struct fdt_header hdr = { 0 };
>> +
>> +            cpu_physical_memory_read(args[1], &hdr, sizeof(hdr));
>> +            size = fdt32_to_cpu(hdr.totalsize);
>> +            if (size > FDT_MAX_SIZE) {
>> +                return H_NOT_AVAILABLE;
>> +            }
>> +
>> +            fdt_skel = g_malloc0(size);
>> +            cpu_physical_memory_read(args[1], fdt_skel, size);
>> +
>> +            fdt = g_malloc0(FDT_MAX_SIZE);
>> +            fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE);
>> +            g_free(fdt_skel);
> 
> fdt_open_into() explicitly permits using the same buffer for both
> arguments, so you don't need two allocations here - you can just
> allocate FDT_MAX_SIZE, read it in, then fdt_open_into() in place.
> 
> You probably should check for errors from fdt_open_into(), though.
> 
>> +
>> +            if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
>> +                g_free(fdt);
>> +                return H_NOT_AVAILABLE;
>> +            }
>> +            fdt_pack(fdt);
>> +            if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
> 
> This can't actually happen - you only ever allocated a buffer of size
> FDT_MAX_SIZE, so if the DT was going to exceed that you would have hit
> FDT_ERR_NOSPACE in an earlier step.
> 
>> +                error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
>> +                             fdt_totalsize(fdt), FDT_MAX_SIZE);
>> +                g_free(fdt);
>> +                return H_NOT_AVAILABLE;
>> +            }
>> +
>> +            /* Load the fdt */
>> +            cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt));
>> +
>> +            g_free(spapr->fdt_blob);
>> +            spapr->fdt_size = fdt_totalsize(fdt);
>> +            spapr->fdt_initial_size = spapr->fdt_size;
>> +            spapr->fdt_blob = fdt;
>> +
>> +            qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
>> +        }
>>      }
>>  
>>      /*
>
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7f5d7a70d27e..73cd9cf25b83 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -766,9 +766,14 @@  struct SpaprEventLogEntry {
 
 void spapr_events_init(SpaprMachineState *sm);
 void spapr_dt_events(SpaprMachineState *sm, void *fdt);
+int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
+                                    SpaprOptionVector *ov5_updates);
 int spapr_h_cas_compose_response(SpaprMachineState *sm,
                                  target_ulong addr, target_ulong size,
                                  SpaprOptionVector *ov5_updates);
+#define FDT_MAX_SIZE 0x100000
+void *spapr_build_fdt(SpaprMachineState *spapr);
+
 void close_htab_fd(SpaprMachineState *spapr);
 void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr);
 void spapr_free_hpt(SpaprMachineState *spapr);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b097a99951f1..f84895f4a8b4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -978,6 +978,19 @@  static bool spapr_hotplugged_dev_before_cas(void)
     return false;
 }
 
+int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt,
+                                    SpaprOptionVector *ov5_updates)
+{
+    /* Fixup cpu nodes */
+    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
+
+    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
+        return -1;
+    }
+
+    return 0;
+}
+
 int spapr_h_cas_compose_response(SpaprMachineState *spapr,
                                  target_ulong addr, target_ulong size,
                                  SpaprOptionVector *ov5_updates)
@@ -1009,10 +1022,7 @@  int spapr_h_cas_compose_response(SpaprMachineState *spapr,
     _FDT((fdt_open_into(fdt_skel, fdt, size)));
     g_free(fdt_skel);
 
-    /* Fixup cpu nodes */
-    _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
-
-    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
+    if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
         return -1;
     }
 
@@ -1232,6 +1242,10 @@  static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt)
 
     /* We always implemented RTAS as hcall, tell guests to call it directly */
     _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1));
+    /* Tell the guest that H_CAS will return the entire FDT now, not the diff */
+    if (!spapr->bios_enabled) {
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1));
+    }
 
     spapr_dt_ov5_platform_support(spapr, fdt, chosen);
 
@@ -1262,7 +1276,7 @@  static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
     }
 }
 
-static void *spapr_build_fdt(SpaprMachineState *spapr)
+void *spapr_build_fdt(SpaprMachineState *spapr)
 {
     MachineState *machine = MACHINE(spapr);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index b964d94f330b..c5cb06c9d507 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -17,6 +17,7 @@ 
 #include "hw/ppc/spapr_ovec.h"
 #include "mmu-book3s-v3.h"
 #include "hw/mem/memory-device.h"
+#include "sysemu/device_tree.h"
 
 static bool has_spr(PowerPCCPU *cpu, int spr)
 {
@@ -1774,9 +1775,51 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
             /* legacy hash or new hash: */
             spapr_setup_hpt_and_vrma(spapr);
         }
-        spapr->cas_reboot =
-            (spapr_h_cas_compose_response(spapr, args[1], args[2],
-                                          ov5_updates) != 0);
+
+        if (spapr->bios_enabled) {
+            spapr->cas_reboot =
+                (spapr_h_cas_compose_response(spapr, args[1], args[2],
+                                              ov5_updates) != 0);
+        } else {
+            int size;
+            void *fdt, *fdt_skel;
+            struct fdt_header hdr = { 0 };
+
+            cpu_physical_memory_read(args[1], &hdr, sizeof(hdr));
+            size = fdt32_to_cpu(hdr.totalsize);
+            if (size > FDT_MAX_SIZE) {
+                return H_NOT_AVAILABLE;
+            }
+
+            fdt_skel = g_malloc0(size);
+            cpu_physical_memory_read(args[1], fdt_skel, size);
+
+            fdt = g_malloc0(FDT_MAX_SIZE);
+            fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE);
+            g_free(fdt_skel);
+
+            if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) {
+                g_free(fdt);
+                return H_NOT_AVAILABLE;
+            }
+            fdt_pack(fdt);
+            if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
+                error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
+                             fdt_totalsize(fdt), FDT_MAX_SIZE);
+                g_free(fdt);
+                return H_NOT_AVAILABLE;
+            }
+
+            /* Load the fdt */
+            cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt));
+
+            g_free(spapr->fdt_blob);
+            spapr->fdt_size = fdt_totalsize(fdt);
+            spapr->fdt_initial_size = spapr->fdt_size;
+            spapr->fdt_blob = fdt;
+
+            qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+        }
     }
 
     /*