Message ID | 20190720012850.14369-5-aik@ozlabs.ru |
---|---|
State | New |
Headers | show |
Series | spapr: Kexec style boot | expand |
On Sat, Jul 20, 2019 at 11:28:50AM +1000, Alexey Kardashevskiy wrote: > QEMU already implements H_CAS called by SLOF. The existing handler > prepares a diff FDT and SLOF applies it on top of its current tree. > In SLOF-less setup when the user explicitly selected "bios=no", > this updates the FDT from the OS, updates it and writes back to the OS. > The new behavior is advertised to the OS via "/chosen/qemu,h_cas". I don't love having two different paths here, I'm wondering if we can unify things at all. I have wondered at some points if there's anything preventing us just giving a whole new device tree at CAS, rather than selected updates - that could simplify several things. > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > --- > include/hw/ppc/spapr.h | 5 +++++ > hw/ppc/spapr.c | 24 ++++++++++++++++----- > hw/ppc/spapr_hcall.c | 49 +++++++++++++++++++++++++++++++++++++++--- > 3 files changed, 70 insertions(+), 8 deletions(-) > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 7f5d7a70d27e..73cd9cf25b83 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -766,9 +766,14 @@ struct SpaprEventLogEntry { > > void spapr_events_init(SpaprMachineState *sm); > void spapr_dt_events(SpaprMachineState *sm, void *fdt); > +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, > + SpaprOptionVector *ov5_updates); > int spapr_h_cas_compose_response(SpaprMachineState *sm, > target_ulong addr, target_ulong size, > SpaprOptionVector *ov5_updates); > +#define FDT_MAX_SIZE 0x100000 > +void *spapr_build_fdt(SpaprMachineState *spapr); > + > void close_htab_fd(SpaprMachineState *spapr); > void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr); > void spapr_free_hpt(SpaprMachineState *spapr); > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index b097a99951f1..f84895f4a8b4 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -978,6 +978,19 @@ static bool spapr_hotplugged_dev_before_cas(void) > return false; > } > > +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, > + SpaprOptionVector *ov5_updates) Not a great function name. > +{ > + /* Fixup cpu nodes */ > + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); > + > + if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { > + return -1; > + } > + > + return 0; > +} > + > int spapr_h_cas_compose_response(SpaprMachineState *spapr, > target_ulong addr, target_ulong size, > SpaprOptionVector *ov5_updates) > @@ -1009,10 +1022,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, > _FDT((fdt_open_into(fdt_skel, fdt, size))); > g_free(fdt_skel); > > - /* Fixup cpu nodes */ > - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); > - > - if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { > + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { > return -1; > } > > @@ -1232,6 +1242,10 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) > > /* We always implemented RTAS as hcall, tell guests to call it directly */ > _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1)); > + /* Tell the guest that H_CAS will return the entire FDT now, not the diff */ > + if (!spapr->bios_enabled) { > + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1)); As with H_RTAS< using qemu,hypertas-functions would be more appropriate for this. > + } > > spapr_dt_ov5_platform_support(spapr, fdt, chosen); > > @@ -1262,7 +1276,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) > } > } > > -static void *spapr_build_fdt(SpaprMachineState *spapr) > +void *spapr_build_fdt(SpaprMachineState *spapr) > { > MachineState *machine = MACHINE(spapr); > MachineClass *mc = MACHINE_GET_CLASS(machine); > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index b964d94f330b..c5cb06c9d507 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -17,6 +17,7 @@ > #include "hw/ppc/spapr_ovec.h" > #include "mmu-book3s-v3.h" > #include "hw/mem/memory-device.h" > +#include "sysemu/device_tree.h" > > static bool has_spr(PowerPCCPU *cpu, int spr) > { > @@ -1774,9 +1775,51 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, > /* legacy hash or new hash: */ > spapr_setup_hpt_and_vrma(spapr); > } > - spapr->cas_reboot = > - (spapr_h_cas_compose_response(spapr, args[1], args[2], > - ov5_updates) != 0); > + > + if (spapr->bios_enabled) { > + spapr->cas_reboot = > + (spapr_h_cas_compose_response(spapr, args[1], args[2], > + ov5_updates) != 0); > + } else { > + int size; > + void *fdt, *fdt_skel; > + struct fdt_header hdr = { 0 }; > + > + cpu_physical_memory_read(args[1], &hdr, sizeof(hdr)); > + size = fdt32_to_cpu(hdr.totalsize); > + if (size > FDT_MAX_SIZE) { > + return H_NOT_AVAILABLE; > + } > + > + fdt_skel = g_malloc0(size); > + cpu_physical_memory_read(args[1], fdt_skel, size); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE); > + g_free(fdt_skel); fdt_open_into() explicitly permits using the same buffer for both arguments, so you don't need two allocations here - you can just allocate FDT_MAX_SIZE, read it in, then fdt_open_into() in place. You probably should check for errors from fdt_open_into(), though. > + > + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { > + g_free(fdt); > + return H_NOT_AVAILABLE; > + } > + fdt_pack(fdt); > + if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { This can't actually happen - you only ever allocated a buffer of size FDT_MAX_SIZE, so if the DT was going to exceed that you would have hit FDT_ERR_NOSPACE in an earlier step. > + error_report("FDT too big ! 0x%x bytes (max is 0x%x)", > + fdt_totalsize(fdt), FDT_MAX_SIZE); > + g_free(fdt); > + return H_NOT_AVAILABLE; > + } > + > + /* Load the fdt */ > + cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt)); > + > + g_free(spapr->fdt_blob); > + spapr->fdt_size = fdt_totalsize(fdt); > + spapr->fdt_initial_size = spapr->fdt_size; > + spapr->fdt_blob = fdt; > + > + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); > + } > } > > /*
On 28/07/2019 16:09, David Gibson wrote: > On Sat, Jul 20, 2019 at 11:28:50AM +1000, Alexey Kardashevskiy wrote: >> QEMU already implements H_CAS called by SLOF. The existing handler >> prepares a diff FDT and SLOF applies it on top of its current tree. >> In SLOF-less setup when the user explicitly selected "bios=no", >> this updates the FDT from the OS, updates it and writes back to the OS. >> The new behavior is advertised to the OS via "/chosen/qemu,h_cas". > > I don't love having two different paths here, I'm wondering if we can > unify things at all. > > I have wondered at some points if there's anything preventing us just > giving a whole new device tree at CAS, rather than selected updates - > that could simplify several things. An update has a header, and this patch just copies the fdt over, if I add a header to this new path, this will require a few more changes in the guest which I would rather avoid. Thanks, > >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >> --- >> include/hw/ppc/spapr.h | 5 +++++ >> hw/ppc/spapr.c | 24 ++++++++++++++++----- >> hw/ppc/spapr_hcall.c | 49 +++++++++++++++++++++++++++++++++++++++--- >> 3 files changed, 70 insertions(+), 8 deletions(-) >> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 7f5d7a70d27e..73cd9cf25b83 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -766,9 +766,14 @@ struct SpaprEventLogEntry { >> >> void spapr_events_init(SpaprMachineState *sm); >> void spapr_dt_events(SpaprMachineState *sm, void *fdt); >> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, >> + SpaprOptionVector *ov5_updates); >> int spapr_h_cas_compose_response(SpaprMachineState *sm, >> target_ulong addr, target_ulong size, >> SpaprOptionVector *ov5_updates); >> +#define FDT_MAX_SIZE 0x100000 >> +void *spapr_build_fdt(SpaprMachineState *spapr); >> + >> void close_htab_fd(SpaprMachineState *spapr); >> void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr); >> void spapr_free_hpt(SpaprMachineState *spapr); >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index b097a99951f1..f84895f4a8b4 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -978,6 +978,19 @@ static bool spapr_hotplugged_dev_before_cas(void) >> return false; >> } >> >> +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, >> + SpaprOptionVector *ov5_updates) > > Not a great function name. > >> +{ >> + /* Fixup cpu nodes */ >> + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); >> + >> + if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> int spapr_h_cas_compose_response(SpaprMachineState *spapr, >> target_ulong addr, target_ulong size, >> SpaprOptionVector *ov5_updates) >> @@ -1009,10 +1022,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, >> _FDT((fdt_open_into(fdt_skel, fdt, size))); >> g_free(fdt_skel); >> >> - /* Fixup cpu nodes */ >> - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); >> - >> - if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { >> + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { >> return -1; >> } >> >> @@ -1232,6 +1242,10 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) >> >> /* We always implemented RTAS as hcall, tell guests to call it directly */ >> _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1)); >> + /* Tell the guest that H_CAS will return the entire FDT now, not the diff */ >> + if (!spapr->bios_enabled) { >> + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1)); > > As with H_RTAS< using qemu,hypertas-functions would be more > appropriate for this. > >> + } >> >> spapr_dt_ov5_platform_support(spapr, fdt, chosen); >> >> @@ -1262,7 +1276,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) >> } >> } >> >> -static void *spapr_build_fdt(SpaprMachineState *spapr) >> +void *spapr_build_fdt(SpaprMachineState *spapr) >> { >> MachineState *machine = MACHINE(spapr); >> MachineClass *mc = MACHINE_GET_CLASS(machine); >> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c >> index b964d94f330b..c5cb06c9d507 100644 >> --- a/hw/ppc/spapr_hcall.c >> +++ b/hw/ppc/spapr_hcall.c >> @@ -17,6 +17,7 @@ >> #include "hw/ppc/spapr_ovec.h" >> #include "mmu-book3s-v3.h" >> #include "hw/mem/memory-device.h" >> +#include "sysemu/device_tree.h" >> >> static bool has_spr(PowerPCCPU *cpu, int spr) >> { >> @@ -1774,9 +1775,51 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, >> /* legacy hash or new hash: */ >> spapr_setup_hpt_and_vrma(spapr); >> } >> - spapr->cas_reboot = >> - (spapr_h_cas_compose_response(spapr, args[1], args[2], >> - ov5_updates) != 0); >> + >> + if (spapr->bios_enabled) { >> + spapr->cas_reboot = >> + (spapr_h_cas_compose_response(spapr, args[1], args[2], >> + ov5_updates) != 0); >> + } else { >> + int size; >> + void *fdt, *fdt_skel; >> + struct fdt_header hdr = { 0 }; >> + >> + cpu_physical_memory_read(args[1], &hdr, sizeof(hdr)); >> + size = fdt32_to_cpu(hdr.totalsize); >> + if (size > FDT_MAX_SIZE) { >> + return H_NOT_AVAILABLE; >> + } >> + >> + fdt_skel = g_malloc0(size); >> + cpu_physical_memory_read(args[1], fdt_skel, size); >> + >> + fdt = g_malloc0(FDT_MAX_SIZE); >> + fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE); >> + g_free(fdt_skel); > > fdt_open_into() explicitly permits using the same buffer for both > arguments, so you don't need two allocations here - you can just > allocate FDT_MAX_SIZE, read it in, then fdt_open_into() in place. > > You probably should check for errors from fdt_open_into(), though. > >> + >> + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { >> + g_free(fdt); >> + return H_NOT_AVAILABLE; >> + } >> + fdt_pack(fdt); >> + if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { > > This can't actually happen - you only ever allocated a buffer of size > FDT_MAX_SIZE, so if the DT was going to exceed that you would have hit > FDT_ERR_NOSPACE in an earlier step. > >> + error_report("FDT too big ! 0x%x bytes (max is 0x%x)", >> + fdt_totalsize(fdt), FDT_MAX_SIZE); >> + g_free(fdt); >> + return H_NOT_AVAILABLE; >> + } >> + >> + /* Load the fdt */ >> + cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt)); >> + >> + g_free(spapr->fdt_blob); >> + spapr->fdt_size = fdt_totalsize(fdt); >> + spapr->fdt_initial_size = spapr->fdt_size; >> + spapr->fdt_blob = fdt; >> + >> + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); >> + } >> } >> >> /* >
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7f5d7a70d27e..73cd9cf25b83 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -766,9 +766,14 @@ struct SpaprEventLogEntry { void spapr_events_init(SpaprMachineState *sm); void spapr_dt_events(SpaprMachineState *sm, void *fdt); +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, + SpaprOptionVector *ov5_updates); int spapr_h_cas_compose_response(SpaprMachineState *sm, target_ulong addr, target_ulong size, SpaprOptionVector *ov5_updates); +#define FDT_MAX_SIZE 0x100000 +void *spapr_build_fdt(SpaprMachineState *spapr); + void close_htab_fd(SpaprMachineState *spapr); void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr); void spapr_free_hpt(SpaprMachineState *spapr); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b097a99951f1..f84895f4a8b4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -978,6 +978,19 @@ static bool spapr_hotplugged_dev_before_cas(void) return false; } +int spapr_h_cas_do_compose_response(SpaprMachineState *spapr, void *fdt, + SpaprOptionVector *ov5_updates) +{ + /* Fixup cpu nodes */ + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); + + if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { + return -1; + } + + return 0; +} + int spapr_h_cas_compose_response(SpaprMachineState *spapr, target_ulong addr, target_ulong size, SpaprOptionVector *ov5_updates) @@ -1009,10 +1022,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, _FDT((fdt_open_into(fdt_skel, fdt, size))); g_free(fdt_skel); - /* Fixup cpu nodes */ - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); - - if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) { + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { return -1; } @@ -1232,6 +1242,10 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) /* We always implemented RTAS as hcall, tell guests to call it directly */ _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_rtas", 1)); + /* Tell the guest that H_CAS will return the entire FDT now, not the diff */ + if (!spapr->bios_enabled) { + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,h_cas", 1)); + } spapr_dt_ov5_platform_support(spapr, fdt, chosen); @@ -1262,7 +1276,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) } } -static void *spapr_build_fdt(SpaprMachineState *spapr) +void *spapr_build_fdt(SpaprMachineState *spapr) { MachineState *machine = MACHINE(spapr); MachineClass *mc = MACHINE_GET_CLASS(machine); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b964d94f330b..c5cb06c9d507 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -17,6 +17,7 @@ #include "hw/ppc/spapr_ovec.h" #include "mmu-book3s-v3.h" #include "hw/mem/memory-device.h" +#include "sysemu/device_tree.h" static bool has_spr(PowerPCCPU *cpu, int spr) { @@ -1774,9 +1775,51 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* legacy hash or new hash: */ spapr_setup_hpt_and_vrma(spapr); } - spapr->cas_reboot = - (spapr_h_cas_compose_response(spapr, args[1], args[2], - ov5_updates) != 0); + + if (spapr->bios_enabled) { + spapr->cas_reboot = + (spapr_h_cas_compose_response(spapr, args[1], args[2], + ov5_updates) != 0); + } else { + int size; + void *fdt, *fdt_skel; + struct fdt_header hdr = { 0 }; + + cpu_physical_memory_read(args[1], &hdr, sizeof(hdr)); + size = fdt32_to_cpu(hdr.totalsize); + if (size > FDT_MAX_SIZE) { + return H_NOT_AVAILABLE; + } + + fdt_skel = g_malloc0(size); + cpu_physical_memory_read(args[1], fdt_skel, size); + + fdt = g_malloc0(FDT_MAX_SIZE); + fdt_open_into(fdt_skel, fdt, FDT_MAX_SIZE); + g_free(fdt_skel); + + if (spapr_h_cas_do_compose_response(spapr, fdt, ov5_updates)) { + g_free(fdt); + return H_NOT_AVAILABLE; + } + fdt_pack(fdt); + if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { + error_report("FDT too big ! 0x%x bytes (max is 0x%x)", + fdt_totalsize(fdt), FDT_MAX_SIZE); + g_free(fdt); + return H_NOT_AVAILABLE; + } + + /* Load the fdt */ + cpu_physical_memory_write(args[1], fdt, fdt_totalsize(fdt)); + + g_free(spapr->fdt_blob); + spapr->fdt_size = fdt_totalsize(fdt); + spapr->fdt_initial_size = spapr->fdt_size; + spapr->fdt_blob = fdt; + + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + } } /*
QEMU already implements H_CAS called by SLOF. The existing handler prepares a diff FDT and SLOF applies it on top of its current tree. In SLOF-less setup when the user explicitly selected "bios=no", this updates the FDT from the OS, updates it and writes back to the OS. The new behavior is advertised to the OS via "/chosen/qemu,h_cas". Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- include/hw/ppc/spapr.h | 5 +++++ hw/ppc/spapr.c | 24 ++++++++++++++++----- hw/ppc/spapr_hcall.c | 49 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 8 deletions(-)