Message ID | 20191024074307.22821-5-ganeshgr@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | target-ppc/spapr: Add FWNMI support in QEMU for PowerKM guests | expand |
On Thu, Oct 24, 2019 at 01:13:04PM +0530, Ganesh Goudar wrote: > From: Aravinda Prasad <arawinda.p@gmail.com> > > Upon a machine check exception (MCE) in a guest address space, > KVM causes a guest exit to enable QEMU to build and pass the > error to the guest in the PAPR defined rtas error log format. > > This patch builds the rtas error log, copies it to the rtas_addr > and then invokes the guest registered machine check handler. The > handler in the guest takes suitable action(s) depending on the type > and criticality of the error. For example, if an error is > unrecoverable memory corruption in an application inside the > guest, then the guest kernel sends a SIGBUS to the application. > For recoverable errors, the guest performs recovery actions and > logs the error. > > [Assume SLOF has allocated enough room for rtas error log] Is that correct with the SLOF image currently included in qemu? Apart from that detail, Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> > Signed-off-by: Aravinda Prasad <arawinda.p@gmail.com> > --- > hw/ppc/spapr_events.c | 220 ++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_rtas.c | 26 +++++ > include/hw/ppc/spapr.h | 6 +- > target/ppc/kvm.c | 4 +- > 4 files changed, 253 insertions(+), 3 deletions(-) > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 0ce96b86be..db44e09154 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -214,6 +214,104 @@ struct hp_extended_log { > struct rtas_event_log_v6_hp hp; > } QEMU_PACKED; > > +struct rtas_event_log_v6_mc { > +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ > + struct rtas_event_log_v6_section_header hdr; > + uint32_t fru_id; > + uint32_t proc_id; > + uint8_t error_type; > +#define RTAS_LOG_V6_MC_TYPE_UE 0 > +#define RTAS_LOG_V6_MC_TYPE_SLB 1 > +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 > +#define RTAS_LOG_V6_MC_TYPE_TLB 4 > +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 > +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 > + uint8_t sub_err_type; > +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 > +#define RTAS_LOG_V6_MC_UE_IFETCH 1 > +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 > +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 > +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 > +#define RTAS_LOG_V6_MC_SLB_PARITY 0 > +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 > +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 > +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 > +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 > +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 > +#define RTAS_LOG_V6_MC_TLB_PARITY 1 > +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 > +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 > + uint8_t reserved_1[6]; > + uint64_t effective_address; > + uint64_t logical_address; > +} QEMU_PACKED; > + > +struct mc_extended_log { > + struct rtas_event_log_v6 v6hdr; > + struct rtas_event_log_v6_mc mc; > +} QEMU_PACKED; > + > +struct MC_ierror_table { > + unsigned long srr1_mask; > + unsigned long srr1_value; > + bool nip_valid; /* nip is a valid indicator of faulting address */ > + uint8_t error_type; > + uint8_t error_subtype; > + unsigned int initiator; > + unsigned int severity; > +}; > + > +static const struct MC_ierror_table mc_ierror_table[] = { > +{ 0x00000000081c0000, 0x0000000000040000, true, > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000000081c0000, 0x0000000000080000, true, > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000000081c0000, 0x00000000000c0000, true, > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000000081c0000, 0x0000000000100000, true, > + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000000081c0000, 0x0000000000140000, true, > + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000000081c0000, 0x0000000000180000, true, > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; > + > +struct MC_derror_table { > + unsigned long dsisr_value; > + bool dar_valid; /* dar is a valid indicator of faulting address */ > + uint8_t error_type; > + uint8_t error_subtype; > + unsigned int initiator; > + unsigned int severity; > +}; > + > +static const struct MC_derror_table mc_derror_table[] = { > +{ 0x00008000, false, > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00004000, true, > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000800, true, > + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000400, true, > + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000080, true, > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > +{ 0x00000100, true, > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; > + > +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) > + > typedef enum EventClass { > EVENT_CLASS_INTERNAL_ERRORS = 0, > EVENT_CLASS_EPOW = 1, > @@ -622,7 +720,125 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, > RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > -void spapr_mce_req_event(PowerPCCPU *cpu) > +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, > + struct mc_extended_log *ext_elog) > +{ > + int i; > + CPUPPCState *env = &cpu->env; > + uint32_t summary; > + uint64_t dsisr = env->spr[SPR_DSISR]; > + > + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; > + if (recovered) { > + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; > + } else { > + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; > + } > + > + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { > + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { > + if (!(dsisr & mc_derror_table[i].dsisr_value)) { > + continue; > + } > + > + ext_elog->mc.error_type = mc_derror_table[i].error_type; > + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; > + if (mc_derror_table[i].dar_valid) { > + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); > + } > + > + summary |= mc_derror_table[i].initiator > + | mc_derror_table[i].severity; > + > + return summary; > + } > + } else { > + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { > + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != > + mc_ierror_table[i].srr1_value) { > + continue; > + } > + > + ext_elog->mc.error_type = mc_ierror_table[i].error_type; > + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; > + if (mc_ierror_table[i].nip_valid) { > + ext_elog->mc.effective_address = cpu_to_be64(env->nip); > + } > + > + summary |= mc_ierror_table[i].initiator > + | mc_ierror_table[i].severity; > + > + return summary; > + } > + } > + > + summary |= RTAS_LOG_INITIATOR_CPU; > + return summary; > +} > + > +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) > +{ > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + CPUState *cs = CPU(cpu); > + uint64_t rtas_addr; > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); > + target_ulong msr = 0; > + struct rtas_error_log log; > + struct mc_extended_log *ext_elog; > + uint32_t summary; > + > + /* > + * Properly set bits in MSR before we invoke the handler. > + * SRR0/1, DAR and DSISR are properly set by KVM > + */ > + if (!(*pcc->interrupts_big_endian)(cpu)) { > + msr |= (1ULL << MSR_LE); > + } > + > + if (env->msr & (1ULL << MSR_SF)) { > + msr |= (1ULL << MSR_SF); > + } > + > + msr |= (1ULL << MSR_ME); > + > + ext_elog = g_malloc0(sizeof(*ext_elog)); > + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); > + > + log.summary = cpu_to_be32(summary); > + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); > + > + spapr_init_v6hdr(&ext_elog->v6hdr); > + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); > + ext_elog->mc.hdr.section_length = > + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); > + ext_elog->mc.hdr.section_version = 1; > + > + /* get rtas addr from fdt */ > + rtas_addr = spapr_get_rtas_addr(); > + if (!rtas_addr) { > + /* Unable to fetch rtas_addr. Hence reset the guest */ > + ppc_cpu_do_system_reset(cs); > + g_free(ext_elog); > + return; > + } > + > + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, > + env->gpr[3]); > + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > + sizeof(env->gpr[3]), &log, sizeof(log)); > + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > + sizeof(env->gpr[3]) + sizeof(log), ext_elog, > + sizeof(*ext_elog)); > + > + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; > + env->msr = msr; > + env->nip = spapr->guest_machine_check_addr; > + > + g_free(ext_elog); > +} > + > +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > { > SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > CPUState *cs = CPU(cpu); > @@ -656,6 +872,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu) > } > } > spapr->mc_status = cpu->vcpu_id; > + > + spapr_mce_dispatch_elog(cpu, recovered); > } > > static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c > index 8d8d8cdfcb..2c066a372d 100644 > --- a/hw/ppc/spapr_rtas.c > +++ b/hw/ppc/spapr_rtas.c > @@ -477,6 +477,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) > } > } > > +hwaddr spapr_get_rtas_addr(void) > +{ > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + int rtas_node; > + const fdt32_t *rtas_data; > + void *fdt = spapr->fdt_blob; > + > + /* fetch rtas addr from fdt */ > + rtas_node = fdt_path_offset(fdt, "/rtas"); > + if (rtas_node < 0) { > + return 0; > + } > + > + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); > + if (!rtas_data) { > + return 0; > + } > + > + /* > + * We assume that the OS called RTAS instantiate-rtas, but some other > + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now > + * as SLOF only supports 32-bit variant. > + */ > + return (hwaddr)fdt32_to_cpu(*rtas_data); > +} > + > static void core_rtas_register_types(void) > { > spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 1f5eb8c856..4afa8d4d09 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -724,6 +724,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); > > #define RTAS_ERROR_LOG_MAX 2048 > > +/* Offset from rtas-base where error log is placed */ > +#define RTAS_ERROR_LOG_OFFSET 0x30 > + > #define RTAS_EVENT_SCAN_RATE 1 > > /* This helper should be used to encode interrupt specifiers when the related > @@ -812,7 +815,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr); > int spapr_max_server_number(SpaprMachineState *spapr); > void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, > uint64_t pte0, uint64_t pte1); > -void spapr_mce_req_event(PowerPCCPU *cpu); > +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); > > /* DRC callbacks. */ > void spapr_core_release(DeviceState *dev); > @@ -903,4 +906,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, > #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ > > void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); > +hwaddr spapr_get_rtas_addr(void); > #endif /* HW_SPAPR_H */ > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index 2d8db6d832..9a902c1064 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -2805,9 +2805,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) > > int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) > { > + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; > + > cpu_synchronize_state(CPU(cpu)); > > - spapr_mce_req_event(cpu); > + spapr_mce_req_event(cpu, recovered); > > return 0; > }
On 11/4/19 9:40 PM, David Gibson wrote: > On Thu, Oct 24, 2019 at 01:13:04PM +0530, Ganesh Goudar wrote: >> From: Aravinda Prasad <arawinda.p@gmail.com> >> >> Upon a machine check exception (MCE) in a guest address space, >> KVM causes a guest exit to enable QEMU to build and pass the >> error to the guest in the PAPR defined rtas error log format. >> >> This patch builds the rtas error log, copies it to the rtas_addr >> and then invokes the guest registered machine check handler. The >> handler in the guest takes suitable action(s) depending on the type >> and criticality of the error. For example, if an error is >> unrecoverable memory corruption in an application inside the >> guest, then the guest kernel sends a SIGBUS to the application. >> For recoverable errors, the guest performs recovery actions and >> logs the error. >> >> [Assume SLOF has allocated enough room for rtas error log] > Is that correct with the SLOF image currently included in qemu? Yes > > Apart from that detail, > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > >> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> >> Signed-off-by: Aravinda Prasad <arawinda.p@gmail.com> > > >> --- >> hw/ppc/spapr_events.c | 220 ++++++++++++++++++++++++++++++++++++++++- >> hw/ppc/spapr_rtas.c | 26 +++++ >> include/hw/ppc/spapr.h | 6 +- >> target/ppc/kvm.c | 4 +- >> 4 files changed, 253 insertions(+), 3 deletions(-) >> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >> index 0ce96b86be..db44e09154 100644 >> --- a/hw/ppc/spapr_events.c >> +++ b/hw/ppc/spapr_events.c >> @@ -214,6 +214,104 @@ struct hp_extended_log { >> struct rtas_event_log_v6_hp hp; >> } QEMU_PACKED; >> >> +struct rtas_event_log_v6_mc { >> +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ >> + struct rtas_event_log_v6_section_header hdr; >> + uint32_t fru_id; >> + uint32_t proc_id; >> + uint8_t error_type; >> +#define RTAS_LOG_V6_MC_TYPE_UE 0 >> +#define RTAS_LOG_V6_MC_TYPE_SLB 1 >> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 >> +#define RTAS_LOG_V6_MC_TYPE_TLB 4 >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 >> + uint8_t sub_err_type; >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 >> +#define RTAS_LOG_V6_MC_UE_IFETCH 1 >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 >> +#define RTAS_LOG_V6_MC_SLB_PARITY 0 >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 >> +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 >> +#define RTAS_LOG_V6_MC_TLB_PARITY 1 >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 >> + uint8_t reserved_1[6]; >> + uint64_t effective_address; >> + uint64_t logical_address; >> +} QEMU_PACKED; >> + >> +struct mc_extended_log { >> + struct rtas_event_log_v6 v6hdr; >> + struct rtas_event_log_v6_mc mc; >> +} QEMU_PACKED; >> + >> +struct MC_ierror_table { >> + unsigned long srr1_mask; >> + unsigned long srr1_value; >> + bool nip_valid; /* nip is a valid indicator of faulting address */ >> + uint8_t error_type; >> + uint8_t error_subtype; >> + unsigned int initiator; >> + unsigned int severity; >> +}; >> + >> +static const struct MC_ierror_table mc_ierror_table[] = { >> +{ 0x00000000081c0000, 0x0000000000040000, true, >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000000081c0000, 0x0000000000080000, true, >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000000081c0000, 0x00000000000c0000, true, >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000000081c0000, 0x0000000000100000, true, >> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000000081c0000, 0x0000000000140000, true, >> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000000081c0000, 0x0000000000180000, true, >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; >> + >> +struct MC_derror_table { >> + unsigned long dsisr_value; >> + bool dar_valid; /* dar is a valid indicator of faulting address */ >> + uint8_t error_type; >> + uint8_t error_subtype; >> + unsigned int initiator; >> + unsigned int severity; >> +}; >> + >> +static const struct MC_derror_table mc_derror_table[] = { >> +{ 0x00008000, false, >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00004000, true, >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000800, true, >> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000400, true, >> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000080, true, >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >> +{ 0x00000100, true, >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; >> + >> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) >> + >> typedef enum EventClass { >> EVENT_CLASS_INTERNAL_ERRORS = 0, >> EVENT_CLASS_EPOW = 1, >> @@ -622,7 +720,125 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, >> RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); >> } >> >> -void spapr_mce_req_event(PowerPCCPU *cpu) >> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, >> + struct mc_extended_log *ext_elog) >> +{ >> + int i; >> + CPUPPCState *env = &cpu->env; >> + uint32_t summary; >> + uint64_t dsisr = env->spr[SPR_DSISR]; >> + >> + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; >> + if (recovered) { >> + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; >> + } else { >> + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; >> + } >> + >> + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { >> + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { >> + if (!(dsisr & mc_derror_table[i].dsisr_value)) { >> + continue; >> + } >> + >> + ext_elog->mc.error_type = mc_derror_table[i].error_type; >> + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; >> + if (mc_derror_table[i].dar_valid) { >> + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); >> + } >> + >> + summary |= mc_derror_table[i].initiator >> + | mc_derror_table[i].severity; >> + >> + return summary; >> + } >> + } else { >> + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { >> + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != >> + mc_ierror_table[i].srr1_value) { >> + continue; >> + } >> + >> + ext_elog->mc.error_type = mc_ierror_table[i].error_type; >> + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; >> + if (mc_ierror_table[i].nip_valid) { >> + ext_elog->mc.effective_address = cpu_to_be64(env->nip); >> + } >> + >> + summary |= mc_ierror_table[i].initiator >> + | mc_ierror_table[i].severity; >> + >> + return summary; >> + } >> + } >> + >> + summary |= RTAS_LOG_INITIATOR_CPU; >> + return summary; >> +} >> + >> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) >> +{ >> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> + CPUState *cs = CPU(cpu); >> + uint64_t rtas_addr; >> + CPUPPCState *env = &cpu->env; >> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); >> + target_ulong msr = 0; >> + struct rtas_error_log log; >> + struct mc_extended_log *ext_elog; >> + uint32_t summary; >> + >> + /* >> + * Properly set bits in MSR before we invoke the handler. >> + * SRR0/1, DAR and DSISR are properly set by KVM >> + */ >> + if (!(*pcc->interrupts_big_endian)(cpu)) { >> + msr |= (1ULL << MSR_LE); >> + } >> + >> + if (env->msr & (1ULL << MSR_SF)) { >> + msr |= (1ULL << MSR_SF); >> + } >> + >> + msr |= (1ULL << MSR_ME); >> + >> + ext_elog = g_malloc0(sizeof(*ext_elog)); >> + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); >> + >> + log.summary = cpu_to_be32(summary); >> + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); >> + >> + spapr_init_v6hdr(&ext_elog->v6hdr); >> + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); >> + ext_elog->mc.hdr.section_length = >> + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); >> + ext_elog->mc.hdr.section_version = 1; >> + >> + /* get rtas addr from fdt */ >> + rtas_addr = spapr_get_rtas_addr(); >> + if (!rtas_addr) { >> + /* Unable to fetch rtas_addr. Hence reset the guest */ >> + ppc_cpu_do_system_reset(cs); >> + g_free(ext_elog); >> + return; >> + } >> + >> + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, >> + env->gpr[3]); >> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + >> + sizeof(env->gpr[3]), &log, sizeof(log)); >> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + >> + sizeof(env->gpr[3]) + sizeof(log), ext_elog, >> + sizeof(*ext_elog)); >> + >> + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; >> + env->msr = msr; >> + env->nip = spapr->guest_machine_check_addr; >> + >> + g_free(ext_elog); >> +} >> + >> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) >> { >> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> CPUState *cs = CPU(cpu); >> @@ -656,6 +872,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu) >> } >> } >> spapr->mc_status = cpu->vcpu_id; >> + >> + spapr_mce_dispatch_elog(cpu, recovered); >> } >> >> static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c >> index 8d8d8cdfcb..2c066a372d 100644 >> --- a/hw/ppc/spapr_rtas.c >> +++ b/hw/ppc/spapr_rtas.c >> @@ -477,6 +477,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) >> } >> } >> >> +hwaddr spapr_get_rtas_addr(void) >> +{ >> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> + int rtas_node; >> + const fdt32_t *rtas_data; >> + void *fdt = spapr->fdt_blob; >> + >> + /* fetch rtas addr from fdt */ >> + rtas_node = fdt_path_offset(fdt, "/rtas"); >> + if (rtas_node < 0) { >> + return 0; >> + } >> + >> + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); >> + if (!rtas_data) { >> + return 0; >> + } >> + >> + /* >> + * We assume that the OS called RTAS instantiate-rtas, but some other >> + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now >> + * as SLOF only supports 32-bit variant. >> + */ >> + return (hwaddr)fdt32_to_cpu(*rtas_data); >> +} >> + >> static void core_rtas_register_types(void) >> { >> spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 1f5eb8c856..4afa8d4d09 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -724,6 +724,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); >> >> #define RTAS_ERROR_LOG_MAX 2048 >> >> +/* Offset from rtas-base where error log is placed */ >> +#define RTAS_ERROR_LOG_OFFSET 0x30 >> + >> #define RTAS_EVENT_SCAN_RATE 1 >> >> /* This helper should be used to encode interrupt specifiers when the related >> @@ -812,7 +815,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr); >> int spapr_max_server_number(SpaprMachineState *spapr); >> void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, >> uint64_t pte0, uint64_t pte1); >> -void spapr_mce_req_event(PowerPCCPU *cpu); >> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); >> >> /* DRC callbacks. */ >> void spapr_core_release(DeviceState *dev); >> @@ -903,4 +906,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, >> #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ >> >> void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); >> +hwaddr spapr_get_rtas_addr(void); >> #endif /* HW_SPAPR_H */ >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c >> index 2d8db6d832..9a902c1064 100644 >> --- a/target/ppc/kvm.c >> +++ b/target/ppc/kvm.c >> @@ -2805,9 +2805,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) >> >> int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) >> { >> + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; >> + >> cpu_synchronize_state(CPU(cpu)); >> >> - spapr_mce_req_event(cpu); >> + spapr_mce_req_event(cpu, recovered); >> >> return 0; >> }
On 11/6/19 4:37 PM, Ganesh wrote: > > On 11/4/19 9:40 PM, David Gibson wrote: >> On Thu, Oct 24, 2019 at 01:13:04PM +0530, Ganesh Goudar wrote: >>> From: Aravinda Prasad <arawinda.p@gmail.com> >>> >>> Upon a machine check exception (MCE) in a guest address space, >>> KVM causes a guest exit to enable QEMU to build and pass the >>> error to the guest in the PAPR defined rtas error log format. >>> >>> This patch builds the rtas error log, copies it to the rtas_addr >>> and then invokes the guest registered machine check handler. The >>> handler in the guest takes suitable action(s) depending on the type >>> and criticality of the error. For example, if an error is >>> unrecoverable memory corruption in an application inside the >>> guest, then the guest kernel sends a SIGBUS to the application. >>> For recoverable errors, the guest performs recovery actions and >>> logs the error. >>> >>> [Assume SLOF has allocated enough room for rtas error log] >> Is that correct with the SLOF image currently included in qemu? > Yes >> >> Apart from that detail, >> >> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Hi David, Please see if this patch series can be merged to 4.2. >> >>> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> >>> Signed-off-by: Aravinda Prasad <arawinda.p@gmail.com> >> >> >>> --- >>> hw/ppc/spapr_events.c | 220 >>> ++++++++++++++++++++++++++++++++++++++++- >>> hw/ppc/spapr_rtas.c | 26 +++++ >>> include/hw/ppc/spapr.h | 6 +- >>> target/ppc/kvm.c | 4 +- >>> 4 files changed, 253 insertions(+), 3 deletions(-) >>> >>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >>> index 0ce96b86be..db44e09154 100644 >>> --- a/hw/ppc/spapr_events.c >>> +++ b/hw/ppc/spapr_events.c >>> @@ -214,6 +214,104 @@ struct hp_extended_log { >>> struct rtas_event_log_v6_hp hp; >>> } QEMU_PACKED; >>> +struct rtas_event_log_v6_mc { >>> +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ >>> + struct rtas_event_log_v6_section_header hdr; >>> + uint32_t fru_id; >>> + uint32_t proc_id; >>> + uint8_t error_type; >>> +#define RTAS_LOG_V6_MC_TYPE_UE 0 >>> +#define RTAS_LOG_V6_MC_TYPE_SLB 1 >>> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 >>> +#define RTAS_LOG_V6_MC_TYPE_TLB 4 >>> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 >>> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 >>> + uint8_t sub_err_type; >>> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 >>> +#define RTAS_LOG_V6_MC_UE_IFETCH 1 >>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 >>> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 >>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 >>> +#define RTAS_LOG_V6_MC_SLB_PARITY 0 >>> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 >>> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 >>> +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 >>> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 >>> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 >>> +#define RTAS_LOG_V6_MC_TLB_PARITY 1 >>> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 >>> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 >>> + uint8_t reserved_1[6]; >>> + uint64_t effective_address; >>> + uint64_t logical_address; >>> +} QEMU_PACKED; >>> + >>> +struct mc_extended_log { >>> + struct rtas_event_log_v6 v6hdr; >>> + struct rtas_event_log_v6_mc mc; >>> +} QEMU_PACKED; >>> + >>> +struct MC_ierror_table { >>> + unsigned long srr1_mask; >>> + unsigned long srr1_value; >>> + bool nip_valid; /* nip is a valid indicator of faulting address */ >>> + uint8_t error_type; >>> + uint8_t error_subtype; >>> + unsigned int initiator; >>> + unsigned int severity; >>> +}; >>> + >>> +static const struct MC_ierror_table mc_ierror_table[] = { >>> +{ 0x00000000081c0000, 0x0000000000040000, true, >>> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000000081c0000, 0x0000000000080000, true, >>> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000000081c0000, 0x00000000000c0000, true, >>> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000000081c0000, 0x0000000000100000, true, >>> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000000081c0000, 0x0000000000140000, true, >>> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000000081c0000, 0x0000000000180000, true, >>> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; >>> + >>> +struct MC_derror_table { >>> + unsigned long dsisr_value; >>> + bool dar_valid; /* dar is a valid indicator of faulting address */ >>> + uint8_t error_type; >>> + uint8_t error_subtype; >>> + unsigned int initiator; >>> + unsigned int severity; >>> +}; >>> + >>> +static const struct MC_derror_table mc_derror_table[] = { >>> +{ 0x00008000, false, >>> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00004000, true, >>> + RTAS_LOG_V6_MC_TYPE_UE, >>> RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000800, true, >>> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000400, true, >>> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000080, true, >>> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before >>> PARITY */ >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, >>> +{ 0x00000100, true, >>> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, >>> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; >>> + >>> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) >>> + >>> typedef enum EventClass { >>> EVENT_CLASS_INTERNAL_ERRORS = 0, >>> EVENT_CLASS_EPOW = 1, >>> @@ -622,7 +720,125 @@ void >>> spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, >>> RTAS_LOG_V6_HP_ACTION_REMOVE, >>> drc_type, &drc_id); >>> } >>> -void spapr_mce_req_event(PowerPCCPU *cpu) >>> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool >>> recovered, >>> + struct mc_extended_log >>> *ext_elog) >>> +{ >>> + int i; >>> + CPUPPCState *env = &cpu->env; >>> + uint32_t summary; >>> + uint64_t dsisr = env->spr[SPR_DSISR]; >>> + >>> + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; >>> + if (recovered) { >>> + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; >>> + } else { >>> + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; >>> + } >>> + >>> + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { >>> + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { >>> + if (!(dsisr & mc_derror_table[i].dsisr_value)) { >>> + continue; >>> + } >>> + >>> + ext_elog->mc.error_type = mc_derror_table[i].error_type; >>> + ext_elog->mc.sub_err_type = >>> mc_derror_table[i].error_subtype; >>> + if (mc_derror_table[i].dar_valid) { >>> + ext_elog->mc.effective_address = >>> cpu_to_be64(env->spr[SPR_DAR]); >>> + } >>> + >>> + summary |= mc_derror_table[i].initiator >>> + | mc_derror_table[i].severity; >>> + >>> + return summary; >>> + } >>> + } else { >>> + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { >>> + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != >>> + mc_ierror_table[i].srr1_value) { >>> + continue; >>> + } >>> + >>> + ext_elog->mc.error_type = mc_ierror_table[i].error_type; >>> + ext_elog->mc.sub_err_type = >>> mc_ierror_table[i].error_subtype; >>> + if (mc_ierror_table[i].nip_valid) { >>> + ext_elog->mc.effective_address = >>> cpu_to_be64(env->nip); >>> + } >>> + >>> + summary |= mc_ierror_table[i].initiator >>> + | mc_ierror_table[i].severity; >>> + >>> + return summary; >>> + } >>> + } >>> + >>> + summary |= RTAS_LOG_INITIATOR_CPU; >>> + return summary; >>> +} >>> + >>> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) >>> +{ >>> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >>> + CPUState *cs = CPU(cpu); >>> + uint64_t rtas_addr; >>> + CPUPPCState *env = &cpu->env; >>> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); >>> + target_ulong msr = 0; >>> + struct rtas_error_log log; >>> + struct mc_extended_log *ext_elog; >>> + uint32_t summary; >>> + >>> + /* >>> + * Properly set bits in MSR before we invoke the handler. >>> + * SRR0/1, DAR and DSISR are properly set by KVM >>> + */ >>> + if (!(*pcc->interrupts_big_endian)(cpu)) { >>> + msr |= (1ULL << MSR_LE); >>> + } >>> + >>> + if (env->msr & (1ULL << MSR_SF)) { >>> + msr |= (1ULL << MSR_SF); >>> + } >>> + >>> + msr |= (1ULL << MSR_ME); >>> + >>> + ext_elog = g_malloc0(sizeof(*ext_elog)); >>> + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); >>> + >>> + log.summary = cpu_to_be32(summary); >>> + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); >>> + >>> + spapr_init_v6hdr(&ext_elog->v6hdr); >>> + ext_elog->mc.hdr.section_id = >>> cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); >>> + ext_elog->mc.hdr.section_length = >>> + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); >>> + ext_elog->mc.hdr.section_version = 1; >>> + >>> + /* get rtas addr from fdt */ >>> + rtas_addr = spapr_get_rtas_addr(); >>> + if (!rtas_addr) { >>> + /* Unable to fetch rtas_addr. Hence reset the guest */ >>> + ppc_cpu_do_system_reset(cs); >>> + g_free(ext_elog); >>> + return; >>> + } >>> + >>> + stq_be_phys(&address_space_memory, rtas_addr + >>> RTAS_ERROR_LOG_OFFSET, >>> + env->gpr[3]); >>> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + >>> + sizeof(env->gpr[3]), &log, sizeof(log)); >>> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + >>> + sizeof(env->gpr[3]) + sizeof(log), >>> ext_elog, >>> + sizeof(*ext_elog)); >>> + >>> + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; >>> + env->msr = msr; >>> + env->nip = spapr->guest_machine_check_addr; >>> + >>> + g_free(ext_elog); >>> +} >>> + >>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) >>> { >>> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >>> CPUState *cs = CPU(cpu); >>> @@ -656,6 +872,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu) >>> } >>> } >>> spapr->mc_status = cpu->vcpu_id; >>> + >>> + spapr_mce_dispatch_elog(cpu, recovered); >>> } >>> static void check_exception(PowerPCCPU *cpu, SpaprMachineState >>> *spapr, >>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c >>> index 8d8d8cdfcb..2c066a372d 100644 >>> --- a/hw/ppc/spapr_rtas.c >>> +++ b/hw/ppc/spapr_rtas.c >>> @@ -477,6 +477,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) >>> } >>> } >>> +hwaddr spapr_get_rtas_addr(void) >>> +{ >>> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >>> + int rtas_node; >>> + const fdt32_t *rtas_data; >>> + void *fdt = spapr->fdt_blob; >>> + >>> + /* fetch rtas addr from fdt */ >>> + rtas_node = fdt_path_offset(fdt, "/rtas"); >>> + if (rtas_node < 0) { >>> + return 0; >>> + } >>> + >>> + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); >>> + if (!rtas_data) { >>> + return 0; >>> + } >>> + >>> + /* >>> + * We assume that the OS called RTAS instantiate-rtas, but some >>> other >>> + * OS might call RTAS instantiate-rtas-64 instead. This fine as >>> of now >>> + * as SLOF only supports 32-bit variant. >>> + */ >>> + return (hwaddr)fdt32_to_cpu(*rtas_data); >>> +} >>> + >>> static void core_rtas_register_types(void) >>> { >>> spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", >>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >>> index 1f5eb8c856..4afa8d4d09 100644 >>> --- a/include/hw/ppc/spapr.h >>> +++ b/include/hw/ppc/spapr.h >>> @@ -724,6 +724,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, >>> void *fdt, hwaddr addr); >>> #define RTAS_ERROR_LOG_MAX 2048 >>> +/* Offset from rtas-base where error log is placed */ >>> +#define RTAS_ERROR_LOG_OFFSET 0x30 >>> + >>> #define RTAS_EVENT_SCAN_RATE 1 >>> /* This helper should be used to encode interrupt specifiers >>> when the related >>> @@ -812,7 +815,7 @@ void >>> spapr_clear_pending_events(SpaprMachineState *spapr); >>> int spapr_max_server_number(SpaprMachineState *spapr); >>> void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, >>> uint64_t pte0, uint64_t pte1); >>> -void spapr_mce_req_event(PowerPCCPU *cpu); >>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); >>> /* DRC callbacks. */ >>> void spapr_core_release(DeviceState *dev); >>> @@ -903,4 +906,5 @@ void spapr_check_pagesize(SpaprMachineState >>> *spapr, hwaddr pagesize, >>> #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the >>> platform */ >>> void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); >>> +hwaddr spapr_get_rtas_addr(void); >>> #endif /* HW_SPAPR_H */ >>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c >>> index 2d8db6d832..9a902c1064 100644 >>> --- a/target/ppc/kvm.c >>> +++ b/target/ppc/kvm.c >>> @@ -2805,9 +2805,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) >>> int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) >>> { >>> + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; >>> + >>> cpu_synchronize_state(CPU(cpu)); >>> - spapr_mce_req_event(cpu); >>> + spapr_mce_req_event(cpu, recovered); >>> return 0; >>> }
On Mon, Nov 18, 2019 at 04:39:16PM +0530, Ganesh wrote: > > On 11/6/19 4:37 PM, Ganesh wrote: > > > > On 11/4/19 9:40 PM, David Gibson wrote: > > > On Thu, Oct 24, 2019 at 01:13:04PM +0530, Ganesh Goudar wrote: > > > > From: Aravinda Prasad <arawinda.p@gmail.com> > > > > > > > > Upon a machine check exception (MCE) in a guest address space, > > > > KVM causes a guest exit to enable QEMU to build and pass the > > > > error to the guest in the PAPR defined rtas error log format. > > > > > > > > This patch builds the rtas error log, copies it to the rtas_addr > > > > and then invokes the guest registered machine check handler. The > > > > handler in the guest takes suitable action(s) depending on the type > > > > and criticality of the error. For example, if an error is > > > > unrecoverable memory corruption in an application inside the > > > > guest, then the guest kernel sends a SIGBUS to the application. > > > > For recoverable errors, the guest performs recovery actions and > > > > logs the error. > > > > > > > > [Assume SLOF has allocated enough room for rtas error log] > > > Is that correct with the SLOF image currently included in qemu? > > Yes > > > > > > Apart from that detail, > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > Hi David, Please see if this patch series can be merged to 4.2. Sorry, we're much too late for 4.2 now. I am hoping to merge it for 5.0, the next release. > > > > > > > Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> > > > > Signed-off-by: Aravinda Prasad <arawinda.p@gmail.com> > > > > > > > > > > --- > > > > hw/ppc/spapr_events.c | 220 > > > > ++++++++++++++++++++++++++++++++++++++++- > > > > hw/ppc/spapr_rtas.c | 26 +++++ > > > > include/hw/ppc/spapr.h | 6 +- > > > > target/ppc/kvm.c | 4 +- > > > > 4 files changed, 253 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > > > index 0ce96b86be..db44e09154 100644 > > > > --- a/hw/ppc/spapr_events.c > > > > +++ b/hw/ppc/spapr_events.c > > > > @@ -214,6 +214,104 @@ struct hp_extended_log { > > > > struct rtas_event_log_v6_hp hp; > > > > } QEMU_PACKED; > > > > +struct rtas_event_log_v6_mc { > > > > +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ > > > > + struct rtas_event_log_v6_section_header hdr; > > > > + uint32_t fru_id; > > > > + uint32_t proc_id; > > > > + uint8_t error_type; > > > > +#define RTAS_LOG_V6_MC_TYPE_UE 0 > > > > +#define RTAS_LOG_V6_MC_TYPE_SLB 1 > > > > +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 > > > > +#define RTAS_LOG_V6_MC_TYPE_TLB 4 > > > > +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 > > > > +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 > > > > + uint8_t sub_err_type; > > > > +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 > > > > +#define RTAS_LOG_V6_MC_UE_IFETCH 1 > > > > +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 > > > > +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 > > > > +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 > > > > +#define RTAS_LOG_V6_MC_SLB_PARITY 0 > > > > +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 > > > > +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 > > > > +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 > > > > +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 > > > > +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 > > > > +#define RTAS_LOG_V6_MC_TLB_PARITY 1 > > > > +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 > > > > +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 > > > > + uint8_t reserved_1[6]; > > > > + uint64_t effective_address; > > > > + uint64_t logical_address; > > > > +} QEMU_PACKED; > > > > + > > > > +struct mc_extended_log { > > > > + struct rtas_event_log_v6 v6hdr; > > > > + struct rtas_event_log_v6_mc mc; > > > > +} QEMU_PACKED; > > > > + > > > > +struct MC_ierror_table { > > > > + unsigned long srr1_mask; > > > > + unsigned long srr1_value; > > > > + bool nip_valid; /* nip is a valid indicator of faulting address */ > > > > + uint8_t error_type; > > > > + uint8_t error_subtype; > > > > + unsigned int initiator; > > > > + unsigned int severity; > > > > +}; > > > > + > > > > +static const struct MC_ierror_table mc_ierror_table[] = { > > > > +{ 0x00000000081c0000, 0x0000000000040000, true, > > > > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000000081c0000, 0x0000000000080000, true, > > > > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000000081c0000, 0x00000000000c0000, true, > > > > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000000081c0000, 0x0000000000100000, true, > > > > + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000000081c0000, 0x0000000000140000, true, > > > > + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000000081c0000, 0x0000000000180000, true, > > > > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; > > > > + > > > > +struct MC_derror_table { > > > > + unsigned long dsisr_value; > > > > + bool dar_valid; /* dar is a valid indicator of faulting address */ > > > > + uint8_t error_type; > > > > + uint8_t error_subtype; > > > > + unsigned int initiator; > > > > + unsigned int severity; > > > > +}; > > > > + > > > > +static const struct MC_derror_table mc_derror_table[] = { > > > > +{ 0x00008000, false, > > > > + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00004000, true, > > > > + RTAS_LOG_V6_MC_TYPE_UE, > > > > RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000800, true, > > > > + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000400, true, > > > > + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000080, true, > > > > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* > > > > Before PARITY */ > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > > > +{ 0x00000100, true, > > > > + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > > > > + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; > > > > + > > > > +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) > > > > + > > > > typedef enum EventClass { > > > > EVENT_CLASS_INTERNAL_ERRORS = 0, > > > > EVENT_CLASS_EPOW = 1, > > > > @@ -622,7 +720,125 @@ void > > > > spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, > > > > RTAS_LOG_V6_HP_ACTION_REMOVE, > > > > drc_type, &drc_id); > > > > } > > > > -void spapr_mce_req_event(PowerPCCPU *cpu) > > > > +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool > > > > recovered, > > > > + struct mc_extended_log > > > > *ext_elog) > > > > +{ > > > > + int i; > > > > + CPUPPCState *env = &cpu->env; > > > > + uint32_t summary; > > > > + uint64_t dsisr = env->spr[SPR_DSISR]; > > > > + > > > > + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; > > > > + if (recovered) { > > > > + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; > > > > + } else { > > > > + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; > > > > + } > > > > + > > > > + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { > > > > + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { > > > > + if (!(dsisr & mc_derror_table[i].dsisr_value)) { > > > > + continue; > > > > + } > > > > + > > > > + ext_elog->mc.error_type = mc_derror_table[i].error_type; > > > > + ext_elog->mc.sub_err_type = > > > > mc_derror_table[i].error_subtype; > > > > + if (mc_derror_table[i].dar_valid) { > > > > + ext_elog->mc.effective_address = > > > > cpu_to_be64(env->spr[SPR_DAR]); > > > > + } > > > > + > > > > + summary |= mc_derror_table[i].initiator > > > > + | mc_derror_table[i].severity; > > > > + > > > > + return summary; > > > > + } > > > > + } else { > > > > + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { > > > > + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != > > > > + mc_ierror_table[i].srr1_value) { > > > > + continue; > > > > + } > > > > + > > > > + ext_elog->mc.error_type = mc_ierror_table[i].error_type; > > > > + ext_elog->mc.sub_err_type = > > > > mc_ierror_table[i].error_subtype; > > > > + if (mc_ierror_table[i].nip_valid) { > > > > + ext_elog->mc.effective_address = > > > > cpu_to_be64(env->nip); > > > > + } > > > > + > > > > + summary |= mc_ierror_table[i].initiator > > > > + | mc_ierror_table[i].severity; > > > > + > > > > + return summary; > > > > + } > > > > + } > > > > + > > > > + summary |= RTAS_LOG_INITIATOR_CPU; > > > > + return summary; > > > > +} > > > > + > > > > +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) > > > > +{ > > > > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > > + CPUState *cs = CPU(cpu); > > > > + uint64_t rtas_addr; > > > > + CPUPPCState *env = &cpu->env; > > > > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); > > > > + target_ulong msr = 0; > > > > + struct rtas_error_log log; > > > > + struct mc_extended_log *ext_elog; > > > > + uint32_t summary; > > > > + > > > > + /* > > > > + * Properly set bits in MSR before we invoke the handler. > > > > + * SRR0/1, DAR and DSISR are properly set by KVM > > > > + */ > > > > + if (!(*pcc->interrupts_big_endian)(cpu)) { > > > > + msr |= (1ULL << MSR_LE); > > > > + } > > > > + > > > > + if (env->msr & (1ULL << MSR_SF)) { > > > > + msr |= (1ULL << MSR_SF); > > > > + } > > > > + > > > > + msr |= (1ULL << MSR_ME); > > > > + > > > > + ext_elog = g_malloc0(sizeof(*ext_elog)); > > > > + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); > > > > + > > > > + log.summary = cpu_to_be32(summary); > > > > + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); > > > > + > > > > + spapr_init_v6hdr(&ext_elog->v6hdr); > > > > + ext_elog->mc.hdr.section_id = > > > > cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); > > > > + ext_elog->mc.hdr.section_length = > > > > + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); > > > > + ext_elog->mc.hdr.section_version = 1; > > > > + > > > > + /* get rtas addr from fdt */ > > > > + rtas_addr = spapr_get_rtas_addr(); > > > > + if (!rtas_addr) { > > > > + /* Unable to fetch rtas_addr. Hence reset the guest */ > > > > + ppc_cpu_do_system_reset(cs); > > > > + g_free(ext_elog); > > > > + return; > > > > + } > > > > + > > > > + stq_be_phys(&address_space_memory, rtas_addr + > > > > RTAS_ERROR_LOG_OFFSET, > > > > + env->gpr[3]); > > > > + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > > > > + sizeof(env->gpr[3]), &log, sizeof(log)); > > > > + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > > > > + sizeof(env->gpr[3]) + > > > > sizeof(log), ext_elog, > > > > + sizeof(*ext_elog)); > > > > + > > > > + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; > > > > + env->msr = msr; > > > > + env->nip = spapr->guest_machine_check_addr; > > > > + > > > > + g_free(ext_elog); > > > > +} > > > > + > > > > +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > > > > { > > > > SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > > CPUState *cs = CPU(cpu); > > > > @@ -656,6 +872,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu) > > > > } > > > > } > > > > spapr->mc_status = cpu->vcpu_id; > > > > + > > > > + spapr_mce_dispatch_elog(cpu, recovered); > > > > } > > > > static void check_exception(PowerPCCPU *cpu, > > > > SpaprMachineState *spapr, > > > > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c > > > > index 8d8d8cdfcb..2c066a372d 100644 > > > > --- a/hw/ppc/spapr_rtas.c > > > > +++ b/hw/ppc/spapr_rtas.c > > > > @@ -477,6 +477,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) > > > > } > > > > } > > > > +hwaddr spapr_get_rtas_addr(void) > > > > +{ > > > > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > > + int rtas_node; > > > > + const fdt32_t *rtas_data; > > > > + void *fdt = spapr->fdt_blob; > > > > + > > > > + /* fetch rtas addr from fdt */ > > > > + rtas_node = fdt_path_offset(fdt, "/rtas"); > > > > + if (rtas_node < 0) { > > > > + return 0; > > > > + } > > > > + > > > > + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); > > > > + if (!rtas_data) { > > > > + return 0; > > > > + } > > > > + > > > > + /* > > > > + * We assume that the OS called RTAS instantiate-rtas, but > > > > some other > > > > + * OS might call RTAS instantiate-rtas-64 instead. This > > > > fine as of now > > > > + * as SLOF only supports 32-bit variant. > > > > + */ > > > > + return (hwaddr)fdt32_to_cpu(*rtas_data); > > > > +} > > > > + > > > > static void core_rtas_register_types(void) > > > > { > > > > spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", > > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > > > index 1f5eb8c856..4afa8d4d09 100644 > > > > --- a/include/hw/ppc/spapr.h > > > > +++ b/include/hw/ppc/spapr.h > > > > @@ -724,6 +724,9 @@ void spapr_load_rtas(SpaprMachineState > > > > *spapr, void *fdt, hwaddr addr); > > > > #define RTAS_ERROR_LOG_MAX 2048 > > > > +/* Offset from rtas-base where error log is placed */ > > > > +#define RTAS_ERROR_LOG_OFFSET 0x30 > > > > + > > > > #define RTAS_EVENT_SCAN_RATE 1 > > > > /* This helper should be used to encode interrupt specifiers > > > > when the related > > > > @@ -812,7 +815,7 @@ void > > > > spapr_clear_pending_events(SpaprMachineState *spapr); > > > > int spapr_max_server_number(SpaprMachineState *spapr); > > > > void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, > > > > uint64_t pte0, uint64_t pte1); > > > > -void spapr_mce_req_event(PowerPCCPU *cpu); > > > > +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); > > > > /* DRC callbacks. */ > > > > void spapr_core_release(DeviceState *dev); > > > > @@ -903,4 +906,5 @@ void spapr_check_pagesize(SpaprMachineState > > > > *spapr, hwaddr pagesize, > > > > #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on > > > > the platform */ > > > > void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); > > > > +hwaddr spapr_get_rtas_addr(void); > > > > #endif /* HW_SPAPR_H */ > > > > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > > > > index 2d8db6d832..9a902c1064 100644 > > > > --- a/target/ppc/kvm.c > > > > +++ b/target/ppc/kvm.c > > > > @@ -2805,9 +2805,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) > > > > int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) > > > > { > > > > + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; > > > > + > > > > cpu_synchronize_state(CPU(cpu)); > > > > - spapr_mce_req_event(cpu); > > > > + spapr_mce_req_event(cpu, recovered); > > > > return 0; > > > > } >
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 0ce96b86be..db44e09154 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -214,6 +214,104 @@ struct hp_extended_log { struct rtas_event_log_v6_hp hp; } QEMU_PACKED; +struct rtas_event_log_v6_mc { +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ + struct rtas_event_log_v6_section_header hdr; + uint32_t fru_id; + uint32_t proc_id; + uint8_t error_type; +#define RTAS_LOG_V6_MC_TYPE_UE 0 +#define RTAS_LOG_V6_MC_TYPE_SLB 1 +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 +#define RTAS_LOG_V6_MC_TYPE_TLB 4 +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 + uint8_t sub_err_type; +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 +#define RTAS_LOG_V6_MC_UE_IFETCH 1 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 +#define RTAS_LOG_V6_MC_SLB_PARITY 0 +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 +#define RTAS_LOG_V6_MC_TLB_PARITY 1 +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 + uint8_t reserved_1[6]; + uint64_t effective_address; + uint64_t logical_address; +} QEMU_PACKED; + +struct mc_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_mc mc; +} QEMU_PACKED; + +struct MC_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_ierror_table mc_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +struct MC_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_derror_table mc_derror_table[] = { +{ 0x00008000, false, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00004000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000800, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000400, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000080, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000100, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + typedef enum EventClass { EVENT_CLASS_INTERNAL_ERRORS = 0, EVENT_CLASS_EPOW = 1, @@ -622,7 +720,125 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } -void spapr_mce_req_event(PowerPCCPU *cpu) +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, + struct mc_extended_log *ext_elog) +{ + int i; + CPUPPCState *env = &cpu->env; + uint32_t summary; + uint64_t dsisr = env->spr[SPR_DSISR]; + + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; + if (recovered) { + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; + } else { + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; + } + + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { + if (!(dsisr & mc_derror_table[i].dsisr_value)) { + continue; + } + + ext_elog->mc.error_type = mc_derror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; + if (mc_derror_table[i].dar_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); + } + + summary |= mc_derror_table[i].initiator + | mc_derror_table[i].severity; + + return summary; + } + } else { + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != + mc_ierror_table[i].srr1_value) { + continue; + } + + ext_elog->mc.error_type = mc_ierror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; + if (mc_ierror_table[i].nip_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->nip); + } + + summary |= mc_ierror_table[i].initiator + | mc_ierror_table[i].severity; + + return summary; + } + } + + summary |= RTAS_LOG_INITIATOR_CPU; + return summary; +} + +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + uint64_t rtas_addr; + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + target_ulong msr = 0; + struct rtas_error_log log; + struct mc_extended_log *ext_elog; + uint32_t summary; + + /* + * Properly set bits in MSR before we invoke the handler. + * SRR0/1, DAR and DSISR are properly set by KVM + */ + if (!(*pcc->interrupts_big_endian)(cpu)) { + msr |= (1ULL << MSR_LE); + } + + if (env->msr & (1ULL << MSR_SF)) { + msr |= (1ULL << MSR_SF); + } + + msr |= (1ULL << MSR_ME); + + ext_elog = g_malloc0(sizeof(*ext_elog)); + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); + + log.summary = cpu_to_be32(summary); + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); + + spapr_init_v6hdr(&ext_elog->v6hdr); + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); + ext_elog->mc.hdr.section_length = + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); + ext_elog->mc.hdr.section_version = 1; + + /* get rtas addr from fdt */ + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + /* Unable to fetch rtas_addr. Hence reset the guest */ + ppc_cpu_do_system_reset(cs); + g_free(ext_elog); + return; + } + + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, + env->gpr[3]); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]), &log, sizeof(log)); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]) + sizeof(log), ext_elog, + sizeof(*ext_elog)); + + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; + env->msr = msr; + env->nip = spapr->guest_machine_check_addr; + + g_free(ext_elog); +} + +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); CPUState *cs = CPU(cpu); @@ -656,6 +872,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu) } } spapr->mc_status = cpu->vcpu_id; + + spapr_mce_dispatch_elog(cpu, recovered); } static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 8d8d8cdfcb..2c066a372d 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -477,6 +477,32 @@ void spapr_dt_rtas_tokens(void *fdt, int rtas) } } +hwaddr spapr_get_rtas_addr(void) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + int rtas_node; + const fdt32_t *rtas_data; + void *fdt = spapr->fdt_blob; + + /* fetch rtas addr from fdt */ + rtas_node = fdt_path_offset(fdt, "/rtas"); + if (rtas_node < 0) { + return 0; + } + + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); + if (!rtas_data) { + return 0; + } + + /* + * We assume that the OS called RTAS instantiate-rtas, but some other + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now + * as SLOF only supports 32-bit variant. + */ + return (hwaddr)fdt32_to_cpu(*rtas_data); +} + static void core_rtas_register_types(void) { spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 1f5eb8c856..4afa8d4d09 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -724,6 +724,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr); #define RTAS_ERROR_LOG_MAX 2048 +/* Offset from rtas-base where error log is placed */ +#define RTAS_ERROR_LOG_OFFSET 0x30 + #define RTAS_EVENT_SCAN_RATE 1 /* This helper should be used to encode interrupt specifiers when the related @@ -812,7 +815,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr); int spapr_max_server_number(SpaprMachineState *spapr); void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte0, uint64_t pte1); -void spapr_mce_req_event(PowerPCCPU *cpu); +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); /* DRC callbacks. */ void spapr_core_release(DeviceState *dev); @@ -903,4 +906,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); +hwaddr spapr_get_rtas_addr(void); #endif /* HW_SPAPR_H */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 2d8db6d832..9a902c1064 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2805,9 +2805,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) { + bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; + cpu_synchronize_state(CPU(cpu)); - spapr_mce_req_event(cpu); + spapr_mce_req_event(cpu, recovered); return 0; }