Message ID | 153365141396.14256.7147876868875454254.stgit@jupiter.in.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | powerpc/pseries: Machine check handler improvements. | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | warning | next/apply_patch Patch failed to apply |
snowpatch_ozlabs/apply_patch | fail | Failed to apply to any branch |
Hi Mahesh, A few nitpicks. Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > > On pseries, the machine check error details are part of RTAS extended > event log passed under Machine check exception section. This patch adds > the definition of rtas MCE event section and related helper > functions. > > Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/rtas.h | 111 +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 111 insertions(+) AFIACS none of this ever gets used outside of ras.c, should it should just go in there. > diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h > index 71e393c46a49..adc677c5e3a4 100644 > --- a/arch/powerpc/include/asm/rtas.h > +++ b/arch/powerpc/include/asm/rtas.h > @@ -326,6 +334,109 @@ struct pseries_hp_errorlog { > #define PSERIES_HP_ELOG_ID_DRC_COUNT 3 > #define PSERIES_HP_ELOG_ID_DRC_IC 4 > > +/* RTAS pseries MCE errorlog section */ > +#pragma pack(push, 1) > +struct pseries_mc_errorlog { > + __be32 fru_id; > + __be32 proc_id; > + uint8_t error_type; Please use kernel types, so u8. > + union { > + struct { > + uint8_t ue_err_type; > + /* XXXXXXXX > + * X 1: Permanent or Transient UE. > + * X 1: Effective address provided. > + * X 1: Logical address provided. > + * XX 2: Reserved. > + * XXX 3: Type of UE error. > + */ But which bit is bit 0? And is that the LSB or MSB? > + uint8_t reserved_1[6]; > + __be64 effective_address; > + __be64 logical_address; > + } ue_error; > + struct { > + uint8_t soft_err_type; > + /* XXXXXXXX > + * X 1: Effective address provided. > + * XXXXX 5: Reserved. > + * XX 2: Type of SLB/ERAT/TLB error. > + */ > + uint8_t reserved_1[6]; > + __be64 effective_address; > + uint8_t reserved_2[8]; > + } soft_error; > + } u; > +}; > +#pragma pack(pop) Why not __packed ? > +/* RTAS pseries MCE error types */ > +#define PSERIES_MC_ERROR_TYPE_UE 0x00 > +#define PSERIES_MC_ERROR_TYPE_SLB 0x01 > +#define PSERIES_MC_ERROR_TYPE_ERAT 0x02 > +#define PSERIES_MC_ERROR_TYPE_TLB 0x04 > +#define PSERIES_MC_ERROR_TYPE_D_CACHE 0x05 > +#define PSERIES_MC_ERROR_TYPE_I_CACHE 0x07 Once these are in ras.c they can have less unwieldy names, ie. the PSERIES at least can be dropped. > +/* RTAS pseries MCE error sub types */ > +#define PSERIES_MC_ERROR_UE_INDETERMINATE 0 > +#define PSERIES_MC_ERROR_UE_IFETCH 1 > +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 > +#define PSERIES_MC_ERROR_UE_LOAD_STORE 3 > +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 > + > +#define PSERIES_MC_ERROR_SLB_PARITY 0 > +#define PSERIES_MC_ERROR_SLB_MULTIHIT 1 > +#define PSERIES_MC_ERROR_SLB_INDETERMINATE 2 > + > +#define PSERIES_MC_ERROR_ERAT_PARITY 1 > +#define PSERIES_MC_ERROR_ERAT_MULTIHIT 2 > +#define PSERIES_MC_ERROR_ERAT_INDETERMINATE 3 > + > +#define PSERIES_MC_ERROR_TLB_PARITY 1 > +#define PSERIES_MC_ERROR_TLB_MULTIHIT 2 > +#define PSERIES_MC_ERROR_TLB_INDETERMINATE 3 > + > +static inline uint8_t rtas_mc_error_type(const struct pseries_mc_errorlog *mlog) > +{ > + return mlog->error_type; > +} Why not just access it directly? > +static inline uint8_t rtas_mc_error_sub_type( > + const struct pseries_mc_errorlog *mlog) > +{ > + switch (mlog->error_type) { > + case PSERIES_MC_ERROR_TYPE_UE: > + return (mlog->u.ue_error.ue_err_type & 0x07); > + case PSERIES_MC_ERROR_TYPE_SLB: > + case PSERIES_MC_ERROR_TYPE_ERAT: > + case PSERIES_MC_ERROR_TYPE_TLB: > + return (mlog->u.soft_error.soft_err_type & 0x03); > + default: > + return 0; > + } > +} > + > +static inline uint64_t rtas_mc_get_effective_addr( > + const struct pseries_mc_errorlog *mlog) > +{ > + uint64_t addr = 0; That should be __be64. > + > + switch (mlog->error_type) { > + case PSERIES_MC_ERROR_TYPE_UE: > + if (mlog->u.ue_error.ue_err_type & 0x40) > + addr = mlog->u.ue_error.effective_address; > + break; > + case PSERIES_MC_ERROR_TYPE_SLB: > + case PSERIES_MC_ERROR_TYPE_ERAT: > + case PSERIES_MC_ERROR_TYPE_TLB: > + if (mlog->u.soft_error.soft_err_type & 0x80) > + addr = mlog->u.soft_error.effective_address; > + default: > + break; > + } > + return be64_to_cpu(addr); > +} > + > struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, > uint16_t section_id); > cheers
On 08/08/2018 08:12 PM, Michael Ellerman wrote: > Hi Mahesh, > > A few nitpicks. > > Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> writes: >> From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> >> On pseries, the machine check error details are part of RTAS extended >> event log passed under Machine check exception section. This patch adds >> the definition of rtas MCE event section and related helper >> functions. >> >> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> >> --- >> arch/powerpc/include/asm/rtas.h | 111 +++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 111 insertions(+) > > AFIACS none of this ever gets used outside of ras.c, should it should > just go in there. Since it was all rtas specific I thought rtas.h is better place. But yes, I can move this into ras.c > >> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h >> index 71e393c46a49..adc677c5e3a4 100644 >> --- a/arch/powerpc/include/asm/rtas.h >> +++ b/arch/powerpc/include/asm/rtas.h >> @@ -326,6 +334,109 @@ struct pseries_hp_errorlog { >> #define PSERIES_HP_ELOG_ID_DRC_COUNT 3 >> #define PSERIES_HP_ELOG_ID_DRC_IC 4 >> >> +/* RTAS pseries MCE errorlog section */ >> +#pragma pack(push, 1) >> +struct pseries_mc_errorlog { >> + __be32 fru_id; >> + __be32 proc_id; >> + uint8_t error_type; > > Please use kernel types, so u8. Will do so. > >> + union { >> + struct { >> + uint8_t ue_err_type; >> + /* XXXXXXXX >> + * X 1: Permanent or Transient UE. >> + * X 1: Effective address provided. >> + * X 1: Logical address provided. >> + * XX 2: Reserved. >> + * XXX 3: Type of UE error. >> + */ > > But which bit is bit 0? And is that the LSB or MSB? RTAS errorlog data in BE format, the leftmost bit is MSB 0 (1: Permanent or Transient UE.). I Will update the comment above that properly points out which one is MSB 0. > > >> + uint8_t reserved_1[6]; >> + __be64 effective_address; >> + __be64 logical_address; >> + } ue_error; >> + struct { >> + uint8_t soft_err_type; >> + /* XXXXXXXX >> + * X 1: Effective address provided. >> + * XXXXX 5: Reserved. >> + * XX 2: Type of SLB/ERAT/TLB error. >> + */ >> + uint8_t reserved_1[6]; >> + __be64 effective_address; >> + uint8_t reserved_2[8]; >> + } soft_error; >> + } u; >> +}; >> +#pragma pack(pop) > > Why not __packed ? Because when used __packed it added 1 byte extra padding between reserved_1[6] and effective_address. That caused wrong effective address to be printed on the console. Hence I switched to #pragma pack to force 1 byte alignment for this structure alone. > >> +/* RTAS pseries MCE error types */ >> +#define PSERIES_MC_ERROR_TYPE_UE 0x00 >> +#define PSERIES_MC_ERROR_TYPE_SLB 0x01 >> +#define PSERIES_MC_ERROR_TYPE_ERAT 0x02 >> +#define PSERIES_MC_ERROR_TYPE_TLB 0x04 >> +#define PSERIES_MC_ERROR_TYPE_D_CACHE 0x05 >> +#define PSERIES_MC_ERROR_TYPE_I_CACHE 0x07 > > Once these are in ras.c they can have less unwieldy names, ie. the > PSERIES at least can be dropped. ok. > >> +/* RTAS pseries MCE error sub types */ >> +#define PSERIES_MC_ERROR_UE_INDETERMINATE 0 >> +#define PSERIES_MC_ERROR_UE_IFETCH 1 >> +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 >> +#define PSERIES_MC_ERROR_UE_LOAD_STORE 3 >> +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 >> + >> +#define PSERIES_MC_ERROR_SLB_PARITY 0 >> +#define PSERIES_MC_ERROR_SLB_MULTIHIT 1 >> +#define PSERIES_MC_ERROR_SLB_INDETERMINATE 2 >> + >> +#define PSERIES_MC_ERROR_ERAT_PARITY 1 >> +#define PSERIES_MC_ERROR_ERAT_MULTIHIT 2 >> +#define PSERIES_MC_ERROR_ERAT_INDETERMINATE 3 >> + >> +#define PSERIES_MC_ERROR_TLB_PARITY 1 >> +#define PSERIES_MC_ERROR_TLB_MULTIHIT 2 >> +#define PSERIES_MC_ERROR_TLB_INDETERMINATE 3 >> + >> +static inline uint8_t rtas_mc_error_type(const struct pseries_mc_errorlog *mlog) >> +{ >> + return mlog->error_type; >> +} > > Why not just access it directly? sure. > >> +static inline uint8_t rtas_mc_error_sub_type( >> + const struct pseries_mc_errorlog *mlog) >> +{ >> + switch (mlog->error_type) { >> + case PSERIES_MC_ERROR_TYPE_UE: >> + return (mlog->u.ue_error.ue_err_type & 0x07); >> + case PSERIES_MC_ERROR_TYPE_SLB: >> + case PSERIES_MC_ERROR_TYPE_ERAT: >> + case PSERIES_MC_ERROR_TYPE_TLB: >> + return (mlog->u.soft_error.soft_err_type & 0x03); >> + default: >> + return 0; >> + } >> +} >> + >> +static inline uint64_t rtas_mc_get_effective_addr( >> + const struct pseries_mc_errorlog *mlog) >> +{ >> + uint64_t addr = 0; > > That should be __be64. Sure will do. Thanks, -Mahesh.
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > On 08/08/2018 08:12 PM, Michael Ellerman wrote: ... >> >>> + union { >>> + struct { >>> + uint8_t ue_err_type; >>> + /* XXXXXXXX >>> + * X 1: Permanent or Transient UE. >>> + * X 1: Effective address provided. >>> + * X 1: Logical address provided. >>> + * XX 2: Reserved. >>> + * XXX 3: Type of UE error. >>> + */ >> >> But which bit is bit 0? And is that the LSB or MSB? > > RTAS errorlog data in BE format, the leftmost bit is MSB 0 (1: Permanent > or Transient UE.). I Will update the comment above that properly points > out which one is MSB 0. > >> >> >>> + uint8_t reserved_1[6]; >>> + __be64 effective_address; >>> + __be64 logical_address; >>> + } ue_error; >>> + struct { >>> + uint8_t soft_err_type; >>> + /* XXXXXXXX >>> + * X 1: Effective address provided. >>> + * XXXXX 5: Reserved. >>> + * XX 2: Type of SLB/ERAT/TLB error. >>> + */ >>> + uint8_t reserved_1[6]; >>> + __be64 effective_address; >>> + uint8_t reserved_2[8]; >>> + } soft_error; >>> + } u; >>> +}; >>> +#pragma pack(pop) >> >> Why not __packed ? > > Because when used __packed it added 1 byte extra padding between > reserved_1[6] and effective_address. That caused wrong effective address > to be printed on the console. Hence I switched to #pragma pack to force > 1 byte alignment for this structure alone. OK, that's weird. Do we really need to bother with all the union stuff? The only difference is the field names, and whether logical address has a value or not. What about: struct pseries_mc_errorlog { __be32 fru_id; __be32 proc_id; u8 error_type; u8 sub_error_type; u8 reserved_1[6]; __be64 effective_address; __be64 logical_address; } __packed; cheers
Hi! On Thu, Aug 16, 2018 at 02:14:39PM +1000, Michael Ellerman wrote: > Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> writes: > > On 08/08/2018 08:12 PM, Michael Ellerman wrote: > >>> + uint8_t reserved_1[6]; > >>> + __be64 effective_address; > >>> + __be64 logical_address; > >>> + } ue_error; > >>> + struct { > >>> + uint8_t soft_err_type; > >>> + /* XXXXXXXX > >>> + * X 1: Effective address provided. > >>> + * XXXXX 5: Reserved. > >>> + * XX 2: Type of SLB/ERAT/TLB error. > >>> + */ > >>> + uint8_t reserved_1[6]; > >>> + __be64 effective_address; > >>> + uint8_t reserved_2[8]; > >>> + } soft_error; > >>> + } u; > >>> +}; > >>> +#pragma pack(pop) > >> > >> Why not __packed ? > > > > Because when used __packed it added 1 byte extra padding between > > reserved_1[6] and effective_address. That caused wrong effective address > > to be printed on the console. Hence I switched to #pragma pack to force > > 1 byte alignment for this structure alone. > > OK, that's weird. Yes, if that is true, then please open a GCC bugzilla report. Segher
On 08/16/2018 09:44 AM, Michael Ellerman wrote: > Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> writes: >> On 08/08/2018 08:12 PM, Michael Ellerman wrote: > ... >>> >>>> + union { >>>> + struct { >>>> + uint8_t ue_err_type; >>>> + /* XXXXXXXX >>>> + * X 1: Permanent or Transient UE. >>>> + * X 1: Effective address provided. >>>> + * X 1: Logical address provided. >>>> + * XX 2: Reserved. >>>> + * XXX 3: Type of UE error. >>>> + */ >>> >>> But which bit is bit 0? And is that the LSB or MSB? >> >> RTAS errorlog data in BE format, the leftmost bit is MSB 0 (1: Permanent >> or Transient UE.). I Will update the comment above that properly points >> out which one is MSB 0. >> >>> >>> >>>> + uint8_t reserved_1[6]; >>>> + __be64 effective_address; >>>> + __be64 logical_address; >>>> + } ue_error; >>>> + struct { >>>> + uint8_t soft_err_type; >>>> + /* XXXXXXXX >>>> + * X 1: Effective address provided. >>>> + * XXXXX 5: Reserved. >>>> + * XX 2: Type of SLB/ERAT/TLB error. >>>> + */ >>>> + uint8_t reserved_1[6]; >>>> + __be64 effective_address; >>>> + uint8_t reserved_2[8]; >>>> + } soft_error; >>>> + } u; >>>> +}; >>>> +#pragma pack(pop) >>> >>> Why not __packed ? >> >> Because when used __packed it added 1 byte extra padding between >> reserved_1[6] and effective_address. That caused wrong effective address >> to be printed on the console. Hence I switched to #pragma pack to force >> 1 byte alignment for this structure alone. > > OK, that's weird. > > Do we really need to bother with all the union stuff? The only > difference is the field names, and whether logical address has a value Also the bit fields for UE and other sub errors differ. Yeah but we can do away with union stuff. > or not. What about: > > struct pseries_mc_errorlog { > __be32 fru_id; > __be32 proc_id; > u8 error_type; > u8 sub_error_type; > u8 reserved_1[6]; > __be64 effective_address; > __be64 logical_address; > } __packed; Sure will do. Thanks -Mahesh. > > cheers >
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 71e393c46a49..adc677c5e3a4 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -185,6 +185,13 @@ static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog) return (elog->byte1 & 0x18) >> 3; } +static inline +void rtas_set_disposition_recovered(struct rtas_error_log *elog) +{ + elog->byte1 &= ~0x18; + elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3); +} + static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog) { return (elog->byte1 & 0x04) >> 2; @@ -275,6 +282,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') #define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') #define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C') /* Vendor specific Platform Event Log Format, Version 6, section header */ struct pseries_errorlog { @@ -326,6 +334,109 @@ struct pseries_hp_errorlog { #define PSERIES_HP_ELOG_ID_DRC_COUNT 3 #define PSERIES_HP_ELOG_ID_DRC_IC 4 +/* RTAS pseries MCE errorlog section */ +#pragma pack(push, 1) +struct pseries_mc_errorlog { + __be32 fru_id; + __be32 proc_id; + uint8_t error_type; + union { + struct { + uint8_t ue_err_type; + /* XXXXXXXX + * X 1: Permanent or Transient UE. + * X 1: Effective address provided. + * X 1: Logical address provided. + * XX 2: Reserved. + * XXX 3: Type of UE error. + */ + uint8_t reserved_1[6]; + __be64 effective_address; + __be64 logical_address; + } ue_error; + struct { + uint8_t soft_err_type; + /* XXXXXXXX + * X 1: Effective address provided. + * XXXXX 5: Reserved. + * XX 2: Type of SLB/ERAT/TLB error. + */ + uint8_t reserved_1[6]; + __be64 effective_address; + uint8_t reserved_2[8]; + } soft_error; + } u; +}; +#pragma pack(pop) + +/* RTAS pseries MCE error types */ +#define PSERIES_MC_ERROR_TYPE_UE 0x00 +#define PSERIES_MC_ERROR_TYPE_SLB 0x01 +#define PSERIES_MC_ERROR_TYPE_ERAT 0x02 +#define PSERIES_MC_ERROR_TYPE_TLB 0x04 +#define PSERIES_MC_ERROR_TYPE_D_CACHE 0x05 +#define PSERIES_MC_ERROR_TYPE_I_CACHE 0x07 + +/* RTAS pseries MCE error sub types */ +#define PSERIES_MC_ERROR_UE_INDETERMINATE 0 +#define PSERIES_MC_ERROR_UE_IFETCH 1 +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 +#define PSERIES_MC_ERROR_UE_LOAD_STORE 3 +#define PSERIES_MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 + +#define PSERIES_MC_ERROR_SLB_PARITY 0 +#define PSERIES_MC_ERROR_SLB_MULTIHIT 1 +#define PSERIES_MC_ERROR_SLB_INDETERMINATE 2 + +#define PSERIES_MC_ERROR_ERAT_PARITY 1 +#define PSERIES_MC_ERROR_ERAT_MULTIHIT 2 +#define PSERIES_MC_ERROR_ERAT_INDETERMINATE 3 + +#define PSERIES_MC_ERROR_TLB_PARITY 1 +#define PSERIES_MC_ERROR_TLB_MULTIHIT 2 +#define PSERIES_MC_ERROR_TLB_INDETERMINATE 3 + +static inline uint8_t rtas_mc_error_type(const struct pseries_mc_errorlog *mlog) +{ + return mlog->error_type; +} + +static inline uint8_t rtas_mc_error_sub_type( + const struct pseries_mc_errorlog *mlog) +{ + switch (mlog->error_type) { + case PSERIES_MC_ERROR_TYPE_UE: + return (mlog->u.ue_error.ue_err_type & 0x07); + case PSERIES_MC_ERROR_TYPE_SLB: + case PSERIES_MC_ERROR_TYPE_ERAT: + case PSERIES_MC_ERROR_TYPE_TLB: + return (mlog->u.soft_error.soft_err_type & 0x03); + default: + return 0; + } +} + +static inline uint64_t rtas_mc_get_effective_addr( + const struct pseries_mc_errorlog *mlog) +{ + uint64_t addr = 0; + + switch (mlog->error_type) { + case PSERIES_MC_ERROR_TYPE_UE: + if (mlog->u.ue_error.ue_err_type & 0x40) + addr = mlog->u.ue_error.effective_address; + break; + case PSERIES_MC_ERROR_TYPE_SLB: + case PSERIES_MC_ERROR_TYPE_ERAT: + case PSERIES_MC_ERROR_TYPE_TLB: + if (mlog->u.soft_error.soft_err_type & 0x80) + addr = mlog->u.soft_error.effective_address; + default: + break; + } + return be64_to_cpu(addr); +} + struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, uint16_t section_id);