Message ID | 20190812092236.16648-7-santosh@fossix.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc: implement machine check safe memcpy | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch next (da206bd46848568e1aaf35f00e2d78bf9bc94f95) |
snowpatch_ozlabs/apply_patch | fail | Failed to apply to any branch |
On 8/12/19 2:52 PM, Santosh Sivaraj wrote: > If we take a UE on one of the instructions with a fixup entry, set nip > to continue execution at the fixup entry. Stop processing the event > further or print it. > > Co-developed-by: Reza Arbab <arbab@linux.ibm.com> > Signed-off-by: Reza Arbab <arbab@linux.ibm.com> > Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com> > Signed-off-by: Santosh Sivaraj <santosh@fossix.org> Looks good to me. Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Thanks, -Mahesh. > --- > arch/powerpc/include/asm/mce.h | 4 +++- > arch/powerpc/kernel/mce.c | 16 ++++++++++++++++ > arch/powerpc/kernel/mce_power.c | 15 +++++++++++++-- > 3 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index f3a6036b6bc0..e1931c8c2743 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -122,7 +122,8 @@ struct machine_check_event { > enum MCE_UeErrorType ue_error_type:8; > u8 effective_address_provided; > u8 physical_address_provided; > - u8 reserved_1[5]; > + u8 ignore_event; > + u8 reserved_1[4]; > u64 effective_address; > u64 physical_address; > u8 reserved_2[8]; > @@ -193,6 +194,7 @@ struct mce_error_info { > enum MCE_Initiator initiator:8; > enum MCE_ErrorClass error_class:8; > bool sync_error; > + bool ignore_event; > }; > > #define MAX_MC_EVT 100 > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index a3b122a685a5..ec4b3e1087be 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, > if (phys_addr != ULONG_MAX) { > mce->u.ue_error.physical_address_provided = true; > mce->u.ue_error.physical_address = phys_addr; > + mce->u.ue_error.ignore_event = mce_err->ignore_event; > machine_check_ue_event(mce); > } > } > @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work) > /* > * This should probably queued elsewhere, but > * oh! well > + * > + * Don't report this machine check because the caller has a > + * asked us to ignore the event, it has a fixup handler which > + * will do the appropriate error handling and reporting. > */ > if (evt->error_type == MCE_ERROR_TYPE_UE) { > + if (evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_ue_count); > + continue; > + } > + > if (evt->u.ue_error.physical_address_provided) { > unsigned long pfn; > > @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work) > while (__this_cpu_read(mce_queue_count) > 0) { > index = __this_cpu_read(mce_queue_count) - 1; > evt = this_cpu_ptr(&mce_event_queue[index]); > + > + if (evt->error_type == MCE_ERROR_TYPE_UE && > + evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_queue_count); > + continue; > + } > machine_check_print_event_info(evt, false, false); > __this_cpu_dec(mce_queue_count); > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index e74816f045f8..1dd87f6f5186 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -11,6 +11,7 @@ > > #include <linux/types.h> > #include <linux/ptrace.h> > +#include <linux/extable.h> > #include <asm/mmu.h> > #include <asm/mce.h> > #include <asm/machdep.h> > @@ -18,6 +19,7 @@ > #include <asm/pte-walk.h> > #include <asm/sstep.h> > #include <asm/exception-64s.h> > +#include <asm/extable.h> > > /* > * Convert an address related to an mm to a physical address. > @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, > return 0; > } > > -static long mce_handle_ue_error(struct pt_regs *regs) > +static long mce_handle_ue_error(struct pt_regs *regs, > + struct mce_error_info *mce_err) > { > long handled = 0; > + const struct exception_table_entry *entry; > + > + entry = search_kernel_exception_table(regs->nip); > + if (entry) { > + mce_err->ignore_event = true; > + regs->nip = extable_fixup(entry); > + return 1; > + } > > /* > * On specific SCOM read via MMIO we may get a machine check > @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, > &phys_addr); > > if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) > - handled = mce_handle_ue_error(regs); > + handled = mce_handle_ue_error(regs, &mce_err); > > save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr); > >
On 12/8/19 7:22 pm, Santosh Sivaraj wrote: > If we take a UE on one of the instructions with a fixup entry, set nip > to continue execution at the fixup entry. Stop processing the event > further or print it. > > Co-developed-by: Reza Arbab <arbab@linux.ibm.com> > Signed-off-by: Reza Arbab <arbab@linux.ibm.com> > Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com> > Signed-off-by: Santosh Sivaraj <santosh@fossix.org> > --- Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it should still have my author tag and signed-off-by Balbir Singh > arch/powerpc/include/asm/mce.h | 4 +++- > arch/powerpc/kernel/mce.c | 16 ++++++++++++++++ > arch/powerpc/kernel/mce_power.c | 15 +++++++++++++-- > 3 files changed, 32 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h > index f3a6036b6bc0..e1931c8c2743 100644 > --- a/arch/powerpc/include/asm/mce.h > +++ b/arch/powerpc/include/asm/mce.h > @@ -122,7 +122,8 @@ struct machine_check_event { > enum MCE_UeErrorType ue_error_type:8; > u8 effective_address_provided; > u8 physical_address_provided; > - u8 reserved_1[5]; > + u8 ignore_event; > + u8 reserved_1[4]; > u64 effective_address; > u64 physical_address; > u8 reserved_2[8]; > @@ -193,6 +194,7 @@ struct mce_error_info { > enum MCE_Initiator initiator:8; > enum MCE_ErrorClass error_class:8; > bool sync_error; > + bool ignore_event; > }; > > #define MAX_MC_EVT 100 > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index a3b122a685a5..ec4b3e1087be 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, > if (phys_addr != ULONG_MAX) { > mce->u.ue_error.physical_address_provided = true; > mce->u.ue_error.physical_address = phys_addr; > + mce->u.ue_error.ignore_event = mce_err->ignore_event; > machine_check_ue_event(mce); > } > } > @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work) > /* > * This should probably queued elsewhere, but > * oh! well > + * > + * Don't report this machine check because the caller has a > + * asked us to ignore the event, it has a fixup handler which > + * will do the appropriate error handling and reporting. > */ > if (evt->error_type == MCE_ERROR_TYPE_UE) { > + if (evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_ue_count); > + continue; > + } > + > if (evt->u.ue_error.physical_address_provided) { > unsigned long pfn; > > @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work) > while (__this_cpu_read(mce_queue_count) > 0) { > index = __this_cpu_read(mce_queue_count) - 1; > evt = this_cpu_ptr(&mce_event_queue[index]); > + > + if (evt->error_type == MCE_ERROR_TYPE_UE && > + evt->u.ue_error.ignore_event) { > + __this_cpu_dec(mce_queue_count); > + continue; > + } > machine_check_print_event_info(evt, false, false); > __this_cpu_dec(mce_queue_count); > } > diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c > index e74816f045f8..1dd87f6f5186 100644 > --- a/arch/powerpc/kernel/mce_power.c > +++ b/arch/powerpc/kernel/mce_power.c > @@ -11,6 +11,7 @@ > > #include <linux/types.h> > #include <linux/ptrace.h> > +#include <linux/extable.h> > #include <asm/mmu.h> > #include <asm/mce.h> > #include <asm/machdep.h> > @@ -18,6 +19,7 @@ > #include <asm/pte-walk.h> > #include <asm/sstep.h> > #include <asm/exception-64s.h> > +#include <asm/extable.h> > > /* > * Convert an address related to an mm to a physical address. > @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, > return 0; > } > > -static long mce_handle_ue_error(struct pt_regs *regs) > +static long mce_handle_ue_error(struct pt_regs *regs, > + struct mce_error_info *mce_err) > { > long handled = 0; > + const struct exception_table_entry *entry; > + > + entry = search_kernel_exception_table(regs->nip); > + if (entry) { > + mce_err->ignore_event = true; > + regs->nip = extable_fixup(entry); > + return 1; > + } > > /* > * On specific SCOM read via MMIO we may get a machine check > @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, > &phys_addr); > > if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) > - handled = mce_handle_ue_error(regs); > + handled = mce_handle_ue_error(regs, &mce_err); > > save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr); > >
Hi Balbir, Balbir Singh <bsingharora@gmail.com> writes: > On 12/8/19 7:22 pm, Santosh Sivaraj wrote: >> If we take a UE on one of the instructions with a fixup entry, set nip >> to continue execution at the fixup entry. Stop processing the event >> further or print it. >> >> Co-developed-by: Reza Arbab <arbab@linux.ibm.com> >> Signed-off-by: Reza Arbab <arbab@linux.ibm.com> >> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com> >> Signed-off-by: Santosh Sivaraj <santosh@fossix.org> >> --- > > Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it > should still have my author tag and signed-off-by Originally when I received the series for posting, I had Reza's authorship and signed-off-by, since the patch changed significantly I added co-developed-by as Reza. I will update in the next spin. https://lore.kernel.org/linuxppc-dev/20190702051932.511-1-santosh@fossix.org/ Santosh > > Balbir Singh > >> arch/powerpc/include/asm/mce.h | 4 +++- >> arch/powerpc/kernel/mce.c | 16 ++++++++++++++++ >> arch/powerpc/kernel/mce_power.c | 15 +++++++++++++-- >> 3 files changed, 32 insertions(+), 3 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h >> index f3a6036b6bc0..e1931c8c2743 100644 >> --- a/arch/powerpc/include/asm/mce.h >> +++ b/arch/powerpc/include/asm/mce.h >> @@ -122,7 +122,8 @@ struct machine_check_event { >> enum MCE_UeErrorType ue_error_type:8; >> u8 effective_address_provided; >> u8 physical_address_provided; >> - u8 reserved_1[5]; >> + u8 ignore_event; >> + u8 reserved_1[4]; >> u64 effective_address; >> u64 physical_address; >> u8 reserved_2[8]; >> @@ -193,6 +194,7 @@ struct mce_error_info { >> enum MCE_Initiator initiator:8; >> enum MCE_ErrorClass error_class:8; >> bool sync_error; >> + bool ignore_event; >> }; >> >> #define MAX_MC_EVT 100 >> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c >> index a3b122a685a5..ec4b3e1087be 100644 >> --- a/arch/powerpc/kernel/mce.c >> +++ b/arch/powerpc/kernel/mce.c >> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, >> if (phys_addr != ULONG_MAX) { >> mce->u.ue_error.physical_address_provided = true; >> mce->u.ue_error.physical_address = phys_addr; >> + mce->u.ue_error.ignore_event = mce_err->ignore_event; >> machine_check_ue_event(mce); >> } >> } >> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work) >> /* >> * This should probably queued elsewhere, but >> * oh! well >> + * >> + * Don't report this machine check because the caller has a >> + * asked us to ignore the event, it has a fixup handler which >> + * will do the appropriate error handling and reporting. >> */ >> if (evt->error_type == MCE_ERROR_TYPE_UE) { >> + if (evt->u.ue_error.ignore_event) { >> + __this_cpu_dec(mce_ue_count); >> + continue; >> + } >> + >> if (evt->u.ue_error.physical_address_provided) { >> unsigned long pfn; >> >> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work) >> while (__this_cpu_read(mce_queue_count) > 0) { >> index = __this_cpu_read(mce_queue_count) - 1; >> evt = this_cpu_ptr(&mce_event_queue[index]); >> + >> + if (evt->error_type == MCE_ERROR_TYPE_UE && >> + evt->u.ue_error.ignore_event) { >> + __this_cpu_dec(mce_queue_count); >> + continue; >> + } >> machine_check_print_event_info(evt, false, false); >> __this_cpu_dec(mce_queue_count); >> } >> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c >> index e74816f045f8..1dd87f6f5186 100644 >> --- a/arch/powerpc/kernel/mce_power.c >> +++ b/arch/powerpc/kernel/mce_power.c >> @@ -11,6 +11,7 @@ >> >> #include <linux/types.h> >> #include <linux/ptrace.h> >> +#include <linux/extable.h> >> #include <asm/mmu.h> >> #include <asm/mce.h> >> #include <asm/machdep.h> >> @@ -18,6 +19,7 @@ >> #include <asm/pte-walk.h> >> #include <asm/sstep.h> >> #include <asm/exception-64s.h> >> +#include <asm/extable.h> >> >> /* >> * Convert an address related to an mm to a physical address. >> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, >> return 0; >> } >> >> -static long mce_handle_ue_error(struct pt_regs *regs) >> +static long mce_handle_ue_error(struct pt_regs *regs, >> + struct mce_error_info *mce_err) >> { >> long handled = 0; >> + const struct exception_table_entry *entry; >> + >> + entry = search_kernel_exception_table(regs->nip); >> + if (entry) { >> + mce_err->ignore_event = true; >> + regs->nip = extable_fixup(entry); >> + return 1; >> + } >> >> /* >> * On specific SCOM read via MMIO we may get a machine check >> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, >> &phys_addr); >> >> if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) >> - handled = mce_handle_ue_error(regs); >> + handled = mce_handle_ue_error(regs, &mce_err); >> >> save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr); >> >>
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index f3a6036b6bc0..e1931c8c2743 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -122,7 +122,8 @@ struct machine_check_event { enum MCE_UeErrorType ue_error_type:8; u8 effective_address_provided; u8 physical_address_provided; - u8 reserved_1[5]; + u8 ignore_event; + u8 reserved_1[4]; u64 effective_address; u64 physical_address; u8 reserved_2[8]; @@ -193,6 +194,7 @@ struct mce_error_info { enum MCE_Initiator initiator:8; enum MCE_ErrorClass error_class:8; bool sync_error; + bool ignore_event; }; #define MAX_MC_EVT 100 diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a3b122a685a5..ec4b3e1087be 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled, if (phys_addr != ULONG_MAX) { mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; + mce->u.ue_error.ignore_event = mce_err->ignore_event; machine_check_ue_event(mce); } } @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work) /* * This should probably queued elsewhere, but * oh! well + * + * Don't report this machine check because the caller has a + * asked us to ignore the event, it has a fixup handler which + * will do the appropriate error handling and reporting. */ if (evt->error_type == MCE_ERROR_TYPE_UE) { + if (evt->u.ue_error.ignore_event) { + __this_cpu_dec(mce_ue_count); + continue; + } + if (evt->u.ue_error.physical_address_provided) { unsigned long pfn; @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; evt = this_cpu_ptr(&mce_event_queue[index]); + + if (evt->error_type == MCE_ERROR_TYPE_UE && + evt->u.ue_error.ignore_event) { + __this_cpu_dec(mce_queue_count); + continue; + } machine_check_print_event_info(evt, false, false); __this_cpu_dec(mce_queue_count); } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index e74816f045f8..1dd87f6f5186 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/ptrace.h> +#include <linux/extable.h> #include <asm/mmu.h> #include <asm/mce.h> #include <asm/machdep.h> @@ -18,6 +19,7 @@ #include <asm/pte-walk.h> #include <asm/sstep.h> #include <asm/exception-64s.h> +#include <asm/extable.h> /* * Convert an address related to an mm to a physical address. @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs, return 0; } -static long mce_handle_ue_error(struct pt_regs *regs) +static long mce_handle_ue_error(struct pt_regs *regs, + struct mce_error_info *mce_err) { long handled = 0; + const struct exception_table_entry *entry; + + entry = search_kernel_exception_table(regs->nip); + if (entry) { + mce_err->ignore_event = true; + regs->nip = extable_fixup(entry); + return 1; + } /* * On specific SCOM read via MMIO we may get a machine check @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs, &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) - handled = mce_handle_ue_error(regs); + handled = mce_handle_ue_error(regs, &mce_err); save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);