diff mbox series

[v9,6/7] powerpc/mce: Handle UE event for memcpy_mcsafe

Message ID 20190812092236.16648-7-santosh@fossix.org (mailing list archive)
State Superseded
Headers show
Series powerpc: implement machine check safe memcpy | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch next (da206bd46848568e1aaf35f00e2d78bf9bc94f95)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Santosh Sivaraj Aug. 12, 2019, 9:22 a.m. UTC
If we take a UE on one of the instructions with a fixup entry, set nip
to continue execution at the fixup entry. Stop processing the event
further or print it.

Co-developed-by: Reza Arbab <arbab@linux.ibm.com>
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 arch/powerpc/include/asm/mce.h  |  4 +++-
 arch/powerpc/kernel/mce.c       | 16 ++++++++++++++++
 arch/powerpc/kernel/mce_power.c | 15 +++++++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)

Comments

Mahesh J Salgaonkar Aug. 14, 2019, 8:11 a.m. UTC | #1
On 8/12/19 2:52 PM, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab <arbab@linux.ibm.com>
> Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>

Looks good to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

> ---
>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c       | 16 ++++++++++++++++
>  arch/powerpc/kernel/mce_power.c | 15 +++++++++++++--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>  			enum MCE_UeErrorType ue_error_type:8;
>  			u8		effective_address_provided;
>  			u8		physical_address_provided;
> -			u8		reserved_1[5];
> +			u8		ignore_event;
> +			u8		reserved_1[4];
>  			u64		effective_address;
>  			u64		physical_address;
>  			u8		reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>  	enum MCE_Initiator	initiator:8;
>  	enum MCE_ErrorClass	error_class:8;
>  	bool			sync_error;
> +	bool			ignore_event;
>  };
>  
>  #define MAX_MC_EVT	100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>  		if (phys_addr != ULONG_MAX) {
>  			mce->u.ue_error.physical_address_provided = true;
>  			mce->u.ue_error.physical_address = phys_addr;
> +			mce->u.ue_error.ignore_event = mce_err->ignore_event;
>  			machine_check_ue_event(mce);
>  		}
>  	}
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
>  		/*
>  		 * This should probably queued elsewhere, but
>  		 * oh! well
> +		 *
> +		 * Don't report this machine check because the caller has a
> +		 * asked us to ignore the event, it has a fixup handler which
> +		 * will do the appropriate error handling and reporting.
>  		 */
>  		if (evt->error_type == MCE_ERROR_TYPE_UE) {
> +			if (evt->u.ue_error.ignore_event) {
> +				__this_cpu_dec(mce_ue_count);
> +				continue;
> +			}
> +
>  			if (evt->u.ue_error.physical_address_provided) {
>  				unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> +
> +		if (evt->error_type == MCE_ERROR_TYPE_UE &&
> +		    evt->u.ue_error.ignore_event) {
> +			__this_cpu_dec(mce_queue_count);
> +			continue;
> +		}
>  		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/ptrace.h>
> +#include <linux/extable.h>
>  #include <asm/mmu.h>
>  #include <asm/mce.h>
>  #include <asm/machdep.h>
> @@ -18,6 +19,7 @@
>  #include <asm/pte-walk.h>
>  #include <asm/sstep.h>
>  #include <asm/exception-64s.h>
> +#include <asm/extable.h>
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>  	return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> +				struct mce_error_info *mce_err)
>  {
>  	long handled = 0;
> +	const struct exception_table_entry *entry;
> +
> +	entry = search_kernel_exception_table(regs->nip);
> +	if (entry) {
> +		mce_err->ignore_event = true;
> +		regs->nip = extable_fixup(entry);
> +		return 1;
> +	}
>  
>  	/*
>  	 * On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
>  				&phys_addr);
>  
>  	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
> -		handled = mce_handle_ue_error(regs);
> +		handled = mce_handle_ue_error(regs, &mce_err);
>  
>  	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
>  
>
Balbir Singh Aug. 14, 2019, 9:38 a.m. UTC | #2
On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
> If we take a UE on one of the instructions with a fixup entry, set nip
> to continue execution at the fixup entry. Stop processing the event
> further or print it.
> 
> Co-developed-by: Reza Arbab <arbab@linux.ibm.com>
> Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> ---

Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it should still have my author tag and signed-off-by

Balbir Singh

>  arch/powerpc/include/asm/mce.h  |  4 +++-
>  arch/powerpc/kernel/mce.c       | 16 ++++++++++++++++
>  arch/powerpc/kernel/mce_power.c | 15 +++++++++++++--
>  3 files changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index f3a6036b6bc0..e1931c8c2743 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -122,7 +122,8 @@ struct machine_check_event {
>  			enum MCE_UeErrorType ue_error_type:8;
>  			u8		effective_address_provided;
>  			u8		physical_address_provided;
> -			u8		reserved_1[5];
> +			u8		ignore_event;
> +			u8		reserved_1[4];
>  			u64		effective_address;
>  			u64		physical_address;
>  			u8		reserved_2[8];
> @@ -193,6 +194,7 @@ struct mce_error_info {
>  	enum MCE_Initiator	initiator:8;
>  	enum MCE_ErrorClass	error_class:8;
>  	bool			sync_error;
> +	bool			ignore_event;
>  };
>  
>  #define MAX_MC_EVT	100
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index a3b122a685a5..ec4b3e1087be 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>  		if (phys_addr != ULONG_MAX) {
>  			mce->u.ue_error.physical_address_provided = true;
>  			mce->u.ue_error.physical_address = phys_addr;
> +			mce->u.ue_error.ignore_event = mce_err->ignore_event;
>  			machine_check_ue_event(mce);
>  		}
>  	}
> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
>  		/*
>  		 * This should probably queued elsewhere, but
>  		 * oh! well
> +		 *
> +		 * Don't report this machine check because the caller has a
> +		 * asked us to ignore the event, it has a fixup handler which
> +		 * will do the appropriate error handling and reporting.
>  		 */
>  		if (evt->error_type == MCE_ERROR_TYPE_UE) {
> +			if (evt->u.ue_error.ignore_event) {
> +				__this_cpu_dec(mce_ue_count);
> +				continue;
> +			}
> +
>  			if (evt->u.ue_error.physical_address_provided) {
>  				unsigned long pfn;
>  
> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
>  	while (__this_cpu_read(mce_queue_count) > 0) {
>  		index = __this_cpu_read(mce_queue_count) - 1;
>  		evt = this_cpu_ptr(&mce_event_queue[index]);
> +
> +		if (evt->error_type == MCE_ERROR_TYPE_UE &&
> +		    evt->u.ue_error.ignore_event) {
> +			__this_cpu_dec(mce_queue_count);
> +			continue;
> +		}
>  		machine_check_print_event_info(evt, false, false);
>  		__this_cpu_dec(mce_queue_count);
>  	}
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index e74816f045f8..1dd87f6f5186 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -11,6 +11,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/ptrace.h>
> +#include <linux/extable.h>
>  #include <asm/mmu.h>
>  #include <asm/mce.h>
>  #include <asm/machdep.h>
> @@ -18,6 +19,7 @@
>  #include <asm/pte-walk.h>
>  #include <asm/sstep.h>
>  #include <asm/exception-64s.h>
> +#include <asm/extable.h>
>  
>  /*
>   * Convert an address related to an mm to a physical address.
> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>  	return 0;
>  }
>  
> -static long mce_handle_ue_error(struct pt_regs *regs)
> +static long mce_handle_ue_error(struct pt_regs *regs,
> +				struct mce_error_info *mce_err)
>  {
>  	long handled = 0;
> +	const struct exception_table_entry *entry;
> +
> +	entry = search_kernel_exception_table(regs->nip);
> +	if (entry) {
> +		mce_err->ignore_event = true;
> +		regs->nip = extable_fixup(entry);
> +		return 1;
> +	}
>  
>  	/*
>  	 * On specific SCOM read via MMIO we may get a machine check
> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
>  				&phys_addr);
>  
>  	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
> -		handled = mce_handle_ue_error(regs);
> +		handled = mce_handle_ue_error(regs, &mce_err);
>  
>  	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
>  
>
Santosh Sivaraj Aug. 15, 2019, 12:15 a.m. UTC | #3
Hi Balbir,

Balbir Singh <bsingharora@gmail.com> writes:

> On 12/8/19 7:22 pm, Santosh Sivaraj wrote:
>> If we take a UE on one of the instructions with a fixup entry, set nip
>> to continue execution at the fixup entry. Stop processing the event
>> further or print it.
>> 
>> Co-developed-by: Reza Arbab <arbab@linux.ibm.com>
>> Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
>> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
>> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
>> ---
>
> Isn't this based on https://patchwork.ozlabs.org/patch/895294/? If so it
> should still have my author tag and signed-off-by

Originally when I received the series for posting, I had Reza's authorship and
signed-off-by, since the patch changed significantly I added co-developed-by as
Reza. I will update in the next spin.

https://lore.kernel.org/linuxppc-dev/20190702051932.511-1-santosh@fossix.org/

Santosh
>
> Balbir Singh
>
>>  arch/powerpc/include/asm/mce.h  |  4 +++-
>>  arch/powerpc/kernel/mce.c       | 16 ++++++++++++++++
>>  arch/powerpc/kernel/mce_power.c | 15 +++++++++++++--
>>  3 files changed, 32 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index f3a6036b6bc0..e1931c8c2743 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -122,7 +122,8 @@ struct machine_check_event {
>>  			enum MCE_UeErrorType ue_error_type:8;
>>  			u8		effective_address_provided;
>>  			u8		physical_address_provided;
>> -			u8		reserved_1[5];
>> +			u8		ignore_event;
>> +			u8		reserved_1[4];
>>  			u64		effective_address;
>>  			u64		physical_address;
>>  			u8		reserved_2[8];
>> @@ -193,6 +194,7 @@ struct mce_error_info {
>>  	enum MCE_Initiator	initiator:8;
>>  	enum MCE_ErrorClass	error_class:8;
>>  	bool			sync_error;
>> +	bool			ignore_event;
>>  };
>>  
>>  #define MAX_MC_EVT	100
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index a3b122a685a5..ec4b3e1087be 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -149,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
>>  		if (phys_addr != ULONG_MAX) {
>>  			mce->u.ue_error.physical_address_provided = true;
>>  			mce->u.ue_error.physical_address = phys_addr;
>> +			mce->u.ue_error.ignore_event = mce_err->ignore_event;
>>  			machine_check_ue_event(mce);
>>  		}
>>  	}
>> @@ -266,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
>>  		/*
>>  		 * This should probably queued elsewhere, but
>>  		 * oh! well
>> +		 *
>> +		 * Don't report this machine check because the caller has a
>> +		 * asked us to ignore the event, it has a fixup handler which
>> +		 * will do the appropriate error handling and reporting.
>>  		 */
>>  		if (evt->error_type == MCE_ERROR_TYPE_UE) {
>> +			if (evt->u.ue_error.ignore_event) {
>> +				__this_cpu_dec(mce_ue_count);
>> +				continue;
>> +			}
>> +
>>  			if (evt->u.ue_error.physical_address_provided) {
>>  				unsigned long pfn;
>>  
>> @@ -301,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
>>  	while (__this_cpu_read(mce_queue_count) > 0) {
>>  		index = __this_cpu_read(mce_queue_count) - 1;
>>  		evt = this_cpu_ptr(&mce_event_queue[index]);
>> +
>> +		if (evt->error_type == MCE_ERROR_TYPE_UE &&
>> +		    evt->u.ue_error.ignore_event) {
>> +			__this_cpu_dec(mce_queue_count);
>> +			continue;
>> +		}
>>  		machine_check_print_event_info(evt, false, false);
>>  		__this_cpu_dec(mce_queue_count);
>>  	}
>> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
>> index e74816f045f8..1dd87f6f5186 100644
>> --- a/arch/powerpc/kernel/mce_power.c
>> +++ b/arch/powerpc/kernel/mce_power.c
>> @@ -11,6 +11,7 @@
>>  
>>  #include <linux/types.h>
>>  #include <linux/ptrace.h>
>> +#include <linux/extable.h>
>>  #include <asm/mmu.h>
>>  #include <asm/mce.h>
>>  #include <asm/machdep.h>
>> @@ -18,6 +19,7 @@
>>  #include <asm/pte-walk.h>
>>  #include <asm/sstep.h>
>>  #include <asm/exception-64s.h>
>> +#include <asm/extable.h>
>>  
>>  /*
>>   * Convert an address related to an mm to a physical address.
>> @@ -559,9 +561,18 @@ static int mce_handle_derror(struct pt_regs *regs,
>>  	return 0;
>>  }
>>  
>> -static long mce_handle_ue_error(struct pt_regs *regs)
>> +static long mce_handle_ue_error(struct pt_regs *regs,
>> +				struct mce_error_info *mce_err)
>>  {
>>  	long handled = 0;
>> +	const struct exception_table_entry *entry;
>> +
>> +	entry = search_kernel_exception_table(regs->nip);
>> +	if (entry) {
>> +		mce_err->ignore_event = true;
>> +		regs->nip = extable_fixup(entry);
>> +		return 1;
>> +	}
>>  
>>  	/*
>>  	 * On specific SCOM read via MMIO we may get a machine check
>> @@ -594,7 +605,7 @@ static long mce_handle_error(struct pt_regs *regs,
>>  				&phys_addr);
>>  
>>  	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
>> -		handled = mce_handle_ue_error(regs);
>> +		handled = mce_handle_ue_error(regs, &mce_err);
>>  
>>  	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
>>  
>>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f3a6036b6bc0..e1931c8c2743 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -122,7 +122,8 @@  struct machine_check_event {
 			enum MCE_UeErrorType ue_error_type:8;
 			u8		effective_address_provided;
 			u8		physical_address_provided;
-			u8		reserved_1[5];
+			u8		ignore_event;
+			u8		reserved_1[4];
 			u64		effective_address;
 			u64		physical_address;
 			u8		reserved_2[8];
@@ -193,6 +194,7 @@  struct mce_error_info {
 	enum MCE_Initiator	initiator:8;
 	enum MCE_ErrorClass	error_class:8;
 	bool			sync_error;
+	bool			ignore_event;
 };
 
 #define MAX_MC_EVT	100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a3b122a685a5..ec4b3e1087be 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -149,6 +149,7 @@  void save_mce_event(struct pt_regs *regs, long handled,
 		if (phys_addr != ULONG_MAX) {
 			mce->u.ue_error.physical_address_provided = true;
 			mce->u.ue_error.physical_address = phys_addr;
+			mce->u.ue_error.ignore_event = mce_err->ignore_event;
 			machine_check_ue_event(mce);
 		}
 	}
@@ -266,8 +267,17 @@  static void machine_process_ue_event(struct work_struct *work)
 		/*
 		 * This should probably queued elsewhere, but
 		 * oh! well
+		 *
+		 * Don't report this machine check because the caller has a
+		 * asked us to ignore the event, it has a fixup handler which
+		 * will do the appropriate error handling and reporting.
 		 */
 		if (evt->error_type == MCE_ERROR_TYPE_UE) {
+			if (evt->u.ue_error.ignore_event) {
+				__this_cpu_dec(mce_ue_count);
+				continue;
+			}
+
 			if (evt->u.ue_error.physical_address_provided) {
 				unsigned long pfn;
 
@@ -301,6 +311,12 @@  static void machine_check_process_queued_event(struct irq_work *work)
 	while (__this_cpu_read(mce_queue_count) > 0) {
 		index = __this_cpu_read(mce_queue_count) - 1;
 		evt = this_cpu_ptr(&mce_event_queue[index]);
+
+		if (evt->error_type == MCE_ERROR_TYPE_UE &&
+		    evt->u.ue_error.ignore_event) {
+			__this_cpu_dec(mce_queue_count);
+			continue;
+		}
 		machine_check_print_event_info(evt, false, false);
 		__this_cpu_dec(mce_queue_count);
 	}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index e74816f045f8..1dd87f6f5186 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@ 
 
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/extable.h>
 #include <asm/mmu.h>
 #include <asm/mce.h>
 #include <asm/machdep.h>
@@ -18,6 +19,7 @@ 
 #include <asm/pte-walk.h>
 #include <asm/sstep.h>
 #include <asm/exception-64s.h>
+#include <asm/extable.h>
 
 /*
  * Convert an address related to an mm to a physical address.
@@ -559,9 +561,18 @@  static int mce_handle_derror(struct pt_regs *regs,
 	return 0;
 }
 
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+				struct mce_error_info *mce_err)
 {
 	long handled = 0;
+	const struct exception_table_entry *entry;
+
+	entry = search_kernel_exception_table(regs->nip);
+	if (entry) {
+		mce_err->ignore_event = true;
+		regs->nip = extable_fixup(entry);
+		return 1;
+	}
 
 	/*
 	 * On specific SCOM read via MMIO we may get a machine check
@@ -594,7 +605,7 @@  static long mce_handle_error(struct pt_regs *regs,
 				&phys_addr);
 
 	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
-		handled = mce_handle_ue_error(regs);
+		handled = mce_handle_ue_error(regs, &mce_err);
 
 	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);