diff mbox series

[v4,5/5] powerpc/mce: hookup memory_failure for UE errors

Message ID 20170929042655.14570-6-bsingharora@gmail.com (mailing list archive)
State Accepted
Commit 733e4a4c4467033918f4e64f797b03efe209d82c
Headers show
Series Revisit MCE handling for UE Errors | expand

Commit Message

Balbir Singh Sept. 29, 2017, 4:26 a.m. UTC
If we are in user space and hit a UE error, we now have the
basic infrastructure to walk the page tables and find out
the effective address that was accessed, since the DAR
is not valid.

We use a work_queue content to hookup the bad pfn, any
other context causes problems, since memory_failure itself
can call into schedule() via lru_drain_ bits.

We could probably poison the struct page to avoid a race
between detection and taking corrective action.

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/mce.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 3 deletions(-)

Comments

Michael Ellerman Oct. 16, 2017, 5:38 a.m. UTC | #1
Balbir Singh <bsingharora@gmail.com> writes:

> If we are in user space and hit a UE error, we now have the
> basic infrastructure to walk the page tables and find out
> the effective address that was accessed, since the DAR
> is not valid.
>
> We use a work_queue content to hookup the bad pfn, any
> other context causes problems, since memory_failure itself
> can call into schedule() via lru_drain_ bits.
>
> We could probably poison the struct page to avoid a race
> between detection and taking corrective action.
>
> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/mce.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 67 insertions(+), 3 deletions(-)

I'm not sure why this is in mce.c but the rest was in mce_power.c ?

cheers
Balbir Singh Oct. 16, 2017, 5:40 a.m. UTC | #2
On Mon, Oct 16, 2017 at 4:38 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Balbir Singh <bsingharora@gmail.com> writes:
>
>> If we are in user space and hit a UE error, we now have the
>> basic infrastructure to walk the page tables and find out
>> the effective address that was accessed, since the DAR
>> is not valid.
>>
>> We use a work_queue content to hookup the bad pfn, any
>> other context causes problems, since memory_failure itself
>> can call into schedule() via lru_drain_ bits.
>>
>> We could probably poison the struct page to avoid a race
>> between detection and taking corrective action.
>>
>> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>  arch/powerpc/kernel/mce.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 67 insertions(+), 3 deletions(-)
>
> I'm not sure why this is in mce.c but the rest was in mce_power.c ?

The way the code is organized is that save_mce_event is implemented here
and called from mce_power.c. save_mce_event() does the processing
of the event.

Balbir Singh.
Michael Ellerman Oct. 16, 2017, 12:07 p.m. UTC | #3
Balbir Singh <bsingharora@gmail.com> writes:

> On Mon, Oct 16, 2017 at 4:38 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
>> Balbir Singh <bsingharora@gmail.com> writes:
>>
>>> If we are in user space and hit a UE error, we now have the
>>> basic infrastructure to walk the page tables and find out
>>> the effective address that was accessed, since the DAR
>>> is not valid.
>>>
>>> We use a work_queue content to hookup the bad pfn, any
>>> other context causes problems, since memory_failure itself
>>> can call into schedule() via lru_drain_ bits.
>>>
>>> We could probably poison the struct page to avoid a race
>>> between detection and taking corrective action.
>>>
>>> Signed-off-by: Balbir Singh <bsingharora@gmail.com>
>>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>  arch/powerpc/kernel/mce.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--
>>>  1 file changed, 67 insertions(+), 3 deletions(-)
>>
>> I'm not sure why this is in mce.c but the rest was in mce_power.c ?
>
> The way the code is organized is that save_mce_event is implemented here
> and called from mce_power.c. save_mce_event() does the processing
> of the event.

save_mce_event() is only called from mce_power.c :/

I'd be happy if it was mce_real_mode.c and mce.c, that would be a good
distinction to make.

Though even then, most of the code is Book3s64 specific I think so maybe
it shouldn't be in just mce.c.

Anyway I'll merge this and we can improve things later.

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ee148f4..299553e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -39,11 +39,21 @@  static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 static DEFINE_PER_CPU(int, mce_queue_count);
 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 
+/* Queue for delayed MCE UE events. */
+static DEFINE_PER_CPU(int, mce_ue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
+					mce_ue_event_queue);
+
 static void machine_check_process_queued_event(struct irq_work *work);
+void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
 static struct irq_work mce_event_process_work = {
         .func = machine_check_process_queued_event,
 };
 
+DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
 static void mce_set_error_info(struct machine_check_event *mce,
 			       struct mce_error_info *mce_err)
 {
@@ -143,6 +153,7 @@  void save_mce_event(struct pt_regs *regs, long handled,
 		if (phys_addr != ULONG_MAX) {
 			mce->u.ue_error.physical_address_provided = true;
 			mce->u.ue_error.physical_address = phys_addr;
+			machine_check_ue_event(mce);
 		}
 	}
 	return;
@@ -197,6 +208,26 @@  void release_mce_event(void)
 	get_mce_event(NULL, true);
 }
 
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_ue_event(struct machine_check_event *evt)
+{
+	int index;
+
+	index = __this_cpu_inc_return(mce_ue_count) - 1;
+	/* If queue is full, just return for now. */
+	if (index >= MAX_MC_EVT) {
+		__this_cpu_dec(mce_ue_count);
+		return;
+	}
+	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+
+	/* Queue work to process this event later. */
+	schedule_work(&mce_ue_event_work);
+}
+
 /*
  * Queue up the MCE event which then can be handled later.
  */
@@ -219,7 +250,39 @@  void machine_check_queue_event(void)
 	/* Queue irq work to process this event later. */
 	irq_work_queue(&mce_event_process_work);
 }
-
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+	int index;
+	struct machine_check_event *evt;
+
+	while (__this_cpu_read(mce_ue_count) > 0) {
+		index = __this_cpu_read(mce_ue_count) - 1;
+		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+#ifdef CONFIG_MEMORY_FAILURE
+		/*
+		 * This should probably queued elsewhere, but
+		 * oh! well
+		 */
+		if (evt->error_type == MCE_ERROR_TYPE_UE) {
+			if (evt->u.ue_error.physical_address_provided) {
+				unsigned long pfn;
+
+				pfn = evt->u.ue_error.physical_address >>
+					PAGE_SHIFT;
+				memory_failure(pfn, SIGBUS, 0);
+			} else
+				pr_warn("Failed to identify bad address from "
+					"where the uncorrectable error (UE) "
+					"was generated\n");
+		}
+#endif
+		__this_cpu_dec(mce_ue_count);
+	}
+}
 /*
  * process pending MCE event from the mce event queue. This function will be
  * called during syscall exit.
@@ -227,6 +290,7 @@  void machine_check_queue_event(void)
 static void machine_check_process_queued_event(struct irq_work *work)
 {
 	int index;
+	struct machine_check_event *evt;
 
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
@@ -236,8 +300,8 @@  static void machine_check_process_queued_event(struct irq_work *work)
 	 */
 	while (__this_cpu_read(mce_queue_count) > 0) {
 		index = __this_cpu_read(mce_queue_count) - 1;
-		machine_check_print_event_info(
-				this_cpu_ptr(&mce_event_queue[index]), false);
+		evt = this_cpu_ptr(&mce_event_queue[index]);
+		machine_check_print_event_info(evt, false);
 		__this_cpu_dec(mce_queue_count);
 	}
 }