diff mbox series

[RESEND,1/2] powerpc/mce: Add MCE notification chain

Message ID 20200709075635.643740-1-santosh@fossix.org (mailing list archive)
State Superseded
Headers show
Series [RESEND,1/2] powerpc/mce: Add MCE notification chain | expand

Commit Message

Santosh Sivaraj July 9, 2020, 7:56 a.m. UTC
Introduce notification chain which lets know about uncorrected memory
errors(UE). This would help prospective users in pmem or nvdimm subsystem
to track bad blocks for better handling of persistent memory allocations.

Signed-off-by: Santosh S <santosh@fossix.org>
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
 arch/powerpc/include/asm/mce.h |  2 ++
 arch/powerpc/kernel/mce.c      | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

Send the two patches together, so the dependencies are clear. The earlier patch reviews are
here: https://lore.kernel.org/linuxppc-dev/20200330071219.12284-1-ganeshgr@linux.ibm.com/

Rebase the patches on top on 5.8-rc4

Comments

Christophe Leroy July 9, 2020, 8:07 a.m. UTC | #1
Le 09/07/2020 à 09:56, Santosh Sivaraj a écrit :
> Introduce notification chain which lets know about uncorrected memory
> errors(UE). This would help prospective users in pmem or nvdimm subsystem
> to track bad blocks for better handling of persistent memory allocations.
> 
> Signed-off-by: Santosh S <santosh@fossix.org>
> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/mce.h |  2 ++
>   arch/powerpc/kernel/mce.c      | 15 +++++++++++++++
>   2 files changed, 17 insertions(+)
> 
> Send the two patches together, so the dependencies are clear. The earlier patch reviews are
> here: https://lore.kernel.org/linuxppc-dev/20200330071219.12284-1-ganeshgr@linux.ibm.com/
> 
> Rebase the patches on top on 5.8-rc4
> 
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 376a395daf329..a57b0772702a9 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -220,6 +220,8 @@ extern void machine_check_print_event_info(struct machine_check_event *evt,
>   unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
>   extern void mce_common_process_ue(struct pt_regs *regs,
>   				  struct mce_error_info *mce_err);
> +extern int mce_register_notifier(struct notifier_block *nb);
> +extern int mce_unregister_notifier(struct notifier_block *nb);

Using the 'extern' keyword on function declaration is pointless and 
should be avoided in new patches. (checkpatch.pl --strict usually 
complains about it).

>   #ifdef CONFIG_PPC_BOOK3S_64
>   void flush_and_reload_slb(void);
>   #endif /* CONFIG_PPC_BOOK3S_64 */
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index fd90c0eda2290..b7b3ed4e61937 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -49,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = {
>   
>   DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
>   
> +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
> +
> +int mce_register_notifier(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_register(&mce_notifier_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(mce_register_notifier);
> +
> +int mce_unregister_notifier(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(mce_unregister_notifier);
> +
>   static void mce_set_error_info(struct machine_check_event *mce,
>   			       struct mce_error_info *mce_err)
>   {
> @@ -278,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work)
>   	while (__this_cpu_read(mce_ue_count) > 0) {
>   		index = __this_cpu_read(mce_ue_count) - 1;
>   		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
> +		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
>   #ifdef CONFIG_MEMORY_FAILURE
>   		/*
>   		 * This should probably queued elsewhere, but
> 

Christophe
Santosh Sivaraj July 9, 2020, 9:24 a.m. UTC | #2
Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> Le 09/07/2020 à 09:56, Santosh Sivaraj a écrit :
>> Introduce notification chain which lets know about uncorrected memory
>> errors(UE). This would help prospective users in pmem or nvdimm subsystem
>> to track bad blocks for better handling of persistent memory allocations.
>> 
>> Signed-off-by: Santosh S <santosh@fossix.org>
>> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/mce.h |  2 ++
>>   arch/powerpc/kernel/mce.c      | 15 +++++++++++++++
>>   2 files changed, 17 insertions(+)
>> 
>> Send the two patches together, so the dependencies are clear. The earlier patch reviews are
>> here: https://lore.kernel.org/linuxppc-dev/20200330071219.12284-1-ganeshgr@linux.ibm.com/
>> 
>> Rebase the patches on top on 5.8-rc4
>> 
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index 376a395daf329..a57b0772702a9 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -220,6 +220,8 @@ extern void machine_check_print_event_info(struct machine_check_event *evt,
>>   unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
>>   extern void mce_common_process_ue(struct pt_regs *regs,
>>   				  struct mce_error_info *mce_err);
>> +extern int mce_register_notifier(struct notifier_block *nb);
>> +extern int mce_unregister_notifier(struct notifier_block *nb);
>
> Using the 'extern' keyword on function declaration is pointless and 
> should be avoided in new patches. (checkpatch.pl --strict usually 
> complains about it).

I will remove that in the v2 which I will be sending for your comments for
the other patch.

Thanks,
Santosh

>
>>   #ifdef CONFIG_PPC_BOOK3S_64
>>   void flush_and_reload_slb(void);
>>   #endif /* CONFIG_PPC_BOOK3S_64 */
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index fd90c0eda2290..b7b3ed4e61937 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -49,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = {
>>   
>>   DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
>>   
>> +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
>> +
>> +int mce_register_notifier(struct notifier_block *nb)
>> +{
>> +	return blocking_notifier_chain_register(&mce_notifier_list, nb);
>> +}
>> +EXPORT_SYMBOL_GPL(mce_register_notifier);
>> +
>> +int mce_unregister_notifier(struct notifier_block *nb)
>> +{
>> +	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
>> +}
>> +EXPORT_SYMBOL_GPL(mce_unregister_notifier);
>> +
>>   static void mce_set_error_info(struct machine_check_event *mce,
>>   			       struct mce_error_info *mce_err)
>>   {
>> @@ -278,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work)
>>   	while (__this_cpu_read(mce_ue_count) > 0) {
>>   		index = __this_cpu_read(mce_ue_count) - 1;
>>   		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
>> +		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
>>   #ifdef CONFIG_MEMORY_FAILURE
>>   		/*
>>   		 * This should probably queued elsewhere, but
>> 
>
> Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 376a395daf329..a57b0772702a9 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -220,6 +220,8 @@  extern void machine_check_print_event_info(struct machine_check_event *evt,
 unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
 extern void mce_common_process_ue(struct pt_regs *regs,
 				  struct mce_error_info *mce_err);
+extern int mce_register_notifier(struct notifier_block *nb);
+extern int mce_unregister_notifier(struct notifier_block *nb);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd90c0eda2290..b7b3ed4e61937 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -49,6 +49,20 @@  static struct irq_work mce_ue_event_irq_work = {
 
 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
 static void mce_set_error_info(struct machine_check_event *mce,
 			       struct mce_error_info *mce_err)
 {
@@ -278,6 +292,7 @@  static void machine_process_ue_event(struct work_struct *work)
 	while (__this_cpu_read(mce_ue_count) > 0) {
 		index = __this_cpu_read(mce_ue_count) - 1;
 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
 #ifdef CONFIG_MEMORY_FAILURE
 		/*
 		 * This should probably queued elsewhere, but