diff mbox series

[v1,2/7] powerpc/kernel: Add uevents in EEH error/resume

Message ID 20171213153242.98015-3-bryantly@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series SR-IOV Enablement on PowerVM | expand

Commit Message

Bryant G. Ly Dec. 13, 2017, 3:32 p.m. UTC
Devices can go offline when EEH is reported. This patch adds
a change to the kernel object and lets udev know of error.
When device resumes a change is also set reporting device as
online. Therefore, EEH events are better propagated to user
space for devices in powerpc arch.

Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Juan J. Alvarez <jjalvare@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Alexey Kardashevskiy Dec. 18, 2017, 3:54 a.m. UTC | #1
On 14/12/17 02:32, Bryant G. Ly wrote:
> Devices can go offline when EEH is reported. This patch adds
> a change to the kernel object and lets udev know of error.
> When device resumes a change is also set reporting device as
> online. Therefore, EEH events are better propagated to user
> space for devices in powerpc arch.
> 
> Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
> Signed-off-by: Juan J. Alvarez <jjalvare@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/eeh_driver.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 3c0fa99c5533..c61bf770282b 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -204,6 +204,7 @@ static void *eeh_report_error(void *data, void *userdata)
>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>  	enum pci_ers_result rc, *res = userdata;
>  	struct pci_driver *driver;
> +	char *envp[] = {"EVENT=EEH_ERROR", "ONLINE=0", NULL};

scripts/checkpatch.pl:

WARNING: char * array declaration might be better as static const
#27: FILE: arch/powerpc/kernel/eeh_driver.c:207:
+       char *envp[] = {"EVENT=EEH_ERROR", "ONLINE=0", NULL};



>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> @@ -228,6 +229,7 @@ static void *eeh_report_error(void *data, void *userdata)
>  
>  	edev->in_error = true;
>  	eeh_pcid_put(dev);
> +	kobject_uevent_env(&dev->dev.kobj, KOBJ_CHANGE, envp);
>  	return NULL;
>  }
>  
> @@ -358,6 +360,7 @@ static void *eeh_report_resume(void *data, void *userdata)
>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>  	bool was_in_error;
>  	struct pci_driver *driver;
> +	char *envp[] = {"EVENT=EEH_RESUME", "ONLINE=1", NULL};


WARNING: char * array declaration might be better as static const
#43: FILE: arch/powerpc/kernel/eeh_driver.c:363:
+       char *envp[] = {"EVENT=EEH_RESUME", "ONLINE=1", NULL};


>  
>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>  		return NULL;
> @@ -379,8 +382,8 @@ static void *eeh_report_resume(void *data, void *userdata)
>  	}
>  
>  	driver->err_handler->resume(dev);
> -

Unnecessary change.


>  	eeh_pcid_put(dev);
> +	kobject_uevent_env(&dev->dev.kobj, KOBJ_CHANGE, envp);
>  	return NULL;
>  }
>  
>
Russell Currey Dec. 18, 2017, 4:15 a.m. UTC | #2
On Wed, 2017-12-13 at 09:32 -0600, Bryant G. Ly wrote:
> Devices can go offline when EEH is reported. This patch adds
> a change to the kernel object and lets udev know of error.
> When device resumes a change is also set reporting device as
> online. Therefore, EEH events are better propagated to user
> space for devices in powerpc arch.
> 
> Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
> Signed-off-by: Juan J. Alvarez <jjalvare@linux.vnet.ibm.com>
> 

It would probably also be useful to communicate when recovery fails and
a device is no longer usable, so userspace knows not to keep waiting
for recovery to complete.
Bryant G. Ly Dec. 18, 2017, 6:45 p.m. UTC | #3
On 12/17/17 9:54 PM, Alexey Kardashevskiy wrote:

> On 14/12/17 02:32, Bryant G. Ly wrote:
>> Devices can go offline when EEH is reported. This patch adds
>> a change to the kernel object and lets udev know of error.
>> When device resumes a change is also set reporting device as
>> online. Therefore, EEH events are better propagated to user
>> space for devices in powerpc arch.
>>
>> Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
>> Signed-off-by: Juan J. Alvarez <jjalvare@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/kernel/eeh_driver.c | 5 ++++-
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
>> index 3c0fa99c5533..c61bf770282b 100644
>> --- a/arch/powerpc/kernel/eeh_driver.c
>> +++ b/arch/powerpc/kernel/eeh_driver.c
>> @@ -204,6 +204,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>  	enum pci_ers_result rc, *res = userdata;
>>  	struct pci_driver *driver;
>> +	char *envp[] = {"EVENT=EEH_ERROR", "ONLINE=0", NULL};
> scripts/checkpatch.pl:
>
> WARNING: char * array declaration might be better as static const
> #27: FILE: arch/powerpc/kernel/eeh_driver.c:207:
> +       char *envp[] = {"EVENT=EEH_ERROR", "ONLINE=0", NULL};
>
>
>
>>  
>>  	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
>>  		return NULL;
>> @@ -228,6 +229,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>  
>>  	edev->in_error = true;
>>  	eeh_pcid_put(dev);
>> +	kobject_uevent_env(&dev->dev.kobj, KOBJ_CHANGE, envp);
>>  	return NULL;
>>  }
>>  
>> @@ -358,6 +360,7 @@ static void *eeh_report_resume(void *data, void *userdata)
>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>  	bool was_in_error;
>>  	struct pci_driver *driver;
>> +	char *envp[] = {"EVENT=EEH_RESUME", "ONLINE=1", NULL};
>
> WARNING: char * array declaration might be better as static const
> #43: FILE: arch/powerpc/kernel/eeh_driver.c:363:
> +       char *envp[] = {"EVENT=EEH_RESUME", "ONLINE=1", NULL};
>
>
>
Checkpatch is wrong it doesn't check the function that uses it, which only takes a char *

-Bryant
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3c0fa99c5533..c61bf770282b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -204,6 +204,7 @@  static void *eeh_report_error(void *data, void *userdata)
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver;
+	char *envp[] = {"EVENT=EEH_ERROR", "ONLINE=0", NULL};
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
@@ -228,6 +229,7 @@  static void *eeh_report_error(void *data, void *userdata)
 
 	edev->in_error = true;
 	eeh_pcid_put(dev);
+	kobject_uevent_env(&dev->dev.kobj, KOBJ_CHANGE, envp);
 	return NULL;
 }
 
@@ -358,6 +360,7 @@  static void *eeh_report_resume(void *data, void *userdata)
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	bool was_in_error;
 	struct pci_driver *driver;
+	char *envp[] = {"EVENT=EEH_RESUME", "ONLINE=1", NULL};
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
@@ -379,8 +382,8 @@  static void *eeh_report_resume(void *data, void *userdata)
 	}
 
 	driver->err_handler->resume(dev);
-
 	eeh_pcid_put(dev);
+	kobject_uevent_env(&dev->dev.kobj, KOBJ_CHANGE, envp);
 	return NULL;
 }