diff mbox

[RESEND-PATCH] cxl: Avoid double free_irq() for psl,slice interrupts

Message ID 20170602093555.9205-1-vaibhav@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Vaibhav Jain June 2, 2017, 9:35 a.m. UTC
During an eeh call to cxl_remove can result in double free_irq of
psl,slice interrupts. This can happen if perst_reloads_same_image == 1
and call to cxl_configure_adapter() fails during slot_reset
callback. In such a case we see a kernel oops with following back-trace:

Oops: Kernel access of bad area, sig: 11 [#1]
Call Trace:
  free_irq+0x88/0xd0 (unreliable)
  cxl_unmap_irq+0x20/0x40 [cxl]
  cxl_native_release_psl_irq+0x78/0xd8 [cxl]
  pci_deconfigure_afu+0xac/0x110 [cxl]
  cxl_remove+0x104/0x210 [cxl]
  pci_device_remove+0x6c/0x110
  device_release_driver_internal+0x204/0x2e0
  pci_stop_bus_device+0xa0/0xd0
  pci_stop_and_remove_bus_device+0x28/0x40
  pci_hp_remove_devices+0xb0/0x150
  pci_hp_remove_devices+0x68/0x150
  eeh_handle_normal_event+0x140/0x580
  eeh_handle_event+0x174/0x360
  eeh_event_handler+0x1e8/0x1f0

This patch fixes the issue of double free_irq by checking that
variables that hold the virqs (err_hwirq, serr_hwirq, psl_virq) are
not '0' before un-mapping and resetting these variables to '0' when
they are un-mapped.

Cc: stable@vger.kernel.org
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

---
Re-send:
- Added stable to recipients
---
 drivers/misc/cxl/native.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

Comments

Frederic Barrat June 2, 2017, 3:30 p.m. UTC | #1
Le 02/06/2017 à 11:35, Vaibhav Jain a écrit :
> During an eeh call to cxl_remove can result in double free_irq of
> psl,slice interrupts. This can happen if perst_reloads_same_image == 1
> and call to cxl_configure_adapter() fails during slot_reset
> callback. In such a case we see a kernel oops with following back-trace:
> 
> Oops: Kernel access of bad area, sig: 11 [#1]
> Call Trace:
>    free_irq+0x88/0xd0 (unreliable)
>    cxl_unmap_irq+0x20/0x40 [cxl]
>    cxl_native_release_psl_irq+0x78/0xd8 [cxl]
>    pci_deconfigure_afu+0xac/0x110 [cxl]
>    cxl_remove+0x104/0x210 [cxl]
>    pci_device_remove+0x6c/0x110
>    device_release_driver_internal+0x204/0x2e0
>    pci_stop_bus_device+0xa0/0xd0
>    pci_stop_and_remove_bus_device+0x28/0x40
>    pci_hp_remove_devices+0xb0/0x150
>    pci_hp_remove_devices+0x68/0x150
>    eeh_handle_normal_event+0x140/0x580
>    eeh_handle_event+0x174/0x360
>    eeh_event_handler+0x1e8/0x1f0
> 
> This patch fixes the issue of double free_irq by checking that
> variables that hold the virqs (err_hwirq, serr_hwirq, psl_virq) are
> not '0' before un-mapping and resetting these variables to '0' when
> they are un-mapped.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
> Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> 
> ---
> Re-send:
> - Added stable to recipients
> ---
>   drivers/misc/cxl/native.c | 14 +++++++++++---
>   1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
> index 871a2f0..3ed2254 100644
> --- a/drivers/misc/cxl/native.c
> +++ b/drivers/misc/cxl/native.c
> @@ -1302,13 +1302,16 @@ int cxl_native_register_psl_err_irq(struct cxl *adapter)
> 
>   void cxl_native_release_psl_err_irq(struct cxl *adapter)
>   {
> -	if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
> +	if (adapter->native->err_virq == 0 ||
> +	    adapter->native->err_virq !=
> +	    irq_find_mapping(NULL, adapter->native->err_hwirq))
>   		return;
> 
>   	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
>   	cxl_unmap_irq(adapter->native->err_virq, adapter);
>   	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
>   	kfree(adapter->irq_name);
> +	adapter->native->err_virq = 0;
>   }
> 
>   int cxl_native_register_serr_irq(struct cxl_afu *afu)
> @@ -1346,13 +1349,15 @@ int cxl_native_register_serr_irq(struct cxl_afu *afu)
> 
>   void cxl_native_release_serr_irq(struct cxl_afu *afu)
>   {
> -	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
> +	if (afu->serr_virq == 0 ||
> +	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
>   		return;
> 
>   	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
>   	cxl_unmap_irq(afu->serr_virq, afu);
>   	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
>   	kfree(afu->err_irq_name);
> +	afu->serr_virq = 0;
>   }
> 
>   int cxl_native_register_psl_irq(struct cxl_afu *afu)
> @@ -1375,12 +1380,15 @@ int cxl_native_register_psl_irq(struct cxl_afu *afu)
> 
>   void cxl_native_release_psl_irq(struct cxl_afu *afu)
>   {
> -	if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
> +	if (afu->native->psl_virq == 0 ||
> +	    afu->native->psl_virq !=
> +	    irq_find_mapping(NULL, afu->native->psl_virq))

                                                     ^^^
Shouldn't it be psl_hwirq?

   Fred

>   		return;
> 
>   	cxl_unmap_irq(afu->native->psl_virq, afu);
>   	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
>   	kfree(afu->psl_irq_name);
> +	afu->native->psl_virq = 0;
>   }
> 
>   static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
>
Vaibhav Jain June 2, 2017, 4:58 p.m. UTC | #2
Frederic Barrat <fbarrat@linux.vnet.ibm.com> writes:


>> +	    irq_find_mapping(NULL, afu->native->psl_virq))
>
>                                                      ^^^
> Shouldn't it be psl_hwirq?
>
Thanks for catching this Fred. I have sent a v2 patch fixing this.
diff mbox

Patch

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 871a2f0..3ed2254 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -1302,13 +1302,16 @@  int cxl_native_register_psl_err_irq(struct cxl *adapter)
 
 void cxl_native_release_psl_err_irq(struct cxl *adapter)
 {
-	if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
 		return;
 
 	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
 	cxl_unmap_irq(adapter->native->err_virq, adapter);
 	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
 	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
 }
 
 int cxl_native_register_serr_irq(struct cxl_afu *afu)
@@ -1346,13 +1349,15 @@  int cxl_native_register_serr_irq(struct cxl_afu *afu)
 
 void cxl_native_release_serr_irq(struct cxl_afu *afu)
 {
-	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
 		return;
 
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
 	cxl_unmap_irq(afu->serr_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
 	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
 }
 
 int cxl_native_register_psl_irq(struct cxl_afu *afu)
@@ -1375,12 +1380,15 @@  int cxl_native_register_psl_irq(struct cxl_afu *afu)
 
 void cxl_native_release_psl_irq(struct cxl_afu *afu)
 {
-	if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_virq))
 		return;
 
 	cxl_unmap_irq(afu->native->psl_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
 	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
 }
 
 static void recover_psl_err(struct cxl_afu *afu, u64 errstat)