diff mbox series

[SRU,bionic/master-next,1/1,V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only

Message ID 20181206123205.10050-1-apw@canonical.com
State New
Headers show
Series [SRU,bionic/master-next,1/1,V2] UBUNTU: SAUCE: base/dd: limit release function changes to vfio driver only | expand

Commit Message

Andy Whitcroft Dec. 6, 2018, 12:32 p.m. UTC
It seems that we can trigger the new race detection after remove()
with some drivers which clear the driver as they unreference
the module.  This leads us to fail to clear down those devices which
triggers suspend/resume issues.

Limit the core changes to only apply to the vfio driver while we
work with upstream on a more generic fix.

Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
BugLink: http://bugs.launchpad.net/bugs/1803942
Signed-off-by: Andy Whitcroft <apw@canonical.com>
---
 drivers/base/dd.c           | 10 +++++++++-
 drivers/vfio/pci/vfio_pci.c |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

Comments

Colin Ian King Dec. 6, 2018, 12:36 p.m. UTC | #1
On 06/12/2018 12:32, Andy Whitcroft wrote:
> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
> 
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
> 
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <apw@canonical.com>
> ---
>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>  		 * A concurrent invocation of the same function might
>  		 * have released the driver successfully while this one
>  		 * was waiting, so check for that.
> +		 * LP: #1792099
> +		 *
> +		 * Limit this to the vfio_pci_driver as some drivers NULL
> +		 * out this pointer in their remove() function.
> +		 * LP: #1803942
>  		 */
> -		if (dev->driver != drv)
> +		if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>  			return;
>  
>  		device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>  	.remove		= vfio_pci_remove,
>  	.err_handler	= &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>  	struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> +	vfio_pci_driver_ptr = (void *)0xdeadfeed;

Out of interest, is there any reason why NULL wasn't used apart from we
can identify these faults easier because of this specific address?

> +
>  	pci_unregister_driver(&vfio_pci_driver);
>  	vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>  	vfio_pci_fill_ids();
>  
> +	/* Advertise my address. */
> +	vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>  	return 0;
>  
>  out_driver:
> 

As a temporary workaround this seems OK to me, but only as a workaround.
I'm assuming that this will be replaced by the official upstream fix at
a later date.

Acked-by: Colin Ian King <colin.king@canonical.com>
Stefan Bader Dec. 6, 2018, 12:52 p.m. UTC | #2
On 06.12.18 13:32, Andy Whitcroft wrote:
> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
> 
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
> 
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---

I guess this is better as the variable is now located in the base driver and
thus exists regardless of the vfio driver being loaded. Not sure whether the
pci_unregister_driver call in vfio would still want the pointer set. But if that
sounds ok to you I am good with that.

-Stefan

>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>  		 * A concurrent invocation of the same function might
>  		 * have released the driver successfully while this one
>  		 * was waiting, so check for that.
> +		 * LP: #1792099
> +		 *
> +		 * Limit this to the vfio_pci_driver as some drivers NULL
> +		 * out this pointer in their remove() function.
> +		 * LP: #1803942
>  		 */
> -		if (dev->driver != drv)
> +		if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>  			return;
>  
>  		device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>  	.remove		= vfio_pci_remove,
>  	.err_handler	= &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>  	struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> +	vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +
>  	pci_unregister_driver(&vfio_pci_driver);
>  	vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>  	vfio_pci_fill_ids();
>  
> +	/* Advertise my address. */
> +	vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>  	return 0;
>  
>  out_driver:
>
Andy Whitcroft Dec. 6, 2018, 12:57 p.m. UTC | #3
On Thu, Dec 06, 2018 at 01:52:55PM +0100, Stefan Bader wrote:
> On 06.12.18 13:32, Andy Whitcroft wrote:
> > It seems that we can trigger the new race detection after remove()
> > with some drivers which clear the driver as they unreference
> > the module.  This leads us to fail to clear down those devices which
> > triggers suspend/resume issues.
> > 
> > Limit the core changes to only apply to the vfio driver while we
> > work with upstream on a more generic fix.
> > 
> > Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> > BugLink: http://bugs.launchpad.net/bugs/1803942
> > Signed-off-by: Andy Whitcroft <apw@canonical.com>
> Acked-by: Stefan Bader <stefan.bader@canonical.com>
> > ---
> 
> I guess this is better as the variable is now located in the base driver and
> thus exists regardless of the vfio driver being loaded. Not sure whether the
> pci_unregister_driver call in vfio would still want the pointer set. But if that
> sounds ok to you I am good with that.

I do not believe it needs it.  This is not something many configurations
can even trigger and removing the driver is not something most people
would do in the normal run of things.

-apw
Kleber Sacilotto de Souza Dec. 6, 2018, 1:25 p.m. UTC | #4
On 12/6/18 1:32 PM, Andy Whitcroft wrote:
> It seems that we can trigger the new race detection after remove()
> with some drivers which clear the driver as they unreference
> the module.  This leads us to fail to clear down those devices which
> triggers suspend/resume issues.
>
> Limit the core changes to only apply to the vfio driver while we
> work with upstream on a more generic fix.
>
> Fixes: 876dcb5f4576 ("UBUNTU: SAUCE: vfio -- release device lock before userspace requests")
> BugLink: http://bugs.launchpad.net/bugs/1803942
> Signed-off-by: Andy Whitcroft <apw@canonical.com>
> ---
>  drivers/base/dd.c           | 10 +++++++++-
>  drivers/vfio/pci/vfio_pci.c |  6 ++++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 37c01054521b..045d4e4485b8 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -820,6 +820,9 @@ int driver_attach(struct device_driver *drv)
>  }
>  EXPORT_SYMBOL_GPL(driver_attach);
>  
> +void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +EXPORT_SYMBOL(vfio_pci_driver_ptr);
> +
>  /*
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
> @@ -872,8 +875,13 @@ static void __device_release_driver(struct device *dev, struct device *parent)
>  		 * A concurrent invocation of the same function might
>  		 * have released the driver successfully while this one
>  		 * was waiting, so check for that.
> +		 * LP: #1792099
> +		 *
> +		 * Limit this to the vfio_pci_driver as some drivers NULL
> +		 * out this pointer in their remove() function.
> +		 * LP: #1803942
>  		 */
> -		if (dev->driver != drv)
> +		if (drv == vfio_pci_driver_ptr && dev->driver != drv)
>  			return;
>  
>  		device_links_driver_cleanup(dev);
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..6e70281715c4 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1310,6 +1310,7 @@ static struct pci_driver vfio_pci_driver = {
>  	.remove		= vfio_pci_remove,
>  	.err_handler	= &vfio_err_handlers,
>  };
> +extern void *vfio_pci_driver_ptr;
>  
>  struct vfio_devices {
>  	struct vfio_device **devices;
> @@ -1404,6 +1405,8 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
>  
>  static void __exit vfio_pci_cleanup(void)
>  {
> +	vfio_pci_driver_ptr = (void *)0xdeadfeed;
> +
>  	pci_unregister_driver(&vfio_pci_driver);
>  	vfio_pci_uninit_perm_bits();
>  }
> @@ -1465,6 +1468,9 @@ static int __init vfio_pci_init(void)
>  
>  	vfio_pci_fill_ids();
>  
> +	/* Advertise my address. */
> +	vfio_pci_driver_ptr = &vfio_pci_driver;
> +
>  	return 0;
>  
>  out_driver:

Applied to bionic/master-next branch.

Thanks,
Kleber
diff mbox series

Patch

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 37c01054521b..045d4e4485b8 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -820,6 +820,9 @@  int driver_attach(struct device_driver *drv)
 }
 EXPORT_SYMBOL_GPL(driver_attach);
 
+void *vfio_pci_driver_ptr = (void *)0xdeadfeed;
+EXPORT_SYMBOL(vfio_pci_driver_ptr);
+
 /*
  * __device_release_driver() must be called with @dev lock held.
  * When called for a USB interface, @dev->parent lock must be held as well.
@@ -872,8 +875,13 @@  static void __device_release_driver(struct device *dev, struct device *parent)
 		 * A concurrent invocation of the same function might
 		 * have released the driver successfully while this one
 		 * was waiting, so check for that.
+		 * LP: #1792099
+		 *
+		 * Limit this to the vfio_pci_driver as some drivers NULL
+		 * out this pointer in their remove() function.
+		 * LP: #1803942
 		 */
-		if (dev->driver != drv)
+		if (drv == vfio_pci_driver_ptr && dev->driver != drv)
 			return;
 
 		device_links_driver_cleanup(dev);
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f041b1a6cf66..6e70281715c4 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1310,6 +1310,7 @@  static struct pci_driver vfio_pci_driver = {
 	.remove		= vfio_pci_remove,
 	.err_handler	= &vfio_err_handlers,
 };
+extern void *vfio_pci_driver_ptr;
 
 struct vfio_devices {
 	struct vfio_device **devices;
@@ -1404,6 +1405,8 @@  static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 
 static void __exit vfio_pci_cleanup(void)
 {
+	vfio_pci_driver_ptr = (void *)0xdeadfeed;
+
 	pci_unregister_driver(&vfio_pci_driver);
 	vfio_pci_uninit_perm_bits();
 }
@@ -1465,6 +1468,9 @@  static int __init vfio_pci_init(void)
 
 	vfio_pci_fill_ids();
 
+	/* Advertise my address. */
+	vfio_pci_driver_ptr = &vfio_pci_driver;
+
 	return 0;
 
 out_driver: