diff mbox series

[RFC,8/8] pci: use finer grain locking for bus protection

Message ID 20240722151936.1452299-9-kbusch@meta.com
State New
Headers show
Series pci: rescan/remove locking rework | expand

Commit Message

Keith Busch July 22, 2024, 3:19 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

The global rescan_remove lock has deadlocks during concurrent removals
because it is used within interrupt handlers. Use a bus specific lock
instead.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/bus.c                | 11 ++++++++---
 drivers/pci/hotplug/pciehp_pci.c | 11 ++++++-----
 drivers/pci/pci-sysfs.c          |  2 ++
 drivers/pci/pci.c                |  5 ++++-
 drivers/pci/probe.c              |  9 +++++++++
 drivers/pci/remove.c             | 10 ++++++++++
 include/linux/pci.h              | 11 +++++++++++
 7 files changed, 50 insertions(+), 9 deletions(-)

Comments

Jonathan Cameron Aug. 15, 2024, 3:21 p.m. UTC | #1
On Mon, 22 Jul 2024 08:19:36 -0700
Keith Busch <kbusch@meta.com> wrote:

> From: Keith Busch <kbusch@kernel.org>
> 
> The global rescan_remove lock has deadlocks during concurrent removals
> because it is used within interrupt handlers. Use a bus specific lock
> instead.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>

Looks reasonable to me. I'm not particularly confident on this one
so more eyes (plus testing) would be good.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Jonathan
Keith Busch Aug. 15, 2024, 5:05 p.m. UTC | #2
On Thu, Aug 15, 2024 at 04:21:26PM +0100, Jonathan Cameron wrote:
> > The global rescan_remove lock has deadlocks during concurrent removals
> > because it is used within interrupt handlers. Use a bus specific lock
> > instead.
> > 
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
> 
> Looks reasonable to me. I'm not particularly confident on this one
> so more eyes (plus testing) would be good.

Thanks for the reviews!

For the hardware I have, it is limited to x86 and I tested concurrent
pcie native hotplugs and errors with PCIe switches. I could get that to
reliably deadlock or crash before, and those are fixed with this patch
set.

But indeed, there are many more paths using this common pci code than
what I could test. I am confident about the earlier patches from this
series, but I also feel this last patch ought to be tried on a more
diversified set of platforms.
diff mbox series

Patch

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 3085ecaa2ba40..d80a9e4f39d38 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -384,8 +384,11 @@  void pci_bus_add_devices(const struct pci_bus *bus)
 			continue;
 
 		child = pci_dev_get_subordinate(dev);
-		if (child)
+		if (child) {
+			pci_lock_bus(child);
 			pci_bus_add_devices(child);
+			pci_unlock_bus(child);
+		}
 		pci_bus_put(child);
 	}
 }
@@ -406,7 +409,9 @@  static int __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void
 
 		bus = pci_dev_get_subordinate(dev);
 		if (bus) {
+			pci_lock_bus(bus);
 			ret = __pci_walk_bus(bus, cb, userdata);
+			pci_unlock_bus(bus);
 			pci_bus_put(bus);
 			if (ret)
 				break;
@@ -430,9 +435,9 @@  static int __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void
  */
 void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
 {
-	down_read(&pci_bus_sem);
+	pci_lock_bus(top);
 	__pci_walk_bus(top, cb, userdata);
-	up_read(&pci_bus_sem);
+	pci_unlock_bus(top);
 }
 EXPORT_SYMBOL_GPL(pci_walk_bus);
 
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index b15dcfd86c60a..f6260f1085d81 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -39,8 +39,7 @@  int pciehp_configure_device(struct controller *ctrl)
 	if (!parent)
 		return 0;
 
-	pci_lock_rescan_remove();
-
+	pci_lock_bus(parent);
 	dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
 	if (dev) {
 		/*
@@ -63,6 +62,7 @@  int pciehp_configure_device(struct controller *ctrl)
 
 	for_each_pci_bridge(dev, parent)
 		pci_hp_add_bridge(dev);
+	pci_unlock_bus(parent);
 
 	pci_assign_unassigned_bridge_resources(bridge);
 	pcie_bus_configure_settings(parent);
@@ -72,6 +72,7 @@  int pciehp_configure_device(struct controller *ctrl)
 	 * to avoid AB-BA deadlock with device_lock.
 	 */
 	up_read(&ctrl->reset_lock);
+	pci_lock_bus(parent);
 	pci_bus_add_devices(parent);
 	down_read_nested(&ctrl->reset_lock, ctrl->depth);
 
@@ -80,7 +81,7 @@  int pciehp_configure_device(struct controller *ctrl)
 	pci_dev_put(dev);
 
  out:
-	pci_unlock_rescan_remove();
+	pci_unlock_bus(parent);
 	pci_bus_put(parent);
 	return ret;
 }
@@ -111,7 +112,7 @@  void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
 	if (!presence)
 		pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
 
-	pci_lock_rescan_remove();
+	pci_lock_bus(parent);
 
 	/*
 	 * Stopping an SR-IOV PF device removes all the associated VFs,
@@ -144,6 +145,6 @@  void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
 		pci_dev_put(dev);
 	}
 
-	pci_unlock_rescan_remove();
+	pci_unlock_bus(parent);
 	pci_bus_put(parent);
 }
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 40cfa716392fb..0853c931b3c7d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -487,8 +487,10 @@  static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
 	if (kstrtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
+	get_device(dev);
 	if (val && device_remove_file_self(dev, attr))
 		pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
+	put_device(dev);
 	return count;
 }
 static DEVICE_ATTR_IGNORE_LOCKDEP(remove, 0220, NULL,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0cd36b7772c8b..7e5f05b155658 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3117,7 +3117,10 @@  void pci_bridge_d3_update(struct pci_dev *dev)
 		struct pci_bus *bus = pci_dev_get_subordinate(bridge);
 
 		if (bus) {
-			pci_walk_bus(bus, pci_dev_check_d3cold, &d3cold_ok);
+			down_read(&pci_bus_sem);
+			pci_walk_bus_locked(bus, pci_dev_check_d3cold,
+					    &d3cold_ok);
+			up_read(&pci_bus_sem);
 			pci_bus_put(bus);
 		}
 	}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 53522685193da..9c1589be9c390 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -563,6 +563,7 @@  static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
 	if (!b)
 		return NULL;
 
+	mutex_init(&b->lock);
 	INIT_LIST_HEAD(&b->node);
 	INIT_LIST_HEAD(&b->children);
 	INIT_LIST_HEAD(&b->devices);
@@ -1359,7 +1360,9 @@  static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
 		}
 
 		buses = subordinate - secondary;
+		pci_lock_bus(child);
 		cmax = pci_scan_child_bus_extend(child, buses);
+		pci_unlock_bus(child);
 		if (cmax > subordinate)
 			pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
 				 subordinate, cmax);
@@ -3109,7 +3112,9 @@  int pci_host_probe(struct pci_host_bridge *bridge)
 	list_for_each_entry(child, &bus->children, node)
 		pcie_bus_configure_settings(child);
 
+	pci_lock_bus(bus);
 	pci_bus_add_devices(bus);
+	pci_unlock_bus(bus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_host_probe);
@@ -3284,11 +3289,13 @@  unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge)
 	unsigned int max;
 	struct pci_bus *bus = bridge->subordinate;
 
+	pci_lock_bus(bus);
 	max = pci_scan_child_bus(bus);
 
 	pci_assign_unassigned_bridge_resources(bridge);
 
 	pci_bus_add_devices(bus);
+	pci_unlock_bus(bus);
 
 	return max;
 }
@@ -3306,9 +3313,11 @@  unsigned int pci_rescan_bus(struct pci_bus *bus)
 {
 	unsigned int max;
 
+	pci_lock_bus(bus);
 	max = pci_scan_child_bus(bus);
 	pci_assign_unassigned_bus_resources(bus);
 	pci_bus_add_devices(bus);
+	pci_unlock_bus(bus);
 
 	return max;
 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 646c97e41a323..cf641a80a7f21 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -52,8 +52,10 @@  static void pci_clear_bus(struct pci_bus *bus)
 {
 	struct pci_dev *dev, *next;
 
+	pci_lock_bus(bus);
 	list_for_each_entry_safe(dev, next, &bus->devices, bus_list)
 		pci_remove_bus_device(dev);
+	pci_unlock_bus(bus);
 }
 
 void pci_remove_bus(struct pci_bus *bus)
@@ -96,8 +98,10 @@  static void pci_stop_bus(struct pci_bus *bus)
 	 * iterator.  Therefore, iterate in reverse so we remove the VFs
 	 * first, then the PF.
 	 */
+	pci_lock_bus(bus);
 	list_for_each_entry_safe_reverse(dev, next, &bus->devices, bus_list)
 		pci_stop_bus_device(dev);
+	pci_unlock_bus(bus);
 }
 
 static void pci_remove_bus_device(struct pci_dev *dev)
@@ -138,9 +142,15 @@  EXPORT_SYMBOL(pci_stop_and_remove_bus_device);
 
 void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev)
 {
+	struct pci_bus *bus = pci_bus_get(dev->bus);
+
 	pci_lock_rescan_remove();
+	pci_lock_bus(bus);
 	pci_stop_and_remove_bus_device(dev);
+	pci_unlock_bus(bus);
 	pci_unlock_rescan_remove();
+
+	pci_bus_put(bus);
 }
 EXPORT_SYMBOL_GPL(pci_stop_and_remove_bus_device_locked);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e36b6c1810ea..6b37373b831ec 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -649,6 +649,7 @@  struct pci_bus {
 	struct list_head node;		/* Node in list of buses */
 	struct pci_bus	*parent;	/* Parent bus this bridge is on */
 	struct list_head children;	/* List of child buses */
+	struct mutex	lock;		/* Lock for devices */
 	struct list_head devices;	/* List of devices on this bus */
 	struct pci_dev	*self;		/* Bridge device as seen by parent */
 	struct list_head slots;		/* List of slots on this bus;
@@ -681,6 +682,16 @@  struct pci_bus {
 	unsigned int		unsafe_warn:1;	/* warned about RW1C config write */
 };
 
+static inline void pci_lock_bus(struct pci_bus *bus)
+{
+	mutex_lock_nested(&bus->lock, bus->number);
+}
+
+static inline void pci_unlock_bus(struct pci_bus *bus)
+{
+	mutex_unlock(&bus->lock);
+}
+
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
 static inline u16 pci_dev_id(struct pci_dev *dev)