diff mbox

[v2,19/19] PCI: hide sys interface 'remove' and 'rescan' for SR-IOV virtual devices

Message ID 1335539820-11232-20-git-send-email-jiang.liu@huawei.com
State Superseded
Headers show

Commit Message

Jiang Liu April 27, 2012, 3:17 p.m. UTC
From: Jiang Liu <liuj97@gmail.com>

From: Jiang Liu <liuj97@gmail.com>

All SR-IOV virtual PCI devices should be managed by corresponding physical
device drivers. And the PCI core shouldn't create or destroy virtual PCI
devices directly without cordination with physical device drivers.
Otherwise it may cause system crashes like below.  So hide the remove and
rescan sys interfaces for SR-IOV virtual PCI devices.

Running following two scripts may trigger system dump on a system with
Intel 82576 NIC.

[root@localhost tests]# cat mod.sh
#!/bin/bash
while true; do
        modprobe igb max_vfs=2
        sleep 0.01
        rmmod igb
done
[root@localhost tests]# cat remove_virt.sh
#!/bin/bash
while true; do
        echo 1 > /sys/devices/pci0000:40/0000:40:03.0/0000:41:00.0/0000:42:02.0/0000:44:10.0/remove
        echo 1 > /sys/devices/pci0000:40/0000:40:03.0/0000:41:00.0/0000:42:02.0/0000:44:10.1/remove
        echo 1 > /sys/devices/pci0000:40/0000:40:03.0/0000:41:00.0/0000:42:02.0/0000:44:10.2/remove
        echo 1 > /sys/devices/pci0000:40/0000:40:03.0/0000:41:00.0/0000:42:02.0/0000:43:10.3/rescan
        sleep 0.01
done

------------[ cut here ]------------
WARNING: at fs/sysfs/dir.c:481 sysfs_add_one+0xb8/0xd0()
Hardware name: FBSA
sysfs: cannot create duplicate filename '/devices/pci0000:40/0000:40:03.0/0000:41:00.0/0000:42:02.0/0000:43:00.0/virtfn0'
Modules linked in: igb(+) igbvf fuse ebtable_nat ebtables xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat bridge autofs4 sunrpc 8021q fcoe libfcoe garp stp llc libfc scsi_transport_fc scsi_tgt cpufreq_ondemand acpi_cpufreq freq_table mperf xt_physdev ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 dm_mirror dm_region_hash dm_log dm_mod kvm_intel kvm uinput wmi pcspkr sg iTCO_wdt iTCO_vendor_support e1000e i2c_i801 i2c_core ioatdma ixgbe dca mdio ext4 mbcache jbd2 sd_mod crc_t10dif ahci libahci [last unloaded: igb]
Pid: 6297, comm: work_for_cpu Not tainted 3.2.0IOAT+ #1
Call Trace:
 [<ffffffff81060c1f>] warn_slowpath_common+0x7f/0xc0
 [<ffffffff81060d16>] warn_slowpath_fmt+0x46/0x50
 [<ffffffff811d0428>] sysfs_add_one+0xb8/0xd0
 [<ffffffff811d15ab>] sysfs_do_create_link+0x13b/0x210
 [<ffffffff81242330>] ? sprintf+0x40/0x50
 [<ffffffff811d16b3>] sysfs_create_link+0x13/0x20
 [<ffffffff81272d33>] virtfn_add+0x283/0x430
 [<ffffffff81273252>] pci_enable_sriov+0x232/0x4c0
 [<ffffffffa049ae1b>] igb_probe+0x6b4/0x1212 [igb]
 [<ffffffff81321aa2>] ? __pm_runtime_set_status+0x172/0x210
 [<ffffffff8125dc0f>] local_pci_probe+0x5f/0xd0
 [<ffffffff8107ab60>] ? move_linked_works+0x90/0x90
 [<ffffffff8107ab78>] do_work_for_cpu+0x18/0x30
 [<ffffffff810829e6>] kthread+0x96/0xa0
 [<ffffffff814e4ab4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81082950>] ? kthread_worker_fn+0x1a0/0x1a0
 [<ffffffff814e4ab0>] ? gs_change+0x13/0x13
---[ end trace 7c33eee57d617c55 ]---
libfcoe_device_notification: NETDEV_UNREGISTER eth3
Trying to free nonexistent resource <00000000e1660000-00000000e1663fff>
Trying to free nonexistent resource <00000000e1640000-00000000e1643fff>
BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff8124a529>] __list_del_entry+0x29/0xd0
PGD 3fdf7e3067 PUD 3fdf45d067 PMD 0
Oops: 0000 [#1] SMP
CPU 8
Modules linked in: igb(+) igbvf fuse ebtable_nat ebtables xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat bridge autofs4 sunrpc 8021q fcoe libfcoe garp stp llc libfc scsi_transport_fc scsi_tgt cpufreq_ondemand acpi_cpufreq freq_table mperf xt_physdev ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 dm_mirror dm_region_hash dm_log dm_mod kvm_intel kvm uinput wmi pcspkr sg iTCO_wdt iTCO_vendor_support e1000e i2c_i801 i2c_core ioatdma ixgbe dca mdio ext4 mbcache jbd2 sd_mod crc_t10dif ahci libahci [last unloaded: igb]

Pid: 6297, comm: work_for_cpu Tainted: G        W    3.2.0IOAT+ #1 INSYDE FBSA/Type2 - Board Product Name1
RIP: 0010:[<ffffffff8124a529>]  [<ffffffff8124a529>] __list_del_entry+0x29/0xd0
RSP: 0018:ffff883fdb499cb0  EFLAGS: 00010207
RAX: 0000000000000000 RBX: ffff881fdde1e000 RCX: dead000000200200
RDX: 0000000000000000 RSI: ffffffff81238d10 RDI: ffff881fdde1e000
RBP: ffff883fdb499cb0 R08: ffff881fdde1e0a8 R09: 0000000000000000
R10: 00000000000009c5 R11: 0000000000000000 R12: 0000000000000011
R13: ffff881fdde1e000 R14: ffff883fdb499d30 R15: ffff883fe07ae0a0
FS:  0000000000000000(0000) GS:ffff88203fc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 0000003fdf88c000 CR4: 00000000000406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process work_for_cpu (pid: 6297, threadinfo ffff883fdb498000, task ffff883fda832a70)
Stack:
 ffff883fdb499cd0 ffffffff8124a5e1 ffff883fe07ae000 0000000000000000
 ffff883fdb499d00 ffffffff81259467 ffff883fdb499d00 ffff881fdde1e000
 ffff883fe07ae000 ffff882fe0f87b40 ffff883fdb499d80 ffffffff81272d64
Call Trace:
 [<ffffffff8124a5e1>] list_del+0x11/0x40
 [<ffffffff81259467>] pci_remove_bus_device+0x57/0xd0
 [<ffffffff81272d64>] virtfn_add+0x2b4/0x430
 [<ffffffff81273252>] pci_enable_sriov+0x232/0x4c0
 [<ffffffffa049ae1b>] igb_probe+0x6b4/0x1212 [igb]
 [<ffffffff81321aa2>] ? __pm_runtime_set_status+0x172/0x210
 [<ffffffff8125dc0f>] local_pci_probe+0x5f/0xd0
 [<ffffffff8107ab60>] ? move_linked_works+0x90/0x90
 [<ffffffff8107ab78>] do_work_for_cpu+0x18/0x30
 [<ffffffff810829e6>] kthread+0x96/0xa0
 [<ffffffff814e4ab4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81082950>] ? kthread_worker_fn+0x1a0/0x1a0
 [<ffffffff814e4ab0>] ? gs_change+0x13/0x13
Code: 90 90 55 48 8b 17 48 b9 00 01 10 00 00 00 ad de 48 8b 47 08 48 89 e5 48 39 ca 74 29 48 b9 00 02 20 00 00 00 ad de 48 39 c8 74 7a <4c> 8b 00 4c 39 c7 75 53 4c 8b 42 08 4c 39 c7 75 2b 48 89 42 08
RIP  [<ffffffff8124a529>] __list_del_entry+0x29/0xd0
 RSP <ffff883fdb499cb0>
CR2: 0000000000000000
---[ end trace 7c33eee57d617c56 ]---

Signed-off-by: Jiang Liu <liuj97@gmail.com>
---
 drivers/pci/pci-sysfs.c |   34 ++++++++++++++++++++++++++++++----
 include/linux/pci.h     |    4 +++-
 2 files changed, 33 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index bc3c422..348995d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -367,6 +367,9 @@  remove_store(struct device *dev, struct device_attribute *dummy,
 	return schedule_hp_callback(dev, buf, count, remove_callback);
 }
 
+static struct device_attribute pci_dev_remove_attr =
+	__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store);
+
 static void dev_bus_rescan_callback(struct device *dev)
 {
 	struct pci_bus *bus = to_pci_bus(dev);
@@ -389,6 +392,8 @@  dev_bus_rescan_store(struct device *dev, struct device_attribute *attr,
 	return schedule_hp_callback(dev, buf, count, dev_bus_rescan_callback);
 }
 
+static struct device_attribute pci_dev_rescan_attr =
+	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store);
 #endif
 
 struct device_attribute pci_dev_attrs[] = {
@@ -411,10 +416,6 @@  struct device_attribute pci_dev_attrs[] = {
 	__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
 		broken_parity_status_show,broken_parity_status_store),
 	__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
-#ifdef CONFIG_HOTPLUG
-	__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
-	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
-#endif
 	__ATTR_NULL,
 };
 
@@ -1350,6 +1351,30 @@  static int __init pci_sysfs_init(void)
 
 late_initcall(pci_sysfs_init);
 
+static struct attribute *pci_dev_phys_attrs[] = {
+#ifdef CONFIG_HOTPLUG
+	&pci_dev_remove_attr.attr,
+	&pci_dev_rescan_attr.attr,
+#endif
+	NULL
+};
+
+static umode_t pci_dev_phys_attrs_are_visible(struct kobject *kobj,
+					      struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (dev_is_pf(dev))
+		return a->mode;
+
+	return 0;
+}
+
+static struct attribute_group pci_dev_phys_attr_group = {
+	.attrs = pci_dev_phys_attrs,
+	.is_visible = pci_dev_phys_attrs_are_visible,
+};
+
 static struct attribute *pci_dev_bridge_attrs[] = {
 	NULL,
 };
@@ -1373,6 +1398,7 @@  static struct attribute_group pci_dev_bridge_attr_group = {
 
 static const struct attribute_group *pci_dev_attr_groups[] = {
 	&pci_dev_bridge_attr_group,
+	&pci_dev_phys_attr_group,
 	NULL,
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1c5f153..6c2c5c9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -692,7 +692,8 @@  extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
 extern void pci_sort_breadthfirst(void);
 #define dev_is_pci(d) ((d)->bus == &pci_bus_type)
-#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
+#define dev_is_vf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_virtfn : false))
+#define dev_is_pf(d) (!dev_is_vf(d))
 #define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
 
 /* Generic PCI functions exported to card drivers */
@@ -1343,6 +1344,7 @@  static inline int pci_domain_nr(struct pci_bus *bus)
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
+#define dev_is_vf(d) (false)
 #define dev_num_vf(d) (0)
 #endif /* CONFIG_PCI */