Patchwork [3/4] PCI,sys: SRIOV control and status via sysfs

login
register
mail settings
Submitter Don Dutile
Date Nov. 5, 2012, 8:20 p.m.
Message ID <1352146841-64458-4-git-send-email-ddutile@redhat.com>
Download mbox | patch
Permalink /patch/197297/
State Accepted
Headers show

Comments

Don Dutile - Nov. 5, 2012, 8:20 p.m.
Provide files under sysfs to determine the max number of vfs
an SRIOV-capable PCIe device supports, and methods to enable and
disable the vfs on a per device basis.

Currently, VF enablement by SRIOV-capable PCIe devices is done
in driver-specific module parameters.  If not setup in modprobe files,
it requires admin to unload & reload PF drivers with number of desired
VFs to enable.  Additionally, the enablement is system wide: all
devices controlled by the same driver have the same number of VFs
enabled.  Although the latter is probably desired, there are PCI
configurations setup by system BIOS that may not enable that to occur.

Three files are created if a PCIe device has SRIOV support:
sriov_totalvfs -- cat-ing this file returns the maximum number
                  of VFs a PCIe device supports as reported by
                  the TotalVFs in the SRIOV ext cap structure.
sriov_numvfs -- echo'ing a positive number to this file enables this
                number of VFs for this given PCIe device.
             -- echo'ing a 0 to this file disables
                any previously enabled VFs for this PCIe device.
             -- cat-ing this file will return the number of VFs
                currently enabled on this PCIe device, i.e.,
                the NumVFs field in SRIOV ext. cap structure.

VF enable and disablement is invoked much like other PCIe
configuration functions -- via registered callbacks in the driver,
i.e., probe, release, etc.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
---
 drivers/pci/pci-sysfs.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h     |   1 +
 2 files changed, 135 insertions(+)
Yinghai Lu - Nov. 10, 2012, 6:47 a.m.
On Mon, Nov 5, 2012 at 12:20 PM, Donald Dutile <ddutile@redhat.com> wrote:
> Provide files under sysfs to determine the max number of vfs
> an SRIOV-capable PCIe device supports, and methods to enable and
> disable the vfs on a per device basis.
>
> Currently, VF enablement by SRIOV-capable PCIe devices is done
> in driver-specific module parameters.  If not setup in modprobe files,
> it requires admin to unload & reload PF drivers with number of desired
> VFs to enable.  Additionally, the enablement is system wide: all
> devices controlled by the same driver have the same number of VFs
> enabled.  Although the latter is probably desired, there are PCI
> configurations setup by system BIOS that may not enable that to occur.
>
> Three files are created if a PCIe device has SRIOV support:
> sriov_totalvfs -- cat-ing this file returns the maximum number
>                   of VFs a PCIe device supports as reported by
>                   the TotalVFs in the SRIOV ext cap structure.
> sriov_numvfs -- echo'ing a positive number to this file enables this
>                 number of VFs for this given PCIe device.
>              -- echo'ing a 0 to this file disables
>                 any previously enabled VFs for this PCIe device.
>              -- cat-ing this file will return the number of VFs
>                 currently enabled on this PCIe device, i.e.,
>                 the NumVFs field in SRIOV ext. cap structure.
>
> VF enable and disablement is invoked much like other PCIe
> configuration functions -- via registered callbacks in the driver,
> i.e., probe, release, etc.
>
> Signed-off-by: Donald Dutile <ddutile@redhat.com>
> ---
>  drivers/pci/pci-sysfs.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h     |   1 +
>  2 files changed, 135 insertions(+)
>
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index fbbb97f..cbcdd8d 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -404,6 +404,113 @@ static ssize_t d3cold_allowed_show(struct device *dev,
>  }
>  #endif
>
> +#ifdef CONFIG_PCI_IOV
> +static ssize_t sriov_totalvfs_show(struct device *dev,
> +                                  struct device_attribute *attr,
> +                                  char *buf)
> +{
> +       struct pci_dev *pdev;
> +       u16 total;
> +
> +       pdev = to_pci_dev(dev);
> +       total = pdev->sriov->total;
> +       return sprintf(buf, "%u\n", total);
> +}
> +
> +
> +static ssize_t sriov_numvfs_show(struct device *dev,
> +                                struct device_attribute *attr,
> +                                char *buf)
> +{
> +       struct pci_dev *pdev;
> +
> +       pdev = to_pci_dev(dev);
> +
> +       return sprintf(buf, "%u\n", pdev->sriov->nr_virtfn);
> +}
> +
> +/*
> + * num_vfs > 0; number of vfs to enable
> + * num_vfs = 0; disable all vfs
> + *
> + * Note: SRIOV spec doesn't allow partial VF
> + *       disable, so its all or none.
> + */
> +static ssize_t sriov_numvfs_store(struct device *dev,
> +                                 struct device_attribute *attr,
> +                                 const char *buf, size_t count)
> +{
> +       struct pci_dev *pdev;
> +       int num_vfs_enabled = 0;
> +       int num_vfs;
> +       int ret = 0;
> +       u16 total;
> +
> +       pdev = to_pci_dev(dev);
> +
> +       if (kstrtoint(buf, 0, &num_vfs) < 0)
> +               return -EINVAL;
> +
> +       /* is PF driver loaded w/callback */
> +       if (!pdev->driver || !pdev->driver->sriov_configure) {
> +               dev_info(&pdev->dev,
> +                        "Driver doesn't support SRIOV configuration via sysfs\n");
> +               return -ENOSYS;
> +       }
> +
> +       /* if enabling vf's ... */
> +       total = pdev->sriov->total;
> +       /* Requested VFs to enable < totalvfs and none enabled already */
> +       if ((num_vfs > 0) && (num_vfs <= total)) {
> +               if (pdev->sriov->nr_virtfn == 0) {
> +                       num_vfs_enabled =
> +                               pdev->driver->sriov_configure(pdev, num_vfs);
> +                       if ((num_vfs_enabled >= 0) &&
> +                           (num_vfs_enabled != num_vfs)) {
> +                               dev_warn(&pdev->dev,
> +                                        "Only %d VFs enabled\n",
> +                                        num_vfs_enabled);
> +                               return count;
> +                       } else if (num_vfs_enabled < 0)
> +                               /* error code from driver callback */
> +                               return num_vfs_enabled;
> +               } else if (num_vfs == pdev->sriov->nr_virtfn) {
> +                       dev_warn(&pdev->dev,
> +                                "%d VFs already enabled; no enable action taken\n",
> +                                num_vfs);
> +                       return count;
> +               } else {
> +                       dev_warn(&pdev->dev,
> +                                "%d VFs already enabled. Disable before enabling %d VFs\n",
> +                                pdev->sriov->nr_virtfn, num_vfs);
> +                       return -EINVAL;
> +               }
> +       }
> +
> +       /* disable vfs */
> +       if (num_vfs == 0) {
> +               if (pdev->sriov->nr_virtfn != 0) {
> +                       ret = pdev->driver->sriov_configure(pdev, 0);
> +                       return ret ? ret : count;
> +               } else {
> +                       dev_warn(&pdev->dev,
> +                                "All VFs disabled; no disable action taken\n");
> +                       return count;
> +               }
> +       }
> +
> +       dev_err(&pdev->dev,
> +               "Invalid value for number of VFs to enable: %d\n", num_vfs);
> +
> +       return -EINVAL;
> +}
> +
> +static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
> +static struct device_attribute sriov_numvfs_attr =
> +               __ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
> +                      sriov_numvfs_show, sriov_numvfs_store);
> +#endif /* CONFIG_PCI_IOV */
> +
>  struct device_attribute pci_dev_attrs[] = {
>         __ATTR_RO(resource),
>         __ATTR_RO(vendor),
> @@ -1421,6 +1528,30 @@ static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
>         return a->mode;
>  }
>
> +#ifdef CONFIG_PCI_IOV
> +static struct attribute *sriov_dev_attrs[] = {
> +       &sriov_totalvfs_attr.attr,
> +       &sriov_numvfs_attr.attr,
> +       NULL,
> +};
> +
> +static umode_t sriov_attrs_are_visible(struct kobject *kobj,
> +                                        struct attribute *a, int n)
> +{
> +       struct device *dev = container_of(kobj, struct device, kobj);
> +
> +       if (!dev_is_pf(dev))
> +               return 0;
> +
> +       return a->mode;
> +}
> +
> +static struct attribute_group sriov_dev_attr_group = {
> +       .attrs = sriov_dev_attrs,
> +       .is_visible = sriov_attrs_are_visible,
> +};
> +#endif /* CONFIG_PCI_IOV */
> +
>  static struct attribute_group pci_dev_attr_group = {
>         .attrs = pci_dev_dev_attrs,
>         .is_visible = pci_dev_attrs_are_visible,
> @@ -1428,6 +1559,9 @@ static struct attribute_group pci_dev_attr_group = {
>
>  static const struct attribute_group *pci_dev_attr_groups[] = {
>         &pci_dev_attr_group,
> +#ifdef CONFIG_PCI_IOV
> +       &sriov_dev_attr_group,
> +#endif

should move sriov_dev_attr_group related code to
drivers/pci/iov.c
or driver/pci/iov_sysfs.c

and kill those CONFIG_IOV.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu - Nov. 10, 2012, 7:31 a.m.
On Fri, Nov 9, 2012 at 10:47 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Mon, Nov 5, 2012 at 12:20 PM, Donald Dutile <ddutile@redhat.com> wrote:
>>
>>  static const struct attribute_group *pci_dev_attr_groups[] = {
>>         &pci_dev_attr_group,
>> +#ifdef CONFIG_PCI_IOV
>> +       &sriov_dev_attr_group,
>> +#endif
>
> should move sriov_dev_attr_group related code to
> drivers/pci/iov.c
> or driver/pci/iov_sysfs.c
>
> and kill those CONFIG_IOV.

Bjorn,

please check attached patch that separate sriov_dev_attr_group out.
it is against to your pci/don-sriov branch.

Thanks

Yinghai
Bjorn Helgaas - Nov. 10, 2012, 9:16 p.m.
On Sat, Nov 10, 2012 at 12:31 AM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Fri, Nov 9, 2012 at 10:47 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Mon, Nov 5, 2012 at 12:20 PM, Donald Dutile <ddutile@redhat.com> wrote:
>>>
>>>  static const struct attribute_group *pci_dev_attr_groups[] = {
>>>         &pci_dev_attr_group,
>>> +#ifdef CONFIG_PCI_IOV
>>> +       &sriov_dev_attr_group,
>>> +#endif
>>
>> should move sriov_dev_attr_group related code to
>> drivers/pci/iov.c
>> or driver/pci/iov_sysfs.c
>>
>> and kill those CONFIG_IOV.
>
> Bjorn,
>
> please check attached patch that separate sriov_dev_attr_group out.
> it is against to your pci/don-sriov branch.

The convention is to include patches inline, which makes them easier
to review.  I know gmail makes that hard.  I use mutt or stacked git
to do it.

I'm not opposed to moving the SR-IOV sysfs stuff out of pci-sysfs.c.
Since these patches haven't been merged yet, you should work it out
with Don first, so we can merge it in the right place to begin with,
rather than merging them and then immediately moving them.

Personally, I would put it all in pci/iov.c rather than creating a new
file just for this.  That would also avoid the question of whether it
should be "iov_sysfs.c" or "iov-sysfs.c".  All the other files in
drivers/pci use a hyphen.  All the files in subdirectories of
drivers/pci use an underscore.  A minor but annoying difference.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu - Nov. 10, 2012, 11:14 p.m.
On Sat, Nov 10, 2012 at 1:16 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Sat, Nov 10, 2012 at 12:31 AM, Yinghai Lu <yinghai@kernel.org> wrote:
>> On Fri, Nov 9, 2012 at 10:47 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> On Mon, Nov 5, 2012 at 12:20 PM, Donald Dutile <ddutile@redhat.com> wrote:
>>>>
>>>>  static const struct attribute_group *pci_dev_attr_groups[] = {
>>>>         &pci_dev_attr_group,
>>>> +#ifdef CONFIG_PCI_IOV
>>>> +       &sriov_dev_attr_group,
>>>> +#endif
>>>
>>> should move sriov_dev_attr_group related code to
>>> drivers/pci/iov.c
>>> or driver/pci/iov_sysfs.c
>>>
>>> and kill those CONFIG_IOV.
>>
>> Bjorn,
>>
>> please check attached patch that separate sriov_dev_attr_group out.
>> it is against to your pci/don-sriov branch.
>
> The convention is to include patches inline, which makes them easier
> to review.  I know gmail makes that hard.  I use mutt or stacked git
> to do it.

yes, gmail web client should support plain attachment.

so you are using mutt with smtp enabled ?

>
> I'm not opposed to moving the SR-IOV sysfs stuff out of pci-sysfs.c.
> Since these patches haven't been merged yet, you should work it out
> with Don first, so we can merge it in the right place to begin with,
> rather than merging them and then immediately moving them.

sure. it should be out of pci-sysfs.c at first place.

I thought that you are going to merge your pci/don-sriov.

BTW, when you redo pci/don-sriov, please add acked-by from GregKH for
first two patches.

>
> Personally, I would put it all in pci/iov.c rather than creating a new
> file just for this.  That would also avoid the question of whether it
> should be "iov_sysfs.c" or "iov-sysfs.c".  All the other files in
> drivers/pci use a hyphen.  All the files in subdirectories of
> drivers/pci use an underscore.  A minor but annoying difference.

yeah, looks like should be iov-sysfs.c to keep them consistent.

iov.c is already about 800 lines. better not to stuff more code into it anymore.

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Don Dutile - Nov. 12, 2012, 7:24 p.m.
On 11/10/2012 02:31 AM, Yinghai Lu wrote:
> On Fri, Nov 9, 2012 at 10:47 PM, Yinghai Lu<yinghai@kernel.org>  wrote:
>> On Mon, Nov 5, 2012 at 12:20 PM, Donald Dutile<ddutile@redhat.com>  wrote:
>>>
>>>   static const struct attribute_group *pci_dev_attr_groups[] = {
>>>          &pci_dev_attr_group,
>>> +#ifdef CONFIG_PCI_IOV
>>> +&sriov_dev_attr_group,
>>> +#endif
>>
>> should move sriov_dev_attr_group related code to
>> drivers/pci/iov.c
>> or driver/pci/iov_sysfs.c
>>
>> and kill those CONFIG_IOV.
>
> Bjorn,
>
> please check attached patch that separate sriov_dev_attr_group out.
> it is against to your pci/don-sriov branch.
>
> Thanks
>
> Yinghai

Since the sriov-related files are created at the same time as
the other <DOMAIN:B:D.F> files, I would prefer to keep their
generation in the pci-sysfs.c file.
If it it moved, then put it into iov.c; the 'virtfn[X]' sysfs
files are created/destroyed in that file already, so if the
majority want it out of pci-sysfs, then I would favor a move to iov.c.
iov-sysfs.c creates yet-another file to search for SRIOV-related code,
and it's fairly centric to iov.c now.

If we're going to toss stuff out of pci-sysfs, then toss out the vga-attributes
crap, as well as other non-pci-specific files like bus-affinity(numa -- which
should go into PCI-root-bus related core code, e.g., acpi for x86; idk in ppc, arm, s390, etc.).
I'm not saying that following bad decisions of the past
is a justification for keeping the sriov related sysfs in pci-sysfs;
I am saying sriov *is* PCI-specific/architected, so it seems like
it should reside in pci-sysfs.c .

Other opinions/votes/preferences ???

- Don

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index fbbb97f..cbcdd8d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -404,6 +404,113 @@  static ssize_t d3cold_allowed_show(struct device *dev,
 }
 #endif
 
+#ifdef CONFIG_PCI_IOV
+static ssize_t sriov_totalvfs_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct pci_dev *pdev;
+	u16 total;
+
+	pdev = to_pci_dev(dev);
+	total = pdev->sriov->total;
+	return sprintf(buf, "%u\n", total);
+}
+
+
+static ssize_t sriov_numvfs_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n", pdev->sriov->nr_virtfn);
+}
+
+/*
+ * num_vfs > 0; number of vfs to enable
+ * num_vfs = 0; disable all vfs
+ *
+ * Note: SRIOV spec doesn't allow partial VF
+ *       disable, so its all or none.
+ */
+static ssize_t sriov_numvfs_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev;
+	int num_vfs_enabled = 0;
+	int num_vfs;
+	int ret = 0;
+	u16 total;
+
+	pdev = to_pci_dev(dev);
+
+	if (kstrtoint(buf, 0, &num_vfs) < 0)
+		return -EINVAL;
+
+	/* is PF driver loaded w/callback */
+	if (!pdev->driver || !pdev->driver->sriov_configure) {
+		dev_info(&pdev->dev,
+			 "Driver doesn't support SRIOV configuration via sysfs\n");
+		return -ENOSYS;
+	}
+
+	/* if enabling vf's ... */
+	total = pdev->sriov->total;
+	/* Requested VFs to enable < totalvfs and none enabled already */
+	if ((num_vfs > 0) && (num_vfs <= total)) {
+		if (pdev->sriov->nr_virtfn == 0) {
+			num_vfs_enabled =
+				pdev->driver->sriov_configure(pdev, num_vfs);
+			if ((num_vfs_enabled >= 0) &&
+			    (num_vfs_enabled != num_vfs)) {
+				dev_warn(&pdev->dev,
+					 "Only %d VFs enabled\n",
+					 num_vfs_enabled);
+				return count;
+			} else if (num_vfs_enabled < 0)
+				/* error code from driver callback */
+				return num_vfs_enabled;
+		} else if (num_vfs == pdev->sriov->nr_virtfn) {
+			dev_warn(&pdev->dev,
+				 "%d VFs already enabled; no enable action taken\n",
+				 num_vfs);
+			return count;
+		} else {
+			dev_warn(&pdev->dev,
+				 "%d VFs already enabled. Disable before enabling %d VFs\n",
+				 pdev->sriov->nr_virtfn, num_vfs);
+			return -EINVAL;
+		}
+	}
+
+	/* disable vfs */
+	if (num_vfs == 0) {
+		if (pdev->sriov->nr_virtfn != 0) {
+			ret = pdev->driver->sriov_configure(pdev, 0);
+			return ret ? ret : count;
+		} else {
+			dev_warn(&pdev->dev,
+				 "All VFs disabled; no disable action taken\n");
+			return count;
+		}
+	}
+
+	dev_err(&pdev->dev,
+		"Invalid value for number of VFs to enable: %d\n", num_vfs);
+
+	return -EINVAL;
+}
+
+static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
+static struct device_attribute sriov_numvfs_attr =
+		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
+		       sriov_numvfs_show, sriov_numvfs_store);
+#endif /* CONFIG_PCI_IOV */
+
 struct device_attribute pci_dev_attrs[] = {
 	__ATTR_RO(resource),
 	__ATTR_RO(vendor),
@@ -1421,6 +1528,30 @@  static umode_t pci_dev_attrs_are_visible(struct kobject *kobj,
 	return a->mode;
 }
 
+#ifdef CONFIG_PCI_IOV
+static struct attribute *sriov_dev_attrs[] = {
+	&sriov_totalvfs_attr.attr,
+	&sriov_numvfs_attr.attr,
+	NULL,
+};
+
+static umode_t sriov_attrs_are_visible(struct kobject *kobj,
+					 struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (!dev_is_pf(dev))
+		return 0;
+
+	return a->mode;
+}
+
+static struct attribute_group sriov_dev_attr_group = {
+	.attrs = sriov_dev_attrs,
+	.is_visible = sriov_attrs_are_visible,
+};
+#endif /* CONFIG_PCI_IOV */
+
 static struct attribute_group pci_dev_attr_group = {
 	.attrs = pci_dev_dev_attrs,
 	.is_visible = pci_dev_attrs_are_visible,
@@ -1428,6 +1559,9 @@  static struct attribute_group pci_dev_attr_group = {
 
 static const struct attribute_group *pci_dev_attr_groups[] = {
 	&pci_dev_attr_group,
+#ifdef CONFIG_PCI_IOV
+	&sriov_dev_attr_group,
+#endif
 	NULL,
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee21795..7ef8fba 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -573,6 +573,7 @@  struct pci_driver {
 	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 	void (*shutdown) (struct pci_dev *dev);
+	int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
 	const struct pci_error_handlers *err_handler;
 	struct device_driver	driver;
 	struct pci_dynids dynids;