diff mbox

[v2] PCI: Add an option to control probing of VFs before enabling SR-IOV

Message ID 1490198038-20465-1-git-send-email-bodong@mellanox.com
State Changes Requested
Headers show

Commit Message

Bodong Wang March 22, 2017, 3:53 p.m. UTC
From: Bodong Wang <bodong@mellanox.com>

Sometimes it is not desirable to probe the virtual functions after
SRIOV is enabled. This can save host side resource usage by VF
instances which would be eventually probed to VMs.

Add a new PCI sysfs interface "sriov_probe_vfs" to control that
from the PF, all current callers still retain the same functionality.
To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to

/sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs

Note that, the choice must be made before enabling VFs. The change
will not take effect if VFs are already enabled. Simply, one can set
sriov_numvfs to 0, choose whether to probe or not, and then resume
sriov_numvfs.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 Documentation/PCI/pci-iov-howto.txt | 10 ++++++++++
 drivers/pci/iov.c                   |  1 +
 drivers/pci/pci-driver.c            | 22 ++++++++++++++++++----
 drivers/pci/pci-sysfs.c             | 28 ++++++++++++++++++++++++++++
 drivers/pci/pci.h                   |  1 +
 5 files changed, 58 insertions(+), 4 deletions(-)

Comments

Bodong Wang March 27, 2017, 3:11 p.m. UTC | #1
On 3/22/2017 10:53 AM, bodong@mellanox.com wrote:
> From: Bodong Wang <bodong@mellanox.com>
>
> Sometimes it is not desirable to probe the virtual functions after
> SRIOV is enabled. This can save host side resource usage by VF
> instances which would be eventually probed to VMs.
>
> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
> from the PF, all current callers still retain the same functionality.
> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
>
> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
>
> Note that, the choice must be made before enabling VFs. The change
> will not take effect if VFs are already enabled. Simply, one can set
> sriov_numvfs to 0, choose whether to probe or not, and then resume
> sriov_numvfs.
>
> Signed-off-by: Bodong Wang <bodong@mellanox.com>
> Signed-off-by: Eli Cohen <eli@mellanox.com>
> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>   Documentation/PCI/pci-iov-howto.txt | 10 ++++++++++
>   drivers/pci/iov.c                   |  1 +
>   drivers/pci/pci-driver.c            | 22 ++++++++++++++++++----
>   drivers/pci/pci-sysfs.c             | 28 ++++++++++++++++++++++++++++
>   drivers/pci/pci.h                   |  1 +
>   5 files changed, 58 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
> index 2d91ae2..902a528 100644
> --- a/Documentation/PCI/pci-iov-howto.txt
> +++ b/Documentation/PCI/pci-iov-howto.txt
> @@ -68,6 +68,16 @@ To disable SR-IOV capability:
>   	echo  0 > \
>           /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
>   
> +To enable probing VFs by a compatible driver on the host:
> +Before enabling SR-IOV capabilities, do:
> +	echo 1 > \
> +        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> +
> +To disable probing VFs by a compatible driver on the host:
> +Before enabling SR-IOV capabilities, do:
> +	echo  0 > \
> +        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> +
>   3.2 Usage example
>   
>   Following piece of code illustrates the usage of the SR-IOV API.
> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> index 2479ae8..70691de 100644
> --- a/drivers/pci/iov.c
> +++ b/drivers/pci/iov.c
> @@ -450,6 +450,7 @@ static int sriov_init(struct pci_dev *dev, int pos)
>   	iov->total_VFs = total;
>   	iov->pgsz = pgsz;
>   	iov->self = dev;
> +	iov->probe_vfs = true;
>   	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
>   	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
>   	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index afa7271..2a1cf84 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -394,6 +394,18 @@ void __weak pcibios_free_irq(struct pci_dev *dev)
>   {
>   }
>   
> +#ifdef CONFIG_PCI_IOV
> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
> +{
> +	return (!pdev->is_virtfn || pdev->physfn->sriov->probe_vfs);
> +}
> +#else
> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
> +{
> +	return true;
> +}
> +#endif
> +
>   static int pci_device_probe(struct device *dev)
>   {
>   	int error;
> @@ -405,10 +417,12 @@ static int pci_device_probe(struct device *dev)
>   		return error;
>   
>   	pci_dev_get(pci_dev);
> -	error = __pci_device_probe(drv, pci_dev);
> -	if (error) {
> -		pcibios_free_irq(pci_dev);
> -		pci_dev_put(pci_dev);
> +	if (pci_device_can_probe(pci_dev)) {
> +		error = __pci_device_probe(drv, pci_dev);
> +		if (error) {
> +			pcibios_free_irq(pci_dev);
> +			pci_dev_put(pci_dev);
> +		}
>   	}
>   
>   	return error;
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 25d010d..1d5b89d 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -526,10 +526,37 @@ static ssize_t sriov_numvfs_store(struct device *dev,
>   	return count;
>   }
>   
> +static ssize_t sriov_probe_vfs_show(struct device *dev,
> +				    struct device_attribute *attr,
> +				    char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	return sprintf(buf, "%u\n", pdev->sriov->probe_vfs);
> +}
> +
> +static ssize_t sriov_probe_vfs_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t count)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	bool probe_vfs;
> +
> +	if (kstrtobool(buf, &probe_vfs) < 0)
> +		return -EINVAL;
> +
> +	pdev->sriov->probe_vfs = probe_vfs;
> +
> +	return count;
> +}
> +
>   static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
>   static struct device_attribute sriov_numvfs_attr =
>   		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
>   		       sriov_numvfs_show, sriov_numvfs_store);
> +static struct device_attribute sriov_probe_vfs_attr =
> +		__ATTR(sriov_probe_vfs, (S_IRUGO|S_IWUSR|S_IWGRP),
> +		       sriov_probe_vfs_show, sriov_probe_vfs_store);
>   #endif /* CONFIG_PCI_IOV */
>   
>   static ssize_t driver_override_store(struct device *dev,
> @@ -1549,6 +1576,7 @@ static umode_t pci_dev_hp_attrs_are_visible(struct kobject *kobj,
>   static struct attribute *sriov_dev_attrs[] = {
>   	&sriov_totalvfs_attr.attr,
>   	&sriov_numvfs_attr.attr,
> +	&sriov_probe_vfs_attr.attr,
>   	NULL,
>   };
>   
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 8dd38e6..b7d8127d 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -272,6 +272,7 @@ struct pci_sriov {
>   	struct pci_dev *self;	/* this PF */
>   	struct mutex lock;	/* lock for setting sriov_numvfs in sysfs */
>   	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
> +	bool probe_vfs;		/* control probing of VFs */
>   };
>   
>   #ifdef CONFIG_PCI_ATS

Hi Bjorn, could you please comment?

Thanks.
Bodong Wang March 31, 2017, 3:07 p.m. UTC | #2
On 3/27/2017 10:11 AM, Bodong Wang wrote:
> On 3/22/2017 10:53 AM, bodong@mellanox.com wrote:
>> From: Bodong Wang <bodong@mellanox.com>
>>
>> Sometimes it is not desirable to probe the virtual functions after
>> SRIOV is enabled. This can save host side resource usage by VF
>> instances which would be eventually probed to VMs.
>>
>> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
>> from the PF, all current callers still retain the same functionality.
>> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
>>
>> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
>>
>> Note that, the choice must be made before enabling VFs. The change
>> will not take effect if VFs are already enabled. Simply, one can set
>> sriov_numvfs to 0, choose whether to probe or not, and then resume
>> sriov_numvfs.
>>
>> Signed-off-by: Bodong Wang <bodong@mellanox.com>
>> Signed-off-by: Eli Cohen <eli@mellanox.com>
>> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>   Documentation/PCI/pci-iov-howto.txt | 10 ++++++++++
>>   drivers/pci/iov.c                   |  1 +
>>   drivers/pci/pci-driver.c            | 22 ++++++++++++++++++----
>>   drivers/pci/pci-sysfs.c             | 28 ++++++++++++++++++++++++++++
>>   drivers/pci/pci.h                   |  1 +
>>   5 files changed, 58 insertions(+), 4 deletions(-)
>>
>> diff --git a/Documentation/PCI/pci-iov-howto.txt 
>> b/Documentation/PCI/pci-iov-howto.txt
>> index 2d91ae2..902a528 100644
>> --- a/Documentation/PCI/pci-iov-howto.txt
>> +++ b/Documentation/PCI/pci-iov-howto.txt
>> @@ -68,6 +68,16 @@ To disable SR-IOV capability:
>>       echo  0 > \
>> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
>>   +To enable probing VFs by a compatible driver on the host:
>> +Before enabling SR-IOV capabilities, do:
>> +    echo 1 > \
>> + /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
>> +
>> +To disable probing VFs by a compatible driver on the host:
>> +Before enabling SR-IOV capabilities, do:
>> +    echo  0 > \
>> + /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
>> +
>>   3.2 Usage example
>>     Following piece of code illustrates the usage of the SR-IOV API.
>> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
>> index 2479ae8..70691de 100644
>> --- a/drivers/pci/iov.c
>> +++ b/drivers/pci/iov.c
>> @@ -450,6 +450,7 @@ static int sriov_init(struct pci_dev *dev, int pos)
>>       iov->total_VFs = total;
>>       iov->pgsz = pgsz;
>>       iov->self = dev;
>> +    iov->probe_vfs = true;
>>       pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
>>       pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
>>       if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
>> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>> index afa7271..2a1cf84 100644
>> --- a/drivers/pci/pci-driver.c
>> +++ b/drivers/pci/pci-driver.c
>> @@ -394,6 +394,18 @@ void __weak pcibios_free_irq(struct pci_dev *dev)
>>   {
>>   }
>>   +#ifdef CONFIG_PCI_IOV
>> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
>> +{
>> +    return (!pdev->is_virtfn || pdev->physfn->sriov->probe_vfs);
>> +}
>> +#else
>> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
>> +{
>> +    return true;
>> +}
>> +#endif
>> +
>>   static int pci_device_probe(struct device *dev)
>>   {
>>       int error;
>> @@ -405,10 +417,12 @@ static int pci_device_probe(struct device *dev)
>>           return error;
>>         pci_dev_get(pci_dev);
>> -    error = __pci_device_probe(drv, pci_dev);
>> -    if (error) {
>> -        pcibios_free_irq(pci_dev);
>> -        pci_dev_put(pci_dev);
>> +    if (pci_device_can_probe(pci_dev)) {
>> +        error = __pci_device_probe(drv, pci_dev);
>> +        if (error) {
>> +            pcibios_free_irq(pci_dev);
>> +            pci_dev_put(pci_dev);
>> +        }
>>       }
>>         return error;
>> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
>> index 25d010d..1d5b89d 100644
>> --- a/drivers/pci/pci-sysfs.c
>> +++ b/drivers/pci/pci-sysfs.c
>> @@ -526,10 +526,37 @@ static ssize_t sriov_numvfs_store(struct device 
>> *dev,
>>       return count;
>>   }
>>   +static ssize_t sriov_probe_vfs_show(struct device *dev,
>> +                    struct device_attribute *attr,
>> +                    char *buf)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> +    return sprintf(buf, "%u\n", pdev->sriov->probe_vfs);
>> +}
>> +
>> +static ssize_t sriov_probe_vfs_store(struct device *dev,
>> +                     struct device_attribute *attr,
>> +                     const char *buf, size_t count)
>> +{
>> +    struct pci_dev *pdev = to_pci_dev(dev);
>> +    bool probe_vfs;
>> +
>> +    if (kstrtobool(buf, &probe_vfs) < 0)
>> +        return -EINVAL;
>> +
>> +    pdev->sriov->probe_vfs = probe_vfs;
>> +
>> +    return count;
>> +}
>> +
>>   static struct device_attribute sriov_totalvfs_attr = 
>> __ATTR_RO(sriov_totalvfs);
>>   static struct device_attribute sriov_numvfs_attr =
>>           __ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
>>                  sriov_numvfs_show, sriov_numvfs_store);
>> +static struct device_attribute sriov_probe_vfs_attr =
>> +        __ATTR(sriov_probe_vfs, (S_IRUGO|S_IWUSR|S_IWGRP),
>> +               sriov_probe_vfs_show, sriov_probe_vfs_store);
>>   #endif /* CONFIG_PCI_IOV */
>>     static ssize_t driver_override_store(struct device *dev,
>> @@ -1549,6 +1576,7 @@ static umode_t 
>> pci_dev_hp_attrs_are_visible(struct kobject *kobj,
>>   static struct attribute *sriov_dev_attrs[] = {
>>       &sriov_totalvfs_attr.attr,
>>       &sriov_numvfs_attr.attr,
>> +    &sriov_probe_vfs_attr.attr,
>>       NULL,
>>   };
>>   diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 8dd38e6..b7d8127d 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -272,6 +272,7 @@ struct pci_sriov {
>>       struct pci_dev *self;    /* this PF */
>>       struct mutex lock;    /* lock for setting sriov_numvfs in sysfs */
>>       resource_size_t barsz[PCI_SRIOV_NUM_BARS];    /* VF BAR size */
>> +    bool probe_vfs;        /* control probing of VFs */
>>   };
>>     #ifdef CONFIG_PCI_ATS
>
> Hi Bjorn, could you please comment?
>
> Thanks.
>
Didn't see any comment on this patch. Is there any plan to pull this 
patch in?

Thanks.
Bjorn Helgaas April 11, 2017, 9:12 p.m. UTC | #3
Hi Bodong,

On Wed, Mar 22, 2017 at 05:53:58PM +0200, bodong@mellanox.com wrote:
> From: Bodong Wang <bodong@mellanox.com>
> 
> Sometimes it is not desirable to probe the virtual functions after
> SRIOV is enabled. This can save host side resource usage by VF
> instances which would be eventually probed to VMs.
> 
> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
> from the PF, all current callers still retain the same functionality.
> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
> 
> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs

Is this basically the same functionality as /sys/bus/pci/drivers_autoprobe, 
but limited to a specific PF?  I.e., could we accomplish the same thing
with the following?

  # echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
  # echo 0 > /sys/bus/pci/drivers_autoprobe
  # echo 2 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
  # echo 1 > /sys/bus/pci/drivers_autoprobe

If not, can you contrast the above with drivers_autoprobe?  If we need
both, should they be named more similarly?

> Note that, the choice must be made before enabling VFs. The change
> will not take effect if VFs are already enabled. Simply, one can set
> sriov_numvfs to 0, choose whether to probe or not, and then resume
> sriov_numvfs.
> 
> Signed-off-by: Bodong Wang <bodong@mellanox.com>
> Signed-off-by: Eli Cohen <eli@mellanox.com>
> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Alex Williamson April 11, 2017, 9:51 p.m. UTC | #4
On Tue, 11 Apr 2017 16:12:11 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> Hi Bodong,
> 
> On Wed, Mar 22, 2017 at 05:53:58PM +0200, bodong@mellanox.com wrote:
> > From: Bodong Wang <bodong@mellanox.com>
> > 
> > Sometimes it is not desirable to probe the virtual functions after
> > SRIOV is enabled. This can save host side resource usage by VF
> > instances which would be eventually probed to VMs.
> > 
> > Add a new PCI sysfs interface "sriov_probe_vfs" to control that
> > from the PF, all current callers still retain the same functionality.
> > To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
> > 
> > /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs  
> 
> Is this basically the same functionality as /sys/bus/pci/drivers_autoprobe, 
> but limited to a specific PF?  I.e., could we accomplish the same thing
> with the following?
> 
>   # echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>   # echo 0 > /sys/bus/pci/drivers_autoprobe
>   # echo 2 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>   # echo 1 > /sys/bus/pci/drivers_autoprobe
> 
> If not, can you contrast the above with drivers_autoprobe?  If we need
> both, should they be named more similarly?

Logically it's similar, but the above is racy, not only would userspace
need to serialize such operations but a device hot added during that
blackout period wouldn't be probed.  So this is like a per PF
drivers_autoprobe for the VFs hosted by that device.  Thanks,

Alex

> > Note that, the choice must be made before enabling VFs. The change
> > will not take effect if VFs are already enabled. Simply, one can set
> > sriov_numvfs to 0, choose whether to probe or not, and then resume
> > sriov_numvfs.
> > 
> > Signed-off-by: Bodong Wang <bodong@mellanox.com>
> > Signed-off-by: Eli Cohen <eli@mellanox.com>
> > Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Alex Williamson April 11, 2017, 9:51 p.m. UTC | #5
On Wed, 22 Mar 2017 17:53:58 +0200
bodong@mellanox.com wrote:

> From: Bodong Wang <bodong@mellanox.com>
> 
> Sometimes it is not desirable to probe the virtual functions after
> SRIOV is enabled. This can save host side resource usage by VF
> instances which would be eventually probed to VMs.
> 
> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
> from the PF, all current callers still retain the same functionality.
> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
> 
> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> 
> Note that, the choice must be made before enabling VFs. The change
> will not take effect if VFs are already enabled. Simply, one can set
> sriov_numvfs to 0, choose whether to probe or not, and then resume
> sriov_numvfs.
> 
> Signed-off-by: Bodong Wang <bodong@mellanox.com>
> Signed-off-by: Eli Cohen <eli@mellanox.com>
> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  Documentation/PCI/pci-iov-howto.txt | 10 ++++++++++
>  drivers/pci/iov.c                   |  1 +
>  drivers/pci/pci-driver.c            | 22 ++++++++++++++++++----
>  drivers/pci/pci-sysfs.c             | 28 ++++++++++++++++++++++++++++
>  drivers/pci/pci.h                   |  1 +
>  5 files changed, 58 insertions(+), 4 deletions(-)

There should be an update to Documentation/ABI/testing/sysfs-bus-pci in
here too.

> 
> diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
> index 2d91ae2..902a528 100644
> --- a/Documentation/PCI/pci-iov-howto.txt
> +++ b/Documentation/PCI/pci-iov-howto.txt
> @@ -68,6 +68,16 @@ To disable SR-IOV capability:
>  	echo  0 > \
>          /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
>  
> +To enable probing VFs by a compatible driver on the host:

nit, probably a good idea to note that probing is enabled by default.

> +Before enabling SR-IOV capabilities, do:
> +	echo 1 > \
> +        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> +
> +To disable probing VFs by a compatible driver on the host:
> +Before enabling SR-IOV capabilities, do:
> +	echo  0 > \
> +        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> +
>  3.2 Usage example
>  
>  Following piece of code illustrates the usage of the SR-IOV API.
> diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
> index 2479ae8..70691de 100644
> --- a/drivers/pci/iov.c
> +++ b/drivers/pci/iov.c
> @@ -450,6 +450,7 @@ static int sriov_init(struct pci_dev *dev, int pos)
>  	iov->total_VFs = total;
>  	iov->pgsz = pgsz;
>  	iov->self = dev;
> +	iov->probe_vfs = true;
>  	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
>  	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
>  	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index afa7271..2a1cf84 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -394,6 +394,18 @@ void __weak pcibios_free_irq(struct pci_dev *dev)
>  {
>  }
>  
> +#ifdef CONFIG_PCI_IOV
> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
> +{
> +	return (!pdev->is_virtfn || pdev->physfn->sriov->probe_vfs);
> +}
> +#else
> +static inline bool pci_device_can_probe(struct pci_dev *pdev)
> +{
> +	return true;
> +}
> +#endif
> +
>  static int pci_device_probe(struct device *dev)
>  {
>  	int error;
> @@ -405,10 +417,12 @@ static int pci_device_probe(struct device *dev)
>  		return error;
>  
>  	pci_dev_get(pci_dev);
> -	error = __pci_device_probe(drv, pci_dev);
> -	if (error) {
> -		pcibios_free_irq(pci_dev);
> -		pci_dev_put(pci_dev);
> +	if (pci_device_can_probe(pci_dev)) {
> +		error = __pci_device_probe(drv, pci_dev);
> +		if (error) {
> +			pcibios_free_irq(pci_dev);
> +			pci_dev_put(pci_dev);
> +		}
>  	}
>  
>  	return error;
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 25d010d..1d5b89d 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -526,10 +526,37 @@ static ssize_t sriov_numvfs_store(struct device *dev,
>  	return count;
>  }
>  
> +static ssize_t sriov_probe_vfs_show(struct device *dev,
> +				    struct device_attribute *attr,
> +				    char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	return sprintf(buf, "%u\n", pdev->sriov->probe_vfs);
> +}
> +
> +static ssize_t sriov_probe_vfs_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t count)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	bool probe_vfs;
> +
> +	if (kstrtobool(buf, &probe_vfs) < 0)
> +		return -EINVAL;
> +
> +	pdev->sriov->probe_vfs = probe_vfs;
> +
> +	return count;
> +}
> +
>  static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
>  static struct device_attribute sriov_numvfs_attr =
>  		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
>  		       sriov_numvfs_show, sriov_numvfs_store);
> +static struct device_attribute sriov_probe_vfs_attr =
> +		__ATTR(sriov_probe_vfs, (S_IRUGO|S_IWUSR|S_IWGRP),
> +		       sriov_probe_vfs_show, sriov_probe_vfs_store);
>  #endif /* CONFIG_PCI_IOV */
>  
>  static ssize_t driver_override_store(struct device *dev,
> @@ -1549,6 +1576,7 @@ static umode_t pci_dev_hp_attrs_are_visible(struct kobject *kobj,
>  static struct attribute *sriov_dev_attrs[] = {
>  	&sriov_totalvfs_attr.attr,
>  	&sriov_numvfs_attr.attr,
> +	&sriov_probe_vfs_attr.attr,
>  	NULL,
>  };
>  
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 8dd38e6..b7d8127d 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -272,6 +272,7 @@ struct pci_sriov {
>  	struct pci_dev *self;	/* this PF */
>  	struct mutex lock;	/* lock for setting sriov_numvfs in sysfs */
>  	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
> +	bool probe_vfs;		/* control probing of VFs */
>  };
>  
>  #ifdef CONFIG_PCI_ATS

Aside from the missing ABI update and howto nit, this seems ok to me,
though I wonder if this isn't a more general problem that should maybe
be solved in the driver core for any device that may create/expose
child devices.  The drivers_autoprobe interface we currently have is
pretty limited with its bus-subsystem level scope.  Thanks,

Alex
Bodong Wang April 12, 2017, 2:37 p.m. UTC | #6
On 4/11/2017 4:12 PM, Bjorn Helgaas wrote:
> Hi Bodong,
>
> On Wed, Mar 22, 2017 at 05:53:58PM +0200, bodong@mellanox.com wrote:
>> From: Bodong Wang <bodong@mellanox.com>
>>
>> Sometimes it is not desirable to probe the virtual functions after
>> SRIOV is enabled. This can save host side resource usage by VF
>> instances which would be eventually probed to VMs.
>>
>> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
>> from the PF, all current callers still retain the same functionality.
>> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
>>
>> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> Is this basically the same functionality as /sys/bus/pci/drivers_autoprobe,
> but limited to a specific PF?  I.e., could we accomplish the same thing
> with the following?
>
>    # echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>    # echo 0 > /sys/bus/pci/drivers_autoprobe
>    # echo 2 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>    # echo 1 > /sys/bus/pci/drivers_autoprobe
>
> If not, can you contrast the above with drivers_autoprobe?  If we need
> both, should they be named more similarly?
Hi Bjorn,

I agree with Alex about not using driver_autoprobe to achieve this. It 
will affect all pci related device once it's disabled(probably in a bad 
way). On the other hand, current pci driver doesn't work for this use 
case at all. VFs drivers(if any) will be probed once sriov is enabled no 
matter what. The code path is pci_enable_sriov-> 
pci_bus_add_device->device_attach, and drivers will be probed at the 
end. Drivers_autoprobe only prevents probing pci_device_add is called. 
This patch bounded the change to the device itself and requires no 
device driver update.

For the name convention, it's aligned with other sriov related sysfs 
entries. Let me know if you have better options.

Thanks,

Bodong
Bjorn Helgaas April 12, 2017, 3:22 p.m. UTC | #7
On Wed, Apr 12, 2017 at 09:37:22AM -0500, Bodong Wang wrote:
> On 4/11/2017 4:12 PM, Bjorn Helgaas wrote:
> >Hi Bodong,
> >
> >On Wed, Mar 22, 2017 at 05:53:58PM +0200, bodong@mellanox.com wrote:
> >>From: Bodong Wang <bodong@mellanox.com>
> >>
> >>Sometimes it is not desirable to probe the virtual functions after
> >>SRIOV is enabled. This can save host side resource usage by VF
> >>instances which would be eventually probed to VMs.
> >>
> >>Add a new PCI sysfs interface "sriov_probe_vfs" to control that
> >>from the PF, all current callers still retain the same functionality.
> >>To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
> >>
> >>/sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
> >Is this basically the same functionality as /sys/bus/pci/drivers_autoprobe,
> >but limited to a specific PF?  I.e., could we accomplish the same thing
> >with the following?
> >
> >   # echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
> >   # echo 0 > /sys/bus/pci/drivers_autoprobe
> >   # echo 2 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
> >   # echo 1 > /sys/bus/pci/drivers_autoprobe
> >
> >If not, can you contrast the above with drivers_autoprobe?  If we need
> >both, should they be named more similarly?
> Hi Bjorn,
> 
> I agree with Alex about not using driver_autoprobe to achieve this.
> It will affect all pci related device once it's disabled(probably in
> a bad way). 

I agree drivers_autoprobe is racy with respect to other PCI devices, so
it's not a great solution to your issue.  But I do want to make it
obvious that these are related concepts.

Alex mooted the idea of a generic driver core knob for devices that
create child devices.  That's intriguing.  What other situations
besides SR-IOV do this?  Maybe we could add a "drivers_autoprobe"
inside the directory of each device that's capable of having child
devices?

If we continue with the current approach (instead of Alex's idea),
I propose that you:

  - Name your new knob "sriov_drivers_autoprobe" to be consistent with
    other sriov sysfs files while also being parallel to
    "drivers_autoprobe"

  - Update Documentation/ABI/testing/sysfs-bus-pci to document your
    new knob

  - Update Documentation/ABI/... to document drivers_autoprobe

Bjorn
Bodong Wang April 12, 2017, 4 p.m. UTC | #8
On 4/12/2017 10:22 AM, Bjorn Helgaas wrote:
> On Wed, Apr 12, 2017 at 09:37:22AM -0500, Bodong Wang wrote:
>> On 4/11/2017 4:12 PM, Bjorn Helgaas wrote:
>>> Hi Bodong,
>>>
>>> On Wed, Mar 22, 2017 at 05:53:58PM +0200, bodong@mellanox.com wrote:
>>>> From: Bodong Wang <bodong@mellanox.com>
>>>>
>>>> Sometimes it is not desirable to probe the virtual functions after
>>>> SRIOV is enabled. This can save host side resource usage by VF
>>>> instances which would be eventually probed to VMs.
>>>>
>>>> Add a new PCI sysfs interface "sriov_probe_vfs" to control that
>>> >from the PF, all current callers still retain the same functionality.
>>>> To modify it, echo 0/n/N (disable probe) or 1/y/Y (enable probe) to
>>>>
>>>> /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
>>> Is this basically the same functionality as /sys/bus/pci/drivers_autoprobe,
>>> but limited to a specific PF?  I.e., could we accomplish the same thing
>>> with the following?
>>>
>>>    # echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>>>    # echo 0 > /sys/bus/pci/drivers_autoprobe
>>>    # echo 2 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvfs
>>>    # echo 1 > /sys/bus/pci/drivers_autoprobe
>>>
>>> If not, can you contrast the above with drivers_autoprobe?  If we need
>>> both, should they be named more similarly?
>> Hi Bjorn,
>>
>> I agree with Alex about not using driver_autoprobe to achieve this.
>> It will affect all pci related device once it's disabled(probably in
>> a bad way).
> Alex mooted the idea of a generic driver core knob for devices that
> create child devices.  That's intriguing.  What other situations
> besides SR-IOV do this?  Maybe we could add a "drivers_autoprobe"
> inside the directory of each device that's capable of having child
> devices?
This seems interesting to me as well. Not sure if USB driver is 
applicable or not. And, drivers_autoprobe might work for USB if it 
doesn't probe the sub-device as default like sriov. Worth to take a look.
> If we continue with the current approach (instead of Alex's idea),
> I propose that you:
>
>    - Name your new knob "sriov_drivers_autoprobe" to be consistent with
>      other sriov sysfs files while also being parallel to
>      "drivers_autoprobe"
>
>    - Update Documentation/ABI/testing/sysfs-bus-pci to document your
>      new knob
>
>    - Update Documentation/ABI/... to document drivers_autoprobe
Ack, will send v3.

Thanks,

Bodong
diff mbox

Patch

diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
index 2d91ae2..902a528 100644
--- a/Documentation/PCI/pci-iov-howto.txt
+++ b/Documentation/PCI/pci-iov-howto.txt
@@ -68,6 +68,16 @@  To disable SR-IOV capability:
 	echo  0 > \
         /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
 
+To enable probing VFs by a compatible driver on the host:
+Before enabling SR-IOV capabilities, do:
+	echo 1 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
+
+To disable probing VFs by a compatible driver on the host:
+Before enabling SR-IOV capabilities, do:
+	echo  0 > \
+        /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_probe_vfs
+
 3.2 Usage example
 
 Following piece of code illustrates the usage of the SR-IOV API.
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 2479ae8..70691de 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -450,6 +450,7 @@  static int sriov_init(struct pci_dev *dev, int pos)
 	iov->total_VFs = total;
 	iov->pgsz = pgsz;
 	iov->self = dev;
+	iov->probe_vfs = true;
 	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index afa7271..2a1cf84 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -394,6 +394,18 @@  void __weak pcibios_free_irq(struct pci_dev *dev)
 {
 }
 
+#ifdef CONFIG_PCI_IOV
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return (!pdev->is_virtfn || pdev->physfn->sriov->probe_vfs);
+}
+#else
+static inline bool pci_device_can_probe(struct pci_dev *pdev)
+{
+	return true;
+}
+#endif
+
 static int pci_device_probe(struct device *dev)
 {
 	int error;
@@ -405,10 +417,12 @@  static int pci_device_probe(struct device *dev)
 		return error;
 
 	pci_dev_get(pci_dev);
-	error = __pci_device_probe(drv, pci_dev);
-	if (error) {
-		pcibios_free_irq(pci_dev);
-		pci_dev_put(pci_dev);
+	if (pci_device_can_probe(pci_dev)) {
+		error = __pci_device_probe(drv, pci_dev);
+		if (error) {
+			pcibios_free_irq(pci_dev);
+			pci_dev_put(pci_dev);
+		}
 	}
 
 	return error;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 25d010d..1d5b89d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -526,10 +526,37 @@  static ssize_t sriov_numvfs_store(struct device *dev,
 	return count;
 }
 
+static ssize_t sriov_probe_vfs_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n", pdev->sriov->probe_vfs);
+}
+
+static ssize_t sriov_probe_vfs_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool probe_vfs;
+
+	if (kstrtobool(buf, &probe_vfs) < 0)
+		return -EINVAL;
+
+	pdev->sriov->probe_vfs = probe_vfs;
+
+	return count;
+}
+
 static struct device_attribute sriov_totalvfs_attr = __ATTR_RO(sriov_totalvfs);
 static struct device_attribute sriov_numvfs_attr =
 		__ATTR(sriov_numvfs, (S_IRUGO|S_IWUSR|S_IWGRP),
 		       sriov_numvfs_show, sriov_numvfs_store);
+static struct device_attribute sriov_probe_vfs_attr =
+		__ATTR(sriov_probe_vfs, (S_IRUGO|S_IWUSR|S_IWGRP),
+		       sriov_probe_vfs_show, sriov_probe_vfs_store);
 #endif /* CONFIG_PCI_IOV */
 
 static ssize_t driver_override_store(struct device *dev,
@@ -1549,6 +1576,7 @@  static umode_t pci_dev_hp_attrs_are_visible(struct kobject *kobj,
 static struct attribute *sriov_dev_attrs[] = {
 	&sriov_totalvfs_attr.attr,
 	&sriov_numvfs_attr.attr,
+	&sriov_probe_vfs_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 8dd38e6..b7d8127d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -272,6 +272,7 @@  struct pci_sriov {
 	struct pci_dev *self;	/* this PF */
 	struct mutex lock;	/* lock for setting sriov_numvfs in sysfs */
 	resource_size_t barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
+	bool probe_vfs;		/* control probing of VFs */
 };
 
 #ifdef CONFIG_PCI_ATS