diff mbox series

[v2,1/5] PCI/ATS: Add PRI support for PCIe VF devices

Message ID f773440c0eee2a8d4e5d6e2856717404ac836458.1557162861.git.sathyanarayanan.kuppuswamy@linux.intel.com
State Changes Requested
Delegated to: Bjorn Helgaas
Headers show
Series Fix PF/VF dependency issues | expand

Commit Message

Kuppuswamy Sathyanarayanan May 6, 2019, 5:20 p.m. UTC
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>

When IOMMU tries to enable PRI for VF device in
iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
VF device is currently broken in PCIE driver. Current implementation
expects the given PCIe device (PF & VF) to implement PRI capability
before enabling the PRI support. But this assumption is incorrect. As
per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
use the Page Request Interface (PRI) of the PF and not implement it.
Hence we need to create exception for handling the PRI support for PCIe
VF device.

Since PRI is shared between PF/VF devices, following rules should apply.

1. Enable PRI in VF only if its already enabled in PF.
2. When enabling/disabling PRI for VF, instead of configuring the
registers just increase/decrease the usage count (pri_ref_cnt) of PF.
3. Disable PRI in PF only if pr_ref_cnt is zero.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/pci.h |  1 +
 2 files changed, 52 insertions(+), 2 deletions(-)

Comments

Bjorn Helgaas May 29, 2019, 10:57 p.m. UTC | #1
On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> 
> When IOMMU tries to enable PRI for VF device in
> iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
> VF device is currently broken in PCIE driver. Current implementation
> expects the given PCIe device (PF & VF) to implement PRI capability
> before enabling the PRI support. But this assumption is incorrect. As
> per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
> use the Page Request Interface (PRI) of the PF and not implement it.
> Hence we need to create exception for handling the PRI support for PCIe
> VF device.
> 
> Since PRI is shared between PF/VF devices, following rules should apply.
> 
> 1. Enable PRI in VF only if its already enabled in PF.
> 2. When enabling/disabling PRI for VF, instead of configuring the
> registers just increase/decrease the usage count (pri_ref_cnt) of PF.
> 3. Disable PRI in PF only if pr_ref_cnt is zero.

s/pr_ref_cnt/pri_ref_cnt/

> Cc: Ashok Raj <ashok.raj@intel.com>
> Cc: Keith Busch <keith.busch@intel.com>
> Suggested-by: Ashok Raj <ashok.raj@intel.com>
> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> ---
>  drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
>  include/linux/pci.h |  1 +
>  2 files changed, 52 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> index 97c08146534a..5582e5d83a3f 100644
> --- a/drivers/pci/ats.c
> +++ b/drivers/pci/ats.c
> @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
>  	u16 control, status;
>  	u32 max_requests;
>  	int pos;
> +	struct pci_dev *pf;
>  
>  	if (WARN_ON(pdev->pri_enabled))
>  		return -EBUSY;
>  
>  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> -	if (!pos)
> +
> +	if (pdev->is_virtfn) {
> +		/*
> +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
> +		 * Capability.
> +		 */
> +		if (pos) {
> +			dev_err(&pdev->dev, "VF must not implement PRI");
> +			return -EINVAL;
> +		}

This seems gratuitous.  It finds implementation errors, but since we
correctly use the PF here anyway, it doesn't *need* to prevent PRI on
the VF from working.

I think you should just have:

  if (pdev->is_virtfn) {
    pf = pci_physfn(pdev);
    if (!pf->pri_enabled)
      return -EINVAL;

    pdev->pri_enabled = 1;
    atomic_inc(&pf->pri_ref_cnt);
  }

  pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
  if (!pos)
    return -EINVAL;

> +		pf = pci_physfn(pdev);
> +
> +		/* If VF config does not match with PF, return error */
> +		if (!pf->pri_enabled)
> +			return -EINVAL;
> +
> +		pdev->pri_reqs_alloc = pf->pri_reqs_alloc;

Is there any point in setting vf->pri_reqs_alloc?  I don't think it's
used for anything since pri_reqs_alloc is only used to write the PF
capability, and we only do that for the PF.

> +		pdev->pri_enabled = 1;
> +
> +		/* Increment PF PRI refcount */

Superfluous comment, since that's exactly what the code says.  It's
always good when the code is so clear that it doesn't require comments :)

> +		atomic_inc(&pf->pri_ref_cnt);
> +
> +		return 0;
> +	}
> +
> +	if (pdev->is_physfn && !pos)
>  		return -EINVAL;
>  
>  	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
> @@ -202,7 +229,6 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
>  	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
>  
>  	pdev->pri_enabled = 1;
> -
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(pci_enable_pri);
> @@ -217,10 +243,27 @@ void pci_disable_pri(struct pci_dev *pdev)
>  {
>  	u16 control;
>  	int pos;
> +	struct pci_dev *pf;
>  
>  	if (WARN_ON(!pdev->pri_enabled))
>  		return;
>  
> +	/* All VFs should be disabled before disabling PF */
> +	if (atomic_read(&pdev->pri_ref_cnt))
> +		return;
> +
> +	if (pdev->is_virtfn) {
> +		/* Since VF shares PRI with PF, use PF config. */
> +		pf = pci_physfn(pdev);
> +
> +		/* Decrement PF PRI refcount */
> +		atomic_dec(&pf->pri_ref_cnt);
> +
> +		pdev->pri_enabled = 0;
> +
> +		return;
> +	}
> +
>  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>  	if (!pos)
>  		return;
> @@ -246,6 +289,9 @@ void pci_restore_pri_state(struct pci_dev *pdev)
>  	if (!pdev->pri_enabled)
>  		return;
>  
> +	if (pdev->is_virtfn)
> +		return;
> +
>  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>  	if (!pos)
>  		return;
> @@ -270,6 +316,9 @@ int pci_reset_pri(struct pci_dev *pdev)
>  	if (WARN_ON(pdev->pri_enabled))
>  		return -EBUSY;
>  
> +	if (pdev->is_virtfn)
> +		return 0;
> +
>  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>  	if (!pos)
>  		return -EINVAL;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 77448215ef5b..699c79c99a39 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -450,6 +450,7 @@ struct pci_dev {
>  #endif
>  #ifdef CONFIG_PCI_PRI
>  	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
> +	atomic_t	pri_ref_cnt;	/* Number of VFs with PRI enabled */
>  #endif
>  #ifdef CONFIG_PCI_PASID
>  	u16		pasid_features;
> -- 
> 2.20.1
>
Ashok Raj May 29, 2019, 11:04 p.m. UTC | #2
On Wed, May 29, 2019 at 05:57:14PM -0500, Bjorn Helgaas wrote:
> On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > 
> > When IOMMU tries to enable PRI for VF device in
> > iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
> > VF device is currently broken in PCIE driver. Current implementation
> > expects the given PCIe device (PF & VF) to implement PRI capability
> > before enabling the PRI support. But this assumption is incorrect. As
> > per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
> > use the Page Request Interface (PRI) of the PF and not implement it.
> > Hence we need to create exception for handling the PRI support for PCIe
> > VF device.
> > 
> > Since PRI is shared between PF/VF devices, following rules should apply.
> > 
> > 1. Enable PRI in VF only if its already enabled in PF.
> > 2. When enabling/disabling PRI for VF, instead of configuring the
> > registers just increase/decrease the usage count (pri_ref_cnt) of PF.
> > 3. Disable PRI in PF only if pr_ref_cnt is zero.
> 
> s/pr_ref_cnt/pri_ref_cnt/
> 
> > Cc: Ashok Raj <ashok.raj@intel.com>
> > Cc: Keith Busch <keith.busch@intel.com>
> > Suggested-by: Ashok Raj <ashok.raj@intel.com>
> > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > ---
> >  drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
> >  include/linux/pci.h |  1 +
> >  2 files changed, 52 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> > index 97c08146534a..5582e5d83a3f 100644
> > --- a/drivers/pci/ats.c
> > +++ b/drivers/pci/ats.c
> > @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
> >  	u16 control, status;
> >  	u32 max_requests;
> >  	int pos;
> > +	struct pci_dev *pf;
> >  
> >  	if (WARN_ON(pdev->pri_enabled))
> >  		return -EBUSY;
> >  
> >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> > -	if (!pos)
> > +
> > +	if (pdev->is_virtfn) {
> > +		/*
> > +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
> > +		 * Capability.
> > +		 */
> > +		if (pos) {
> > +			dev_err(&pdev->dev, "VF must not implement PRI");
> > +			return -EINVAL;
> > +		}
> 
> This seems gratuitous.  It finds implementation errors, but since we
> correctly use the PF here anyway, it doesn't *need* to prevent PRI on
> the VF from working.
> 
> I think you should just have:
> 
>   if (pdev->is_virtfn) {
>     pf = pci_physfn(pdev);
>     if (!pf->pri_enabled)
>       return -EINVAL;

This would be incorrect. Since if we never did any bind_mm to the PF
PRI would not have been enabled. Currently this is done in the IOMMU 
driver, and not in the device driver. 

I suppose we should enable PF capability if its not enabled. Same
comment would be applicable for PASID as well.


> 
>     pdev->pri_enabled = 1;
>     atomic_inc(&pf->pri_ref_cnt);
>   }
> 
>   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>   if (!pos)
>     return -EINVAL;
> 
> > +		pf = pci_physfn(pdev);
> > +
> > +		/* If VF config does not match with PF, return error */
> > +		if (!pf->pri_enabled)
> > +			return -EINVAL;
> > +
> > +		pdev->pri_reqs_alloc = pf->pri_reqs_alloc;
> 
> Is there any point in setting vf->pri_reqs_alloc?  I don't think it's
> used for anything since pri_reqs_alloc is only used to write the PF
> capability, and we only do that for the PF.
> 
> > +		pdev->pri_enabled = 1;
> > +
> > +		/* Increment PF PRI refcount */
> 
> Superfluous comment, since that's exactly what the code says.  It's
> always good when the code is so clear that it doesn't require comments :)
> 
> > +		atomic_inc(&pf->pri_ref_cnt);
> > +
> > +		return 0;
> > +	}
> > +
> > +	if (pdev->is_physfn && !pos)
> >  		return -EINVAL;
> >  
> >  	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
> > @@ -202,7 +229,6 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
> >  	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
> >  
> >  	pdev->pri_enabled = 1;
> > -
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL_GPL(pci_enable_pri);
> > @@ -217,10 +243,27 @@ void pci_disable_pri(struct pci_dev *pdev)
> >  {
> >  	u16 control;
> >  	int pos;
> > +	struct pci_dev *pf;
> >  
> >  	if (WARN_ON(!pdev->pri_enabled))
> >  		return;
> >  
> > +	/* All VFs should be disabled before disabling PF */
> > +	if (atomic_read(&pdev->pri_ref_cnt))
> > +		return;
> > +
> > +	if (pdev->is_virtfn) {
> > +		/* Since VF shares PRI with PF, use PF config. */
> > +		pf = pci_physfn(pdev);
> > +
> > +		/* Decrement PF PRI refcount */
> > +		atomic_dec(&pf->pri_ref_cnt);
> > +
> > +		pdev->pri_enabled = 0;
> > +
> > +		return;
> > +	}
> > +
> >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> >  	if (!pos)
> >  		return;
> > @@ -246,6 +289,9 @@ void pci_restore_pri_state(struct pci_dev *pdev)
> >  	if (!pdev->pri_enabled)
> >  		return;
> >  
> > +	if (pdev->is_virtfn)
> > +		return;
> > +
> >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> >  	if (!pos)
> >  		return;
> > @@ -270,6 +316,9 @@ int pci_reset_pri(struct pci_dev *pdev)
> >  	if (WARN_ON(pdev->pri_enabled))
> >  		return -EBUSY;
> >  
> > +	if (pdev->is_virtfn)
> > +		return 0;
> > +
> >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> >  	if (!pos)
> >  		return -EINVAL;
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 77448215ef5b..699c79c99a39 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -450,6 +450,7 @@ struct pci_dev {
> >  #endif
> >  #ifdef CONFIG_PCI_PRI
> >  	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
> > +	atomic_t	pri_ref_cnt;	/* Number of VFs with PRI enabled */
> >  #endif
> >  #ifdef CONFIG_PCI_PASID
> >  	u16		pasid_features;
> > -- 
> > 2.20.1
> >
Kuppuswamy Sathyanarayanan May 29, 2019, 11:24 p.m. UTC | #3
On 5/29/19 4:04 PM, Raj, Ashok wrote:
> On Wed, May 29, 2019 at 05:57:14PM -0500, Bjorn Helgaas wrote:
>> On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
>>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>>
>>> When IOMMU tries to enable PRI for VF device in
>>> iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
>>> VF device is currently broken in PCIE driver. Current implementation
>>> expects the given PCIe device (PF & VF) to implement PRI capability
>>> before enabling the PRI support. But this assumption is incorrect. As
>>> per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
>>> use the Page Request Interface (PRI) of the PF and not implement it.
>>> Hence we need to create exception for handling the PRI support for PCIe
>>> VF device.
>>>
>>> Since PRI is shared between PF/VF devices, following rules should apply.
>>>
>>> 1. Enable PRI in VF only if its already enabled in PF.
>>> 2. When enabling/disabling PRI for VF, instead of configuring the
>>> registers just increase/decrease the usage count (pri_ref_cnt) of PF.
>>> 3. Disable PRI in PF only if pr_ref_cnt is zero.
>> s/pr_ref_cnt/pri_ref_cnt/
>>
>>> Cc: Ashok Raj <ashok.raj@intel.com>
>>> Cc: Keith Busch <keith.busch@intel.com>
>>> Suggested-by: Ashok Raj <ashok.raj@intel.com>
>>> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>> ---
>>>   drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
>>>   include/linux/pci.h |  1 +
>>>   2 files changed, 52 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
>>> index 97c08146534a..5582e5d83a3f 100644
>>> --- a/drivers/pci/ats.c
>>> +++ b/drivers/pci/ats.c
>>> @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
>>>   	u16 control, status;
>>>   	u32 max_requests;
>>>   	int pos;
>>> +	struct pci_dev *pf;
>>>   
>>>   	if (WARN_ON(pdev->pri_enabled))
>>>   		return -EBUSY;
>>>   
>>>   	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>> -	if (!pos)
>>> +
>>> +	if (pdev->is_virtfn) {
>>> +		/*
>>> +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
>>> +		 * Capability.
>>> +		 */
>>> +		if (pos) {
>>> +			dev_err(&pdev->dev, "VF must not implement PRI");
>>> +			return -EINVAL;
>>> +		}
>> This seems gratuitous.  It finds implementation errors, but since we
>> correctly use the PF here anyway, it doesn't *need* to prevent PRI on
>> the VF from working.
>>
>> I think you should just have:
>>
>>    if (pdev->is_virtfn) {
>>      pf = pci_physfn(pdev);
>>      if (!pf->pri_enabled)
>>        return -EINVAL;
> This would be incorrect. Since if we never did any bind_mm to the PF
> PRI would not have been enabled. Currently this is done in the IOMMU
> driver, and not in the device driver.
>
> I suppose we should enable PF capability if its not enabled. Same
> comment would be applicable for PASID as well.

I am currently working on a fix to handle the bind issue (VF binding 
before PF). My next version will have this update.

But, regarding VF spec compliance checks, Is there any issue in having 
them in enable code ? Perhaps I can change dev_err to dev_warn and not 
return error if it found implementation errors. I found it useful to 
have them because it helped me in finding some faulty devices during my 
testing. Let me know your comments.

>
>
>>      pdev->pri_enabled = 1;
>>      atomic_inc(&pf->pri_ref_cnt);
>>    }
>>
>>    pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>    if (!pos)
>>      return -EINVAL;
>>
>>> +		pf = pci_physfn(pdev);
>>> +
>>> +		/* If VF config does not match with PF, return error */
>>> +		if (!pf->pri_enabled)
>>> +			return -EINVAL;
>>> +
>>> +		pdev->pri_reqs_alloc = pf->pri_reqs_alloc;
>> Is there any point in setting vf->pri_reqs_alloc?  I don't think it's
>> used for anything since pri_reqs_alloc is only used to write the PF
>> capability, and we only do that for the PF.
>>
>>> +		pdev->pri_enabled = 1;
>>> +
>>> +		/* Increment PF PRI refcount */
>> Superfluous comment, since that's exactly what the code says.  It's
>> always good when the code is so clear that it doesn't require comments :)
>>
>>> +		atomic_inc(&pf->pri_ref_cnt);
>>> +
>>> +		return 0;
>>> +	}
>>> +
>>> +	if (pdev->is_physfn && !pos)
>>>   		return -EINVAL;
>>>   
>>>   	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
>>> @@ -202,7 +229,6 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
>>>   	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
>>>   
>>>   	pdev->pri_enabled = 1;
>>> -
>>>   	return 0;
>>>   }
>>>   EXPORT_SYMBOL_GPL(pci_enable_pri);
>>> @@ -217,10 +243,27 @@ void pci_disable_pri(struct pci_dev *pdev)
>>>   {
>>>   	u16 control;
>>>   	int pos;
>>> +	struct pci_dev *pf;
>>>   
>>>   	if (WARN_ON(!pdev->pri_enabled))
>>>   		return;
>>>   
>>> +	/* All VFs should be disabled before disabling PF */
>>> +	if (atomic_read(&pdev->pri_ref_cnt))
>>> +		return;
>>> +
>>> +	if (pdev->is_virtfn) {
>>> +		/* Since VF shares PRI with PF, use PF config. */
>>> +		pf = pci_physfn(pdev);
>>> +
>>> +		/* Decrement PF PRI refcount */
>>> +		atomic_dec(&pf->pri_ref_cnt);
>>> +
>>> +		pdev->pri_enabled = 0;
>>> +
>>> +		return;
>>> +	}
>>> +
>>>   	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>>   	if (!pos)
>>>   		return;
>>> @@ -246,6 +289,9 @@ void pci_restore_pri_state(struct pci_dev *pdev)
>>>   	if (!pdev->pri_enabled)
>>>   		return;
>>>   
>>> +	if (pdev->is_virtfn)
>>> +		return;
>>> +
>>>   	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>>   	if (!pos)
>>>   		return;
>>> @@ -270,6 +316,9 @@ int pci_reset_pri(struct pci_dev *pdev)
>>>   	if (WARN_ON(pdev->pri_enabled))
>>>   		return -EBUSY;
>>>   
>>> +	if (pdev->is_virtfn)
>>> +		return 0;
>>> +
>>>   	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>>   	if (!pos)
>>>   		return -EINVAL;
>>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>>> index 77448215ef5b..699c79c99a39 100644
>>> --- a/include/linux/pci.h
>>> +++ b/include/linux/pci.h
>>> @@ -450,6 +450,7 @@ struct pci_dev {
>>>   #endif
>>>   #ifdef CONFIG_PCI_PRI
>>>   	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
>>> +	atomic_t	pri_ref_cnt;	/* Number of VFs with PRI enabled */
>>>   #endif
>>>   #ifdef CONFIG_PCI_PASID
>>>   	u16		pasid_features;
>>> -- 
>>> 2.20.1
>>>
Bjorn Helgaas May 30, 2019, 1:17 p.m. UTC | #4
On Wed, May 29, 2019 at 04:04:27PM -0700, Raj, Ashok wrote:
> On Wed, May 29, 2019 at 05:57:14PM -0500, Bjorn Helgaas wrote:
> > On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > 
> > > When IOMMU tries to enable PRI for VF device in
> > > iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
> > > VF device is currently broken in PCIE driver. Current implementation
> > > expects the given PCIe device (PF & VF) to implement PRI capability
> > > before enabling the PRI support. But this assumption is incorrect. As
> > > per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
> > > use the Page Request Interface (PRI) of the PF and not implement it.
> > > Hence we need to create exception for handling the PRI support for PCIe
> > > VF device.
> > > 
> > > Since PRI is shared between PF/VF devices, following rules should apply.
> > > 
> > > 1. Enable PRI in VF only if its already enabled in PF.
> > > 2. When enabling/disabling PRI for VF, instead of configuring the
> > > registers just increase/decrease the usage count (pri_ref_cnt) of PF.
> > > 3. Disable PRI in PF only if pr_ref_cnt is zero.
> > 
> > s/pr_ref_cnt/pri_ref_cnt/
> > 
> > > Cc: Ashok Raj <ashok.raj@intel.com>
> > > Cc: Keith Busch <keith.busch@intel.com>
> > > Suggested-by: Ashok Raj <ashok.raj@intel.com>
> > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > ---
> > >  drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
> > >  include/linux/pci.h |  1 +
> > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> > > index 97c08146534a..5582e5d83a3f 100644
> > > --- a/drivers/pci/ats.c
> > > +++ b/drivers/pci/ats.c
> > > @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
> > >  	u16 control, status;
> > >  	u32 max_requests;
> > >  	int pos;
> > > +	struct pci_dev *pf;
> > >  
> > >  	if (WARN_ON(pdev->pri_enabled))
> > >  		return -EBUSY;
> > >  
> > >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> > > -	if (!pos)
> > > +
> > > +	if (pdev->is_virtfn) {
> > > +		/*
> > > +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
> > > +		 * Capability.
> > > +		 */
> > > +		if (pos) {
> > > +			dev_err(&pdev->dev, "VF must not implement PRI");
> > > +			return -EINVAL;
> > > +		}
> > 
> > This seems gratuitous.  It finds implementation errors, but since we
> > correctly use the PF here anyway, it doesn't *need* to prevent PRI on
> > the VF from working.
> > 
> > I think you should just have:
> > 
> >   if (pdev->is_virtfn) {
> >     pf = pci_physfn(pdev);
> >     if (!pf->pri_enabled)
> >       return -EINVAL;
> 
> This would be incorrect. Since if we never did any bind_mm to the PF
> PRI would not have been enabled. Currently this is done in the IOMMU 
> driver, and not in the device driver. 

This is functionally the same as the original patch, only omitting the
"VF must not implement PRI" check.

> I suppose we should enable PF capability if its not enabled. Same
> comment would be applicable for PASID as well.

Operating on a device other than the one the driver owns opens the
issue of mutual exclusion and races, so would require careful
scrutiny.  Are PRI/PASID things that could be *always* enabled for the
PF at enumeration-time, or do we have to wait until a driver claims
the VF?  If the latter, are there coordination issues between drivers
of different VFs?

> >     pdev->pri_enabled = 1;
> >     atomic_inc(&pf->pri_ref_cnt);
> >   }
> > 
> >   pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> >   if (!pos)
> >     return -EINVAL;
> > 
> > > +		pf = pci_physfn(pdev);
> > > +
> > > +		/* If VF config does not match with PF, return error */
> > > +		if (!pf->pri_enabled)
> > > +			return -EINVAL;
> > > +
> > > +		pdev->pri_reqs_alloc = pf->pri_reqs_alloc;
> > 
> > Is there any point in setting vf->pri_reqs_alloc?  I don't think it's
> > used for anything since pri_reqs_alloc is only used to write the PF
> > capability, and we only do that for the PF.
> > 
> > > +		pdev->pri_enabled = 1;
> > > +
> > > +		/* Increment PF PRI refcount */
> > 
> > Superfluous comment, since that's exactly what the code says.  It's
> > always good when the code is so clear that it doesn't require comments :)
> > 
> > > +		atomic_inc(&pf->pri_ref_cnt);
> > > +
> > > +		return 0;
> > > +	}
> > > +
> > > +	if (pdev->is_physfn && !pos)
> > >  		return -EINVAL;
> > >  
> > >  	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
> > > @@ -202,7 +229,6 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
> > >  	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
> > >  
> > >  	pdev->pri_enabled = 1;
> > > -
> > >  	return 0;
Bjorn Helgaas May 30, 2019, 1:20 p.m. UTC | #5
On Wed, May 29, 2019 at 04:24:05PM -0700, sathyanarayanan kuppuswamy wrote:
> But, regarding VF spec compliance checks, Is there any issue in having them
> in enable code ? Perhaps I can change dev_err to dev_warn and not return
> error if it found implementation errors. I found it useful to have them
> because it helped me in finding some faulty devices during my testing. Let
> me know your comments.

If you need quirks to make these non-compliant devices usable, we
should check for compliance.  If not, my personal opinion is that we
shouldn't touch things we don't need.

Bjorn
Ashok Raj May 30, 2019, 5:20 p.m. UTC | #6
On Thu, May 30, 2019 at 08:17:38AM -0500, Bjorn Helgaas wrote:
> On Wed, May 29, 2019 at 04:04:27PM -0700, Raj, Ashok wrote:
> > On Wed, May 29, 2019 at 05:57:14PM -0500, Bjorn Helgaas wrote:
> > > On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > > 
> > > > When IOMMU tries to enable PRI for VF device in
> > > > iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
> > > > VF device is currently broken in PCIE driver. Current implementation
> > > > expects the given PCIe device (PF & VF) to implement PRI capability
> > > > before enabling the PRI support. But this assumption is incorrect. As
> > > > per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
> > > > use the Page Request Interface (PRI) of the PF and not implement it.
> > > > Hence we need to create exception for handling the PRI support for PCIe
> > > > VF device.
> > > > 
> > > > Since PRI is shared between PF/VF devices, following rules should apply.
> > > > 
> > > > 1. Enable PRI in VF only if its already enabled in PF.
> > > > 2. When enabling/disabling PRI for VF, instead of configuring the
> > > > registers just increase/decrease the usage count (pri_ref_cnt) of PF.
> > > > 3. Disable PRI in PF only if pr_ref_cnt is zero.
> > > 
> > > s/pr_ref_cnt/pri_ref_cnt/
> > > 
> > > > Cc: Ashok Raj <ashok.raj@intel.com>
> > > > Cc: Keith Busch <keith.busch@intel.com>
> > > > Suggested-by: Ashok Raj <ashok.raj@intel.com>
> > > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > > ---
> > > >  drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
> > > >  include/linux/pci.h |  1 +
> > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
> > > > index 97c08146534a..5582e5d83a3f 100644
> > > > --- a/drivers/pci/ats.c
> > > > +++ b/drivers/pci/ats.c
> > > > @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
> > > >  	u16 control, status;
> > > >  	u32 max_requests;
> > > >  	int pos;
> > > > +	struct pci_dev *pf;
> > > >  
> > > >  	if (WARN_ON(pdev->pri_enabled))
> > > >  		return -EBUSY;
> > > >  
> > > >  	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
> > > > -	if (!pos)
> > > > +
> > > > +	if (pdev->is_virtfn) {
> > > > +		/*
> > > > +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
> > > > +		 * Capability.
> > > > +		 */
> > > > +		if (pos) {
> > > > +			dev_err(&pdev->dev, "VF must not implement PRI");
> > > > +			return -EINVAL;
> > > > +		}
> > > 
> > > This seems gratuitous.  It finds implementation errors, but since we
> > > correctly use the PF here anyway, it doesn't *need* to prevent PRI on
> > > the VF from working.
> > > 
> > > I think you should just have:
> > > 
> > >   if (pdev->is_virtfn) {
> > >     pf = pci_physfn(pdev);
> > >     if (!pf->pri_enabled)
> > >       return -EINVAL;
> > 
> > This would be incorrect. Since if we never did any bind_mm to the PF
> > PRI would not have been enabled. Currently this is done in the IOMMU 
> > driver, and not in the device driver. 
> 
> This is functionally the same as the original patch, only omitting the
> "VF must not implement PRI" check.
> 
> > I suppose we should enable PF capability if its not enabled. Same
> > comment would be applicable for PASID as well.
> 
> Operating on a device other than the one the driver owns opens the
> issue of mutual exclusion and races, so would require careful
> scrutiny.  Are PRI/PASID things that could be *always* enabled for the
> PF at enumeration-time, or do we have to wait until a driver claims
> the VF?  If the latter, are there coordination issues between drivers
> of different VFs?

I suppose that's a reasonably good alternative. You mean we could 
do this when VF's are being created? Otherwise we can do this as its
done today, on demand for all normal PF's. 


Cheers,
Ashok
Kuppuswamy Sathyanarayanan May 30, 2019, 5:39 p.m. UTC | #7
On 5/30/19 10:20 AM, Raj, Ashok wrote:
> On Thu, May 30, 2019 at 08:17:38AM -0500, Bjorn Helgaas wrote:
>> On Wed, May 29, 2019 at 04:04:27PM -0700, Raj, Ashok wrote:
>>> On Wed, May 29, 2019 at 05:57:14PM -0500, Bjorn Helgaas wrote:
>>>> On Mon, May 06, 2019 at 10:20:03AM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
>>>>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>>>>
>>>>> When IOMMU tries to enable PRI for VF device in
>>>>> iommu_enable_dev_iotlb(), it always fails because PRI support for PCIe
>>>>> VF device is currently broken in PCIE driver. Current implementation
>>>>> expects the given PCIe device (PF & VF) to implement PRI capability
>>>>> before enabling the PRI support. But this assumption is incorrect. As
>>>>> per PCIe spec r4.0, sec 9.3.7.11, all VFs associated with PF can only
>>>>> use the Page Request Interface (PRI) of the PF and not implement it.
>>>>> Hence we need to create exception for handling the PRI support for PCIe
>>>>> VF device.
>>>>>
>>>>> Since PRI is shared between PF/VF devices, following rules should apply.
>>>>>
>>>>> 1. Enable PRI in VF only if its already enabled in PF.
>>>>> 2. When enabling/disabling PRI for VF, instead of configuring the
>>>>> registers just increase/decrease the usage count (pri_ref_cnt) of PF.
>>>>> 3. Disable PRI in PF only if pr_ref_cnt is zero.
>>>> s/pr_ref_cnt/pri_ref_cnt/
>>>>
>>>>> Cc: Ashok Raj <ashok.raj@intel.com>
>>>>> Cc: Keith Busch <keith.busch@intel.com>
>>>>> Suggested-by: Ashok Raj <ashok.raj@intel.com>
>>>>> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>>>> ---
>>>>>   drivers/pci/ats.c   | 53 +++++++++++++++++++++++++++++++++++++++++++--
>>>>>   include/linux/pci.h |  1 +
>>>>>   2 files changed, 52 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
>>>>> index 97c08146534a..5582e5d83a3f 100644
>>>>> --- a/drivers/pci/ats.c
>>>>> +++ b/drivers/pci/ats.c
>>>>> @@ -181,12 +181,39 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
>>>>>   	u16 control, status;
>>>>>   	u32 max_requests;
>>>>>   	int pos;
>>>>> +	struct pci_dev *pf;
>>>>>   
>>>>>   	if (WARN_ON(pdev->pri_enabled))
>>>>>   		return -EBUSY;
>>>>>   
>>>>>   	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
>>>>> -	if (!pos)
>>>>> +
>>>>> +	if (pdev->is_virtfn) {
>>>>> +		/*
>>>>> +		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
>>>>> +		 * Capability.
>>>>> +		 */
>>>>> +		if (pos) {
>>>>> +			dev_err(&pdev->dev, "VF must not implement PRI");
>>>>> +			return -EINVAL;
>>>>> +		}
>>>> This seems gratuitous.  It finds implementation errors, but since we
>>>> correctly use the PF here anyway, it doesn't *need* to prevent PRI on
>>>> the VF from working.
>>>>
>>>> I think you should just have:
>>>>
>>>>    if (pdev->is_virtfn) {
>>>>      pf = pci_physfn(pdev);
>>>>      if (!pf->pri_enabled)
>>>>        return -EINVAL;
>>> This would be incorrect. Since if we never did any bind_mm to the PF
>>> PRI would not have been enabled. Currently this is done in the IOMMU
>>> driver, and not in the device driver.
>> This is functionally the same as the original patch, only omitting the
>> "VF must not implement PRI" check.
>>
>>> I suppose we should enable PF capability if its not enabled. Same
>>> comment would be applicable for PASID as well.
>> Operating on a device other than the one the driver owns opens the
>> issue of mutual exclusion and races, so would require careful
>> scrutiny.  Are PRI/PASID things that could be *always* enabled for the
>> PF at enumeration-time, or do we have to wait until a driver claims
>> the VF?  If the latter, are there coordination issues between drivers
>> of different VFs?
> I suppose that's a reasonably good alternative. You mean we could
> do this when VF's are being created? Otherwise we can do this as its
> done today, on demand for all normal PF's.

If we are going to enable it with default features then its doable. But 
for cases with custom requirements, it will become complicated. For 
example, in following code, IOMMU sets PRI Outstanding Page Allocation 
quota as 32 or 1 based on errata info. So if we just enable it by 
default then we may not be able to take these requirements into 
consideration.

2051 static int pdev_iommuv2_enable(struct pci_dev *pdev)
2052 {
2053         bool reset_enable;
2054         int reqs, ret;
2055
2056         /* FIXME: Hardcode number of outstanding requests for now */
2057         reqs = 32;
2058         if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
2059                 reqs = 1;
2060         reset_enable = pdev_pri_erratum(pdev, 
AMD_PRI_DEV_ERRATUM_ENABLE_RESET);

2073         ret = pci_enable_pri(pdev, reqs);


>
>
> Cheers,
> Ashok
>
diff mbox series

Patch

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 97c08146534a..5582e5d83a3f 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -181,12 +181,39 @@  int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u16 control, status;
 	u32 max_requests;
 	int pos;
+	struct pci_dev *pf;
 
 	if (WARN_ON(pdev->pri_enabled))
 		return -EBUSY;
 
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
-	if (!pos)
+
+	if (pdev->is_virtfn) {
+		/*
+		 * Per PCIe r4.0, sec 9.3.7.11, VF must not implement PRI
+		 * Capability.
+		 */
+		if (pos) {
+			dev_err(&pdev->dev, "VF must not implement PRI");
+			return -EINVAL;
+		}
+
+		pf = pci_physfn(pdev);
+
+		/* If VF config does not match with PF, return error */
+		if (!pf->pri_enabled)
+			return -EINVAL;
+
+		pdev->pri_reqs_alloc = pf->pri_reqs_alloc;
+		pdev->pri_enabled = 1;
+
+		/* Increment PF PRI refcount */
+		atomic_inc(&pf->pri_ref_cnt);
+
+		return 0;
+	}
+
+	if (pdev->is_physfn && !pos)
 		return -EINVAL;
 
 	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
@@ -202,7 +229,6 @@  int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	pdev->pri_enabled = 1;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_enable_pri);
@@ -217,10 +243,27 @@  void pci_disable_pri(struct pci_dev *pdev)
 {
 	u16 control;
 	int pos;
+	struct pci_dev *pf;
 
 	if (WARN_ON(!pdev->pri_enabled))
 		return;
 
+	/* All VFs should be disabled before disabling PF */
+	if (atomic_read(&pdev->pri_ref_cnt))
+		return;
+
+	if (pdev->is_virtfn) {
+		/* Since VF shares PRI with PF, use PF config. */
+		pf = pci_physfn(pdev);
+
+		/* Decrement PF PRI refcount */
+		atomic_dec(&pf->pri_ref_cnt);
+
+		pdev->pri_enabled = 0;
+
+		return;
+	}
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
@@ -246,6 +289,9 @@  void pci_restore_pri_state(struct pci_dev *pdev)
 	if (!pdev->pri_enabled)
 		return;
 
+	if (pdev->is_virtfn)
+		return;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
@@ -270,6 +316,9 @@  int pci_reset_pri(struct pci_dev *pdev)
 	if (WARN_ON(pdev->pri_enabled))
 		return -EBUSY;
 
+	if (pdev->is_virtfn)
+		return 0;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 77448215ef5b..699c79c99a39 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -450,6 +450,7 @@  struct pci_dev {
 #endif
 #ifdef CONFIG_PCI_PRI
 	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
+	atomic_t	pri_ref_cnt;	/* Number of VFs with PRI enabled */
 #endif
 #ifdef CONFIG_PCI_PASID
 	u16		pasid_features;