diff mbox series

[v2,net-next,2/2] ionic: support sr-iov operations

Message ID 20191212003344.5571-3-snelson@pensando.io
State Changes Requested
Delegated to: David Miller
Headers show
Series ionic: add sriov support | expand

Commit Message

Shannon Nelson Dec. 12, 2019, 12:33 a.m. UTC
Add the netdev ops for managing VFs.  Since most of the
management work happens in the NIC firmware, the driver becomes
mostly a pass-through for the network stack commands that want
to control and configure the VFs.

We also tweak ionic_station_set() a little to allow for
the VFs that start off with a zero'd mac address.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 drivers/net/ethernet/pensando/ionic/ionic.h   |  15 +-
 .../ethernet/pensando/ionic/ionic_bus_pci.c   |  85 ++++++
 .../net/ethernet/pensando/ionic/ionic_dev.c   |  58 ++++
 .../net/ethernet/pensando/ionic/ionic_dev.h   |   7 +
 .../net/ethernet/pensando/ionic/ionic_lif.c   | 254 +++++++++++++++++-
 .../net/ethernet/pensando/ionic/ionic_lif.h   |   7 +
 .../net/ethernet/pensando/ionic/ionic_main.c  |   4 +
 7 files changed, 422 insertions(+), 8 deletions(-)

Comments

Parav Pandit Dec. 12, 2019, 6:53 a.m. UTC | #1
On 12/11/2019 6:33 PM, Shannon Nelson wrote:
> Add the netdev ops for managing VFs.  Since most of the
> management work happens in the NIC firmware, the driver becomes
> mostly a pass-through for the network stack commands that want
> to control and configure the VFs.
> 
> We also tweak ionic_station_set() a little to allow for
> the VFs that start off with a zero'd mac address.
> 
> Signed-off-by: Shannon Nelson <snelson@pensando.io>
> ---
>  drivers/net/ethernet/pensando/ionic/ionic.h   |  15 +-
>  .../ethernet/pensando/ionic/ionic_bus_pci.c   |  85 ++++++
>  .../net/ethernet/pensando/ionic/ionic_dev.c   |  58 ++++
>  .../net/ethernet/pensando/ionic/ionic_dev.h   |   7 +
>  .../net/ethernet/pensando/ionic/ionic_lif.c   | 254 +++++++++++++++++-
>  .../net/ethernet/pensando/ionic/ionic_lif.h   |   7 +
>  .../net/ethernet/pensando/ionic/ionic_main.c  |   4 +
>  7 files changed, 422 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
> index 98e102af7756..e5c9e4b0450b 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic.h
> +++ b/drivers/net/ethernet/pensando/ionic/ionic.h
> @@ -12,7 +12,7 @@ struct ionic_lif;
>  
>  #define IONIC_DRV_NAME		"ionic"
>  #define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
> -#define IONIC_DRV_VERSION	"0.18.0-k"
> +#define IONIC_DRV_VERSION	"0.20.0-k"
>  
>  #define PCI_VENDOR_ID_PENSANDO			0x1dd8
>  
> @@ -25,6 +25,18 @@ struct ionic_lif;
>  
>  #define DEVCMD_TIMEOUT  10
>  
> +struct ionic_vf {
> +	u16	 index;
> +	u8	 macaddr[6];
> +	__le32	 maxrate;
> +	__le16	 vlanid;
> +	u8	 spoofchk;
> +	u8	 trusted;
> +	u8	 linkstate;
> +	dma_addr_t       stats_pa;
> +	struct ionic_lif_stats stats;
> +};
> +
>  struct ionic {
>  	struct pci_dev *pdev;
>  	struct device *dev;
> @@ -46,6 +58,7 @@ struct ionic {
>  	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
>  	struct work_struct nb_work;
>  	struct notifier_block nb;
> +	struct ionic_vf **vf;
>  	struct timer_list watchdog_timer;
>  	int watchdog_period;
>  };
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
> index 9a9ab8cb2cb3..057eb453dd11 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
> @@ -250,6 +250,87 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	return err;
>  }
>  
> +static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
> +{
> +	struct ionic *ionic = pci_get_drvdata(pdev);
> +	struct device *dev = ionic->dev;
> +	int i, ret = 0;
> +	int nvfs = 0;
> +
> +	if (test_and_set_bit(IONIC_LIF_VF_OP, ionic->master_lif->state)) {
Nop. This is not correct.
User space doesn't retry the command when you throw an error as EAGAIN.
These are not file system calls.
As I told in v1, you need rwsem here without below warning messages all
over the ops.

> +		dev_warn(&pdev->dev, "Unable to configure VFs, other operation is pending.\n");
> +		return -EAGAIN;
> +	}
> +
> +	if (num_vfs > 0) {
> +		ret = pci_enable_sriov(pdev, num_vfs);
> +		if (ret) {
> +			dev_err(&pdev->dev, "Cannot enable SRIOV: %d\n", ret);
> +			goto out;
> +		}
> +
> +		ionic->vf = kcalloc(num_vfs, sizeof(struct ionic_vf *),
> +				    GFP_KERNEL);
> +		if (!ionic->vf) {
> +			pci_disable_sriov(pdev);
> +			ret = -ENOMEM;
> +			goto out;
> +		}
> +
> +		for (i = 0; i < num_vfs; i++) {
> +			struct ionic_vf *v;
> +
> +			v = kzalloc(sizeof(*v), GFP_KERNEL);
> +			if (!v) {
> +				ret = -ENOMEM;
> +				num_vfs = 0;
> +				goto remove_vfs;
> +			}
> +
> +			v->stats_pa = dma_map_single(dev, &v->stats,
> +						     sizeof(v->stats),
> +						     DMA_FROM_DEVICE);
> +			if (dma_mapping_error(dev, v->stats_pa)) {
> +				ret = -ENODEV;
> +				kfree(v);
> +				ionic->vf[i] = NULL;
> +				num_vfs = 0;
> +				goto remove_vfs;
> +			}
> +
> +			ionic->vf[i] = v;
> +			nvfs++;
No need for this extra nvfs. Run the loop from i to 0 in reverse order
in error unwinding.

> +		}
> +
> +		ret = num_vfs;
> +		goto out;
> +	}
> +
> +remove_vfs:
> +	if (num_vfs == 0) {
> +		if (ret)
> +			dev_err(&pdev->dev, "SRIOV setup failed: %d\n", ret);
> +
> +		pci_disable_sriov(pdev);
> +
> +		if (!nvfs)
> +			nvfs = pci_num_vf(pdev);
> +		for (i = 0; i < nvfs; i++) {
error unwinding should be reverse of setup. It should run from i to 0.

> +			dma_unmap_single(dev, ionic->vf[i]->stats_pa,
> +					 sizeof(struct ionic_lif_stats),
Please be consistent with dma_map_single which uses sizeof(v->stats).

> +					 DMA_FROM_DEVICE);
> +			kfree(ionic->vf[i]);
> +		}
> +
> +		kfree(ionic->vf);
> +		ionic->vf = NULL;
> +	}
> +
> +out:
> +	clear_bit(IONIC_LIF_VF_OP, ionic->master_lif->state);
> +	return ret;
> +}
> +
>  static void ionic_remove(struct pci_dev *pdev)
>  {
>  	struct ionic *ionic = pci_get_drvdata(pdev);
> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>  	if (!ionic)
>  		return;
>  
> +	if (pci_num_vf(pdev))
> +		ionic_sriov_configure(pdev, 0);
> +
Usually sriov is left enabled while removing PF.
It is not the role of the pci PF removal to disable it sriov.
Shannon Nelson Dec. 12, 2019, 7:52 p.m. UTC | #2
On 12/11/19 10:53 PM, Parav Pandit wrote:
> On 12/11/2019 6:33 PM, Shannon Nelson wrote:
>> Add the netdev ops for managing VFs.  Since most of the
>> management work happens in the NIC firmware, the driver becomes
>> mostly a pass-through for the network stack commands that want
>> to control and configure the VFs.
>>
>> We also tweak ionic_station_set() a little to allow for
>> the VFs that start off with a zero'd mac address.
>>
>> Signed-off-by: Shannon Nelson <snelson@pensando.io>
>> ---
>>   drivers/net/ethernet/pensando/ionic/ionic.h   |  15 +-
>>   .../ethernet/pensando/ionic/ionic_bus_pci.c   |  85 ++++++
>>   .../net/ethernet/pensando/ionic/ionic_dev.c   |  58 ++++
>>   .../net/ethernet/pensando/ionic/ionic_dev.h   |   7 +
>>   .../net/ethernet/pensando/ionic/ionic_lif.c   | 254 +++++++++++++++++-
>>   .../net/ethernet/pensando/ionic/ionic_lif.h   |   7 +
>>   .../net/ethernet/pensando/ionic/ionic_main.c  |   4 +
>>   7 files changed, 422 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
>> index 98e102af7756..e5c9e4b0450b 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic.h
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic.h
>> @@ -12,7 +12,7 @@ struct ionic_lif;
>>   
>>   #define IONIC_DRV_NAME		"ionic"
>>   #define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
>> -#define IONIC_DRV_VERSION	"0.18.0-k"
>> +#define IONIC_DRV_VERSION	"0.20.0-k"
>>   
>>   #define PCI_VENDOR_ID_PENSANDO			0x1dd8
>>   
>> @@ -25,6 +25,18 @@ struct ionic_lif;
>>   
>>   #define DEVCMD_TIMEOUT  10
>>   
>> +struct ionic_vf {
>> +	u16	 index;
>> +	u8	 macaddr[6];
>> +	__le32	 maxrate;
>> +	__le16	 vlanid;
>> +	u8	 spoofchk;
>> +	u8	 trusted;
>> +	u8	 linkstate;
>> +	dma_addr_t       stats_pa;
>> +	struct ionic_lif_stats stats;
>> +};
>> +
>>   struct ionic {
>>   	struct pci_dev *pdev;
>>   	struct device *dev;
>> @@ -46,6 +58,7 @@ struct ionic {
>>   	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
>>   	struct work_struct nb_work;
>>   	struct notifier_block nb;
>> +	struct ionic_vf **vf;
>>   	struct timer_list watchdog_timer;
>>   	int watchdog_period;
>>   };
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
>> index 9a9ab8cb2cb3..057eb453dd11 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
>> @@ -250,6 +250,87 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>   	return err;
>>   }
>>   
>> +static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
>> +{
>> +	struct ionic *ionic = pci_get_drvdata(pdev);
>> +	struct device *dev = ionic->dev;
>> +	int i, ret = 0;
>> +	int nvfs = 0;
>> +
>> +	if (test_and_set_bit(IONIC_LIF_VF_OP, ionic->master_lif->state)) {
> Nop. This is not correct.
> User space doesn't retry the command when you throw an error as EAGAIN.
> These are not file system calls.
> As I told in v1, you need rwsem here without below warning messages all
> over the ops.

Hmmm... I was following how i40e solved this a year ago, which seems 
valid enough, but I see how using an rwsem could be seen as more 
correct, and probably a lot kinder on reads with a device that can 
support a large number of VFs.  I'll take another run at this.

>
>> +		dev_warn(&pdev->dev, "Unable to configure VFs, other operation is pending.\n");
>> +		return -EAGAIN;
>> +	}
>> +
>> +	if (num_vfs > 0) {
>> +		ret = pci_enable_sriov(pdev, num_vfs);
>> +		if (ret) {
>> +			dev_err(&pdev->dev, "Cannot enable SRIOV: %d\n", ret);
>> +			goto out;
>> +		}
>> +
>> +		ionic->vf = kcalloc(num_vfs, sizeof(struct ionic_vf *),
>> +				    GFP_KERNEL);
>> +		if (!ionic->vf) {
>> +			pci_disable_sriov(pdev);
>> +			ret = -ENOMEM;
>> +			goto out;
>> +		}
>> +
>> +		for (i = 0; i < num_vfs; i++) {
>> +			struct ionic_vf *v;
>> +
>> +			v = kzalloc(sizeof(*v), GFP_KERNEL);
>> +			if (!v) {
>> +				ret = -ENOMEM;
>> +				num_vfs = 0;
>> +				goto remove_vfs;
>> +			}
>> +
>> +			v->stats_pa = dma_map_single(dev, &v->stats,
>> +						     sizeof(v->stats),
>> +						     DMA_FROM_DEVICE);
>> +			if (dma_mapping_error(dev, v->stats_pa)) {
>> +				ret = -ENODEV;
>> +				kfree(v);
>> +				ionic->vf[i] = NULL;
>> +				num_vfs = 0;
>> +				goto remove_vfs;
>> +			}
>> +
>> +			ionic->vf[i] = v;
>> +			nvfs++;
> No need for this extra nvfs. Run the loop from i to 0 in reverse order
> in error unwinding.

Sure

>
>> +		}
>> +
>> +		ret = num_vfs;
>> +		goto out;
>> +	}
>> +
>> +remove_vfs:
>> +	if (num_vfs == 0) {
>> +		if (ret)
>> +			dev_err(&pdev->dev, "SRIOV setup failed: %d\n", ret);
>> +
>> +		pci_disable_sriov(pdev);
>> +
>> +		if (!nvfs)
>> +			nvfs = pci_num_vf(pdev);
>> +		for (i = 0; i < nvfs; i++) {
> error unwinding should be reverse of setup. It should run from i to 0.

Yes, this follows if I ditch nvfs.

>
>> +			dma_unmap_single(dev, ionic->vf[i]->stats_pa,
>> +					 sizeof(struct ionic_lif_stats),
> Please be consistent with dma_map_single which uses sizeof(v->stats).

Yep

>
>> +					 DMA_FROM_DEVICE);
>> +			kfree(ionic->vf[i]);
>> +		}
>> +
>> +		kfree(ionic->vf);
>> +		ionic->vf = NULL;
>> +	}
>> +
>> +out:
>> +	clear_bit(IONIC_LIF_VF_OP, ionic->master_lif->state);
>> +	return ret;
>> +}
>> +
>>   static void ionic_remove(struct pci_dev *pdev)
>>   {
>>   	struct ionic *ionic = pci_get_drvdata(pdev);
>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>   	if (!ionic)
>>   		return;
>>   
>> +	if (pci_num_vf(pdev))
>> +		ionic_sriov_configure(pdev, 0);
>> +
> Usually sriov is left enabled while removing PF.
> It is not the role of the pci PF removal to disable it sriov.

Looks like I'll need to split out the vf[] allocation into a separate 
routine (and perhaps the teardown too) so I can do it from
probe.

Version three coming soon to a mailing list near you...

sln
Jakub Kicinski Dec. 12, 2019, 7:52 p.m. UTC | #3
On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
> >  static void ionic_remove(struct pci_dev *pdev)
> >  {
> >  	struct ionic *ionic = pci_get_drvdata(pdev);
> > @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
> >  	if (!ionic)
> >  		return;
> >  
> > +	if (pci_num_vf(pdev))
> > +		ionic_sriov_configure(pdev, 0);
> > +  
> Usually sriov is left enabled while removing PF.
> It is not the role of the pci PF removal to disable it sriov.

I don't think that's true. I consider igb and ixgbe to set the standard
for legacy SR-IOV handling since they were one of the first (the first?)
and Alex Duyck wrote them.

mlx4, bnxt and nfp all disable SR-IOV on remove.
Shannon Nelson Dec. 12, 2019, 7:59 p.m. UTC | #4
On 12/12/19 11:52 AM, Jakub Kicinski wrote:
> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>   static void ionic_remove(struct pci_dev *pdev)
>>>   {
>>>   	struct ionic *ionic = pci_get_drvdata(pdev);
>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>   	if (!ionic)
>>>   		return;
>>>   
>>> +	if (pci_num_vf(pdev))
>>> +		ionic_sriov_configure(pdev, 0);
>>> +
>> Usually sriov is left enabled while removing PF.
>> It is not the role of the pci PF removal to disable it sriov.
> I don't think that's true. I consider igb and ixgbe to set the standard
> for legacy SR-IOV handling since they were one of the first (the first?)
> and Alex Duyck wrote them.
>
> mlx4, bnxt and nfp all disable SR-IOV on remove.

This was my understanding as well, but now I can see that ixgbe and i40e 
are both checking for existing VFs in probe and setting up to use them, 
as well as the newer ice driver.  I found this today by looking for 
where they use pci_num_vf().

sln
Jakub Kicinski Dec. 12, 2019, 9:35 p.m. UTC | #5
On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
> > On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:  
> >>>   static void ionic_remove(struct pci_dev *pdev)
> >>>   {
> >>>   	struct ionic *ionic = pci_get_drvdata(pdev);
> >>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
> >>>   	if (!ionic)
> >>>   		return;
> >>>   
> >>> +	if (pci_num_vf(pdev))
> >>> +		ionic_sriov_configure(pdev, 0);
> >>> +  
> >> Usually sriov is left enabled while removing PF.
> >> It is not the role of the pci PF removal to disable it sriov.  
> > I don't think that's true. I consider igb and ixgbe to set the standard
> > for legacy SR-IOV handling since they were one of the first (the first?)
> > and Alex Duyck wrote them.
> >
> > mlx4, bnxt and nfp all disable SR-IOV on remove.  
> 
> This was my understanding as well, but now I can see that ixgbe and i40e 
> are both checking for existing VFs in probe and setting up to use them, 
> as well as the newer ice driver.  I found this today by looking for 
> where they use pci_num_vf().

Right, if the VFs very already enabled on probe they are set up.

It's a bit of a asymmetric design, in case some other driver left
SR-IOV on, I guess.
Parav Pandit Dec. 12, 2019, 10:24 p.m. UTC | #6
On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:  
>>>>>   static void ionic_remove(struct pci_dev *pdev)
>>>>>   {
>>>>>   	struct ionic *ionic = pci_get_drvdata(pdev);
>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>   	if (!ionic)
>>>>>   		return;
>>>>>   
>>>>> +	if (pci_num_vf(pdev))
>>>>> +		ionic_sriov_configure(pdev, 0);
>>>>> +  
>>>> Usually sriov is left enabled while removing PF.
>>>> It is not the role of the pci PF removal to disable it sriov.  
>>> I don't think that's true. I consider igb and ixgbe to set the standard
>>> for legacy SR-IOV handling since they were one of the first (the first?)
>>> and Alex Duyck wrote them.
>>>
>>> mlx4, bnxt and nfp all disable SR-IOV on remove.  
>>
>> This was my understanding as well, but now I can see that ixgbe and i40e 
>> are both checking for existing VFs in probe and setting up to use them, 
>> as well as the newer ice driver.  I found this today by looking for 
>> where they use pci_num_vf().
> 
> Right, if the VFs very already enabled on probe they are set up.
> 
> It's a bit of a asymmetric design, in case some other driver left
> SR-IOV on, I guess.
> 

I remember on one email thread on netdev list from someone that in one
use case, they upgrade the PF driver while VFs are still bound and
SR-IOV kept enabled.
I am not sure how much it is used in practice/or practical.
Such use case may be the reason to keep SR-IOV enabled.
Shannon Nelson Dec. 12, 2019, 10:40 p.m. UTC | #7
On 12/12/19 2:24 PM, Parav Pandit wrote:
> On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
>> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>>>>    static void ionic_remove(struct pci_dev *pdev)
>>>>>>    {
>>>>>>    	struct ionic *ionic = pci_get_drvdata(pdev);
>>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>>    	if (!ionic)
>>>>>>    		return;
>>>>>>    
>>>>>> +	if (pci_num_vf(pdev))
>>>>>> +		ionic_sriov_configure(pdev, 0);
>>>>>> +
>>>>> Usually sriov is left enabled while removing PF.
>>>>> It is not the role of the pci PF removal to disable it sriov.
>>>> I don't think that's true. I consider igb and ixgbe to set the standard
>>>> for legacy SR-IOV handling since they were one of the first (the first?)
>>>> and Alex Duyck wrote them.
>>>>
>>>> mlx4, bnxt and nfp all disable SR-IOV on remove.
>>> This was my understanding as well, but now I can see that ixgbe and i40e
>>> are both checking for existing VFs in probe and setting up to use them,
>>> as well as the newer ice driver.  I found this today by looking for
>>> where they use pci_num_vf().
>> Right, if the VFs very already enabled on probe they are set up.
>>
>> It's a bit of a asymmetric design, in case some other driver left
>> SR-IOV on, I guess.
>>
> I remember on one email thread on netdev list from someone that in one
> use case, they upgrade the PF driver while VFs are still bound and
> SR-IOV kept enabled.
> I am not sure how much it is used in practice/or practical.
> Such use case may be the reason to keep SR-IOV enabled.

This brings up a potential corner case where it would be better for the 
driver to use its own num_vfs value rather than relying on the 
pci_num_vf() when answering the ndo_get_vf_*() callbacks, and at least 
the igb may be susceptible.  If the driver hasn't set up its vf[] data 
arrays because there was an error in setting them up in the probe(), and 
later someone tries to get VF statistics, the ndo_get_vf_stats callback 
could end up dereferencing bad pointers because vf is less than 
pci_num_vf() but more than the number of vf[] structs set up by the driver.

I suppose the argument could be made that PF's probe should if the VF 
config fails, but it might be nice to have the PF driver running to help 
fix up whatever when sideways in the VF configuration.

sln
Jakub Kicinski Dec. 12, 2019, 10:42 p.m. UTC | #8
On Thu, 12 Dec 2019 14:40:01 -0800, Shannon Nelson wrote:
> I suppose the argument could be made that PF's probe should if the VF 
> config fails, but it might be nice to have the PF driver running to help 
> fix up whatever when sideways in the VF configuration.

If probe fails driver should probe in a degraded mode, where only
subset of devlink functionality needed for debug is available.
Parav Pandit Dec. 16, 2019, 4:47 a.m. UTC | #9
On 12/13/2019 4:10 AM, Shannon Nelson wrote:
> On 12/12/19 2:24 PM, Parav Pandit wrote:
>> On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
>>> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>>>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>>>>>    static void ionic_remove(struct pci_dev *pdev)
>>>>>>>    {
>>>>>>>        struct ionic *ionic = pci_get_drvdata(pdev);
>>>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>>>        if (!ionic)
>>>>>>>            return;
>>>>>>>    +    if (pci_num_vf(pdev))
>>>>>>> +        ionic_sriov_configure(pdev, 0);
>>>>>>> +
>>>>>> Usually sriov is left enabled while removing PF.
>>>>>> It is not the role of the pci PF removal to disable it sriov.
>>>>> I don't think that's true. I consider igb and ixgbe to set the
>>>>> standard
>>>>> for legacy SR-IOV handling since they were one of the first (the
>>>>> first?)
>>>>> and Alex Duyck wrote them.
>>>>>
>>>>> mlx4, bnxt and nfp all disable SR-IOV on remove.
>>>> This was my understanding as well, but now I can see that ixgbe and
>>>> i40e
>>>> are both checking for existing VFs in probe and setting up to use them,
>>>> as well as the newer ice driver.  I found this today by looking for
>>>> where they use pci_num_vf().
>>> Right, if the VFs very already enabled on probe they are set up.
>>>
>>> It's a bit of a asymmetric design, in case some other driver left
>>> SR-IOV on, I guess.
>>>
>> I remember on one email thread on netdev list from someone that in one
>> use case, they upgrade the PF driver while VFs are still bound and
>> SR-IOV kept enabled.
>> I am not sure how much it is used in practice/or practical.
>> Such use case may be the reason to keep SR-IOV enabled.
> 
> This brings up a potential corner case where it would be better for the
> driver to use its own num_vfs value rather than relying on the
> pci_num_vf() when answering the ndo_get_vf_*() callbacks, and at least
> the igb may be susceptible.  
Please do not cache num_vfs in driver. Use the pci core's pci_num_vf()
in the new code that you are adding.
More below.
> If the driver hasn't set up its vf[] data
> arrays because there was an error in setting them up in the probe(), and
> later someone tries to get VF statistics, the ndo_get_vf_stats callback
> could end up dereferencing bad pointers because vf is less than
> pci_num_vf() but more than the number of vf[] structs set up by the driver.
> 
> I suppose the argument could be made that PF's probe should if the VF
> config fails, but it might be nice to have the PF driver running to help
> fix up whatever when sideways in the VF configuration.
> 
> sln
> 
I not have strong opinion on letting sriov enabled/disabled on PF device
removal.
But it should be symmetric on probe() and remove() for PF.
If you want to keep it enabled on PF removal, you need to check on probe
and allocate VF metadata you have by using helper function in
sriov_configure() and in probe().
This is followed by mlx5 driver.
Shannon Nelson Dec. 16, 2019, 6:02 a.m. UTC | #10
On 12/15/19 8:47 PM, Parav Pandit wrote:
> On 12/13/2019 4:10 AM, Shannon Nelson wrote:
>> On 12/12/19 2:24 PM, Parav Pandit wrote:
>>> On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
>>>> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>>>>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>>>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>>>>>>     static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>     {
>>>>>>>>         struct ionic *ionic = pci_get_drvdata(pdev);
>>>>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>         if (!ionic)
>>>>>>>>             return;
>>>>>>>>     +    if (pci_num_vf(pdev))
>>>>>>>> +        ionic_sriov_configure(pdev, 0);
>>>>>>>> +
>>>>>>> Usually sriov is left enabled while removing PF.
>>>>>>> It is not the role of the pci PF removal to disable it sriov.
>>>>>> I don't think that's true. I consider igb and ixgbe to set the
>>>>>> standard
>>>>>> for legacy SR-IOV handling since they were one of the first (the
>>>>>> first?)
>>>>>> and Alex Duyck wrote them.
>>>>>>
>>>>>> mlx4, bnxt and nfp all disable SR-IOV on remove.
>>>>> This was my understanding as well, but now I can see that ixgbe and
>>>>> i40e
>>>>> are both checking for existing VFs in probe and setting up to use them,
>>>>> as well as the newer ice driver.  I found this today by looking for
>>>>> where they use pci_num_vf().
>>>> Right, if the VFs very already enabled on probe they are set up.
>>>>
>>>> It's a bit of a asymmetric design, in case some other driver left
>>>> SR-IOV on, I guess.
>>>>
>>> I remember on one email thread on netdev list from someone that in one
>>> use case, they upgrade the PF driver while VFs are still bound and
>>> SR-IOV kept enabled.
>>> I am not sure how much it is used in practice/or practical.
>>> Such use case may be the reason to keep SR-IOV enabled.
>> This brings up a potential corner case where it would be better for the
>> driver to use its own num_vfs value rather than relying on the
>> pci_num_vf() when answering the ndo_get_vf_*() callbacks, and at least
>> the igb may be susceptible.
> Please do not cache num_vfs in driver. Use the pci core's pci_num_vf()
> in the new code that you are adding.

I disagree.  The pci_num_vf() tells us what the kernel has set up for 
VFs running, while the driver's num_vfs tracks how many resources the 
driver has set up for handling VFs: these are two different numbers, and 
there are times in the life of the driver when these numbers are 
different.  Yes, these are small windows of time, but they are different 
and need to be treated differently.

> More below.
>> If the driver hasn't set up its vf[] data
>> arrays because there was an error in setting them up in the probe(), and
>> later someone tries to get VF statistics, the ndo_get_vf_stats callback
>> could end up dereferencing bad pointers because vf is less than
>> pci_num_vf() but more than the number of vf[] structs set up by the driver.
>>
>> I suppose the argument could be made that PF's probe should if the VF
>> config fails, but it might be nice to have the PF driver running to help
>> fix up whatever when sideways in the VF configuration.
>>
>> sln
>>
> I not have strong opinion on letting sriov enabled/disabled on PF device
> removal.
> But it should be symmetric on probe() and remove() for PF.
> If you want to keep it enabled on PF removal, you need to check on probe
> and allocate VF metadata you have by using helper function in
> sriov_configure() and in probe().
> This is followed by mlx5 driver.

Agreed, and this check at probe time is included in the v3 patch that I 
sent out on Friday.

sln
Parav Pandit Dec. 16, 2019, 8:46 a.m. UTC | #11
On 12/16/2019 11:32 AM, Shannon Nelson wrote:
> On 12/15/19 8:47 PM, Parav Pandit wrote:
>> On 12/13/2019 4:10 AM, Shannon Nelson wrote:
>>> On 12/12/19 2:24 PM, Parav Pandit wrote:
>>>> On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
>>>>> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>>>>>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>>>>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>>>>>>>     static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>>     {
>>>>>>>>>         struct ionic *ionic = pci_get_drvdata(pdev);
>>>>>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>>         if (!ionic)
>>>>>>>>>             return;
>>>>>>>>>     +    if (pci_num_vf(pdev))
>>>>>>>>> +        ionic_sriov_configure(pdev, 0);
>>>>>>>>> +
>>>>>>>> Usually sriov is left enabled while removing PF.
>>>>>>>> It is not the role of the pci PF removal to disable it sriov.
>>>>>>> I don't think that's true. I consider igb and ixgbe to set the
>>>>>>> standard
>>>>>>> for legacy SR-IOV handling since they were one of the first (the
>>>>>>> first?)
>>>>>>> and Alex Duyck wrote them.
>>>>>>>
>>>>>>> mlx4, bnxt and nfp all disable SR-IOV on remove.
>>>>>> This was my understanding as well, but now I can see that ixgbe and
>>>>>> i40e
>>>>>> are both checking for existing VFs in probe and setting up to use
>>>>>> them,
>>>>>> as well as the newer ice driver.  I found this today by looking for
>>>>>> where they use pci_num_vf().
>>>>> Right, if the VFs very already enabled on probe they are set up.
>>>>>
>>>>> It's a bit of a asymmetric design, in case some other driver left
>>>>> SR-IOV on, I guess.
>>>>>
>>>> I remember on one email thread on netdev list from someone that in one
>>>> use case, they upgrade the PF driver while VFs are still bound and
>>>> SR-IOV kept enabled.
>>>> I am not sure how much it is used in practice/or practical.
>>>> Such use case may be the reason to keep SR-IOV enabled.
>>> This brings up a potential corner case where it would be better for the
>>> driver to use its own num_vfs value rather than relying on the
>>> pci_num_vf() when answering the ndo_get_vf_*() callbacks, and at least
>>> the igb may be susceptible.
>> Please do not cache num_vfs in driver. Use the pci core's pci_num_vf()
>> in the new code that you are adding.
> 
> I disagree.  The pci_num_vf() tells us what the kernel has set up for
> VFs running, while the driver's num_vfs tracks how many resources the
> driver has set up for handling VFs: these are two different numbers, and
> there are times in the life of the driver when these numbers are
> different.  Yes, these are small windows of time, but they are different
> and need to be treated differently.
> 
They shouldn't be different. Why are they different?

>> More below.
>>> If the driver hasn't set up its vf[] data
>>> arrays because there was an error in setting them up in the probe(), and
>>> later someone tries to get VF statistics, the ndo_get_vf_stats callback
>>> could end up dereferencing bad pointers because vf is less than
>>> pci_num_vf() but more than the number of vf[] structs set up by the
>>> driver.
>>>
>>> I suppose the argument could be made that PF's probe should if the VF
>>> config fails, but it might be nice to have the PF driver running to help
>>> fix up whatever when sideways in the VF configuration.
>>>
>>> sln
>>>
>> I not have strong opinion on letting sriov enabled/disabled on PF device
>> removal.
>> But it should be symmetric on probe() and remove() for PF.
>> If you want to keep it enabled on PF removal, you need to check on probe
>> and allocate VF metadata you have by using helper function in
>> sriov_configure() and in probe().
>> This is followed by mlx5 driver.
> 
> Agreed, and this check at probe time is included in the v3 patch that I
> sent out on Friday.
> 
ok. Will review. Thanks.

> sln
> 
>
Shannon Nelson Jan. 3, 2020, 12:05 a.m. UTC | #12
On 12/16/19 12:46 AM, Parav Pandit wrote:
> On 12/16/2019 11:32 AM, Shannon Nelson wrote:
>> On 12/15/19 8:47 PM, Parav Pandit wrote:
>>> On 12/13/2019 4:10 AM, Shannon Nelson wrote:
>>>> On 12/12/19 2:24 PM, Parav Pandit wrote:
>>>>> On 12/12/2019 3:35 PM, Jakub Kicinski wrote:
>>>>>> On Thu, 12 Dec 2019 11:59:50 -0800, Shannon Nelson wrote:
>>>>>>> On 12/12/19 11:52 AM, Jakub Kicinski wrote:
>>>>>>>> On Thu, 12 Dec 2019 06:53:42 +0000, Parav Pandit wrote:
>>>>>>>>>>      static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>>>      {
>>>>>>>>>>          struct ionic *ionic = pci_get_drvdata(pdev);
>>>>>>>>>> @@ -257,6 +338,9 @@ static void ionic_remove(struct pci_dev *pdev)
>>>>>>>>>>          if (!ionic)
>>>>>>>>>>              return;
>>>>>>>>>>      +    if (pci_num_vf(pdev))
>>>>>>>>>> +        ionic_sriov_configure(pdev, 0);
>>>>>>>>>> +
>>>>>>>>> Usually sriov is left enabled while removing PF.
>>>>>>>>> It is not the role of the pci PF removal to disable it sriov.
>>>>>>>> I don't think that's true. I consider igb and ixgbe to set the
>>>>>>>> standard
>>>>>>>> for legacy SR-IOV handling since they were one of the first (the
>>>>>>>> first?)
>>>>>>>> and Alex Duyck wrote them.
>>>>>>>>
>>>>>>>> mlx4, bnxt and nfp all disable SR-IOV on remove.
>>>>>>> This was my understanding as well, but now I can see that ixgbe and
>>>>>>> i40e
>>>>>>> are both checking for existing VFs in probe and setting up to use
>>>>>>> them,
>>>>>>> as well as the newer ice driver.  I found this today by looking for
>>>>>>> where they use pci_num_vf().
>>>>>> Right, if the VFs very already enabled on probe they are set up.
>>>>>>
>>>>>> It's a bit of a asymmetric design, in case some other driver left
>>>>>> SR-IOV on, I guess.
>>>>>>
>>>>> I remember on one email thread on netdev list from someone that in one
>>>>> use case, they upgrade the PF driver while VFs are still bound and
>>>>> SR-IOV kept enabled.
>>>>> I am not sure how much it is used in practice/or practical.
>>>>> Such use case may be the reason to keep SR-IOV enabled.
>>>> This brings up a potential corner case where it would be better for the
>>>> driver to use its own num_vfs value rather than relying on the
>>>> pci_num_vf() when answering the ndo_get_vf_*() callbacks, and at least
>>>> the igb may be susceptible.
>>> Please do not cache num_vfs in driver. Use the pci core's pci_num_vf()
>>> in the new code that you are adding.
>> I disagree.  The pci_num_vf() tells us what the kernel has set up for
>> VFs running, while the driver's num_vfs tracks how many resources the
>> driver has set up for handling VFs: these are two different numbers, and
>> there are times in the life of the driver when these numbers are
>> different.  Yes, these are small windows of time, but they are different
>> and need to be treated differently.
>>
> They shouldn't be different. Why are they different?

One simple case where they are different is when .sriov_config is first 
called and the driver hasn't set anything up for handling the VFs.  Once 
the driver has set up whatever resources it needs, then they definitely 
should be the same.  This works in reverse when tearing down the VFs: 
pci_num_vf() says 0, but the driver thinks it has N VFs configured until 
it has torn down its resources and decremented its own counter.  I'm 
sure there are some drivers that don't need to allocate any resources, 
but if they do, these need to be tracked for tearing down later.

In this ionic driver, after working through this and fixing all the 
places where ionic->num_vfs can/should be replaced by pci_num_vf(), I'm 
still left with this issue: since pci_num_vf() returns 0 when 
.sriov_configure() is called when disabling sr-iov, the driver doesn't 
know how many ionic->vf[] had been set up earlier and can't properly 
unmap the DMA stats memory set up for each VF.

I think the driver needs to keep its own resource count.

sln
diff mbox series

Patch

diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 98e102af7756..e5c9e4b0450b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -12,7 +12,7 @@  struct ionic_lif;
 
 #define IONIC_DRV_NAME		"ionic"
 #define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
-#define IONIC_DRV_VERSION	"0.18.0-k"
+#define IONIC_DRV_VERSION	"0.20.0-k"
 
 #define PCI_VENDOR_ID_PENSANDO			0x1dd8
 
@@ -25,6 +25,18 @@  struct ionic_lif;
 
 #define DEVCMD_TIMEOUT  10
 
+struct ionic_vf {
+	u16	 index;
+	u8	 macaddr[6];
+	__le32	 maxrate;
+	__le16	 vlanid;
+	u8	 spoofchk;
+	u8	 trusted;
+	u8	 linkstate;
+	dma_addr_t       stats_pa;
+	struct ionic_lif_stats stats;
+};
+
 struct ionic {
 	struct pci_dev *pdev;
 	struct device *dev;
@@ -46,6 +58,7 @@  struct ionic {
 	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
 	struct work_struct nb_work;
 	struct notifier_block nb;
+	struct ionic_vf **vf;
 	struct timer_list watchdog_timer;
 	int watchdog_period;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 9a9ab8cb2cb3..057eb453dd11 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -250,6 +250,87 @@  static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return err;
 }
 
+static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct ionic *ionic = pci_get_drvdata(pdev);
+	struct device *dev = ionic->dev;
+	int i, ret = 0;
+	int nvfs = 0;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, ionic->master_lif->state)) {
+		dev_warn(&pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	if (num_vfs > 0) {
+		ret = pci_enable_sriov(pdev, num_vfs);
+		if (ret) {
+			dev_err(&pdev->dev, "Cannot enable SRIOV: %d\n", ret);
+			goto out;
+		}
+
+		ionic->vf = kcalloc(num_vfs, sizeof(struct ionic_vf *),
+				    GFP_KERNEL);
+		if (!ionic->vf) {
+			pci_disable_sriov(pdev);
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < num_vfs; i++) {
+			struct ionic_vf *v;
+
+			v = kzalloc(sizeof(*v), GFP_KERNEL);
+			if (!v) {
+				ret = -ENOMEM;
+				num_vfs = 0;
+				goto remove_vfs;
+			}
+
+			v->stats_pa = dma_map_single(dev, &v->stats,
+						     sizeof(v->stats),
+						     DMA_FROM_DEVICE);
+			if (dma_mapping_error(dev, v->stats_pa)) {
+				ret = -ENODEV;
+				kfree(v);
+				ionic->vf[i] = NULL;
+				num_vfs = 0;
+				goto remove_vfs;
+			}
+
+			ionic->vf[i] = v;
+			nvfs++;
+		}
+
+		ret = num_vfs;
+		goto out;
+	}
+
+remove_vfs:
+	if (num_vfs == 0) {
+		if (ret)
+			dev_err(&pdev->dev, "SRIOV setup failed: %d\n", ret);
+
+		pci_disable_sriov(pdev);
+
+		if (!nvfs)
+			nvfs = pci_num_vf(pdev);
+		for (i = 0; i < nvfs; i++) {
+			dma_unmap_single(dev, ionic->vf[i]->stats_pa,
+					 sizeof(struct ionic_lif_stats),
+					 DMA_FROM_DEVICE);
+			kfree(ionic->vf[i]);
+		}
+
+		kfree(ionic->vf);
+		ionic->vf = NULL;
+	}
+
+out:
+	clear_bit(IONIC_LIF_VF_OP, ionic->master_lif->state);
+	return ret;
+}
+
 static void ionic_remove(struct pci_dev *pdev)
 {
 	struct ionic *ionic = pci_get_drvdata(pdev);
@@ -257,6 +338,9 @@  static void ionic_remove(struct pci_dev *pdev)
 	if (!ionic)
 		return;
 
+	if (pci_num_vf(pdev))
+		ionic_sriov_configure(pdev, 0);
+
 	ionic_devlink_unregister(ionic);
 	ionic_lifs_unregister(ionic);
 	ionic_lifs_deinit(ionic);
@@ -279,6 +363,7 @@  static struct pci_driver ionic_driver = {
 	.id_table = ionic_id_table,
 	.probe = ionic_probe,
 	.remove = ionic_remove,
+	.sriov_configure = ionic_sriov_configure,
 };
 
 int ionic_bus_register_driver(void)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 5f9d2ec70446..87f82f36812f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -286,6 +286,64 @@  void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
 	ionic_dev_cmd_go(idev, &cmd);
 }
 
+/* VF commands */
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+{
+	union ionic_dev_cmd cmd = {
+		.vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
+		.vf_setattr.attr = attr,
+		.vf_setattr.vf_index = vf,
+	};
+	int err;
+
+	switch (attr) {
+	case IONIC_VF_ATTR_SPOOFCHK:
+		cmd.vf_setattr.spoofchk = *data;
+		dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_TRUST:
+		cmd.vf_setattr.trust = *data;
+		dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_LINKSTATE:
+		cmd.vf_setattr.linkstate = *data;
+		dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_MAC:
+		ether_addr_copy(cmd.vf_setattr.macaddr, data);
+		dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+			__func__, vf, data);
+		break;
+	case IONIC_VF_ATTR_VLAN:
+		cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+			__func__, vf, *(u16 *)data);
+		break;
+	case IONIC_VF_ATTR_RATE:
+		cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+			__func__, vf, *(u32 *)data);
+		break;
+	case IONIC_VF_ATTR_STATSADDR:
+		cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
+			__func__, vf, *(u64 *)data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_go(&ionic->idev, &cmd);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
 /* LIF commands */
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
 {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 4665c5dc5324..7838e342c4fd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -113,6 +113,12 @@  static_assert(sizeof(struct ionic_rxq_desc) == 16);
 static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
 static_assert(sizeof(struct ionic_rxq_comp) == 16);
 
+/* SR/IOV */
+static_assert(sizeof(struct ionic_vf_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_setattr_comp) == 16);
+static_assert(sizeof(struct ionic_vf_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_getattr_comp) == 16);
+
 struct ionic_devinfo {
 	u8 asic_type;
 	u8 asic_rev;
@@ -275,6 +281,7 @@  void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
 void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
 void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
 
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
 void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
 			    dma_addr_t addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 60fd14df49d7..4d75d6c40c43 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1616,6 +1616,234 @@  int ionic_stop(struct net_device *netdev)
 	return err;
 }
 
+static int ionic_get_vf_config(struct net_device *netdev,
+			       int vf, struct ifla_vf_info *ivf)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ivf->vf           = vf;
+	ivf->vlan         = ionic->vf[vf]->vlanid;
+	ivf->qos	  = 0;
+	ivf->spoofchk     = ionic->vf[vf]->spoofchk;
+	ivf->linkstate    = ionic->vf[vf]->linkstate;
+	ivf->max_tx_rate  = ionic->vf[vf]->maxrate;
+	ivf->trusted      = ionic->vf[vf]->trusted;
+	ether_addr_copy(ivf->mac, ionic->vf[vf]->macaddr);
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return 0;
+}
+
+static int ionic_get_vf_stats(struct net_device *netdev, int vf,
+			      struct ifla_vf_stats *vf_stats)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	struct ionic_lif_stats *vs;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+	vs = &ionic->vf[vf]->stats;
+
+	vf_stats->rx_packets = le64_to_cpu(vs->rx_ucast_packets);
+	vf_stats->tx_packets = le64_to_cpu(vs->tx_ucast_packets);
+	vf_stats->rx_bytes   = le64_to_cpu(vs->rx_ucast_bytes);
+	vf_stats->tx_bytes   = le64_to_cpu(vs->tx_ucast_bytes);
+	vf_stats->broadcast  = le64_to_cpu(vs->rx_bcast_packets);
+	vf_stats->multicast  = le64_to_cpu(vs->rx_mcast_packets);
+	vf_stats->rx_dropped = le64_to_cpu(vs->rx_ucast_drop_packets) +
+			       le64_to_cpu(vs->rx_mcast_drop_packets) +
+			       le64_to_cpu(vs->rx_bcast_drop_packets);
+	vf_stats->tx_dropped = le64_to_cpu(vs->tx_ucast_drop_packets) +
+			       le64_to_cpu(vs->tx_mcast_drop_packets) +
+			       le64_to_cpu(vs->tx_bcast_drop_packets);
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return 0;
+}
+
+static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_MAC, mac);
+	if (!ret)
+		ether_addr_copy(lif->ionic->vf[vf]->macaddr, mac);
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
+static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+			     u8 qos, __be16 proto)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	/* until someday when we support qos */
+	if (qos)
+		return -EINVAL;
+
+	if (vlan > 4095)
+		return -EINVAL;
+
+	if (proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+	if (!ret)
+		lif->ionic->vf[vf]->vlanid = vlan;
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
+static int ionic_set_vf_rate(struct net_device *netdev, int vf,
+			     int tx_min, int tx_max)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	/* setting the min just seems silly */
+	if (tx_min)
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+	if (!ret)
+		lif->ionic->vf[vf]->maxrate = tx_max;
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
+static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u8 data = set;  /* convert to u8 for config */
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_SPOOFCHK, &data);
+	if (!ret)
+		lif->ionic->vf[vf]->spoofchk = data;
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
+static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u8 data = set;  /* convert to u8 for config */
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_TRUST, &data);
+	if (!ret)
+		lif->ionic->vf[vf]->trusted = data;
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
+static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	u8 data;
+	int ret;
+
+	if (vf >= pci_num_vf(lif->ionic->pdev))
+		return -EINVAL;
+
+	if (test_and_set_bit(IONIC_LIF_VF_OP, lif->state)) {
+		netdev_warn(netdev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	switch (set) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		data = IONIC_VF_LINK_STATUS_AUTO;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		data = IONIC_VF_LINK_STATUS_UP;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		data = IONIC_VF_LINK_STATUS_DOWN;
+		break;
+	}
+
+	ret = ionic_set_vf_config(lif->ionic, vf,
+				  IONIC_VF_ATTR_LINKSTATE, &data);
+	if (!ret)
+		lif->ionic->vf[vf]->linkstate = set;
+
+	clear_bit(IONIC_LIF_VF_OP, lif->state);
+	return ret;
+}
+
 static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_open               = ionic_open,
 	.ndo_stop               = ionic_stop,
@@ -1629,6 +1857,14 @@  static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_change_mtu         = ionic_change_mtu,
 	.ndo_vlan_rx_add_vid    = ionic_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid   = ionic_vlan_rx_kill_vid,
+	.ndo_set_vf_vlan	= ionic_set_vf_vlan,
+	.ndo_set_vf_trust	= ionic_set_vf_trust,
+	.ndo_set_vf_mac		= ionic_set_vf_mac,
+	.ndo_set_vf_rate	= ionic_set_vf_rate,
+	.ndo_set_vf_spoofchk	= ionic_set_vf_spoofchk,
+	.ndo_get_vf_config	= ionic_get_vf_config,
+	.ndo_set_vf_link_state	= ionic_set_vf_link_state,
+	.ndo_get_vf_stats       = ionic_get_vf_stats,
 };
 
 int ionic_reset_queues(struct ionic_lif *lif)
@@ -1961,18 +2197,22 @@  static int ionic_station_set(struct ionic_lif *lif)
 	if (err)
 		return err;
 
+	if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
+		return 0;
+
 	memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
 	addr.sa_family = AF_INET;
 	err = eth_prepare_mac_addr_change(netdev, &addr);
-	if (err)
-		return err;
-
-	if (!is_zero_ether_addr(netdev->dev_addr)) {
-		netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
-			   netdev->dev_addr);
-		ionic_lif_addr(lif, netdev->dev_addr, false);
+	if (err) {
+		netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM\n",
+			    addr.sa_data);
+		return 0;
 	}
 
+	netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
+		   netdev->dev_addr);
+	ionic_lif_addr(lif, netdev->dev_addr, false);
+
 	eth_commit_mac_addr_change(netdev, &addr);
 	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
 		   netdev->dev_addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index a55fd1f8c31b..0657e00ab85f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -125,6 +125,7 @@  enum ionic_lif_state_flags {
 	IONIC_LIF_UP,
 	IONIC_LIF_LINK_CHECK_REQUESTED,
 	IONIC_LIF_QUEUE_RESET,
+	IONIC_LIF_VF_OP,
 
 	/* leave this as last */
 	IONIC_LIF_STATE_SIZE
@@ -195,6 +196,12 @@  static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname)
 	return wait_on_bit_lock(lif->state, bitname, TASK_INTERRUPTIBLE);
 }
 
+static inline bool ionic_is_pf(struct ionic *ionic)
+{
+	return ionic->pdev &&
+	       ionic->pdev->device == PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF;
+}
+
 static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
 {
 	u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 3590ea7fd88a..837b85f2fed9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -165,6 +165,10 @@  static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 		return "IONIC_CMD_FW_DOWNLOAD";
 	case IONIC_CMD_FW_CONTROL:
 		return "IONIC_CMD_FW_CONTROL";
+	case IONIC_CMD_VF_GETATTR:
+		return "IONIC_CMD_VF_GETATTR";
+	case IONIC_CMD_VF_SETATTR:
+		return "IONIC_CMD_VF_SETATTR";
 	default:
 		return "DEVCMD_UNKNOWN";
 	}