diff mbox

[next-queue,v6,7/7] i40e: Add support to get switch id and port number for port netdevs

Message ID 1490833375-2788-8-git-send-email-sridhar.samudrala@intel.com
State Awaiting Upstream, archived
Delegated to: David Miller
Headers show

Commit Message

Samudrala, Sridhar March 30, 2017, 12:22 a.m. UTC
Introduce switchdev_ops to PF and port netdevs to return the switch id via
SWITCHDEV_ATTR_ID_PORT_PARENT_ID attribute.
Also, ndo_get_phys_port_name() support is added to port netdevs to return
the port number.

PF: p4p1, VFs: p4p1_0,p4p1_1, VF port reps:p4p1-vf0, p4p1-vf1,
PF port rep: p4p1-pf
# rmmod i40e; modprobe i40e
# devlink dev eswitch set pci/0000:42:00.0 mode switchdev
# echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
# ip -d l show p4p1
27: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 3c:fd:fe:a3:18:f8 brd ff:ff:ff:ff:ff:ff promiscuity 0 numtxqueues 64 numrxqueues 64 gso_max_size 65536 gso_max_segs 65535 portid 3cfdfea318f8 switchid 3cfdfea318f8
    vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off
    vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state disable, trust off
# ip -d l show p4p1-pf
29: p4p1-pf: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 42:7a:b5:dc:85:11 brd ff:ff:ff:ff:ff:ff promiscuity 0 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 portname 65535 switchid 3cfdfea318f8
# ip -d l show p4p1-vf0
30: p4p1-vf0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 6e:ff:0b:5a:63:6d brd ff:ff:ff:ff:ff:ff promiscuity 0 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 portname 0 switchid 3cfdfea318f8
# ip -d l show p4p1-vf1
31: p4p1-vf1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 92:6e:ff:35:05:d5 brd ff:ff:ff:ff:ff:ff promiscuity 0 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 portname 1 switchid 3cfdfea318f8

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      |  1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c | 97 +++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

Comments

Jakub Kicinski March 30, 2017, 9:45 p.m. UTC | #1
On Wed, 29 Mar 2017 17:22:55 -0700, Sridhar Samudrala wrote:
> Introduce switchdev_ops to PF and port netdevs to return the switch id via
> SWITCHDEV_ATTR_ID_PORT_PARENT_ID attribute.
> Also, ndo_get_phys_port_name() support is added to port netdevs to return
> the port number.
> 
...
> +static int
> +i40e_port_netdev_get_phys_port_name(struct net_device *dev, char *buf,
> +				    size_t len)
> +{
> +	struct i40e_port_netdev_priv *priv = netdev_priv(dev);
> +	struct i40e_vf *vf;
> +	int ret;
> +
> +	switch (priv->type) {
> +	case I40E_PORT_NETDEV_VF:
> +		vf = (struct i40e_vf *)priv->f;
> +		ret = snprintf(buf, len, "%d", vf->vf_id);
> +		break;
> +	case I40E_PORT_NETDEV_PF:
> +		ret = snprintf(buf, len, "%d", I40E_MAIN_VSI_PORT_ID);
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (ret >= len)
> +		return -EOPNOTSUPP;
> +
> +	return 0;
> +}

You are using only an integer here, which forces you to manually name
the netdev in patch 2, and that is what phys_port_name is supposed to
help avoid doing AFAIU.

We have naming rules in Documentation/networking/switchdev.txt for
switch ports suggested as pX for physical ports or pXsY for ports which
are broken out/split.  Could we establish similar suggestion for vf and
pf representors and document it? (note: we may need pf representors for
multi-host devices.)

IMHO naming representors pfr%d or vfr%d would make sense.  This way
actual VF and PF netdevs could be called pf%d and vf%d, and
udev/systemd will give all netdevs nice, meaningful names without any
custom rules.

Sorry for the bike shedding but I was hoping we could save some user
pain by establishing those rules (more or less) upfront.
Alexander H Duyck March 30, 2017, 10:31 p.m. UTC | #2
On Thu, Mar 30, 2017 at 2:45 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Wed, 29 Mar 2017 17:22:55 -0700, Sridhar Samudrala wrote:
>> Introduce switchdev_ops to PF and port netdevs to return the switch id via
>> SWITCHDEV_ATTR_ID_PORT_PARENT_ID attribute.
>> Also, ndo_get_phys_port_name() support is added to port netdevs to return
>> the port number.
>>
> ...
>> +static int
>> +i40e_port_netdev_get_phys_port_name(struct net_device *dev, char *buf,
>> +                                 size_t len)
>> +{
>> +     struct i40e_port_netdev_priv *priv = netdev_priv(dev);
>> +     struct i40e_vf *vf;
>> +     int ret;
>> +
>> +     switch (priv->type) {
>> +     case I40E_PORT_NETDEV_VF:
>> +             vf = (struct i40e_vf *)priv->f;
>> +             ret = snprintf(buf, len, "%d", vf->vf_id);
>> +             break;
>> +     case I40E_PORT_NETDEV_PF:
>> +             ret = snprintf(buf, len, "%d", I40E_MAIN_VSI_PORT_ID);
>> +             break;
>> +     default:
>> +             return -EOPNOTSUPP;
>> +     }
>> +
>> +     if (ret >= len)
>> +             return -EOPNOTSUPP;
>> +
>> +     return 0;
>> +}
>
> You are using only an integer here, which forces you to manually name
> the netdev in patch 2, and that is what phys_port_name is supposed to
> help avoid doing AFAIU.
>
> We have naming rules in Documentation/networking/switchdev.txt for
> switch ports suggested as pX for physical ports or pXsY for ports which
> are broken out/split.  Could we establish similar suggestion for vf and
> pf representors and document it? (note: we may need pf representors for
> multi-host devices.)
>
> IMHO naming representors pfr%d or vfr%d would make sense.  This way
> actual VF and PF netdevs could be called pf%d and vf%d, and
> udev/systemd will give all netdevs nice, meaningful names without any
> custom rules.
>
> Sorry for the bike shedding but I was hoping we could save some user
> pain by establishing those rules (more or less) upfront.

This is something we should probably discuss at netdev/netconf next
week. It seems like the convention has been to just use an integer and
I think we might want to look at doing something like you are
suggesting where if nothing else we come up with a way of identifying
that a VF versus something like a segmented port which is the only
thing currently defined in the documentation.

- Alex
Jakub Kicinski March 31, 2017, 1:16 a.m. UTC | #3
On Thu, 30 Mar 2017 15:31:01 -0700, Alexander Duyck wrote:
> On Thu, Mar 30, 2017 at 2:45 PM, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
> > On Wed, 29 Mar 2017 17:22:55 -0700, Sridhar Samudrala wrote:  
> >> Introduce switchdev_ops to PF and port netdevs to return the switch id via
> >> SWITCHDEV_ATTR_ID_PORT_PARENT_ID attribute.
> >> Also, ndo_get_phys_port_name() support is added to port netdevs to return
> >> the port number.
> >>  
> > ...  
> >> +static int
> >> +i40e_port_netdev_get_phys_port_name(struct net_device *dev, char *buf,
> >> +                                 size_t len)
> >> +{
> >> +     struct i40e_port_netdev_priv *priv = netdev_priv(dev);
> >> +     struct i40e_vf *vf;
> >> +     int ret;
> >> +
> >> +     switch (priv->type) {
> >> +     case I40E_PORT_NETDEV_VF:
> >> +             vf = (struct i40e_vf *)priv->f;
> >> +             ret = snprintf(buf, len, "%d", vf->vf_id);
> >> +             break;
> >> +     case I40E_PORT_NETDEV_PF:
> >> +             ret = snprintf(buf, len, "%d", I40E_MAIN_VSI_PORT_ID);
> >> +             break;
> >> +     default:
> >> +             return -EOPNOTSUPP;
> >> +     }
> >> +
> >> +     if (ret >= len)
> >> +             return -EOPNOTSUPP;
> >> +
> >> +     return 0;
> >> +}  
> >
> > You are using only an integer here, which forces you to manually name
> > the netdev in patch 2, and that is what phys_port_name is supposed to
> > help avoid doing AFAIU.
> >
> > We have naming rules in Documentation/networking/switchdev.txt for
> > switch ports suggested as pX for physical ports or pXsY for ports which
> > are broken out/split.  Could we establish similar suggestion for vf and
> > pf representors and document it? (note: we may need pf representors for
> > multi-host devices.)
> >
> > IMHO naming representors pfr%d or vfr%d would make sense.  This way
> > actual VF and PF netdevs could be called pf%d and vf%d, and
> > udev/systemd will give all netdevs nice, meaningful names without any
> > custom rules.
> >
> > Sorry for the bike shedding but I was hoping we could save some user
> > pain by establishing those rules (more or less) upfront.  
> 
> This is something we should probably discuss at netdev/netconf next
> week. It seems like the convention has been to just use an integer and
> I think we might want to look at doing something like you are
> suggesting where if nothing else we come up with a way of identifying
> that a VF versus something like a segmented port which is the only
> thing currently defined in the documentation.

Sure.  If we want to talk about this at netdev there is another
more minor thing we were pondering.  How to represent the VF -- PCI DEV
-- representor netdev relation nicely e.g. for OpenStack integration?

AFAIU when PCI device is added to a VM user space should add the
representors to appropriate bridges and fire the legacy sriov ndos
to set mac/vlan.  VF PCI dev and PF PCI dev are nicely linked in sysfs
via virtfnX and physfn files.  But going from VF PCI dev to the
representor requires iteration over all representor netdevs to find the
right switchdev_id + phys_port_name combination.

One way to solve this would be to SET_NETDEV_DEV() the representor
netdev to the VF pci dev, but then representors may not share the base
enpXsYfZ name since they will be using different PCI devices as the
parent...
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 72e11b2..9eb2ba5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -56,6 +56,7 @@ 
 #include <linux/ptp_clock_kernel.h>
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
+#include <net/switchdev.h>
 
 #include "i40e_type.h"
 #include "i40e_prototype.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4f0eebc..85f214d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4862,6 +4862,32 @@  static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
 	return 0;
 }
 
+static int i40e_switchdev_pf_attr_get(struct net_device *dev,
+				      struct switchdev_attr *attr)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+
+	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id, dev->dev_addr);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct switchdev_ops i40e_switchdev_pf_ops = {
+	.switchdev_port_attr_get	= i40e_switchdev_pf_attr_get,
+};
+
 /**
  * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration
  * @vsi: the VSI being configured
@@ -9364,6 +9390,9 @@  static int i40e_config_netdev(struct i40e_vsi *vsi)
 	netdev->netdev_ops = &i40e_netdev_ops;
 	netdev->watchdog_timeo = 5 * HZ;
 	i40e_set_ethtool_ops(netdev);
+#ifdef CONFIG_NET_SWITCHDEV
+	netdev->switchdev_ops = &i40e_switchdev_pf_ops;
+#endif
 
 	/* MTU range: 68 - 9706 */
 	netdev->min_mtu = ETH_MIN_MTU;
@@ -11111,6 +11140,32 @@  static int i40e_port_netdev_stop(struct net_device *dev)
 	return -EINVAL;
 }
 
+static int
+i40e_port_netdev_get_phys_port_name(struct net_device *dev, char *buf,
+				    size_t len)
+{
+	struct i40e_port_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf;
+	int ret;
+
+	switch (priv->type) {
+	case I40E_PORT_NETDEV_VF:
+		vf = (struct i40e_vf *)priv->f;
+		ret = snprintf(buf, len, "%d", vf->vf_id);
+		break;
+	case I40E_PORT_NETDEV_PF:
+		ret = snprintf(buf, len, "%d", I40E_MAIN_VSI_PORT_ID);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (ret >= len)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static const struct net_device_ops i40e_port_netdev_ops = {
 	.ndo_open		= i40e_port_netdev_open,
 	.ndo_stop		= i40e_port_netdev_stop,
@@ -11118,6 +11173,44 @@  static int i40e_port_netdev_stop(struct net_device *dev)
 	.ndo_get_stats64	= i40e_port_netdev_get_stats64,
 	.ndo_has_offload_stats	= i40e_port_netdev_has_offload_stats,
 	.ndo_get_offload_stats	= i40e_port_netdev_get_offload_stats,
+	.ndo_get_phys_port_name	= i40e_port_netdev_get_phys_port_name,
+};
+
+static int i40e_switchdev_port_attr_get(struct net_device *dev,
+					struct switchdev_attr *attr)
+{
+	struct i40e_port_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vsi *vsi;
+	struct i40e_pf *pf;
+	struct i40e_vf *vf;
+
+	switch (priv->type) {
+	case I40E_PORT_NETDEV_VF:
+		vf = (struct i40e_vf *)priv->f;
+		pf = vf->pf;
+		break;
+	case I40E_PORT_NETDEV_PF:
+		pf = (struct i40e_pf *)priv->f;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	vsi = pf->vsi[pf->lan_vsi];
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id, vsi->netdev->dev_addr);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct switchdev_ops i40e_switchdev_port_ops = {
+	.switchdev_port_attr_get	= i40e_switchdev_port_attr_get,
 };
 
 /**
@@ -11203,6 +11296,10 @@  int i40e_alloc_port_netdev(void *f, enum i40e_port_netdev_type type)
 	port_netdev->netdev_ops = &i40e_port_netdev_ops;
 	eth_hw_addr_random(port_netdev);
 
+#ifdef CONFIG_NET_SWITCHDEV
+	port_netdev->switchdev_ops = &i40e_switchdev_port_ops;
+#endif
+
 	netif_carrier_off(port_netdev);
 	netif_tx_stop_all_queues(port_netdev);