Message ID | 20190226182436.23811-3-jakub.kicinski@netronome.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | devlink: add PF and VF port flavours | expand |
Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >Current port flavours cover simple switches and DSA. Add PF >and VF flavours to cover "switchdev" SR-IOV NICs. > >Example devlink user space output: > >$ devlink port >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 I believe that that this output should be in sync with attr names. So the names should be: pci_vf pci_pf pf_number vf_number Like: pci/0000:82:00.0/10002: type eth netdev eth2 flavour pci_vf pf_number 0 vf_number 1 But that is comment to the userspace part. > >$ devlink -jp port >{ > "port": { > "pci/0000:82:00.0/0": { > "type": "eth", > "netdev": "p4p1", > "flavour": "physical" > }, > "pci/0000:82:00.0/10000": { > "type": "eth", > "netdev": "eth0", > "flavour": "pci_pf", > "pf": 0, > }, > "pci/0000:82:00.0/10001": { > "type": "eth", > "netdev": "eth1", > "flavour": "pci_vf", > "pf": 0, > "vf": 0 > }, > "pci/0000:82:00.0/10002": { > "type": "eth", > "netdev": "eth2", > "flavour": "pci_vf", > "pf": 0, > "vf": 1 > } > } >} > >Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> >--- > include/net/devlink.h | 25 ++++++++++-- > include/uapi/linux/devlink.h | 5 +++ > net/core/devlink.c | 73 +++++++++++++++++++++++++++++++----- > 3 files changed, 91 insertions(+), 12 deletions(-) > >diff --git a/include/net/devlink.h b/include/net/devlink.h >index 7f5a0bdca228..b5376ef492f1 100644 >--- a/include/net/devlink.h >+++ b/include/net/devlink.h >@@ -42,9 +42,19 @@ struct devlink { > struct devlink_port_attrs { > bool set; > enum devlink_port_flavour flavour; >- u32 port_number; /* same value as "split group" */ >- bool split; >- u32 split_subport_number; >+ union { /* port identifiers differ per-flavour */ >+ /* PHYSICAL, CPU, DSA */ >+ struct { >+ bool split; >+ u32 split_subport_number; >+ u32 port_number; /* same value as "split group" */ >+ }; >+ /* PCI_PF, PCI_VF */ >+ struct { >+ u32 pf_number; >+ u32 vf_number; >+ } pci; >+ }; > }; > > struct devlink_port { >@@ -568,6 +578,9 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, > enum devlink_port_flavour flavour, > u32 port_number, bool split, > u32 split_subport_number); >+void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, >+ enum devlink_port_flavour flavour, >+ u32 pf_number, u32 vf_number); > int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, > char *name, size_t len); > int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, >@@ -782,6 +795,12 @@ static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, > { > } > >+static inline void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, >+ enum devlink_port_flavour flavour, >+ u32 pf_number, u32 vf_number) >+{ >+} >+ > static inline int > devlink_port_get_phys_port_name(struct devlink_port *devlink_port, > char *name, size_t len) >diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h >index 5bb4ea67d84f..9ce76d4f640d 100644 >--- a/include/uapi/linux/devlink.h >+++ b/include/uapi/linux/devlink.h >@@ -167,6 +167,8 @@ enum devlink_port_flavour { > DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture > * interconnect port. > */ >+ DEVLINK_PORT_FLAVOUR_PCI_PF, /* PCI Physical function port */ >+ DEVLINK_PORT_FLAVOUR_PCI_VF, /* PCI Physical function port */ > }; > > enum devlink_param_cmode { >@@ -332,6 +334,9 @@ enum devlink_attr { > DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, /* string */ > DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, /* string */ > >+ DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u32 */ >+ DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u32 */ >+ > /* add new attributes above here, update the policy in devlink.c */ > > __DEVLINK_ATTR_MAX, >diff --git a/net/core/devlink.c b/net/core/devlink.c >index a49dee67e66f..af177284830b 100644 >--- a/net/core/devlink.c >+++ b/net/core/devlink.c >@@ -516,16 +516,35 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, > return 0; > if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) > return -EMSGSIZE; >- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number)) >- return -EMSGSIZE; >- if (!attrs->split) >+ >+ switch (attrs->flavour) { >+ case DEVLINK_PORT_FLAVOUR_PHYSICAL: >+ case DEVLINK_PORT_FLAVOUR_CPU: >+ case DEVLINK_PORT_FLAVOUR_DSA: >+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, >+ attrs->port_number)) >+ return -EMSGSIZE; >+ >+ if (attrs->split && >+ (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, >+ attrs->port_number) || >+ nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, >+ attrs->split_subport_number))) >+ return -EMSGSIZE; > return 0; >- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number)) >- return -EMSGSIZE; >- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, >- attrs->split_subport_number)) >- return -EMSGSIZE; >- return 0; >+ case DEVLINK_PORT_FLAVOUR_PCI_VF: >+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, >+ attrs->pci.vf_number)) >+ return -EMSGSIZE; >+ /* fall through */ >+ case DEVLINK_PORT_FLAVOUR_PCI_PF: >+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, >+ attrs->pci.pf_number)) >+ return -EMSGSIZE; >+ return 0; >+ default: >+ return -EINVAL; >+ } > } > > static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, >@@ -5410,6 +5429,9 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, > { > struct devlink_port_attrs *attrs = &devlink_port->attrs; > >+ WARN_ON(flavour == DEVLINK_PORT_FLAVOUR_PCI_PF || >+ flavour == DEVLINK_PORT_FLAVOUR_PCI_VF); >+ > attrs->set = true; > attrs->flavour = flavour; > attrs->port_number = port_number; >@@ -5419,6 +5441,32 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, > } > EXPORT_SYMBOL_GPL(devlink_port_attrs_set); > >+/** >+ * devlink_port_attrs_pci_set - Set port attributes for a PCI port >+ * >+ * @devlink_port: devlink port >+ * @flavour: flavour of the port (PF or VF only) >+ * @pf_number: PCI PF number, in multi-host mapping to hosts depends >+ * on the platform >+ * @vf_number: PCI VF number within given PF (ignored for PF itself) >+ */ >+void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, >+ enum devlink_port_flavour flavour, >+ u32 pf_number, u32 vf_number) Maybe nicer would be to have this static and have 2 helpers: void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 pf_number) void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 pf_number, u32 vf_number) Then you need no warnon. Also you won't have dead arg in driver api case of pf Other than this the patch looks good to me. >+{ >+ struct devlink_port_attrs *attrs = &devlink_port->attrs; >+ >+ WARN_ON(flavour != DEVLINK_PORT_FLAVOUR_PCI_PF && >+ flavour != DEVLINK_PORT_FLAVOUR_PCI_VF); >+ >+ attrs->set = true; >+ attrs->flavour = flavour; >+ attrs->pci.pf_number = pf_number; >+ attrs->pci.vf_number = vf_number; >+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); >+} >+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_set); >+ > int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, > char *name, size_t len) > { >@@ -5443,6 +5491,13 @@ int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, > */ > WARN_ON(1); > return -EINVAL; >+ case DEVLINK_PORT_FLAVOUR_PCI_PF: >+ n = snprintf(name, len, "pf%u", attrs->pci.pf_number); >+ break; >+ case DEVLINK_PORT_FLAVOUR_PCI_VF: >+ n = snprintf(name, len, "pf%uvf%u", >+ attrs->pf_number, attrs->pci.vf_number); >+ break; > } > > if (n >= len) >-- >2.19.2 >
Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >Current port flavours cover simple switches and DSA. Add PF >and VF flavours to cover "switchdev" SR-IOV NICs. > >Example devlink user space output: > >$ devlink port >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 Wait a second, howcome pf and vfs have the same PCI address?
Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: >Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >>Current port flavours cover simple switches and DSA. Add PF >>and VF flavours to cover "switchdev" SR-IOV NICs. >> >>Example devlink user space output: >> >>$ devlink port >>pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >>pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >>pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >>pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 > >Wait a second, howcome pf and vfs have the same PCI address? Oh, I think you have these as eswitch port representors. Confusing...
On Wed, 27 Feb 2019 13:41:35 +0100, Jiri Pirko wrote: > Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: > >Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: > >>Current port flavours cover simple switches and DSA. Add PF > >>and VF flavours to cover "switchdev" SR-IOV NICs. > >> > >>Example devlink user space output: > >> > >>$ devlink port > >>pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical > >>pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 > >>pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 > >>pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 > > > >Wait a second, howcome pf and vfs have the same PCI address? > > Oh, I think you have these as eswitch port representors. Confusing... FWIW I don't like the word representor, its a port. We don't call physical ports "representors" even though from ASIC's point of view they are exactly the same.
Wed, Feb 27, 2019 at 06:23:26PM CET, jakub.kicinski@netronome.com wrote: >On Wed, 27 Feb 2019 13:41:35 +0100, Jiri Pirko wrote: >> Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: >> >Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >> >>Current port flavours cover simple switches and DSA. Add PF >> >>and VF flavours to cover "switchdev" SR-IOV NICs. >> >> >> >>Example devlink user space output: >> >> >> >>$ devlink port >> >>pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >> >>pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >> >>pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >> >>pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 >> > >> >Wait a second, howcome pf and vfs have the same PCI address? >> >> Oh, I think you have these as eswitch port representors. Confusing... > >FWIW I don't like the word representor, its a port. We don't call >physical ports "representors" even though from ASIC's point of view >they are exactly the same. My point is, they are not PFs and VFs. We have to find a way to clearly see what's what.
On Wed, 27 Feb 2019 21:17:27 +0100, Jiri Pirko wrote: > Wed, Feb 27, 2019 at 06:23:26PM CET, jakub.kicinski@netronome.com wrote: > >On Wed, 27 Feb 2019 13:41:35 +0100, Jiri Pirko wrote: > >> Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: > >> >Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: > >> >>Current port flavours cover simple switches and DSA. Add PF > >> >>and VF flavours to cover "switchdev" SR-IOV NICs. > >> >> > >> >>Example devlink user space output: > >> >> > >> >>$ devlink port > >> >>pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical > >> >>pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 > >> >>pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 > >> >>pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 > >> > > >> >Wait a second, howcome pf and vfs have the same PCI address? > >> > >> Oh, I think you have these as eswitch port representors. Confusing... > > > >FWIW I don't like the word representor, its a port. We don't call > >physical ports "representors" even though from ASIC's point of view > >they are exactly the same. > > My point is, they are not PFs and VFs. We have to find a way to clearly > see what's what. Okay, so let me explain the way I see it, and you can explain your way or tell me where you disagree. Those devlink ports and netdevs are pf ports and vf ports, which most refer to as "representor". If one sends packets to the netdev indicated in DEVLINK_ATTR_PORT_NETDEV_* attributes they will _egress_ the switch from that port. For physical port that means going onto the Ethernet or IB wire. For PCIe it means getting DMAed over the PCIe link to host memory. There is a netdev construct on the host which is in charge of that host memory. Maybe we shall call that host netdev? (I said I don't like "representor" for the reason that people don't refer to the physical port as "representor" even though it has exactly the semantics we are following. This distinction between behaviour of physical and PCI ports is what leads to confusion, I think.) Let me bring out the moose :) HOST A || HOST B || PF A | V | V | V | V || PF B | V | V | V |*F |*F |*F |*F ... || |*F |*F |*F ... *port A0 |*port A1 | 0 | 1 | 2 | 3 ||*port B0 |*port B1 | 0 | 1 | 2 || PCI Express link || PCI Express link \ \ \ | | | | | / / / \ \ \ | | | | | / / / /\ \______\______\'___|___|__________|_______'____/___/___/__ /\ || |+PF0s0|+PF0s1 |+VF0|+VF1| ...| |+PF1s0|+PF1s1|+VF0|+VF1| || i || |------ ------ ----- ---- ----|--- ------ ------ ---- ----| || i d n H || | <<========== | || d n H e s O || | ==========>> | || e s O v t S || | SR-IOV e-switch | || v t S l a T || | <<========== | || l a T i n || | ==========>> | || i n n c A || | ________ _________ ________ | || n c B k e || | |+Phys 0 |+Phys 1 |+Phys 2 | | || k e || \---------------------------------------------------------/ || \/ | | | \/ | | | || || MAC 0 || MAC 1 || MAC 2 || || Things marked with + are devlink ports and have port (-repr-) netdevs (including physical ports). Things marked with * are host netdevs, don't have devlink ports.
Wed, Feb 27, 2019 at 11:42:39PM CET, jakub.kicinski@netronome.com wrote: >On Wed, 27 Feb 2019 21:17:27 +0100, Jiri Pirko wrote: >> Wed, Feb 27, 2019 at 06:23:26PM CET, jakub.kicinski@netronome.com wrote: >> >On Wed, 27 Feb 2019 13:41:35 +0100, Jiri Pirko wrote: >> >> Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: >> >> >Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >> >> >>Current port flavours cover simple switches and DSA. Add PF >> >> >>and VF flavours to cover "switchdev" SR-IOV NICs. >> >> >> >> >> >>Example devlink user space output: >> >> >> >> >> >>$ devlink port >> >> >>pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >> >> >>pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >> >> >>pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >> >> >>pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 >> >> > >> >> >Wait a second, howcome pf and vfs have the same PCI address? >> >> >> >> Oh, I think you have these as eswitch port representors. Confusing... >> > >> >FWIW I don't like the word representor, its a port. We don't call >> >physical ports "representors" even though from ASIC's point of view >> >they are exactly the same. >> >> My point is, they are not PFs and VFs. We have to find a way to clearly >> see what's what. > >Okay, so let me explain the way I see it, and you can explain your way >or tell me where you disagree. Those devlink ports and netdevs are pf >ports and vf ports, which most refer to as "representor". If one sends >packets to the netdev indicated in DEVLINK_ATTR_PORT_NETDEV_* >attributes they will _egress_ the switch from that port. For physical >port that means going onto the Ethernet or IB wire. For PCIe it means >getting DMAed over the PCIe link to host memory. > >There is a netdev construct on the host which is in charge of that >host memory. Maybe we shall call that host netdev? > >(I said I don't like "representor" for the reason that people don't >refer to the physical port as "representor" even though it has exactly >the semantics we are following. This distinction between behaviour of >physical and PCI ports is what leads to confusion, I think.) > >Let me bring out the moose :) > > HOST A || HOST B > || > PF A | V | V | V | V || PF B | V | V | V > |*F |*F |*F |*F ... || |*F |*F |*F ... >*port A0 |*port A1 | 0 | 1 | 2 | 3 ||*port B0 |*port B1 | 0 | 1 | 2 > || > PCI Express link || PCI Express link > \ \ \ | | | | | / / / > \ \ \ | | | | | / / / > /\ \______\______\'___|___|__________|_______'____/___/___/__ /\ > || |+PF0s0|+PF0s1 |+VF0|+VF1| ...| |+PF1s0|+PF1s1|+VF0|+VF1| || > i || |------ ------ ----- ---- ----|--- ------ ------ ---- ----| || i >d n H || | <<========== | || d n H >e s O || | ==========>> | || e s O >v t S || | SR-IOV e-switch | || v t S >l a T || | <<========== | || l a T >i n || | ==========>> | || i n >n c A || | ________ _________ ________ | || n c B >k e || | |+Phys 0 |+Phys 1 |+Phys 2 | | || k e > || \---------------------------------------------------------/ || > \/ | | | \/ > | | | > || || > MAC 0 || MAC 1 || MAC 2 > || || > >Things marked with + are devlink ports and have port (-repr-) netdevs >(including physical ports). >Things marked with * are host netdevs, don't have devlink ports. Okay, I got it. So you say that devlink ports should always be only ports of eswitch. PF host netdev should have "devlink port" instance, correct? But it still "belongs" under the ASIC represented by the devlink instance...
On Thu, 28 Feb 2019 09:44:29 +0100, Jiri Pirko wrote: > >Okay, so let me explain the way I see it, and you can explain your way > >or tell me where you disagree. Those devlink ports and netdevs are pf > >ports and vf ports, which most refer to as "representor". If one sends > >packets to the netdev indicated in DEVLINK_ATTR_PORT_NETDEV_* > >attributes they will _egress_ the switch from that port. For physical > >port that means going onto the Ethernet or IB wire. For PCIe it means > >getting DMAed over the PCIe link to host memory. > > > >There is a netdev construct on the host which is in charge of that > >host memory. Maybe we shall call that host netdev? > > > >(I said I don't like "representor" for the reason that people don't > >refer to the physical port as "representor" even though it has exactly > >the semantics we are following. This distinction between behaviour of > >physical and PCI ports is what leads to confusion, I think.) > > > >Let me bring out the moose :) > > > > HOST A || HOST B > > || > > PF A | V | V | V | V || PF B | V | V | V > > |*F |*F |*F |*F ... || |*F |*F |*F ... > >*port A0 |*port A1 | 0 | 1 | 2 | 3 ||*port B0 |*port B1 | 0 | 1 | 2 > > || > > PCI Express link || PCI Express link > > \ \ \ | | | | | / / / > > \ \ \ | | | | | / / / > > /\ \______\______\'___|___|__________|_______'____/___/___/__ /\ > > || |+PF0s0|+PF0s1 |+VF0|+VF1| ...| |+PF1s0|+PF1s1|+VF0|+VF1| || > > i || |------ ------ ----- ---- ----|--- ------ ------ ---- ----| || i > >d n H || | <<========== | || d n H > >e s O || | ==========>> | || e s O > >v t S || | SR-IOV e-switch | || v t S > >l a T || | <<========== | || l a T > >i n || | ==========>> | || i n > >n c A || | ________ _________ ________ | || n c B > >k e || | |+Phys 0 |+Phys 1 |+Phys 2 | | || k e > > || \---------------------------------------------------------/ || > > \/ | | | \/ > > | | | > > || || > > MAC 0 || MAC 1 || MAC 2 > > || || > > > >Things marked with + are devlink ports and have port (-repr-) netdevs > >(including physical ports). > >Things marked with * are host netdevs, don't have devlink ports. > > Okay, I got it. So you say that devlink ports should always be only > ports of eswitch. > > PF host netdev should have "devlink port" instance, correct? > But it still "belongs" under the ASIC represented by the devlink > instance... Yes, I think so.
On 2/27/19 3:42 PM, Jakub Kicinski wrote: > On Wed, 27 Feb 2019 21:17:27 +0100, Jiri Pirko wrote: >> Wed, Feb 27, 2019 at 06:23:26PM CET, jakub.kicinski@netronome.com wrote: >>> On Wed, 27 Feb 2019 13:41:35 +0100, Jiri Pirko wrote: >>>> Wed, Feb 27, 2019 at 01:23:27PM CET, jiri@resnulli.us wrote: >>>>> Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >>>>>> Current port flavours cover simple switches and DSA. Add PF >>>>>> and VF flavours to cover "switchdev" SR-IOV NICs. >>>>>> >>>>>> Example devlink user space output: >>>>>> >>>>>> $ devlink port >>>>>> pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >>>>>> pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >>>>>> pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >>>>>> pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 >>>>> >>>>> Wait a second, howcome pf and vfs have the same PCI address? >>>> >>>> Oh, I think you have these as eswitch port representors. Confusing... >>> >>> FWIW I don't like the word representor, its a port. We don't call >>> physical ports "representors" even though from ASIC's point of view >>> they are exactly the same. >> >> My point is, they are not PFs and VFs. We have to find a way to clearly >> see what's what. > > Okay, so let me explain the way I see it, and you can explain your way > or tell me where you disagree. Those devlink ports and netdevs are pf > ports and vf ports, which most refer to as "representor". If one sends > packets to the netdev indicated in DEVLINK_ATTR_PORT_NETDEV_* > attributes they will _egress_ the switch from that port. For physical > port that means going onto the Ethernet or IB wire. For PCIe it means > getting DMAed over the PCIe link to host memory. > > There is a netdev construct on the host which is in charge of that > host memory. Maybe we shall call that host netdev? > > (I said I don't like "representor" for the reason that people don't > refer to the physical port as "representor" even though it has exactly > the semantics we are following. This distinction between behaviour of > physical and PCI ports is what leads to confusion, I think.) > > Let me bring out the moose :) > > HOST A || HOST B > || > PF A | V | V | V | V || PF B | V | V | V > |*F |*F |*F |*F ... || |*F |*F |*F ... > *port A0 |*port A1 | 0 | 1 | 2 | 3 ||*port B0 |*port B1 | 0 | 1 | 2 > || > PCI Express link || PCI Express link > \ \ \ | | | | | / / / > \ \ \ | | | | | / / / > /\ \______\______\'___|___|__________|_______'____/___/___/__ /\ > || |+PF0s0|+PF0s1 |+VF0|+VF1| ...| |+PF1s0|+PF1s1|+VF0|+VF1| || > i || |------ ------ ----- ---- ----|--- ------ ------ ---- ----| || i > d n H || | <<========== | || d n H > e s O || | ==========>> | || e s O > v t S || | SR-IOV e-switch | || v t S > l a T || | <<========== | || l a T > i n || | ==========>> | || i n > n c A || | ________ _________ ________ | || n c B > k e || | |+Phys 0 |+Phys 1 |+Phys 2 | | || k e > || \---------------------------------------------------------/ || > \/ | | | \/ > | | | > || || > MAC 0 || MAC 1 || MAC 2 > || || > > Things marked with + are devlink ports and have port (-repr-) netdevs > (including physical ports). > Things marked with * are host netdevs, don't have devlink ports. > That would a good update to the commit message or cover letter.
> -----Original Message----- > From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On > Behalf Of Jiri Pirko > Sent: Wednesday, February 27, 2019 6:17 AM > To: Jakub Kicinski <jakub.kicinski@netronome.com> > Cc: davem@davemloft.net; oss-drivers@netronome.com; > netdev@vger.kernel.org > Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port flavours > > Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: > >Current port flavours cover simple switches and DSA. Add PF and VF > >flavours to cover "switchdev" SR-IOV NICs. > > > >Example devlink user space output: > > > >$ devlink port > >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical > >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 > >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 > >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 > A given port is of its parent device. In current scenario, its PF or VF. Hence it should be device attribute and not a port attribute. So devlink dev show command have to show what device flavour is. Is it well known PCI VF or PF or something else. It will show subdev device attribute and its parent PCI (PF/VF) devlink device. So we should have device flovour as PCI_PF or PCI_VF or SUBDEV. Again VF number showcasing here is very restrictive model. Every PF/VF/Subdev represents its own 'port' and it is connected to eswitch 'port'. Instead of showing VF here, it must be this 'port' or 'link' number that gives right view. Which netdev represents which VF is already linked in the VF rep-netdev sysfs property. So flavour should be something like 'hostport' and when port is registered for the eswitch side it should be 'switchport'. With this there is very clear picture of which hostport is connected to which eswitch port. Just like how we see in the physical world.
Mon, Mar 04, 2019 at 05:59:04AM CET, parav@mellanox.com wrote: > > >> -----Original Message----- >> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On >> Behalf Of Jiri Pirko >> Sent: Wednesday, February 27, 2019 6:17 AM >> To: Jakub Kicinski <jakub.kicinski@netronome.com> >> Cc: davem@davemloft.net; oss-drivers@netronome.com; >> netdev@vger.kernel.org >> Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port flavours >> >> Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >> >Current port flavours cover simple switches and DSA. Add PF and VF >> >flavours to cover "switchdev" SR-IOV NICs. >> > >> >Example devlink user space output: >> > >> >$ devlink port >> >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >> >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >> >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 >> >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 >> >A given port is of its parent device. >In current scenario, its PF or VF. >Hence it should be device attribute and not a port attribute. I think that this works. You have VF_rep ports, PF_rep ports and PHYSICAL ports. In mlxsw for example, there are only PHYSICAL ports. In sr-iov world, there is also a PHYSICAL port on the eswitch. The others are either facing PF of VF. Looks accurate. I don't see any need for "devlink dev" flavour. >So devlink dev show command have to show what device flavour is. >Is it well known PCI VF or PF or something else. >It will show subdev device attribute and its parent PCI (PF/VF) devlink device. >So we should have device flovour as PCI_PF or PCI_VF or SUBDEV. > >Again VF number showcasing here is very restrictive model. >Every PF/VF/Subdev represents its own 'port' and it is connected to eswitch 'port'. >Instead of showing VF here, it must be this 'port' or 'link' number that gives right view. >Which netdev represents which VF is already linked in the VF rep-netdev sysfs property. I think you confuse the eswtich ports (in Jakub's output it's them) and the actual VF. > >So flavour should be something like 'hostport' and when port is registered for the eswitch side it should be 'switchport'. >With this there is very clear picture of which hostport is connected to which eswitch port. >Just like how we see in the physical world. >
> -----Original Message----- > From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] > On Behalf Of Jiri Pirko > Sent: Sunday, March 03, 2019 11:30 PM > To: Parav Pandit <parav@mellanox.com> > Cc: Jakub Kicinski <jakub.kicinski@netronome.com>; davem@davemloft.net; > oss-drivers@netronome.com; netdev@vger.kernel.org > Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port flavours > > Mon, Mar 04, 2019 at 05:59:04AM CET, parav@mellanox.com wrote: > > > > > >> -----Original Message----- > >> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On > >> Behalf Of Jiri Pirko > >> Sent: Wednesday, February 27, 2019 6:17 AM > >> To: Jakub Kicinski <jakub.kicinski@netronome.com> > >> Cc: davem@davemloft.net; oss-drivers@netronome.com; > >> netdev@vger.kernel.org > >> Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port > >> flavours > >> > >> Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: > >> >Current port flavours cover simple switches and DSA. Add PF and VF > >> >flavours to cover "switchdev" SR-IOV NICs. > >> > > >> >Example devlink user space output: > >> > > >> >$ devlink port > >> >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical > >> >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 > >> >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf > >> >0 > >> >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf > >> >1 > >> > >A given port is of its parent device. > >In current scenario, its PF or VF. > >Hence it should be device attribute and not a port attribute. > > I think that this works. You have VF_rep ports, PF_rep ports and PHYSICAL ports. > In mlxsw for example, there are only PHYSICAL ports. > In sr-iov world, there is also a PHYSICAL port on the eswitch. The others are > either facing PF of VF. Looks accurate. I don't see any need for "devlink dev" > flavour. I see what you're trying to do here, with VF_rep ports being independent of PF_rep ports and PHYSICAL ports - however, my question is how do you categorize VF_rep ports of the same parent PF physical ports (say you have multi-port device, with 2 or more PFs), at least for identification purposes per physical port? Do we need to have pci_vf_number appended to physical port number? Thanks, ~Akeem > > > >So devlink dev show command have to show what device flavour is. > >Is it well known PCI VF or PF or something else. > >It will show subdev device attribute and its parent PCI (PF/VF) devlink device. > >So we should have device flovour as PCI_PF or PCI_VF or SUBDEV. > > > >Again VF number showcasing here is very restrictive model. > >Every PF/VF/Subdev represents its own 'port' and it is connected to eswitch > 'port'. > >Instead of showing VF here, it must be this 'port' or 'link' number that gives > right view. > >Which netdev represents which VF is already linked in the VF rep-netdev sysfs > property. > > I think you confuse the eswtich ports (in Jakub's output it's them) and the actual > VF. > > > > > >So flavour should be something like 'hostport' and when port is registered for > the eswitch side it should be 'switchport'. > >With this there is very clear picture of which hostport is connected to which > eswitch port. > >Just like how we see in the physical world. > >
Wed, Mar 20, 2019 at 06:29:44PM CET, akeem.g.abodunrin@intel.com wrote: > > >> -----Original Message----- >> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] >> On Behalf Of Jiri Pirko >> Sent: Sunday, March 03, 2019 11:30 PM >> To: Parav Pandit <parav@mellanox.com> >> Cc: Jakub Kicinski <jakub.kicinski@netronome.com>; davem@davemloft.net; >> oss-drivers@netronome.com; netdev@vger.kernel.org >> Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port flavours >> >> Mon, Mar 04, 2019 at 05:59:04AM CET, parav@mellanox.com wrote: >> > >> > >> >> -----Original Message----- >> >> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On >> >> Behalf Of Jiri Pirko >> >> Sent: Wednesday, February 27, 2019 6:17 AM >> >> To: Jakub Kicinski <jakub.kicinski@netronome.com> >> >> Cc: davem@davemloft.net; oss-drivers@netronome.com; >> >> netdev@vger.kernel.org >> >> Subject: Re: [PATCH net-next 2/8] devlink: add PF and VF port >> >> flavours >> >> >> >> Tue, Feb 26, 2019 at 07:24:30PM CET, jakub.kicinski@netronome.com wrote: >> >> >Current port flavours cover simple switches and DSA. Add PF and VF >> >> >flavours to cover "switchdev" SR-IOV NICs. >> >> > >> >> >Example devlink user space output: >> >> > >> >> >$ devlink port >> >> >pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical >> >> >pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 >> >> >pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf >> >> >0 >> >> >pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf >> >> >1 >> >> >> >A given port is of its parent device. >> >In current scenario, its PF or VF. >> >Hence it should be device attribute and not a port attribute. >> >> I think that this works. You have VF_rep ports, PF_rep ports and PHYSICAL ports. >> In mlxsw for example, there are only PHYSICAL ports. >> In sr-iov world, there is also a PHYSICAL port on the eswitch. The others are >> either facing PF of VF. Looks accurate. I don't see any need for "devlink dev" >> flavour. > >I see what you're trying to do here, with VF_rep ports being independent of PF_rep ports and PHYSICAL ports - however, my question is how do you categorize VF_rep ports of the same parent PF physical ports (say you have multi-port device, with 2 or more PFs), at least for identification purposes per physical port? Do we need to have pci_vf_number appended to physical port number? Please wrap your messages at 80 cols. > >Thanks, >~Akeem >> >> >> >So devlink dev show command have to show what device flavour is. >> >Is it well known PCI VF or PF or something else. >> >It will show subdev device attribute and its parent PCI (PF/VF) devlink device. >> >So we should have device flovour as PCI_PF or PCI_VF or SUBDEV. >> > >> >Again VF number showcasing here is very restrictive model. >> >Every PF/VF/Subdev represents its own 'port' and it is connected to eswitch >> 'port'. >> >Instead of showing VF here, it must be this 'port' or 'link' number that gives >> right view. >> >Which netdev represents which VF is already linked in the VF rep-netdev sysfs >> property. >> >> I think you confuse the eswtich ports (in Jakub's output it's them) and the actual >> VF. >> >> >> > >> >So flavour should be something like 'hostport' and when port is registered for >> the eswitch side it should be 'switchport'. >> >With this there is very clear picture of which hostport is connected to which >> eswitch port. >> >Just like how we see in the physical world. >> >
diff --git a/include/net/devlink.h b/include/net/devlink.h index 7f5a0bdca228..b5376ef492f1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -42,9 +42,19 @@ struct devlink { struct devlink_port_attrs { bool set; enum devlink_port_flavour flavour; - u32 port_number; /* same value as "split group" */ - bool split; - u32 split_subport_number; + union { /* port identifiers differ per-flavour */ + /* PHYSICAL, CPU, DSA */ + struct { + bool split; + u32 split_subport_number; + u32 port_number; /* same value as "split group" */ + }; + /* PCI_PF, PCI_VF */ + struct { + u32 pf_number; + u32 vf_number; + } pci; + }; }; struct devlink_port { @@ -568,6 +578,9 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, enum devlink_port_flavour flavour, u32 port_number, bool split, u32 split_subport_number); +void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 pf_number, u32 vf_number); int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, char *name, size_t len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, @@ -782,6 +795,12 @@ static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, { } +static inline void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 pf_number, u32 vf_number) +{ +} + static inline int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, char *name, size_t len) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 5bb4ea67d84f..9ce76d4f640d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -167,6 +167,8 @@ enum devlink_port_flavour { DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture * interconnect port. */ + DEVLINK_PORT_FLAVOUR_PCI_PF, /* PCI Physical function port */ + DEVLINK_PORT_FLAVOUR_PCI_VF, /* PCI Physical function port */ }; enum devlink_param_cmode { @@ -332,6 +334,9 @@ enum devlink_attr { DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, /* string */ DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, /* string */ + DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u32 */ + DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index a49dee67e66f..af177284830b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -516,16 +516,35 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number)) - return -EMSGSIZE; - if (!attrs->split) + + switch (attrs->flavour) { + case DEVLINK_PORT_FLAVOUR_PHYSICAL: + case DEVLINK_PORT_FLAVOUR_CPU: + case DEVLINK_PORT_FLAVOUR_DSA: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, + attrs->port_number)) + return -EMSGSIZE; + + if (attrs->split && + (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, + attrs->port_number) || + nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, + attrs->split_subport_number))) + return -EMSGSIZE; return 0; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number)) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, - attrs->split_subport_number)) - return -EMSGSIZE; - return 0; + case DEVLINK_PORT_FLAVOUR_PCI_VF: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, + attrs->pci.vf_number)) + return -EMSGSIZE; + /* fall through */ + case DEVLINK_PORT_FLAVOUR_PCI_PF: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, + attrs->pci.pf_number)) + return -EMSGSIZE; + return 0; + default: + return -EINVAL; + } } static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, @@ -5410,6 +5429,9 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { struct devlink_port_attrs *attrs = &devlink_port->attrs; + WARN_ON(flavour == DEVLINK_PORT_FLAVOUR_PCI_PF || + flavour == DEVLINK_PORT_FLAVOUR_PCI_VF); + attrs->set = true; attrs->flavour = flavour; attrs->port_number = port_number; @@ -5419,6 +5441,32 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); +/** + * devlink_port_attrs_pci_set - Set port attributes for a PCI port + * + * @devlink_port: devlink port + * @flavour: flavour of the port (PF or VF only) + * @pf_number: PCI PF number, in multi-host mapping to hosts depends + * on the platform + * @vf_number: PCI VF number within given PF (ignored for PF itself) + */ +void devlink_port_attrs_pci_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 pf_number, u32 vf_number) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + + WARN_ON(flavour != DEVLINK_PORT_FLAVOUR_PCI_PF && + flavour != DEVLINK_PORT_FLAVOUR_PCI_VF); + + attrs->set = true; + attrs->flavour = flavour; + attrs->pci.pf_number = pf_number; + attrs->pci.vf_number = vf_number; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_set); + int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, char *name, size_t len) { @@ -5443,6 +5491,13 @@ int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, */ WARN_ON(1); return -EINVAL; + case DEVLINK_PORT_FLAVOUR_PCI_PF: + n = snprintf(name, len, "pf%u", attrs->pci.pf_number); + break; + case DEVLINK_PORT_FLAVOUR_PCI_VF: + n = snprintf(name, len, "pf%uvf%u", + attrs->pf_number, attrs->pci.vf_number); + break; } if (n >= len)
Current port flavours cover simple switches and DSA. Add PF and VF flavours to cover "switchdev" SR-IOV NICs. Example devlink user space output: $ devlink port pci/0000:82:00.0/0: type eth netdev p4p1 flavour physical pci/0000:82:00.0/10000: type eth netdev eth0 flavour pcie_pf pf 0 pci/0000:82:00.0/10001: type eth netdev eth1 flavour pcie_vf pf 0 vf 0 pci/0000:82:00.0/10002: type eth netdev eth2 flavour pcie_vf pf 0 vf 1 $ devlink -jp port { "port": { "pci/0000:82:00.0/0": { "type": "eth", "netdev": "p4p1", "flavour": "physical" }, "pci/0000:82:00.0/10000": { "type": "eth", "netdev": "eth0", "flavour": "pci_pf", "pf": 0, }, "pci/0000:82:00.0/10001": { "type": "eth", "netdev": "eth1", "flavour": "pci_vf", "pf": 0, "vf": 0 }, "pci/0000:82:00.0/10002": { "type": "eth", "netdev": "eth2", "flavour": "pci_vf", "pf": 0, "vf": 1 } } } Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> --- include/net/devlink.h | 25 ++++++++++-- include/uapi/linux/devlink.h | 5 +++ net/core/devlink.c | 73 +++++++++++++++++++++++++++++++----- 3 files changed, 91 insertions(+), 12 deletions(-)