diff mbox

[ovs-dev,RFC,v3,1/1] netdev-dpdk: Arbitrary 'dpdk' port naming

Message ID 1468600491-6657-2-git-send-email-ciara.loftus@intel.com
State Changes Requested
Delegated to: Daniele Di Proietto
Headers show

Commit Message

Ciara Loftus July 15, 2016, 4:34 p.m. UTC
'dpdk' ports no longer have naming restrictions. Now, instead
of specifying the dpdk port ID as part of the name, the PCI
address of the device must be specified via the 'dpdk-pci'
option. eg.

ovs-vsctl add-port br0 my-port
ovs-vsctl set Interface my-port type=dpdk
ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

v2:
- remove global pci list
- remove unnecessary parenthesis
- remove return from void fn
- print pci like dpdk
- fix port ranges
---
 INSTALL.DPDK-ADVANCED.md |   2 +-
 INSTALL.DPDK.md          |  10 ++--
 NEWS                     |   2 +
 lib/netdev-dpdk.c        | 132 ++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 116 insertions(+), 30 deletions(-)

Comments

Daniele Di Proietto July 16, 2016, 12:20 a.m. UTC | #1
The idea looks very good to me, thanks for working on it.

Very high level comments:

Do we need to be limited to pci devices?  Perhaps we can accept the same
string as rte_eth_dev_attach().

Would it be possible to integrate this more with the hotplug patch?  It
would be nice to avoid introducing extra appctl commands and call
rte_eth_dev_attach() if needed in netdev_dpdk_construct().

Thoughts?

Thanks,

Daniele

2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.loftus@intel.com>:

> 'dpdk' ports no longer have naming restrictions. Now, instead
> of specifying the dpdk port ID as part of the name, the PCI
> address of the device must be specified via the 'dpdk-pci'
> option. eg.
>
> ovs-vsctl add-port br0 my-port
> ovs-vsctl set Interface my-port type=dpdk
> ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3
>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
>
> v2:
> - remove global pci list
> - remove unnecessary parenthesis
> - remove return from void fn
> - print pci like dpdk
> - fix port ranges
> ---
>  INSTALL.DPDK-ADVANCED.md |   2 +-
>  INSTALL.DPDK.md          |  10 ++--
>  NEWS                     |   2 +
>  lib/netdev-dpdk.c        | 132
> ++++++++++++++++++++++++++++++++++++++---------
>  4 files changed, 116 insertions(+), 30 deletions(-)
>
> diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
> index 61b4e82..7370d03 100644
> --- a/INSTALL.DPDK-ADVANCED.md
> +++ b/INSTALL.DPDK-ADVANCED.md
> @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using
> the add-port command.
>  It is also possible to detach a port from ovs, the user has to remove the
>  port using the del-port command, then it can be detached using:
>
> -`ovs-appctl netdev-dpdk/port-detach dpdk0`
> +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`
>
>  This feature is not supported with VFIO and could not work with some NICs,
>  please refer to the [DPDK Port Hotplug Framework] in order to get more
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 5407794..9a781ff 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-ADVANCED.md]
>
>       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`
>
> -     Now you can add DPDK devices. OVS expects DPDK device names to start
> with
> -     "dpdk" and end with a portid. vswitchd should print (in the log
> file) the
> -     number of dpdk devices found.
> +     Now you can add dpdk devices. The PCI address of the device needs to
> be
> +     set using the 'dpdk-pci' option. vswitchd should print (in the log
> file)
> +     the PCI addresses of dpdk devices found during initialisation.
>
>       ```
> -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> options:dpdk-pci=0000:06:00.0
> +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> options:dpdk-pci=0000:06:00.1
>       ```
>
>       After the DPDK ports get added to switch, a polling thread
> continuously polls
> diff --git a/NEWS b/NEWS
> index 9064225..03b9ba8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -59,6 +59,8 @@ Post-v2.5.0
>         node that device memory is located on if
> CONFIG_RTE_LIBRTE_VHOST_NUMA
>         is enabled in DPDK.
>       * Port Hotplug is now supported.
> +     * DPDK physical ports can now have arbitrary names. The PCI address
> of
> +       the device must be set using the 'dpdk-pci' option.
>     - Increase number of registers to 16.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 3fab52c..d2cceb2 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -58,6 +58,7 @@
>  #include "rte_config.h"
>  #include "rte_mbuf.h"
>  #include "rte_meter.h"
> +#include "rte_pci.h"
>  #include "rte_virtio_net.h"
>
>  VLOG_DEFINE_THIS_MODULE(dpdk);
> @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      /* If the 'sid' is negative, it means that the kernel fails
>       * to obtain the pci numa info.  In that situation, always
>       * use 'SOCKET0'. */
> -    if (type == DPDK_DEV_ETH) {
> +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {
>          sid = rte_eth_dev_socket_id(port_no);
>      } else {
>          sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      dev->requested_n_txq = netdev->n_txq;
>
>      if (type == DPDK_DEV_ETH) {
> -        err = dpdk_eth_dev_init(dev);
> -        if (err) {
> -            goto unlock;
> +        if (dev->port_id != -1) {
> +            err = dpdk_eth_dev_init(dev);
> +            if (err) {
> +                goto unlock;
> +            }
>          }
>          netdev_dpdk_alloc_txq(dev, netdev->n_txq);
>          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
> @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct netdev
> *netdev)
>  static int
>  netdev_dpdk_construct(struct netdev *netdev)
>  {
> -    unsigned int port_no;
>      int err;
>
>      if (rte_eal_init_ret) {
>          return rte_eal_init_ret;
>      }
>
> -    /* Names always start with "dpdk" */
> -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);
> -    if (err) {
> -        return err;
> -    }
> -
>      ovs_mutex_lock(&dpdk_mutex);
> -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
> +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);
>      ovs_mutex_unlock(&dpdk_mutex);
>      return err;
>  }
> @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev *netdev,
> struct smap *args)
>      return 0;
>  }
>
> +static void
> +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr
> *addr)
> +{
> +    int i = 0;
> +    struct rte_eth_dev_info info;
> +
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (!rte_eth_dev_is_valid_port(i)) {
> +            continue;
> +        }
> +        rte_eth_dev_info_get(i, &info);
> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {
> +            dev->port_id = i;
> +            break;
> +        }
> +    }
> +
> +    if (dev->port_id != -1) {
> +        rte_eth_dev_stop(dev->port_id);
> +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);
> +        dpdk_eth_dev_init(dev);
> +    } else {
> +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);
> +    }
> +}
> +
>  static int
>  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      int new_n_rxq;
> +    struct rte_pci_addr addr;
> +    const char *pci_str;
>
>      ovs_mutex_lock(&dev->mutex);
>      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1);
> @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev *netdev, const
> struct smap *args)
>          dev->requested_n_rxq = new_n_rxq;
>          netdev_request_reconfigure(netdev);
>      }
> +
> +    if (dev->port_id == -1) {
> +        pci_str = smap_get(args, "dpdk-pci");
> +        if (pci_str != NULL) {
> +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {
> +                netdev_dpdk_associate_pci(dev, &addr);
> +            } else {
> +                VLOG_ERR("Error parsing PCI address %s, please check
> format",
> +                          pci_str);
> +            }
> +        }
> +    }
> +
>      ovs_mutex_unlock(&dev->mutex);
>
>      return 0;
> @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>      int ret;
>      char response[128];
>      uint8_t port_id;
> +    struct rte_eth_dev_info info;
>
>      ovs_mutex_lock(&dpdk_mutex);
>
> @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>      }
>
>      snprintf(response, sizeof(response),
> -             "Device '%s' has been attached as 'dpdk%d'", argv[1],
> port_id);
> +             "Device '%s' has been attached", argv[1]);
> +
> +    rte_eth_dev_info_get(port_id, &info);
> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> +              info.pci_dev->addr.devid, info.pci_dev->addr.function);
>
>      ovs_mutex_unlock(&dpdk_mutex);
>      unixctl_command_reply(conn, response);
> @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>  {
>      int ret;
>      char response[128];
> -    unsigned int parsed_port;
>      uint8_t port_id;
>      char devname[RTE_ETH_NAME_MAX_LEN];
> +    bool found = false;
> +    int i;
> +    struct rte_pci_addr addr;
> +    struct netdev_dpdk *dev;
> +    struct rte_eth_dev_info info;
>
>      ovs_mutex_lock(&dpdk_mutex);
>
> -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);
> -    if (ret) {
> +    /* Parse the PCI address to a usable format */
> +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {
>          snprintf(response, sizeof(response),
> -                 "'%s' is not a valid port", argv[1]);
> +                 "'%s' is not a valid PCI address format - cannot detach",
> +                 argv[1]);
>          goto error;
>      }
>
> -    port_id = parsed_port;
> +    /* Search for the address in the initialised devices */
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (!rte_eth_dev_is_valid_port(i)) {
> +            continue;
> +        }
> +        rte_eth_dev_info_get(i, &info);
> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {
> +            port_id = i;
> +            found = true;
> +            break;
> +        }
> +    }
>
> -    struct netdev *netdev = netdev_from_name(argv[1]);
> -    if (netdev) {
> -        netdev_close(netdev);
> +    /* Print an error if the device is not already initialised */
> +    if (!found) {
>          snprintf(response, sizeof(response),
> -                 "Port '%s' is being used. Remove it before detaching",
> +                 "'%s' is not an initialized DPDK device - cannot detach",
>                   argv[1]);
>          goto error;
>      }
>
> +    /* Check if the device is already in use by a port, and advise the
> user to
> +     * remove it if so */
> +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> +        if (dev->port_id == port_id) {
> +            snprintf(response, sizeof(response),
> +                     "Port '%s' is using PCI device '%s'."
> +                     "Remove it before detaching",
> +                     dev->up.name, argv[1]);
> +            goto error;
> +        }
> +    }
> +
> +    /* It is safe to detach the device */
>      rte_eth_dev_close(port_id);
>
>      ret = rte_eth_dev_detach(port_id, devname);
>      if (ret < 0) {
>          snprintf(response, sizeof(response),
> -                 "Port '%s' can not be detached", argv[1]);
> +                 "Device '%s' can not be detached", argv[1]);
>          goto error;
>      }
>
>      snprintf(response, sizeof(response),
> -             "Port '%s' has been detached", argv[1]);
> +             "Device '%s' has been detached", argv[1]);
> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",
> +              addr.domain, addr.bus, addr.devid, addr.function);
>
>      ovs_mutex_unlock(&dpdk_mutex);
>      unixctl_command_reply(conn, response);
> @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap *ovs_other_config)
>      int argc, argc_tmp;
>      bool auto_determine = true;
>      int err = 0;
> +    int i = 0;
> +    uint8_t nb_ports = 0;
> +    struct rte_eth_dev_info info;
>      cpu_set_t cpuset;
>  #ifndef VHOST_CUSE
>      char *sock_dir_subcomponent;
> @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap *ovs_other_config)
>
>      atexit(deferred_argv_release);
>
> +    nb_ports = rte_eth_dev_count();
> +    for (i = 0; i < nb_ports; i++) {
> +        rte_eth_dev_info_get(i, &info);
> +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> +                  info.pci_dev->addr.devid, info.pci_dev->addr.function);
> +    }
> +
>      rte_memzone_dump(stdout);
>      rte_eal_init_ret = 0;
>
> --
> 2.4.3
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
Ciara Loftus July 19, 2016, 9:53 a.m. UTC | #2
> 

> The idea looks very good to me, thanks for working on it.

> Very high level comments:

Hi Daniele thanks for looking at this.

> 

> Do we need to be limited to pci devices?  Perhaps we can accept the same

> string as rte_eth_dev_attach().

Can you elaborate? For physical devs the string is always the PCI address. Do you mean to include virtual devices as well? This could be an option once we can use the ethdev API with vHost ports if the PMD gets merged.

> Would it be possible to integrate this more with the hotplug patch?  It would

> be nice to avoid introducing extra appctl commands and call

> rte_eth_dev_attach() if needed in netdev_dpdk_construct().

Good idea. I'll look at this for the v4.

Thanks,
Ciara

> Thoughts?

> Thanks,

> Daniele

> 

> 2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.loftus@intel.com>:

> 'dpdk' ports no longer have naming restrictions. Now, instead

> of specifying the dpdk port ID as part of the name, the PCI

> address of the device must be specified via the 'dpdk-pci'

> option. eg.

> 

> ovs-vsctl add-port br0 my-port

> ovs-vsctl set Interface my-port type=dpdk

> ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3

> 

> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> 

> v2:

> - remove global pci list

> - remove unnecessary parenthesis

> - remove return from void fn

> - print pci like dpdk

> - fix port ranges

> ---

>  INSTALL.DPDK-ADVANCED.md |   2 +-

>  INSTALL.DPDK.md          |  10 ++--

>  NEWS                     |   2 +

>  lib/netdev-dpdk.c        | 132

> ++++++++++++++++++++++++++++++++++++++---------

>  4 files changed, 116 insertions(+), 30 deletions(-)

> 

> diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md

> index 61b4e82..7370d03 100644

> --- a/INSTALL.DPDK-ADVANCED.md

> +++ b/INSTALL.DPDK-ADVANCED.md

> @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using the

> add-port command.

>  It is also possible to detach a port from ovs, the user has to remove the

>  port using the del-port command, then it can be detached using:

> 

> -`ovs-appctl netdev-dpdk/port-detach dpdk0`

> +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`

> 

>  This feature is not supported with VFIO and could not work with some NICs,

>  please refer to the [DPDK Port Hotplug Framework] in order to get more

> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> index 5407794..9a781ff 100644

> --- a/INSTALL.DPDK.md

> +++ b/INSTALL.DPDK.md

> @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-

> ADVANCED.md]

> 

>       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`

> 

> -     Now you can add DPDK devices. OVS expects DPDK device names to start

> with

> -     "dpdk" and end with a portid. vswitchd should print (in the log file) the

> -     number of dpdk devices found.

> +     Now you can add dpdk devices. The PCI address of the device needs to

> be

> +     set using the 'dpdk-pci' option. vswitchd should print (in the log file)

> +     the PCI addresses of dpdk devices found during initialisation.

> 

>       ```

> -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> options:dpdk-pci=0000:06:00.0

> +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> options:dpdk-pci=0000:06:00.1

>       ```

> 

>       After the DPDK ports get added to switch, a polling thread continuously

> polls

> diff --git a/NEWS b/NEWS

> index 9064225..03b9ba8 100644

> --- a/NEWS

> +++ b/NEWS

> @@ -59,6 +59,8 @@ Post-v2.5.0

>         node that device memory is located on if

> CONFIG_RTE_LIBRTE_VHOST_NUMA

>         is enabled in DPDK.

>       * Port Hotplug is now supported.

> +     * DPDK physical ports can now have arbitrary names. The PCI address of

> +       the device must be set using the 'dpdk-pci' option.

>     - Increase number of registers to 16.

>     - ovs-benchmark: This utility has been removed due to lack of use and

>       bitrot.

> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> index 3fab52c..d2cceb2 100644

> --- a/lib/netdev-dpdk.c

> +++ b/lib/netdev-dpdk.c

> @@ -58,6 +58,7 @@

>  #include "rte_config.h"

>  #include "rte_mbuf.h"

>  #include "rte_meter.h"

> +#include "rte_pci.h"

>  #include "rte_virtio_net.h"

> 

>  VLOG_DEFINE_THIS_MODULE(dpdk);

> @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned

> int port_no,

>      /* If the 'sid' is negative, it means that the kernel fails

>       * to obtain the pci numa info.  In that situation, always

>       * use 'SOCKET0'. */

> -    if (type == DPDK_DEV_ETH) {

> +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {

>          sid = rte_eth_dev_socket_id(port_no);

>      } else {

>          sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned

> int port_no,

>      dev->requested_n_txq = netdev->n_txq;

> 

>      if (type == DPDK_DEV_ETH) {

> -        err = dpdk_eth_dev_init(dev);

> -        if (err) {

> -            goto unlock;

> +        if (dev->port_id != -1) {

> +            err = dpdk_eth_dev_init(dev);

> +            if (err) {

> +                goto unlock;

> +            }

>          }

>          netdev_dpdk_alloc_txq(dev, netdev->n_txq);

>          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;

> @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct netdev

> *netdev)

>  static int

>  netdev_dpdk_construct(struct netdev *netdev)

>  {

> -    unsigned int port_no;

>      int err;

> 

>      if (rte_eal_init_ret) {

>          return rte_eal_init_ret;

>      }

> 

> -    /* Names always start with "dpdk" */

> -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);

> -    if (err) {

> -        return err;

> -    }

> -

>      ovs_mutex_lock(&dpdk_mutex);

> -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);

> +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);

>      ovs_mutex_unlock(&dpdk_mutex);

>      return err;

>  }

> @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev

> *netdev, struct smap *args)

>      return 0;

>  }

> 

> +static void

> +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr

> *addr)

> +{

> +    int i = 0;

> +    struct rte_eth_dev_info info;

> +

> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> +        if (!rte_eth_dev_is_valid_port(i)) {

> +            continue;

> +        }

> +        rte_eth_dev_info_get(i, &info);

> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {

> +            dev->port_id = i;

> +            break;

> +        }

> +    }

> +

> +    if (dev->port_id != -1) {

> +        rte_eth_dev_stop(dev->port_id);

> +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);

> +        dpdk_eth_dev_init(dev);

> +    } else {

> +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);

> +    }

> +}

> +

>  static int

>  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)

>  {

>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

>      int new_n_rxq;

> +    struct rte_pci_addr addr;

> +    const char *pci_str;

> 

>      ovs_mutex_lock(&dev->mutex);

>      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq),

> 1);

> @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev *netdev,

> const struct smap *args)

>          dev->requested_n_rxq = new_n_rxq;

>          netdev_request_reconfigure(netdev);

>      }

> +

> +    if (dev->port_id == -1) {

> +        pci_str = smap_get(args, "dpdk-pci");

> +        if (pci_str != NULL) {

> +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {

> +                netdev_dpdk_associate_pci(dev, &addr);

> +            } else {

> +                VLOG_ERR("Error parsing PCI address %s, please check format",

> +                          pci_str);

> +            }

> +        }

> +    }

> +

>      ovs_mutex_unlock(&dev->mutex);

> 

>      return 0;

> @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn

> *conn, int argc OVS_UNUSED,

>      int ret;

>      char response[128];

>      uint8_t port_id;

> +    struct rte_eth_dev_info info;

> 

>      ovs_mutex_lock(&dpdk_mutex);

> 

> @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn

> *conn, int argc OVS_UNUSED,

>      }

> 

>      snprintf(response, sizeof(response),

> -             "Device '%s' has been attached as 'dpdk%d'", argv[1], port_id);

> +             "Device '%s' has been attached", argv[1]);

> +

> +    rte_eth_dev_info_get(port_id, &info);

> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> +              info.pci_dev->addr.devid, info.pci_dev->addr.function);

> 

>      ovs_mutex_unlock(&dpdk_mutex);

>      unixctl_command_reply(conn, response);

> @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct unixctl_conn

> *conn, int argc OVS_UNUSED,

>  {

>      int ret;

>      char response[128];

> -    unsigned int parsed_port;

>      uint8_t port_id;

>      char devname[RTE_ETH_NAME_MAX_LEN];

> +    bool found = false;

> +    int i;

> +    struct rte_pci_addr addr;

> +    struct netdev_dpdk *dev;

> +    struct rte_eth_dev_info info;

> 

>      ovs_mutex_lock(&dpdk_mutex);

> 

> -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);

> -    if (ret) {

> +    /* Parse the PCI address to a usable format */

> +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {

>          snprintf(response, sizeof(response),

> -                 "'%s' is not a valid port", argv[1]);

> +                 "'%s' is not a valid PCI address format - cannot detach",

> +                 argv[1]);

>          goto error;

>      }

> 

> -    port_id = parsed_port;

> +    /* Search for the address in the initialised devices */

> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> +        if (!rte_eth_dev_is_valid_port(i)) {

> +            continue;

> +        }

> +        rte_eth_dev_info_get(i, &info);

> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {

> +            port_id = i;

> +            found = true;

> +            break;

> +        }

> +    }

> 

> -    struct netdev *netdev = netdev_from_name(argv[1]);

> -    if (netdev) {

> -        netdev_close(netdev);

> +    /* Print an error if the device is not already initialised */

> +    if (!found) {

>          snprintf(response, sizeof(response),

> -                 "Port '%s' is being used. Remove it before detaching",

> +                 "'%s' is not an initialized DPDK device - cannot detach",

>                   argv[1]);

>          goto error;

>      }

> 

> +    /* Check if the device is already in use by a port, and advise the user to

> +     * remove it if so */

> +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> +        if (dev->port_id == port_id) {

> +            snprintf(response, sizeof(response),

> +                     "Port '%s' is using PCI device '%s'."

> +                     "Remove it before detaching",

> +                     dev->up.name, argv[1]);

> +            goto error;

> +        }

> +    }

> +

> +    /* It is safe to detach the device */

>      rte_eth_dev_close(port_id);

> 

>      ret = rte_eth_dev_detach(port_id, devname);

>      if (ret < 0) {

>          snprintf(response, sizeof(response),

> -                 "Port '%s' can not be detached", argv[1]);

> +                 "Device '%s' can not be detached", argv[1]);

>          goto error;

>      }

> 

>      snprintf(response, sizeof(response),

> -             "Port '%s' has been detached", argv[1]);

> +             "Device '%s' has been detached", argv[1]);

> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",

> +              addr.domain, addr.bus, addr.devid, addr.function);

> 

>      ovs_mutex_unlock(&dpdk_mutex);

>      unixctl_command_reply(conn, response);

> @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap *ovs_other_config)

>      int argc, argc_tmp;

>      bool auto_determine = true;

>      int err = 0;

> +    int i = 0;

> +    uint8_t nb_ports = 0;

> +    struct rte_eth_dev_info info;

>      cpu_set_t cpuset;

>  #ifndef VHOST_CUSE

>      char *sock_dir_subcomponent;

> @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap

> *ovs_other_config)

> 

>      atexit(deferred_argv_release);

> 

> +    nb_ports = rte_eth_dev_count();

> +    for (i = 0; i < nb_ports; i++) {

> +        rte_eth_dev_info_get(i, &info);

> +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> +                  info.pci_dev->addr.devid, info.pci_dev->addr.function);

> +    }

> +

>      rte_memzone_dump(stdout);

>      rte_eal_init_ret = 0;

> 

> --

> 2.4.3

> 

> _______________________________________________

> dev mailing list

> dev@openvswitch.org

> http://openvswitch.org/mailman/listinfo/dev
Daniele Di Proietto July 22, 2016, 12:07 a.m. UTC | #3
2016-07-19 2:53 GMT-07:00 Loftus, Ciara <ciara.loftus@intel.com>:

> >
> > The idea looks very good to me, thanks for working on it.
> > Very high level comments:
> Hi Daniele thanks for looking at this.
>
> >
> > Do we need to be limited to pci devices?  Perhaps we can accept the same
> > string as rte_eth_dev_attach().
> Can you elaborate? For physical devs the string is always the PCI address.
> Do you mean to include virtual devices as well? This could be an option
> once we can use the ethdev API with vHost ports if the PMD gets merged.
>

I agree with you that for vhost devices we can wait for vHost PMD.  I was
thinking more about devices like DPDK "af_packet" and "pcap".  Can we use
this interface to create those as well?

Thanks,

Daniele


>
> > Would it be possible to integrate this more with the hotplug patch?  It
> would
> > be nice to avoid introducing extra appctl commands and call
> > rte_eth_dev_attach() if needed in netdev_dpdk_construct().
> Good idea. I'll look at this for the v4.
>
> Thanks,
> Ciara
>
> > Thoughts?
> > Thanks,
> > Daniele
> >
> > 2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.loftus@intel.com>:
> > 'dpdk' ports no longer have naming restrictions. Now, instead
> > of specifying the dpdk port ID as part of the name, the PCI
> > address of the device must be specified via the 'dpdk-pci'
> > option. eg.
> >
> > ovs-vsctl add-port br0 my-port
> > ovs-vsctl set Interface my-port type=dpdk
> > ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> >
> > v2:
> > - remove global pci list
> > - remove unnecessary parenthesis
> > - remove return from void fn
> > - print pci like dpdk
> > - fix port ranges
> > ---
> >  INSTALL.DPDK-ADVANCED.md |   2 +-
> >  INSTALL.DPDK.md          |  10 ++--
> >  NEWS                     |   2 +
> >  lib/netdev-dpdk.c        | 132
> > ++++++++++++++++++++++++++++++++++++++---------
> >  4 files changed, 116 insertions(+), 30 deletions(-)
> >
> > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
> > index 61b4e82..7370d03 100644
> > --- a/INSTALL.DPDK-ADVANCED.md
> > +++ b/INSTALL.DPDK-ADVANCED.md
> > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using
> the
> > add-port command.
> >  It is also possible to detach a port from ovs, the user has to remove
> the
> >  port using the del-port command, then it can be detached using:
> >
> > -`ovs-appctl netdev-dpdk/port-detach dpdk0`
> > +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`
> >
> >  This feature is not supported with VFIO and could not work with some
> NICs,
> >  please refer to the [DPDK Port Hotplug Framework] in order to get more
> > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> > index 5407794..9a781ff 100644
> > --- a/INSTALL.DPDK.md
> > +++ b/INSTALL.DPDK.md
> > @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-
> > ADVANCED.md]
> >
> >       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`
> >
> > -     Now you can add DPDK devices. OVS expects DPDK device names to
> start
> > with
> > -     "dpdk" and end with a portid. vswitchd should print (in the log
> file) the
> > -     number of dpdk devices found.
> > +     Now you can add dpdk devices. The PCI address of the device needs
> to
> > be
> > +     set using the 'dpdk-pci' option. vswitchd should print (in the log
> file)
> > +     the PCI addresses of dpdk devices found during initialisation.
> >
> >       ```
> > -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> > -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> > +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> > options:dpdk-pci=0000:06:00.0
> > +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> > options:dpdk-pci=0000:06:00.1
> >       ```
> >
> >       After the DPDK ports get added to switch, a polling thread
> continuously
> > polls
> > diff --git a/NEWS b/NEWS
> > index 9064225..03b9ba8 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -59,6 +59,8 @@ Post-v2.5.0
> >         node that device memory is located on if
> > CONFIG_RTE_LIBRTE_VHOST_NUMA
> >         is enabled in DPDK.
> >       * Port Hotplug is now supported.
> > +     * DPDK physical ports can now have arbitrary names. The PCI
> address of
> > +       the device must be set using the 'dpdk-pci' option.
> >     - Increase number of registers to 16.
> >     - ovs-benchmark: This utility has been removed due to lack of use and
> >       bitrot.
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index 3fab52c..d2cceb2 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -58,6 +58,7 @@
> >  #include "rte_config.h"
> >  #include "rte_mbuf.h"
> >  #include "rte_meter.h"
> > +#include "rte_pci.h"
> >  #include "rte_virtio_net.h"
> >
> >  VLOG_DEFINE_THIS_MODULE(dpdk);
> > @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned
> > int port_no,
> >      /* If the 'sid' is negative, it means that the kernel fails
> >       * to obtain the pci numa info.  In that situation, always
> >       * use 'SOCKET0'. */
> > -    if (type == DPDK_DEV_ETH) {
> > +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {
> >          sid = rte_eth_dev_socket_id(port_no);
> >      } else {
> >          sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> > @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned
> > int port_no,
> >      dev->requested_n_txq = netdev->n_txq;
> >
> >      if (type == DPDK_DEV_ETH) {
> > -        err = dpdk_eth_dev_init(dev);
> > -        if (err) {
> > -            goto unlock;
> > +        if (dev->port_id != -1) {
> > +            err = dpdk_eth_dev_init(dev);
> > +            if (err) {
> > +                goto unlock;
> > +            }
> >          }
> >          netdev_dpdk_alloc_txq(dev, netdev->n_txq);
> >          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
> > @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct netdev
> > *netdev)
> >  static int
> >  netdev_dpdk_construct(struct netdev *netdev)
> >  {
> > -    unsigned int port_no;
> >      int err;
> >
> >      if (rte_eal_init_ret) {
> >          return rte_eal_init_ret;
> >      }
> >
> > -    /* Names always start with "dpdk" */
> > -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);
> > -    if (err) {
> > -        return err;
> > -    }
> > -
> >      ovs_mutex_lock(&dpdk_mutex);
> > -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
> > +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      return err;
> >  }
> > @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev
> > *netdev, struct smap *args)
> >      return 0;
> >  }
> >
> > +static void
> > +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr
> > *addr)
> > +{
> > +    int i = 0;
> > +    struct rte_eth_dev_info info;
> > +
> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> > +        if (!rte_eth_dev_is_valid_port(i)) {
> > +            continue;
> > +        }
> > +        rte_eth_dev_info_get(i, &info);
> > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {
> > +            dev->port_id = i;
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (dev->port_id != -1) {
> > +        rte_eth_dev_stop(dev->port_id);
> > +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);
> > +        dpdk_eth_dev_init(dev);
> > +    } else {
> > +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);
> > +    }
> > +}
> > +
> >  static int
> >  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      int new_n_rxq;
> > +    struct rte_pci_addr addr;
> > +    const char *pci_str;
> >
> >      ovs_mutex_lock(&dev->mutex);
> >      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq),
> > 1);
> > @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev *netdev,
> > const struct smap *args)
> >          dev->requested_n_rxq = new_n_rxq;
> >          netdev_request_reconfigure(netdev);
> >      }
> > +
> > +    if (dev->port_id == -1) {
> > +        pci_str = smap_get(args, "dpdk-pci");
> > +        if (pci_str != NULL) {
> > +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {
> > +                netdev_dpdk_associate_pci(dev, &addr);
> > +            } else {
> > +                VLOG_ERR("Error parsing PCI address %s, please check
> format",
> > +                          pci_str);
> > +            }
> > +        }
> > +    }
> > +
> >      ovs_mutex_unlock(&dev->mutex);
> >
> >      return 0;
> > @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn
> > *conn, int argc OVS_UNUSED,
> >      int ret;
> >      char response[128];
> >      uint8_t port_id;
> > +    struct rte_eth_dev_info info;
> >
> >      ovs_mutex_lock(&dpdk_mutex);
> >
> > @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn
> > *conn, int argc OVS_UNUSED,
> >      }
> >
> >      snprintf(response, sizeof(response),
> > -             "Device '%s' has been attached as 'dpdk%d'", argv[1],
> port_id);
> > +             "Device '%s' has been attached", argv[1]);
> > +
> > +    rte_eth_dev_info_get(port_id, &info);
> > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> > +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> > +              info.pci_dev->addr.devid, info.pci_dev->addr.function);
> >
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      unixctl_command_reply(conn, response);
> > @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct unixctl_conn
> > *conn, int argc OVS_UNUSED,
> >  {
> >      int ret;
> >      char response[128];
> > -    unsigned int parsed_port;
> >      uint8_t port_id;
> >      char devname[RTE_ETH_NAME_MAX_LEN];
> > +    bool found = false;
> > +    int i;
> > +    struct rte_pci_addr addr;
> > +    struct netdev_dpdk *dev;
> > +    struct rte_eth_dev_info info;
> >
> >      ovs_mutex_lock(&dpdk_mutex);
> >
> > -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);
> > -    if (ret) {
> > +    /* Parse the PCI address to a usable format */
> > +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {
> >          snprintf(response, sizeof(response),
> > -                 "'%s' is not a valid port", argv[1]);
> > +                 "'%s' is not a valid PCI address format - cannot
> detach",
> > +                 argv[1]);
> >          goto error;
> >      }
> >
> > -    port_id = parsed_port;
> > +    /* Search for the address in the initialised devices */
> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> > +        if (!rte_eth_dev_is_valid_port(i)) {
> > +            continue;
> > +        }
> > +        rte_eth_dev_info_get(i, &info);
> > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {
> > +            port_id = i;
> > +            found = true;
> > +            break;
> > +        }
> > +    }
> >
> > -    struct netdev *netdev = netdev_from_name(argv[1]);
> > -    if (netdev) {
> > -        netdev_close(netdev);
> > +    /* Print an error if the device is not already initialised */
> > +    if (!found) {
> >          snprintf(response, sizeof(response),
> > -                 "Port '%s' is being used. Remove it before detaching",
> > +                 "'%s' is not an initialized DPDK device - cannot
> detach",
> >                   argv[1]);
> >          goto error;
> >      }
> >
> > +    /* Check if the device is already in use by a port, and advise the
> user to
> > +     * remove it if so */
> > +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> > +        if (dev->port_id == port_id) {
> > +            snprintf(response, sizeof(response),
> > +                     "Port '%s' is using PCI device '%s'."
> > +                     "Remove it before detaching",
> > +                     dev->up.name, argv[1]);
> > +            goto error;
> > +        }
> > +    }
> > +
> > +    /* It is safe to detach the device */
> >      rte_eth_dev_close(port_id);
> >
> >      ret = rte_eth_dev_detach(port_id, devname);
> >      if (ret < 0) {
> >          snprintf(response, sizeof(response),
> > -                 "Port '%s' can not be detached", argv[1]);
> > +                 "Device '%s' can not be detached", argv[1]);
> >          goto error;
> >      }
> >
> >      snprintf(response, sizeof(response),
> > -             "Port '%s' has been detached", argv[1]);
> > +             "Device '%s' has been detached", argv[1]);
> > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",
> > +              addr.domain, addr.bus, addr.devid, addr.function);
> >
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      unixctl_command_reply(conn, response);
> > @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap *ovs_other_config)
> >      int argc, argc_tmp;
> >      bool auto_determine = true;
> >      int err = 0;
> > +    int i = 0;
> > +    uint8_t nb_ports = 0;
> > +    struct rte_eth_dev_info info;
> >      cpu_set_t cpuset;
> >  #ifndef VHOST_CUSE
> >      char *sock_dir_subcomponent;
> > @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap
> > *ovs_other_config)
> >
> >      atexit(deferred_argv_release);
> >
> > +    nb_ports = rte_eth_dev_count();
> > +    for (i = 0; i < nb_ports; i++) {
> > +        rte_eth_dev_info_get(i, &info);
> > +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> > +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> > +                  info.pci_dev->addr.devid,
> info.pci_dev->addr.function);
> > +    }
> > +
> >      rte_memzone_dump(stdout);
> >      rte_eal_init_ret = 0;
> >
> > --
> > 2.4.3
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > http://openvswitch.org/mailman/listinfo/dev
>
>
Ciara Loftus July 26, 2016, 4:36 p.m. UTC | #4
> 2016-07-19 2:53 GMT-07:00 Loftus, Ciara <ciara.loftus@intel.com>:

> >

> > The idea looks very good to me, thanks for working on it.

> > Very high level comments:

> Hi Daniele thanks for looking at this.

> 

> >

> > Do we need to be limited to pci devices?  Perhaps we can accept the same

> > string as rte_eth_dev_attach().

> Can you elaborate? For physical devs the string is always the PCI address. Do

> you mean to include virtual devices as well? This could be an option once we

> can use the ethdev API with vHost ports if the PMD gets merged.

> 

> I agree with you that for vhost devices we can wait for vHost PMD.  I was

> thinking more about devices like DPDK "af_packet" and "pcap".  Can we use

> this interface to create those as well?


Understood. It’s possible. If the string provided isn't PCI format we can assume it's a vdev and provide the args to attach() without searching through the PCI devices and trying to find a match first.
I can include this in the v4. However I won't be able to thoroughly test all 20+ DPDK PMDs and ensure the attach() works for them all. I tested a few - some worked out of the box eg. eth_null, some didn’t eg af_packet.
I imagine that the netdev_class dpdk_class functions only happen to be compatible with some PMDs straight away. Those that aren't compatible will require new port types (and modifications to existing / new netdev functions) which I think is beyond the scope of this patch.

Thanks,
Ciara

> Thanks,

> Daniele

> 

> 

> > Would it be possible to integrate this more with the hotplug patch?  It

> would

> > be nice to avoid introducing extra appctl commands and call

> > rte_eth_dev_attach() if needed in netdev_dpdk_construct().

> Good idea. I'll look at this for the v4.

> 

> Thanks,

> Ciara

> 

> > Thoughts?

> > Thanks,

> > Daniele

> >

> > 2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.loftus@intel.com>:

> > 'dpdk' ports no longer have naming restrictions. Now, instead

> > of specifying the dpdk port ID as part of the name, the PCI

> > address of the device must be specified via the 'dpdk-pci'

> > option. eg.

> >

> > ovs-vsctl add-port br0 my-port

> > ovs-vsctl set Interface my-port type=dpdk

> > ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3

> >

> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> >

> > v2:

> > - remove global pci list

> > - remove unnecessary parenthesis

> > - remove return from void fn

> > - print pci like dpdk

> > - fix port ranges

> > ---

> >  INSTALL.DPDK-ADVANCED.md |   2 +-

> >  INSTALL.DPDK.md          |  10 ++--

> >  NEWS                     |   2 +

> >  lib/netdev-dpdk.c        | 132

> > ++++++++++++++++++++++++++++++++++++++---------

> >  4 files changed, 116 insertions(+), 30 deletions(-)

> >

> > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md

> > index 61b4e82..7370d03 100644

> > --- a/INSTALL.DPDK-ADVANCED.md

> > +++ b/INSTALL.DPDK-ADVANCED.md

> > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using

> the

> > add-port command.

> >  It is also possible to detach a port from ovs, the user has to remove the

> >  port using the del-port command, then it can be detached using:

> >

> > -`ovs-appctl netdev-dpdk/port-detach dpdk0`

> > +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`

> >

> >  This feature is not supported with VFIO and could not work with some

> NICs,

> >  please refer to the [DPDK Port Hotplug Framework] in order to get more

> > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> > index 5407794..9a781ff 100644

> > --- a/INSTALL.DPDK.md

> > +++ b/INSTALL.DPDK.md

> > @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-

> > ADVANCED.md]

> >

> >       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`

> >

> > -     Now you can add DPDK devices. OVS expects DPDK device names to

> start

> > with

> > -     "dpdk" and end with a portid. vswitchd should print (in the log file) the

> > -     number of dpdk devices found.

> > +     Now you can add dpdk devices. The PCI address of the device needs to

> > be

> > +     set using the 'dpdk-pci' option. vswitchd should print (in the log file)

> > +     the PCI addresses of dpdk devices found during initialisation.

> >

> >       ```

> > -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> > -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> > +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> > options:dpdk-pci=0000:06:00.0

> > +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> > options:dpdk-pci=0000:06:00.1

> >       ```

> >

> >       After the DPDK ports get added to switch, a polling thread continuously

> > polls

> > diff --git a/NEWS b/NEWS

> > index 9064225..03b9ba8 100644

> > --- a/NEWS

> > +++ b/NEWS

> > @@ -59,6 +59,8 @@ Post-v2.5.0

> >         node that device memory is located on if

> > CONFIG_RTE_LIBRTE_VHOST_NUMA

> >         is enabled in DPDK.

> >       * Port Hotplug is now supported.

> > +     * DPDK physical ports can now have arbitrary names. The PCI address

> of

> > +       the device must be set using the 'dpdk-pci' option.

> >     - Increase number of registers to 16.

> >     - ovs-benchmark: This utility has been removed due to lack of use and

> >       bitrot.

> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> > index 3fab52c..d2cceb2 100644

> > --- a/lib/netdev-dpdk.c

> > +++ b/lib/netdev-dpdk.c

> > @@ -58,6 +58,7 @@

> >  #include "rte_config.h"

> >  #include "rte_mbuf.h"

> >  #include "rte_meter.h"

> > +#include "rte_pci.h"

> >  #include "rte_virtio_net.h"

> >

> >  VLOG_DEFINE_THIS_MODULE(dpdk);

> > @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned

> > int port_no,

> >      /* If the 'sid' is negative, it means that the kernel fails

> >       * to obtain the pci numa info.  In that situation, always

> >       * use 'SOCKET0'. */

> > -    if (type == DPDK_DEV_ETH) {

> > +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {

> >          sid = rte_eth_dev_socket_id(port_no);

> >      } else {

> >          sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> > @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned

> > int port_no,

> >      dev->requested_n_txq = netdev->n_txq;

> >

> >      if (type == DPDK_DEV_ETH) {

> > -        err = dpdk_eth_dev_init(dev);

> > -        if (err) {

> > -            goto unlock;

> > +        if (dev->port_id != -1) {

> > +            err = dpdk_eth_dev_init(dev);

> > +            if (err) {

> > +                goto unlock;

> > +            }

> >          }

> >          netdev_dpdk_alloc_txq(dev, netdev->n_txq);

> >          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;

> > @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct

> netdev

> > *netdev)

> >  static int

> >  netdev_dpdk_construct(struct netdev *netdev)

> >  {

> > -    unsigned int port_no;

> >      int err;

> >

> >      if (rte_eal_init_ret) {

> >          return rte_eal_init_ret;

> >      }

> >

> > -    /* Names always start with "dpdk" */

> > -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);

> > -    if (err) {

> > -        return err;

> > -    }

> > -

> >      ovs_mutex_lock(&dpdk_mutex);

> > -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);

> > +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >      return err;

> >  }

> > @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev

> > *netdev, struct smap *args)

> >      return 0;

> >  }

> >

> > +static void

> > +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct

> rte_pci_addr

> > *addr)

> > +{

> > +    int i = 0;

> > +    struct rte_eth_dev_info info;

> > +

> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > +        if (!rte_eth_dev_is_valid_port(i)) {

> > +            continue;

> > +        }

> > +        rte_eth_dev_info_get(i, &info);

> > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {

> > +            dev->port_id = i;

> > +            break;

> > +        }

> > +    }

> > +

> > +    if (dev->port_id != -1) {

> > +        rte_eth_dev_stop(dev->port_id);

> > +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);

> > +        dpdk_eth_dev_init(dev);

> > +    } else {

> > +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);

> > +    }

> > +}

> > +

> >  static int

> >  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >      int new_n_rxq;

> > +    struct rte_pci_addr addr;

> > +    const char *pci_str;

> >

> >      ovs_mutex_lock(&dev->mutex);

> >      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev-

> >requested_n_rxq),

> > 1);

> > @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev *netdev,

> > const struct smap *args)

> >          dev->requested_n_rxq = new_n_rxq;

> >          netdev_request_reconfigure(netdev);

> >      }

> > +

> > +    if (dev->port_id == -1) {

> > +        pci_str = smap_get(args, "dpdk-pci");

> > +        if (pci_str != NULL) {

> > +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {

> > +                netdev_dpdk_associate_pci(dev, &addr);

> > +            } else {

> > +                VLOG_ERR("Error parsing PCI address %s, please check format",

> > +                          pci_str);

> > +            }

> > +        }

> > +    }

> > +

> >      ovs_mutex_unlock(&dev->mutex);

> >

> >      return 0;

> > @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn

> > *conn, int argc OVS_UNUSED,

> >      int ret;

> >      char response[128];

> >      uint8_t port_id;

> > +    struct rte_eth_dev_info info;

> >

> >      ovs_mutex_lock(&dpdk_mutex);

> >

> > @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn

> > *conn, int argc OVS_UNUSED,

> >      }

> >

> >      snprintf(response, sizeof(response),

> > -             "Device '%s' has been attached as 'dpdk%d'", argv[1], port_id);

> > +             "Device '%s' has been attached", argv[1]);

> > +

> > +    rte_eth_dev_info_get(port_id, &info);

> > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> > +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> > +              info.pci_dev->addr.devid, info.pci_dev->addr.function);

> >

> >      ovs_mutex_unlock(&dpdk_mutex);

> >      unixctl_command_reply(conn, response);

> > @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct unixctl_conn

> > *conn, int argc OVS_UNUSED,

> >  {

> >      int ret;

> >      char response[128];

> > -    unsigned int parsed_port;

> >      uint8_t port_id;

> >      char devname[RTE_ETH_NAME_MAX_LEN];

> > +    bool found = false;

> > +    int i;

> > +    struct rte_pci_addr addr;

> > +    struct netdev_dpdk *dev;

> > +    struct rte_eth_dev_info info;

> >

> >      ovs_mutex_lock(&dpdk_mutex);

> >

> > -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);

> > -    if (ret) {

> > +    /* Parse the PCI address to a usable format */

> > +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {

> >          snprintf(response, sizeof(response),

> > -                 "'%s' is not a valid port", argv[1]);

> > +                 "'%s' is not a valid PCI address format - cannot detach",

> > +                 argv[1]);

> >          goto error;

> >      }

> >

> > -    port_id = parsed_port;

> > +    /* Search for the address in the initialised devices */

> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > +        if (!rte_eth_dev_is_valid_port(i)) {

> > +            continue;

> > +        }

> > +        rte_eth_dev_info_get(i, &info);

> > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {

> > +            port_id = i;

> > +            found = true;

> > +            break;

> > +        }

> > +    }

> >

> > -    struct netdev *netdev = netdev_from_name(argv[1]);

> > -    if (netdev) {

> > -        netdev_close(netdev);

> > +    /* Print an error if the device is not already initialised */

> > +    if (!found) {

> >          snprintf(response, sizeof(response),

> > -                 "Port '%s' is being used. Remove it before detaching",

> > +                 "'%s' is not an initialized DPDK device - cannot detach",

> >                   argv[1]);

> >          goto error;

> >      }

> >

> > +    /* Check if the device is already in use by a port, and advise the user to

> > +     * remove it if so */

> > +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> > +        if (dev->port_id == port_id) {

> > +            snprintf(response, sizeof(response),

> > +                     "Port '%s' is using PCI device '%s'."

> > +                     "Remove it before detaching",

> > +                     dev->up.name, argv[1]);

> > +            goto error;

> > +        }

> > +    }

> > +

> > +    /* It is safe to detach the device */

> >      rte_eth_dev_close(port_id);

> >

> >      ret = rte_eth_dev_detach(port_id, devname);

> >      if (ret < 0) {

> >          snprintf(response, sizeof(response),

> > -                 "Port '%s' can not be detached", argv[1]);

> > +                 "Device '%s' can not be detached", argv[1]);

> >          goto error;

> >      }

> >

> >      snprintf(response, sizeof(response),

> > -             "Port '%s' has been detached", argv[1]);

> > +             "Device '%s' has been detached", argv[1]);

> > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",

> > +              addr.domain, addr.bus, addr.devid, addr.function);

> >

> >      ovs_mutex_unlock(&dpdk_mutex);

> >      unixctl_command_reply(conn, response);

> > @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap

> *ovs_other_config)

> >      int argc, argc_tmp;

> >      bool auto_determine = true;

> >      int err = 0;

> > +    int i = 0;

> > +    uint8_t nb_ports = 0;

> > +    struct rte_eth_dev_info info;

> >      cpu_set_t cpuset;

> >  #ifndef VHOST_CUSE

> >      char *sock_dir_subcomponent;

> > @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap

> > *ovs_other_config)

> >

> >      atexit(deferred_argv_release);

> >

> > +    nb_ports = rte_eth_dev_count();

> > +    for (i = 0; i < nb_ports; i++) {

> > +        rte_eth_dev_info_get(i, &info);

> > +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> > +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> > +                  info.pci_dev->addr.devid, info.pci_dev->addr.function);

> > +    }

> > +

> >      rte_memzone_dump(stdout);

> >      rte_eal_init_ret = 0;

> >

> > --

> > 2.4.3

> >

> > _______________________________________________

> > dev mailing list

> > dev@openvswitch.org

> > http://openvswitch.org/mailman/listinfo/dev
Ciara Loftus Aug. 16, 2016, 4:01 p.m. UTC | #5
> > 2016-07-19 2:53 GMT-07:00 Loftus, Ciara <ciara.loftus@intel.com>:

> > >

> > > The idea looks very good to me, thanks for working on it.

> > > Very high level comments:

> > Hi Daniele thanks for looking at this.

> >

> > >

> > > Do we need to be limited to pci devices?  Perhaps we can accept the

> same

> > > string as rte_eth_dev_attach().

> > Can you elaborate? For physical devs the string is always the PCI address.

> Do

> > you mean to include virtual devices as well? This could be an option once

> we

> > can use the ethdev API with vHost ports if the PMD gets merged.

> >

> > I agree with you that for vhost devices we can wait for vHost PMD.  I was

> > thinking more about devices like DPDK "af_packet" and "pcap".  Can we

> use

> > this interface to create those as well?

> 

> Understood. It’s possible. If the string provided isn't PCI format we can

> assume it's a vdev and provide the args to attach() without searching through

> the PCI devices and trying to find a match first.

> I can include this in the v4. However I won't be able to thoroughly test all 20+

> DPDK PMDs and ensure the attach() works for them all. I tested a few - some

> worked out of the box eg. eth_null, some didn’t eg af_packet.

> I imagine that the netdev_class dpdk_class functions only happen to be

> compatible with some PMDs straight away. Those that aren't compatible will

> require new port types (and modifications to existing / new netdev

> functions) which I think is beyond the scope of this patch.


Hi Daniele,

I plan to submit a new version of this soon. Would like your opinion if possible on the way to support for other DPDK devices as you suggested previously.
I think we should keep 'dpdk' ports limited to physical devices with associated PCI addresses. We could create a new port type ('dpdkvdev' maybe?) for the other devices like af_packet and eth_null. I would consider this port type as more 'experimental' (for a few reasons, mainly limited testing as mentioned above) and thus better to be kept separate.
These ports could be configured like so: ovs-vsctl set Interface vdevX options:dpdk-devargs=eth_null0
'dpdk-devargs' would be supplied to rte_eth_dev_attach().

Thanks,
Ciara

> 

> Thanks,

> Ciara

> 

> > Thanks,

> > Daniele

> >

> >

> > > Would it be possible to integrate this more with the hotplug patch?  It

> > would

> > > be nice to avoid introducing extra appctl commands and call

> > > rte_eth_dev_attach() if needed in netdev_dpdk_construct().

> > Good idea. I'll look at this for the v4.

> >

> > Thanks,

> > Ciara

> >

> > > Thoughts?

> > > Thanks,

> > > Daniele

> > >

> > > 2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.loftus@intel.com>:

> > > 'dpdk' ports no longer have naming restrictions. Now, instead

> > > of specifying the dpdk port ID as part of the name, the PCI

> > > address of the device must be specified via the 'dpdk-pci'

> > > option. eg.

> > >

> > > ovs-vsctl add-port br0 my-port

> > > ovs-vsctl set Interface my-port type=dpdk

> > > ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3

> > >

> > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> > >

> > > v2:

> > > - remove global pci list

> > > - remove unnecessary parenthesis

> > > - remove return from void fn

> > > - print pci like dpdk

> > > - fix port ranges

> > > ---

> > >  INSTALL.DPDK-ADVANCED.md |   2 +-

> > >  INSTALL.DPDK.md          |  10 ++--

> > >  NEWS                     |   2 +

> > >  lib/netdev-dpdk.c        | 132

> > > ++++++++++++++++++++++++++++++++++++++---------

> > >  4 files changed, 116 insertions(+), 30 deletions(-)

> > >

> > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-

> ADVANCED.md

> > > index 61b4e82..7370d03 100644

> > > --- a/INSTALL.DPDK-ADVANCED.md

> > > +++ b/INSTALL.DPDK-ADVANCED.md

> > > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using

> > the

> > > add-port command.

> > >  It is also possible to detach a port from ovs, the user has to remove the

> > >  port using the del-port command, then it can be detached using:

> > >

> > > -`ovs-appctl netdev-dpdk/port-detach dpdk0`

> > > +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`

> > >

> > >  This feature is not supported with VFIO and could not work with some

> > NICs,

> > >  please refer to the [DPDK Port Hotplug Framework] in order to get more

> > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> > > index 5407794..9a781ff 100644

> > > --- a/INSTALL.DPDK.md

> > > +++ b/INSTALL.DPDK.md

> > > @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-

> > > ADVANCED.md]

> > >

> > >       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`

> > >

> > > -     Now you can add DPDK devices. OVS expects DPDK device names to

> > start

> > > with

> > > -     "dpdk" and end with a portid. vswitchd should print (in the log file) the

> > > -     number of dpdk devices found.

> > > +     Now you can add dpdk devices. The PCI address of the device needs

> to

> > > be

> > > +     set using the 'dpdk-pci' option. vswitchd should print (in the log file)

> > > +     the PCI addresses of dpdk devices found during initialisation.

> > >

> > >       ```

> > > -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> > > -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> > > +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk

> > > options:dpdk-pci=0000:06:00.0

> > > +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk

> > > options:dpdk-pci=0000:06:00.1

> > >       ```

> > >

> > >       After the DPDK ports get added to switch, a polling thread

> continuously

> > > polls

> > > diff --git a/NEWS b/NEWS

> > > index 9064225..03b9ba8 100644

> > > --- a/NEWS

> > > +++ b/NEWS

> > > @@ -59,6 +59,8 @@ Post-v2.5.0

> > >         node that device memory is located on if

> > > CONFIG_RTE_LIBRTE_VHOST_NUMA

> > >         is enabled in DPDK.

> > >       * Port Hotplug is now supported.

> > > +     * DPDK physical ports can now have arbitrary names. The PCI address

> > of

> > > +       the device must be set using the 'dpdk-pci' option.

> > >     - Increase number of registers to 16.

> > >     - ovs-benchmark: This utility has been removed due to lack of use and

> > >       bitrot.

> > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> > > index 3fab52c..d2cceb2 100644

> > > --- a/lib/netdev-dpdk.c

> > > +++ b/lib/netdev-dpdk.c

> > > @@ -58,6 +58,7 @@

> > >  #include "rte_config.h"

> > >  #include "rte_mbuf.h"

> > >  #include "rte_meter.h"

> > > +#include "rte_pci.h"

> > >  #include "rte_virtio_net.h"

> > >

> > >  VLOG_DEFINE_THIS_MODULE(dpdk);

> > > @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned

> > > int port_no,

> > >      /* If the 'sid' is negative, it means that the kernel fails

> > >       * to obtain the pci numa info.  In that situation, always

> > >       * use 'SOCKET0'. */

> > > -    if (type == DPDK_DEV_ETH) {

> > > +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {

> > >          sid = rte_eth_dev_socket_id(port_no);

> > >      } else {

> > >          sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> > > @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev,

> > unsigned

> > > int port_no,

> > >      dev->requested_n_txq = netdev->n_txq;

> > >

> > >      if (type == DPDK_DEV_ETH) {

> > > -        err = dpdk_eth_dev_init(dev);

> > > -        if (err) {

> > > -            goto unlock;

> > > +        if (dev->port_id != -1) {

> > > +            err = dpdk_eth_dev_init(dev);

> > > +            if (err) {

> > > +                goto unlock;

> > > +            }

> > >          }

> > >          netdev_dpdk_alloc_txq(dev, netdev->n_txq);

> > >          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;

> > > @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct

> > netdev

> > > *netdev)

> > >  static int

> > >  netdev_dpdk_construct(struct netdev *netdev)

> > >  {

> > > -    unsigned int port_no;

> > >      int err;

> > >

> > >      if (rte_eal_init_ret) {

> > >          return rte_eal_init_ret;

> > >      }

> > >

> > > -    /* Names always start with "dpdk" */

> > > -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);

> > > -    if (err) {

> > > -        return err;

> > > -    }

> > > -

> > >      ovs_mutex_lock(&dpdk_mutex);

> > > -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);

> > > +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >      return err;

> > >  }

> > > @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev

> > > *netdev, struct smap *args)

> > >      return 0;

> > >  }

> > >

> > > +static void

> > > +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct

> > rte_pci_addr

> > > *addr)

> > > +{

> > > +    int i = 0;

> > > +    struct rte_eth_dev_info info;

> > > +

> > > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > > +        if (!rte_eth_dev_is_valid_port(i)) {

> > > +            continue;

> > > +        }

> > > +        rte_eth_dev_info_get(i, &info);

> > > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {

> > > +            dev->port_id = i;

> > > +            break;

> > > +        }

> > > +    }

> > > +

> > > +    if (dev->port_id != -1) {

> > > +        rte_eth_dev_stop(dev->port_id);

> > > +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);

> > > +        dpdk_eth_dev_init(dev);

> > > +    } else {

> > > +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);

> > > +    }

> > > +}

> > > +

> > >  static int

> > >  netdev_dpdk_set_config(struct netdev *netdev, const struct smap

> *args)

> > >  {

> > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > >      int new_n_rxq;

> > > +    struct rte_pci_addr addr;

> > > +    const char *pci_str;

> > >

> > >      ovs_mutex_lock(&dev->mutex);

> > >      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev-

> > >requested_n_rxq),

> > > 1);

> > > @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev

> *netdev,

> > > const struct smap *args)

> > >          dev->requested_n_rxq = new_n_rxq;

> > >          netdev_request_reconfigure(netdev);

> > >      }

> > > +

> > > +    if (dev->port_id == -1) {

> > > +        pci_str = smap_get(args, "dpdk-pci");

> > > +        if (pci_str != NULL) {

> > > +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {

> > > +                netdev_dpdk_associate_pci(dev, &addr);

> > > +            } else {

> > > +                VLOG_ERR("Error parsing PCI address %s, please check format",

> > > +                          pci_str);

> > > +            }

> > > +        }

> > > +    }

> > > +

> > >      ovs_mutex_unlock(&dev->mutex);

> > >

> > >      return 0;

> > > @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn

> > > *conn, int argc OVS_UNUSED,

> > >      int ret;

> > >      char response[128];

> > >      uint8_t port_id;

> > > +    struct rte_eth_dev_info info;

> > >

> > >      ovs_mutex_lock(&dpdk_mutex);

> > >

> > > @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn

> > > *conn, int argc OVS_UNUSED,

> > >      }

> > >

> > >      snprintf(response, sizeof(response),

> > > -             "Device '%s' has been attached as 'dpdk%d'", argv[1], port_id);

> > > +             "Device '%s' has been attached", argv[1]);

> > > +

> > > +    rte_eth_dev_info_get(port_id, &info);

> > > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> > > +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> > > +              info.pci_dev->addr.devid, info.pci_dev->addr.function);

> > >

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >      unixctl_command_reply(conn, response);

> > > @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct

> unixctl_conn

> > > *conn, int argc OVS_UNUSED,

> > >  {

> > >      int ret;

> > >      char response[128];

> > > -    unsigned int parsed_port;

> > >      uint8_t port_id;

> > >      char devname[RTE_ETH_NAME_MAX_LEN];

> > > +    bool found = false;

> > > +    int i;

> > > +    struct rte_pci_addr addr;

> > > +    struct netdev_dpdk *dev;

> > > +    struct rte_eth_dev_info info;

> > >

> > >      ovs_mutex_lock(&dpdk_mutex);

> > >

> > > -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);

> > > -    if (ret) {

> > > +    /* Parse the PCI address to a usable format */

> > > +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {

> > >          snprintf(response, sizeof(response),

> > > -                 "'%s' is not a valid port", argv[1]);

> > > +                 "'%s' is not a valid PCI address format - cannot detach",

> > > +                 argv[1]);

> > >          goto error;

> > >      }

> > >

> > > -    port_id = parsed_port;

> > > +    /* Search for the address in the initialised devices */

> > > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > > +        if (!rte_eth_dev_is_valid_port(i)) {

> > > +            continue;

> > > +        }

> > > +        rte_eth_dev_info_get(i, &info);

> > > +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {

> > > +            port_id = i;

> > > +            found = true;

> > > +            break;

> > > +        }

> > > +    }

> > >

> > > -    struct netdev *netdev = netdev_from_name(argv[1]);

> > > -    if (netdev) {

> > > -        netdev_close(netdev);

> > > +    /* Print an error if the device is not already initialised */

> > > +    if (!found) {

> > >          snprintf(response, sizeof(response),

> > > -                 "Port '%s' is being used. Remove it before detaching",

> > > +                 "'%s' is not an initialized DPDK device - cannot detach",

> > >                   argv[1]);

> > >          goto error;

> > >      }

> > >

> > > +    /* Check if the device is already in use by a port, and advise the user

> to

> > > +     * remove it if so */

> > > +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> > > +        if (dev->port_id == port_id) {

> > > +            snprintf(response, sizeof(response),

> > > +                     "Port '%s' is using PCI device '%s'."

> > > +                     "Remove it before detaching",

> > > +                     dev->up.name, argv[1]);

> > > +            goto error;

> > > +        }

> > > +    }

> > > +

> > > +    /* It is safe to detach the device */

> > >      rte_eth_dev_close(port_id);

> > >

> > >      ret = rte_eth_dev_detach(port_id, devname);

> > >      if (ret < 0) {

> > >          snprintf(response, sizeof(response),

> > > -                 "Port '%s' can not be detached", argv[1]);

> > > +                 "Device '%s' can not be detached", argv[1]);

> > >          goto error;

> > >      }

> > >

> > >      snprintf(response, sizeof(response),

> > > -             "Port '%s' has been detached", argv[1]);

> > > +             "Device '%s' has been detached", argv[1]);

> > > +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",

> > > +              addr.domain, addr.bus, addr.devid, addr.function);

> > >

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >      unixctl_command_reply(conn, response);

> > > @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap

> > *ovs_other_config)

> > >      int argc, argc_tmp;

> > >      bool auto_determine = true;

> > >      int err = 0;

> > > +    int i = 0;

> > > +    uint8_t nb_ports = 0;

> > > +    struct rte_eth_dev_info info;

> > >      cpu_set_t cpuset;

> > >  #ifndef VHOST_CUSE

> > >      char *sock_dir_subcomponent;

> > > @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap

> > > *ovs_other_config)

> > >

> > >      atexit(deferred_argv_release);

> > >

> > > +    nb_ports = rte_eth_dev_count();

> > > +    for (i = 0; i < nb_ports; i++) {

> > > +        rte_eth_dev_info_get(i, &info);

> > > +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",

> > > +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,

> > > +                  info.pci_dev->addr.devid, info.pci_dev->addr.function);

> > > +    }

> > > +

> > >      rte_memzone_dump(stdout);

> > >      rte_eal_init_ret = 0;

> > >

> > > --

> > > 2.4.3

> > >

> > > _______________________________________________

> > > dev mailing list

> > > dev@openvswitch.org

> > > http://openvswitch.org/mailman/listinfo/dev

> 

> _______________________________________________

> dev mailing list

> dev@openvswitch.org

> http://openvswitch.org/mailman/listinfo/dev
diff mbox

Patch

diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
index 61b4e82..7370d03 100644
--- a/INSTALL.DPDK-ADVANCED.md
+++ b/INSTALL.DPDK-ADVANCED.md
@@ -854,7 +854,7 @@  At this point, the user can create a ovs port using the add-port command.
 It is also possible to detach a port from ovs, the user has to remove the
 port using the del-port command, then it can be detached using:
 
-`ovs-appctl netdev-dpdk/port-detach dpdk0`
+`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`
 
 This feature is not supported with VFIO and could not work with some NICs,
 please refer to the [DPDK Port Hotplug Framework] in order to get more
diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index 5407794..9a781ff 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -258,13 +258,13 @@  advanced install guide [INSTALL.DPDK-ADVANCED.md]
 
      `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`
 
-     Now you can add DPDK devices. OVS expects DPDK device names to start with
-     "dpdk" and end with a portid. vswitchd should print (in the log file) the
-     number of dpdk devices found.
+     Now you can add dpdk devices. The PCI address of the device needs to be
+     set using the 'dpdk-pci' option. vswitchd should print (in the log file)
+     the PCI addresses of dpdk devices found during initialisation.
 
      ```
-     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
-     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
+     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-pci=0000:06:00.0
+     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk options:dpdk-pci=0000:06:00.1
      ```
 
      After the DPDK ports get added to switch, a polling thread continuously polls
diff --git a/NEWS b/NEWS
index 9064225..03b9ba8 100644
--- a/NEWS
+++ b/NEWS
@@ -59,6 +59,8 @@  Post-v2.5.0
        node that device memory is located on if CONFIG_RTE_LIBRTE_VHOST_NUMA
        is enabled in DPDK.
      * Port Hotplug is now supported.
+     * DPDK physical ports can now have arbitrary names. The PCI address of
+       the device must be set using the 'dpdk-pci' option.
    - Increase number of registers to 16.
    - ovs-benchmark: This utility has been removed due to lack of use and
      bitrot.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 3fab52c..d2cceb2 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -58,6 +58,7 @@ 
 #include "rte_config.h"
 #include "rte_mbuf.h"
 #include "rte_meter.h"
+#include "rte_pci.h"
 #include "rte_virtio_net.h"
 
 VLOG_DEFINE_THIS_MODULE(dpdk);
@@ -736,7 +737,7 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     /* If the 'sid' is negative, it means that the kernel fails
      * to obtain the pci numa info.  In that situation, always
      * use 'SOCKET0'. */
-    if (type == DPDK_DEV_ETH) {
+    if (type == DPDK_DEV_ETH && dev->port_id != -1) {
         sid = rte_eth_dev_socket_id(port_no);
     } else {
         sid = rte_lcore_to_socket_id(rte_get_master_lcore());
@@ -772,9 +773,11 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     dev->requested_n_txq = netdev->n_txq;
 
     if (type == DPDK_DEV_ETH) {
-        err = dpdk_eth_dev_init(dev);
-        if (err) {
-            goto unlock;
+        if (dev->port_id != -1) {
+            err = dpdk_eth_dev_init(dev);
+            if (err) {
+                goto unlock;
+            }
         }
         netdev_dpdk_alloc_txq(dev, netdev->n_txq);
         dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
@@ -886,21 +889,14 @@  netdev_dpdk_vhost_user_construct(struct netdev *netdev)
 static int
 netdev_dpdk_construct(struct netdev *netdev)
 {
-    unsigned int port_no;
     int err;
 
     if (rte_eal_init_ret) {
         return rte_eal_init_ret;
     }
 
-    /* Names always start with "dpdk" */
-    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);
-    if (err) {
-        return err;
-    }
-
     ovs_mutex_lock(&dpdk_mutex);
-    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
+    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);
     ovs_mutex_unlock(&dpdk_mutex);
     return err;
 }
@@ -979,11 +975,39 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
     return 0;
 }
 
+static void
+netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr *addr)
+{
+    int i = 0;
+    struct rte_eth_dev_info info;
+
+    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+        if (!rte_eth_dev_is_valid_port(i)) {
+            continue;
+        }
+        rte_eth_dev_info_get(i, &info);
+        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {
+            dev->port_id = i;
+            break;
+        }
+    }
+
+    if (dev->port_id != -1) {
+        rte_eth_dev_stop(dev->port_id);
+        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);
+        dpdk_eth_dev_init(dev);
+    } else {
+        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);
+    }
+}
+
 static int
 netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     int new_n_rxq;
+    struct rte_pci_addr addr;
+    const char *pci_str;
 
     ovs_mutex_lock(&dev->mutex);
     new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1);
@@ -991,6 +1015,19 @@  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
         dev->requested_n_rxq = new_n_rxq;
         netdev_request_reconfigure(netdev);
     }
+
+    if (dev->port_id == -1) {
+        pci_str = smap_get(args, "dpdk-pci");
+        if (pci_str != NULL) {
+            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {
+                netdev_dpdk_associate_pci(dev, &addr);
+            } else {
+                VLOG_ERR("Error parsing PCI address %s, please check format",
+                          pci_str);
+            }
+        }
+    }
+
     ovs_mutex_unlock(&dev->mutex);
 
     return 0;
@@ -2179,6 +2216,7 @@  netdev_dpdk_port_attach(struct unixctl_conn *conn, int argc OVS_UNUSED,
     int ret;
     char response[128];
     uint8_t port_id;
+    struct rte_eth_dev_info info;
 
     ovs_mutex_lock(&dpdk_mutex);
 
@@ -2192,7 +2230,12 @@  netdev_dpdk_port_attach(struct unixctl_conn *conn, int argc OVS_UNUSED,
     }
 
     snprintf(response, sizeof(response),
-             "Device '%s' has been attached as 'dpdk%d'", argv[1], port_id);
+             "Device '%s' has been attached", argv[1]);
+
+    rte_eth_dev_info_get(port_id, &info);
+    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
+              info.pci_dev->addr.domain, info.pci_dev->addr.bus,
+              info.pci_dev->addr.devid, info.pci_dev->addr.function);
 
     ovs_mutex_unlock(&dpdk_mutex);
     unixctl_command_reply(conn, response);
@@ -2204,41 +2247,71 @@  netdev_dpdk_port_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
 {
     int ret;
     char response[128];
-    unsigned int parsed_port;
     uint8_t port_id;
     char devname[RTE_ETH_NAME_MAX_LEN];
+    bool found = false;
+    int i;
+    struct rte_pci_addr addr;
+    struct netdev_dpdk *dev;
+    struct rte_eth_dev_info info;
 
     ovs_mutex_lock(&dpdk_mutex);
 
-    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);
-    if (ret) {
+    /* Parse the PCI address to a usable format */
+    if (eal_parse_pci_DomBDF(argv[1], &addr)) {
         snprintf(response, sizeof(response),
-                 "'%s' is not a valid port", argv[1]);
+                 "'%s' is not a valid PCI address format - cannot detach",
+                 argv[1]);
         goto error;
     }
 
-    port_id = parsed_port;
+    /* Search for the address in the initialised devices */
+    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+        if (!rte_eth_dev_is_valid_port(i)) {
+            continue;
+        }
+        rte_eth_dev_info_get(i, &info);
+        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {
+            port_id = i;
+            found = true;
+            break;
+        }
+    }
 
-    struct netdev *netdev = netdev_from_name(argv[1]);
-    if (netdev) {
-        netdev_close(netdev);
+    /* Print an error if the device is not already initialised */
+    if (!found) {
         snprintf(response, sizeof(response),
-                 "Port '%s' is being used. Remove it before detaching",
+                 "'%s' is not an initialized DPDK device - cannot detach",
                  argv[1]);
         goto error;
     }
 
+    /* Check if the device is already in use by a port, and advise the user to
+     * remove it if so */
+    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
+        if (dev->port_id == port_id) {
+            snprintf(response, sizeof(response),
+                     "Port '%s' is using PCI device '%s'."
+                     "Remove it before detaching",
+                     dev->up.name, argv[1]);
+            goto error;
+        }
+    }
+
+    /* It is safe to detach the device */
     rte_eth_dev_close(port_id);
 
     ret = rte_eth_dev_detach(port_id, devname);
     if (ret < 0) {
         snprintf(response, sizeof(response),
-                 "Port '%s' can not be detached", argv[1]);
+                 "Device '%s' can not be detached", argv[1]);
         goto error;
     }
 
     snprintf(response, sizeof(response),
-             "Port '%s' has been detached", argv[1]);
+             "Device '%s' has been detached", argv[1]);
+    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",
+              addr.domain, addr.bus, addr.devid, addr.function);
 
     ovs_mutex_unlock(&dpdk_mutex);
     unixctl_command_reply(conn, response);
@@ -3295,6 +3368,9 @@  dpdk_init__(const struct smap *ovs_other_config)
     int argc, argc_tmp;
     bool auto_determine = true;
     int err = 0;
+    int i = 0;
+    uint8_t nb_ports = 0;
+    struct rte_eth_dev_info info;
     cpu_set_t cpuset;
 #ifndef VHOST_CUSE
     char *sock_dir_subcomponent;
@@ -3417,6 +3493,14 @@  dpdk_init__(const struct smap *ovs_other_config)
 
     atexit(deferred_argv_release);
 
+    nb_ports = rte_eth_dev_count();
+    for (i = 0; i < nb_ports; i++) {
+        rte_eth_dev_info_get(i, &info);
+        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
+                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,
+                  info.pci_dev->addr.devid, info.pci_dev->addr.function);
+    }
+
     rte_memzone_dump(stdout);
     rte_eal_init_ret = 0;