diff mbox series

[ovs-dev,dpdk-howl,v2] netdev-dpdk: Upgrade to dpdk v18.08.0

Message ID 1536620677-22429-1-git-send-email-ophirmu@mellanox.com
State Superseded
Headers show
Series [ovs-dev,dpdk-howl,v2] netdev-dpdk: Upgrade to dpdk v18.08.0 | expand

Commit Message

Ophir Munk Sept. 10, 2018, 11:04 p.m. UTC
1. Enable compilation and linkage with dpdk 18.08.0
The following dpdk commits which were introduced after dpdk 17.11.x
require OVS updates to accommodate to the dpdk changes.
- ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
- ab3ce1e0c193 ("ethdev: remove old offload API")
- c06ddf9698e0 ("meter: add configuration profile")
- e58638c32411 ("ethdev: fix TPID handling in flow API")
- cd8c7c7ce241 ("ethdev: replace bus specific struct with generic dev")
- ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")

2. Limit configured rss hash functions to only those supported
by the eth device.

3. Update references to DPDK version in Documentation

4. Update DPDK version in travis' linux-build script

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
---
v1:
First version

v2:
Avoid seg faults cases as described in
https://patchwork.ozlabs.org/patch/965451/
by using the patch in:
https://github.com/kevintraynor/ovs-dpdk-master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c

 .travis/linux-build.sh         |   2 +-
 Documentation/faq/releases.rst |   2 +-
 lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
 3 files changed, 104 insertions(+), 42 deletions(-)

Comments

Ilya Maximets Sept. 11, 2018, 9:35 a.m. UTC | #1
Comments inline.

Best regards, Ilya Maximets.

On 11.09.2018 02:04, Ophir Munk wrote:
> 1. Enable compilation and linkage with dpdk 18.08.0
> The following dpdk commits which were introduced after dpdk 17.11.x
> require OVS updates to accommodate to the dpdk changes.
> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> - ab3ce1e0c193 ("ethdev: remove old offload API")
> - c06ddf9698e0 ("meter: add configuration profile")
> - e58638c32411 ("ethdev: fix TPID handling in flow API")
> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic dev")
> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> 
> 2. Limit configured rss hash functions to only those supported
> by the eth device.
> 
> 3. Update references to DPDK version in Documentation
> 
> 4. Update DPDK version in travis' linux-build script
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
> v1:
> First version
> 
> v2:
> Avoid seg faults cases as described in
> https://patchwork.ozlabs.org/patch/965451/
> by using the patch in:
> https://github.com/kevintraynor/ovs-dpdk-master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> 
>  .travis/linux-build.sh         |   2 +-
>  Documentation/faq/releases.rst |   2 +-
>  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
>  3 files changed, 104 insertions(+), 42 deletions(-)
> 
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> index 4b9fc4a..c60ac71 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -83,7 +83,7 @@ fi
>  
>  if [ "$DPDK" ]; then
>      if [ -z "$DPDK_VER" ]; then
> -        DPDK_VER="17.11.3"
> +        DPDK_VER="18.08.0"
>      fi
>      install_dpdk $DPDK_VER
>      if [ "$CC" = "clang" ]; then
> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> index 41d41e3..646ae09 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch release work with?
>      2.7.x        16.11.7
>      2.8.x        17.05.2
>      2.9.x        17.11.3
> -    2.10.x       17.11.3
> +    2.10.x       18.08.0
>      ============ =======
>  
>  Q: Are all the DPDK releases that OVS versions work with maintained?
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index f91aa27..6879e9a 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>      .rxmode = {
>          .mq_mode = ETH_MQ_RX_RSS,
>          .split_hdr_size = 0,
> -        .header_split   = 0, /* Header Split disabled */
> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> -        .hw_strip_crc   = 0,
> +        .offloads = 0,
>      },
>      .rx_adv_conf = {
>          .rss_conf = {
> @@ -364,6 +360,7 @@ struct dpdk_ring {
>  struct ingress_policer {
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm in_policer;
> +    struct rte_meter_srtcm_profile in_prof;
>      rte_spinlock_t policer_lock;
>  };
>  
> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>      struct rte_eth_dev_info info;
>      uint16_t conf_mtu;
>  
> +    rte_eth_dev_info_get(dev->port_id, &info);
> +
>      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>       * scatter to support jumbo RX. Checking the offload capabilities
>       * is not an option as PMDs are not required yet to report
> @@ -901,20 +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>       * (testing or code review). Listing all such PMDs feels harder
>       * than highlighting the one known not to need scatter */
>      if (dev->mtu > ETHER_MTU) {
> -        rte_eth_dev_info_get(dev->port_id, &info);
>          if (strncmp(info.driver_name, "net_nfp", 7)) {
> -            conf.rxmode.enable_scatter = 1;
> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>          }
>      }
>  
>      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;

IMHO, it's better to use 'if'. This thing became too complex.

>  
>      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> -        conf.rxmode.hw_strip_crc = 1;
> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>      }
>  
> +    /* Limit configured rss hash functions to only those supported
> +     * by the eth device. */
> +    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
> +
>      /* A device may report more queues than it makes available (this has
>       * been observed for Intel xl710, which reserves some of them for
>       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
> @@ -1932,16 +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>  
>  static inline bool
>  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> +                               struct rte_meter_srtcm_profile *profile,
>                                 struct rte_mbuf *pkt, uint64_t time)
>  {
>      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
>  
> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
> +    return rte_meter_srtcm_color_blind_check(meter, profile, time, pkt_len) ==
>                                                  e_RTE_METER_GREEN;
>  }
>  
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> +                        struct rte_meter_srtcm_profile *profile,
>                          struct rte_mbuf **pkts, int pkt_cnt,
>                          bool should_steal)
>  {
> @@ -1953,7 +1958,8 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>      for (i = 0; i < pkt_cnt; i++) {
>          pkt = pkts[i];
>          /* Handle current packet */
> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> +                                                 current_time)) {
>              if (cnt != i) {
>                  pkts[cnt] = pkt;
>              }
> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
>      int cnt = 0;
>  
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> -                                  pkt_cnt, should_steal);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer->in_prof,
> +                                  pkts, pkt_cnt, should_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
>  
>      return cnt;
> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
>      policer->app_srtcm_params.cir = rate_bytes;
>      policer->app_srtcm_params.cbs = burst_bytes;
>      policer->app_srtcm_params.ebs = 0;
> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        VLOG_ERR("Could not create rte meter profile for ingress policer");
> +        free(policer);
> +        return NULL;
> +    }
>      err = rte_meter_srtcm_config(&policer->in_policer,
> -                                    &policer->app_srtcm_params);
> +                                    &policer->in_prof);
>      if (err) {
>          VLOG_ERR("Could not create rte meter for ingress policer");
>          free(policer);
> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
>      smap_add_format(args, "if_descr", "%s %s", rte_version(),
>                                                 dev_info.driver_name);
>  
> -    if (dev_info.pci_dev) {
> -        smap_add_format(args, "pci-vendor_id", "0x%x",
> -                        dev_info.pci_dev->id.vendor_id);
> -        smap_add_format(args, "pci-device_id", "0x%x",
> -                        dev_info.pci_dev->id.device_id);
> +    const struct rte_bus *bus;
> +    const struct rte_pci_device *pci_dev;
> +    bus = rte_bus_find_by_device(dev_info.device);
> +    if (bus && !strcmp(bus->name, "pci")) {
> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> +        if (pci_dev) {
> +            smap_add_format(args, "pci-vendor_id", "0x%u",
> +                            pci_dev->id.vendor_id);
> +             smap_add_format(args, "pci-device_id", "0x%x",
> +                             pci_dev->id.device_id);

%u --> %x.
Also, something happened with indents.

> +        }
>      }
> -
>      return 0;
>  }
>  
> @@ -3727,6 +3745,7 @@ struct egress_policer {
>      struct qos_conf qos_conf;
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm egress_meter;
> +    struct rte_meter_srtcm_profile egress_prof;
>  };
>  
>  static void
> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct smap *details,
>      policer = xmalloc(sizeof *policer);
>      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>      egress_policer_details_to_param(details, &policer->app_srtcm_params);
> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        free(policer);
> +        *conf = NULL;
> +        return -err;
> +    }
>      err = rte_meter_srtcm_config(&policer->egress_meter,
> -                                 &policer->app_srtcm_params);
> +                                 &policer->egress_prof);
> +
>      if (!err) {
>          *conf = &policer->qos_conf;
>      } else {
> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
>  
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> +                                  &policer->egress_prof, pkts,
>                                    pkt_cnt, should_steal);
>  
>      return cnt;
> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item *item)
>  
>          VLOG_DBG("rte flow vlan pattern:\n");
>          if (vlan_spec) {
> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> +                     ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
>          } else {
>              VLOG_DBG("  Spec = null\n");
>          }
>  
>          if (vlan_mask) {
> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     vlan_mask->tpid, vlan_mask->tci);
> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> +                     vlan_mask->inner_type, vlan_mask->tci);
>          } else {
>              VLOG_DBG("  Mask = null\n");
>          }
> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>      actions->cnt++;
>  }
>  
> +/*
> + * Storage for struct rte_flow_action_rss
> + * including storage for key and queue array
> + */
> +#define MAX_RSS_HASH_KEY_LENGTH  128
> +#define MAX_ACTION_RSS_QUEUE_NUM 128
> +struct action_rss_data {
> +    struct rte_flow_action_rss conf;
> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH];
> +};
> +
>  static struct rte_flow_action_rss *
>  add_flow_rss_action(struct flow_actions *actions,
>                      struct netdev *netdev) {
>      int i;
> -    struct rte_flow_action_rss *rss;
> +    struct action_rss_data *rss_data;
>  
> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> -    /*
> -     * Setting it to NULL will let the driver use the default RSS
> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> -     */
> -    rss->rss_conf = NULL;
> -    rss->num = netdev->n_rxq;
> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> +                 "than max rss num of queues (%u)",
> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> +        return NULL;
> +    }
>  
> -    for (i = 0; i < rss->num; i++) {
> -        rss->queue[i] = i;
> +    rss_data = xmalloc(sizeof(struct action_rss_data));
> +    *rss_data = (struct action_rss_data){
> +        .conf = (struct rte_flow_action_rss){

Some spaces needed between the type and '{'.

> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +            .level = 0,
> +            .types = ETH_RSS_IP,
> +            .key_len = 0,
> +            .queue_num = netdev->n_rxq,
> +            .queue = rss_data->queue,
> +            .key = rss_data->key,
> +        },
> +        .key = { 0 },
> +        .queue = { 0 },

It's better to use 'xzalloc' instead of manual memory initialization.

> +    };
> +
> +    /* TODO: Override key with default */
> +
> +    /* Override queue array with default */
> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> +       rss_data->queue[i] = i;
>      }
>  
> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
>  
> -    return rss;
> +    return &rss_data->conf;

Pointer to the field of the structure returned. 'free' will be invoked for it
but not for the pointer to the structure. This should not be like this even
if this field is the first one.

Maybe it's better to not have the additional structure and allocate arrays
explicitly? The caller will free rss->queue, rss->key and the rss itself
at the end. This will also remove the restriction for the number of queues.

>  }
>  
>  static int
> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
>          vlan_mask.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
>  
>          /* match any protocols */
> -        vlan_mask.tpid = 0;
> +        vlan_mask.inner_type = 0;
>  
>          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>                           &vlan_spec, &vlan_mask);
> @@ -4516,6 +4573,11 @@ end_proto_check:
>  
>      struct rte_flow_action_rss *rss;
>      rss = add_flow_rss_action(&actions, netdev);
> +    if (!rss) {
> +        VLOG_ERR("add_flow_rss_action error.\n");
> +        ret = -1;
> +        goto out;
> +    }
>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>  
>      flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
>
Stokes, Ian Sept. 11, 2018, 9:18 p.m. UTC | #2
On 9/11/2018 10:35 AM, Ilya Maximets wrote:
> Comments inline.
> 
> Best regards, Ilya Maximets.
> 
> On 11.09.2018 02:04, Ophir Munk wrote:
>> 1. Enable compilation and linkage with dpdk 18.08.0
>> The following dpdk commits which were introduced after dpdk 17.11.x
>> require OVS updates to accommodate to the dpdk changes.
>> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
>> - ab3ce1e0c193 ("ethdev: remove old offload API")
>> - c06ddf9698e0 ("meter: add configuration profile")
>> - e58638c32411 ("ethdev: fix TPID handling in flow API")
>> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic dev")
>> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>>
>> 2. Limit configured rss hash functions to only those supported
>> by the eth device.
>>
>> 3. Update references to DPDK version in Documentation
>>
>> 4. Update DPDK version in travis' linux-build script

Thanks for working on this Ophir, I've been testing and spotted one 
issue relating to scatter offload enablement detailed below.

I'll keep testing in the mean time.

>>
>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>> ---
>> v1:
>> First version
>>
>> v2:
>> Avoid seg faults cases as described in
>> https://patchwork.ozlabs.org/patch/965451/
>> by using the patch in:
>> https://github.com/kevintraynor/ovs-dpdk-master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>>
>>   .travis/linux-build.sh         |   2 +-
>>   Documentation/faq/releases.rst |   2 +-
>>   lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
>>   3 files changed, 104 insertions(+), 42 deletions(-)
>>
>> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
>> index 4b9fc4a..c60ac71 100755
>> --- a/.travis/linux-build.sh
>> +++ b/.travis/linux-build.sh
>> @@ -83,7 +83,7 @@ fi
>>   
>>   if [ "$DPDK" ]; then
>>       if [ -z "$DPDK_VER" ]; then
>> -        DPDK_VER="17.11.3"
>> +        DPDK_VER="18.08.0"
>>       fi
>>       install_dpdk $DPDK_VER
>>       if [ "$CC" = "clang" ]; then
>> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
>> index 41d41e3..646ae09 100644
>> --- a/Documentation/faq/releases.rst
>> +++ b/Documentation/faq/releases.rst
>> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch release work with?
>>       2.7.x        16.11.7
>>       2.8.x        17.05.2
>>       2.9.x        17.11.3
>> -    2.10.x       17.11.3
>> +    2.10.x       18.08.0
>>       ============ =======
>>   
>>   Q: Are all the DPDK releases that OVS versions work with maintained?
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index f91aa27..6879e9a 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>>       .rxmode = {
>>           .mq_mode = ETH_MQ_RX_RSS,
>>           .split_hdr_size = 0,
>> -        .header_split   = 0, /* Header Split disabled */
>> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
>> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
>> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
>> -        .hw_strip_crc   = 0,
>> +        .offloads = 0,
>>       },
>>       .rx_adv_conf = {
>>           .rss_conf = {
>> @@ -364,6 +360,7 @@ struct dpdk_ring {
>>   struct ingress_policer {
>>       struct rte_meter_srtcm_params app_srtcm_params;
>>       struct rte_meter_srtcm in_policer;
>> +    struct rte_meter_srtcm_profile in_prof;
>>       rte_spinlock_t policer_lock;
>>   };
>>   
>> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>>       struct rte_eth_dev_info info;
>>       uint16_t conf_mtu;
>>   
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +
>>       /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>>        * scatter to support jumbo RX. Checking the offload capabilities
>>        * is not an option as PMDs are not required yet to report
>> @@ -901,20 +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>>        * (testing or code review). Listing all such PMDs feels harder
>>        * than highlighting the one known not to need scatter */
>>       if (dev->mtu > ETHER_MTU) {
>> -        rte_eth_dev_info_get(dev->port_id, &info);
>>           if (strncmp(info.driver_name, "net_nfp", 7)) {
>> -            conf.rxmode.enable_scatter = 1;
>> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;

Above will break MTU support for i40e devices (and any other devices 
that do not have scatter listed in their offload capabilities).

DPDK seems to have implemented offload capabilities for more PMD drivers 
since 17.11 so I'm thinking we can remove the specific check against 
net_nfp altogether and check for support for scatter in device 
capabilities before setting it for the device. Something like below:

     if (dev->mtu > ETHER_MTU) {
         if (info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER) {
             conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
         }
     }

It would require confirmation/testing for other PMD devices however that 
they list supported offloads correctly for scatter if it's required for 
RX jumbo frames.

Ian
Kevin Traynor Sept. 12, 2018, 10:29 a.m. UTC | #3
On 09/11/2018 10:35 AM, Ilya Maximets wrote:
> Comments inline.
> 
> Best regards, Ilya Maximets.
> 
> On 11.09.2018 02:04, Ophir Munk wrote:
>> 1. Enable compilation and linkage with dpdk 18.08.0
>> The following dpdk commits which were introduced after dpdk 17.11.x
>> require OVS updates to accommodate to the dpdk changes.
>> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
>> - ab3ce1e0c193 ("ethdev: remove old offload API")
>> - c06ddf9698e0 ("meter: add configuration profile")
>> - e58638c32411 ("ethdev: fix TPID handling in flow API")
>> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic dev")
>> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>>
>> 2. Limit configured rss hash functions to only those supported
>> by the eth device.
>>
>> 3. Update references to DPDK version in Documentation
>>
>> 4. Update DPDK version in travis' linux-build script
>>
>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>> ---
>> v1:
>> First version
>>
>> v2:
>> Avoid seg faults cases as described in
>> https://patchwork.ozlabs.org/patch/965451/
>> by using the patch in:
>> https://github.com/kevintraynor/ovs-dpdk-master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>>
>>  .travis/linux-build.sh         |   2 +-
>>  Documentation/faq/releases.rst |   2 +-
>>  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
>>  3 files changed, 104 insertions(+), 42 deletions(-)
>>
>> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
>> index 4b9fc4a..c60ac71 100755
>> --- a/.travis/linux-build.sh
>> +++ b/.travis/linux-build.sh
>> @@ -83,7 +83,7 @@ fi
>>  
>>  if [ "$DPDK" ]; then
>>      if [ -z "$DPDK_VER" ]; then
>> -        DPDK_VER="17.11.3"
>> +        DPDK_VER="18.08.0"
>>      fi
>>      install_dpdk $DPDK_VER
>>      if [ "$CC" = "clang" ]; then
>> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
>> index 41d41e3..646ae09 100644
>> --- a/Documentation/faq/releases.rst
>> +++ b/Documentation/faq/releases.rst
>> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch release work with?
>>      2.7.x        16.11.7
>>      2.8.x        17.05.2
>>      2.9.x        17.11.3
>> -    2.10.x       17.11.3
>> +    2.10.x       18.08.0
>>      ============ =======
>>  
>>  Q: Are all the DPDK releases that OVS versions work with maintained?
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index f91aa27..6879e9a 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>>      .rxmode = {
>>          .mq_mode = ETH_MQ_RX_RSS,
>>          .split_hdr_size = 0,
>> -        .header_split   = 0, /* Header Split disabled */
>> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
>> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
>> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
>> -        .hw_strip_crc   = 0,
>> +        .offloads = 0,
>>      },
>>      .rx_adv_conf = {
>>          .rss_conf = {
>> @@ -364,6 +360,7 @@ struct dpdk_ring {
>>  struct ingress_policer {
>>      struct rte_meter_srtcm_params app_srtcm_params;
>>      struct rte_meter_srtcm in_policer;
>> +    struct rte_meter_srtcm_profile in_prof;
>>      rte_spinlock_t policer_lock;
>>  };
>>  
>> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>>      struct rte_eth_dev_info info;
>>      uint16_t conf_mtu;
>>  
>> +    rte_eth_dev_info_get(dev->port_id, &info);
>> +
>>      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>>       * scatter to support jumbo RX. Checking the offload capabilities
>>       * is not an option as PMDs are not required yet to report
>> @@ -901,20 +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>>       * (testing or code review). Listing all such PMDs feels harder
>>       * than highlighting the one known not to need scatter */
>>      if (dev->mtu > ETHER_MTU) {
>> -        rte_eth_dev_info_get(dev->port_id, &info);
>>          if (strncmp(info.driver_name, "net_nfp", 7)) {
>> -            conf.rxmode.enable_scatter = 1;
>> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>>          }
>>      }
>>  
>>      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
>> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
>> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
>> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
>> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
>> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> 
> IMHO, it's better to use 'if'. This thing became too complex.
> 

+1, also to make it more consistent with others

    if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM;
    }

>>  
>>      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
>> -        conf.rxmode.hw_strip_crc = 1;
>> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>>      }
>>  
>> +    /* Limit configured rss hash functions to only those supported
>> +     * by the eth device. */
>> +    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
>> +
>>      /* A device may report more queues than it makes available (this has
>>       * been observed for Intel xl710, which reserves some of them for
>>       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
>> @@ -1932,16 +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>>  
>>  static inline bool
>>  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>> +                               struct rte_meter_srtcm_profile *profile,
>>                                 struct rte_mbuf *pkt, uint64_t time)
>>  {
>>      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
>>  
>> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>> +    return rte_meter_srtcm_color_blind_check(meter, profile, time, pkt_len) ==
>>                                                  e_RTE_METER_GREEN;
>>  }
>>  
>>  static int
>>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>> +                        struct rte_meter_srtcm_profile *profile,
>>                          struct rte_mbuf **pkts, int pkt_cnt,
>>                          bool should_steal)
>>  {
>> @@ -1953,7 +1958,8 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>      for (i = 0; i < pkt_cnt; i++) {
>>          pkt = pkts[i];
>>          /* Handle current packet */
>> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
>> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
>> +                                                 current_time)) {
>>              if (cnt != i) {
>>                  pkts[cnt] = pkt;
>>              }
>> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
>>      int cnt = 0;
>>  
>>      rte_spinlock_lock(&policer->policer_lock);
>> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>> -                                  pkt_cnt, should_steal);
>> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer->in_prof,
>> +                                  pkts, pkt_cnt, should_steal);
>>      rte_spinlock_unlock(&policer->policer_lock);
>>  
>>      return cnt;
>> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
>>      policer->app_srtcm_params.cir = rate_bytes;
>>      policer->app_srtcm_params.cbs = burst_bytes;
>>      policer->app_srtcm_params.ebs = 0;
>> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
>> +                                         &policer->app_srtcm_params);
>> +    if (err) {
>> +        VLOG_ERR("Could not create rte meter profile for ingress policer");
>> +        free(policer);
>> +        return NULL;
>> +    }
>>      err = rte_meter_srtcm_config(&policer->in_policer,
>> -                                    &policer->app_srtcm_params);
>> +                                    &policer->in_prof);
>>      if (err) {
>>          VLOG_ERR("Could not create rte meter for ingress policer");
>>          free(policer);
>> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
>>      smap_add_format(args, "if_descr", "%s %s", rte_version(),
>>                                                 dev_info.driver_name);
>>  
>> -    if (dev_info.pci_dev) {
>> -        smap_add_format(args, "pci-vendor_id", "0x%x",
>> -                        dev_info.pci_dev->id.vendor_id);
>> -        smap_add_format(args, "pci-device_id", "0x%x",
>> -                        dev_info.pci_dev->id.device_id);
>> +    const struct rte_bus *bus;
>> +    const struct rte_pci_device *pci_dev;
>> +    bus = rte_bus_find_by_device(dev_info.device);
>> +    if (bus && !strcmp(bus->name, "pci")) {
>> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
>> +        if (pci_dev) {
>> +            smap_add_format(args, "pci-vendor_id", "0x%u",
>> +                            pci_dev->id.vendor_id);
>> +             smap_add_format(args, "pci-device_id", "0x%x",
>> +                             pci_dev->id.device_id);
> 
> %u --> %x.
> Also, something happened with indents.
> 
>> +        }
>>      }
>> -
>>      return 0;
>>  }
>>  
>> @@ -3727,6 +3745,7 @@ struct egress_policer {
>>      struct qos_conf qos_conf;
>>      struct rte_meter_srtcm_params app_srtcm_params;
>>      struct rte_meter_srtcm egress_meter;
>> +    struct rte_meter_srtcm_profile egress_prof;
>>  };
>>  
>>  static void
>> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct smap *details,
>>      policer = xmalloc(sizeof *policer);
>>      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>>      egress_policer_details_to_param(details, &policer->app_srtcm_params);
>> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
>> +                                         &policer->app_srtcm_params);
>> +    if (err) {
>> +        free(policer);
>> +        *conf = NULL;
>> +        return -err;
>> +    }


you could replace above 'if' with

if (!err) {

>>      err = rte_meter_srtcm_config(&policer->egress_meter,
>> -                                 &policer->app_srtcm_params);
>> +                                 &policer->egress_prof);
>> +

}

You could do the same in ingress fn. too if you were happy to have just
one error log.

>>      if (!err) {
>>          *conf = &policer->qos_conf;
>>      } else {
>> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
>>      struct egress_policer *policer =
>>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
>>  
>> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
>> +                                  &policer->egress_prof, pkts,
>>                                    pkt_cnt, should_steal);
>>  
>>      return cnt;
>> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item *item)
>>  
>>          VLOG_DBG("rte flow vlan pattern:\n");
>>          if (vlan_spec) {
>> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
>> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
>> +                     ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
>>          } else {
>>              VLOG_DBG("  Spec = null\n");
>>          }
>>  
>>          if (vlan_mask) {
>> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>> -                     vlan_mask->tpid, vlan_mask->tci);
>> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
>> +                     vlan_mask->inner_type, vlan_mask->tci);
>>          } else {
>>              VLOG_DBG("  Mask = null\n");
>>          }
>> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>>      actions->cnt++;
>>  }
>>  
>> +/*
>> + * Storage for struct rte_flow_action_rss
>> + * including storage for key and queue array
>> + */
>> +#define MAX_RSS_HASH_KEY_LENGTH  128
>> +#define MAX_ACTION_RSS_QUEUE_NUM 128
>> +struct action_rss_data {
>> +    struct rte_flow_action_rss conf;
>> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
>> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH];
>> +};
>> +
>>  static struct rte_flow_action_rss *
>>  add_flow_rss_action(struct flow_actions *actions,
>>                      struct netdev *netdev) {
>>      int i;
>> -    struct rte_flow_action_rss *rss;
>> +    struct action_rss_data *rss_data;
>>  
>> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
>> -    /*
>> -     * Setting it to NULL will let the driver use the default RSS
>> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
>> -     */
>> -    rss->rss_conf = NULL;
>> -    rss->num = netdev->n_rxq;
>> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
>> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
>> +                 "than max rss num of queues (%u)",
>> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
>> +        return NULL;
>> +    }
>>  
>> -    for (i = 0; i < rss->num; i++) {
>> -        rss->queue[i] = i;
>> +    rss_data = xmalloc(sizeof(struct action_rss_data));
>> +    *rss_data = (struct action_rss_data){
>> +        .conf = (struct rte_flow_action_rss){
> 
> Some spaces needed between the type and '{'.
> 
>> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>> +            .level = 0,
>> +            .types = ETH_RSS_IP,

Do you need to check that this RSS type is supported from dev info
before setting it, similar to dpdk_eth_dev_port_config()? or you can be
sure it's supported at this point?

>> +            .key_len = 0,
>> +            .queue_num = netdev->n_rxq,
>> +            .queue = rss_data->queue,
>> +            .key = rss_data->key,
>> +        },
>> +        .key = { 0 },
>> +        .queue = { 0 },
> 
> It's better to use 'xzalloc' instead of manual memory initialization.
> 
>> +    };
>> +
>> +    /* TODO: Override key with default */
>> +
>> +    /* Override queue array with default */
>> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
>> +       rss_data->queue[i] = i;
>>      }
>>  
>> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
>>  
>> -    return rss;
>> +    return &rss_data->conf;
> 
> Pointer to the field of the structure returned. 'free' will be invoked for it
> but not for the pointer to the structure. This should not be like this even
> if this field is the first one.
> 
> Maybe it's better to not have the additional structure and allocate arrays
> explicitly? The caller will free rss->queue, rss->key and the rss itself
> at the end. This will also remove the restriction for the number of queues.
> 
>>  }
>>  
>>  static int
>> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
>>          vlan_mask.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
>>  
>>          /* match any protocols */
>> -        vlan_mask.tpid = 0;
>> +        vlan_mask.inner_type = 0;
>>  
>>          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>>                           &vlan_spec, &vlan_mask);
>> @@ -4516,6 +4573,11 @@ end_proto_check:
>>  
>>      struct rte_flow_action_rss *rss;
>>      rss = add_flow_rss_action(&actions, netdev);
>> +    if (!rss) {
>> +        VLOG_ERR("add_flow_rss_action error.\n");
>> +        ret = -1;
>> +        goto out;
>> +    }
>>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>  
>>      flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
>>
Chandran, Sugesh Sept. 12, 2018, 4:05 p.m. UTC | #4
Hi Ophir,
I verified this patch in our test lab and looks fine to me.
I have couple of minor comments in the V1 series. Could you please take care of them as well?


Regards
_Sugesh

> -----Original Message-----
> From: Ophir Munk [mailto:ophirmu@mellanox.com]
> Sent: Tuesday, September 11, 2018 12:05 AM
> To: ovs-dev@openvswitch.org
> Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
> <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben Pfaff
> <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>; Ophir Munk
> <ophirmu@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
> Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> 1. Enable compilation and linkage with dpdk 18.08.0 The following dpdk
> commits which were introduced after dpdk 17.11.x require OVS updates to
> accommodate to the dpdk changes.
> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> - ab3ce1e0c193 ("ethdev: remove old offload API")
> - c06ddf9698e0 ("meter: add configuration profile")
> - e58638c32411 ("ethdev: fix TPID handling in flow API")
> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic dev")
> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> 
> 2. Limit configured rss hash functions to only those supported by the eth device.
> 
> 3. Update references to DPDK version in Documentation
> 
> 4. Update DPDK version in travis' linux-build script
> 
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
> v1:
> First version
> 
> v2:
> Avoid seg faults cases as described in
> https://patchwork.ozlabs.org/patch/965451/
> by using the patch in:
> https://github.com/kevintraynor/ovs-dpdk-
> master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> 
>  .travis/linux-build.sh         |   2 +-
>  Documentation/faq/releases.rst |   2 +-
>  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
>  3 files changed, 104 insertions(+), 42 deletions(-)
> 
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 4b9fc4a..c60ac71
> 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -83,7 +83,7 @@ fi
> 
>  if [ "$DPDK" ]; then
>      if [ -z "$DPDK_VER" ]; then
> -        DPDK_VER="17.11.3"
> +        DPDK_VER="18.08.0"
>      fi
>      install_dpdk $DPDK_VER
>      if [ "$CC" = "clang" ]; then
> diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> index 41d41e3..646ae09 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch release
> work with?
>      2.7.x        16.11.7
>      2.8.x        17.05.2
>      2.9.x        17.11.3
> -    2.10.x       17.11.3
> +    2.10.x       18.08.0
>      ============ =======
> 
>  Q: Are all the DPDK releases that OVS versions work with maintained?
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index f91aa27..6879e9a
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>      .rxmode = {
>          .mq_mode = ETH_MQ_RX_RSS,
>          .split_hdr_size = 0,
> -        .header_split   = 0, /* Header Split disabled */
> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> -        .hw_strip_crc   = 0,
> +        .offloads = 0,
>      },
>      .rx_adv_conf = {
>          .rss_conf = {
> @@ -364,6 +360,7 @@ struct dpdk_ring {
>  struct ingress_policer {
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm in_policer;
> +    struct rte_meter_srtcm_profile in_prof;
>      rte_spinlock_t policer_lock;
>  };
> 
> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
> n_rxq, int n_txq)
>      struct rte_eth_dev_info info;
>      uint16_t conf_mtu;
> 
> +    rte_eth_dev_info_get(dev->port_id, &info);
> +
>      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>       * scatter to support jumbo RX. Checking the offload capabilities
>       * is not an option as PMDs are not required yet to report @@ -901,20
> +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int
> n_txq)
>       * (testing or code review). Listing all such PMDs feels harder
>       * than highlighting the one known not to need scatter */
>      if (dev->mtu > ETHER_MTU) {
> -        rte_eth_dev_info_get(dev->port_id, &info);
>          if (strncmp(info.driver_name, "net_nfp", 7)) {
> -            conf.rxmode.enable_scatter = 1;
> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>          }
>      }
> 
>      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> 
>      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> -        conf.rxmode.hw_strip_crc = 1;
> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>      }
> 
> +    /* Limit configured rss hash functions to only those supported
> +     * by the eth device. */
> +    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
> +
>      /* A device may report more queues than it makes available (this has
>       * been observed for Intel xl710, which reserves some of them for
>       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not @@ -1932,16
> +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> 
>  static inline bool
>  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> +                               struct rte_meter_srtcm_profile *profile,
>                                 struct rte_mbuf *pkt, uint64_t time)  {
>      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
> 
> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
> +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
> + pkt_len) ==
>                                                  e_RTE_METER_GREEN;  }
> 
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> +                        struct rte_meter_srtcm_profile *profile,
>                          struct rte_mbuf **pkts, int pkt_cnt,
>                          bool should_steal)  { @@ -1953,7 +1958,8 @@
> netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>      for (i = 0; i < pkt_cnt; i++) {
>          pkt = pkts[i];
>          /* Handle current packet */
> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> +                                                 current_time)) {
>              if (cnt != i) {
>                  pkts[cnt] = pkt;
>              }
> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer *policer,
> struct rte_mbuf **pkts,
>      int cnt = 0;
> 
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> -                                  pkt_cnt, should_steal);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer->in_prof,
> +                                  pkts, pkt_cnt, should_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
> 
>      return cnt;
> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t rate,
> uint32_t burst)
>      policer->app_srtcm_params.cir = rate_bytes;
>      policer->app_srtcm_params.cbs = burst_bytes;
>      policer->app_srtcm_params.ebs = 0;
> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        VLOG_ERR("Could not create rte meter profile for ingress policer");
> +        free(policer);
> +        return NULL;
> +    }
>      err = rte_meter_srtcm_config(&policer->in_policer,
> -                                    &policer->app_srtcm_params);
> +                                    &policer->in_prof);
>      if (err) {
>          VLOG_ERR("Could not create rte meter for ingress policer");
>          free(policer);
> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct netdev
> *netdev, struct smap *args)
>      smap_add_format(args, "if_descr", "%s %s", rte_version(),
>                                                 dev_info.driver_name);
> 
> -    if (dev_info.pci_dev) {
> -        smap_add_format(args, "pci-vendor_id", "0x%x",
> -                        dev_info.pci_dev->id.vendor_id);
> -        smap_add_format(args, "pci-device_id", "0x%x",
> -                        dev_info.pci_dev->id.device_id);
> +    const struct rte_bus *bus;
> +    const struct rte_pci_device *pci_dev;
> +    bus = rte_bus_find_by_device(dev_info.device);
> +    if (bus && !strcmp(bus->name, "pci")) {
> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> +        if (pci_dev) {
> +            smap_add_format(args, "pci-vendor_id", "0x%u",
> +                            pci_dev->id.vendor_id);
> +             smap_add_format(args, "pci-device_id", "0x%x",
> +                             pci_dev->id.device_id);
> +        }
>      }
> -
>      return 0;
>  }
> 
> @@ -3727,6 +3745,7 @@ struct egress_policer {
>      struct qos_conf qos_conf;
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm egress_meter;
> +    struct rte_meter_srtcm_profile egress_prof;
>  };
> 
>  static void
> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct smap
> *details,
>      policer = xmalloc(sizeof *policer);
>      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>      egress_policer_details_to_param(details, &policer->app_srtcm_params);
> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        free(policer);
> +        *conf = NULL;
> +        return -err;
> +    }
>      err = rte_meter_srtcm_config(&policer->egress_meter,
> -                                 &policer->app_srtcm_params);
> +                                 &policer->egress_prof);
> +
>      if (!err) {
>          *conf = &policer->qos_conf;
>      } else {
> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf, struct
> rte_mbuf **pkts, int pkt_cnt,
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> 
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> +                                  &policer->egress_prof, pkts,
>                                    pkt_cnt, should_steal);
> 
>      return cnt;
> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item *item)
> 
>          VLOG_DBG("rte flow vlan pattern:\n");
>          if (vlan_spec) {
> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> +                     ntohs(vlan_spec->inner_type),
> + ntohs(vlan_spec->tci));
>          } else {
>              VLOG_DBG("  Spec = null\n");
>          }
> 
>          if (vlan_mask) {
> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     vlan_mask->tpid, vlan_mask->tci);
> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> +                     vlan_mask->inner_type, vlan_mask->tci);
>          } else {
>              VLOG_DBG("  Mask = null\n");
>          }
> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions *actions,
> enum rte_flow_action_type type,
>      actions->cnt++;
>  }
> 
> +/*
> + * Storage for struct rte_flow_action_rss
> + * including storage for key and queue array  */ #define
> +MAX_RSS_HASH_KEY_LENGTH  128 #define MAX_ACTION_RSS_QUEUE_NUM
> 128
> +struct action_rss_data {
> +    struct rte_flow_action_rss conf;
> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
> +
>  static struct rte_flow_action_rss *
>  add_flow_rss_action(struct flow_actions *actions,
>                      struct netdev *netdev) {
>      int i;
> -    struct rte_flow_action_rss *rss;
> +    struct action_rss_data *rss_data;
> 
> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> -    /*
> -     * Setting it to NULL will let the driver use the default RSS
> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> -     */
> -    rss->rss_conf = NULL;
> -    rss->num = netdev->n_rxq;
> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> +                 "than max rss num of queues (%u)",
> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> +        return NULL;
> +    }
> 
> -    for (i = 0; i < rss->num; i++) {
> -        rss->queue[i] = i;
> +    rss_data = xmalloc(sizeof(struct action_rss_data));
> +    *rss_data = (struct action_rss_data){
> +        .conf = (struct rte_flow_action_rss){
> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +            .level = 0,
> +            .types = ETH_RSS_IP,
> +            .key_len = 0,
> +            .queue_num = netdev->n_rxq,
> +            .queue = rss_data->queue,
> +            .key = rss_data->key,
> +        },
> +        .key = { 0 },
> +        .queue = { 0 },
> +    };
> +
> +    /* TODO: Override key with default */
> +
> +    /* Override queue array with default */
> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> +       rss_data->queue[i] = i;
>      }
> 
> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
> + &rss_data->conf);
> 
> -    return rss;
> +    return &rss_data->conf;
>  }
> 
>  static int
> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev
> *netdev,
>          vlan_mask.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
> 
>          /* match any protocols */
> -        vlan_mask.tpid = 0;
> +        vlan_mask.inner_type = 0;
> 
>          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>                           &vlan_spec, &vlan_mask); @@ -4516,6 +4573,11 @@
> end_proto_check:
> 
>      struct rte_flow_action_rss *rss;
>      rss = add_flow_rss_action(&actions, netdev);
> +    if (!rss) {
> +        VLOG_ERR("add_flow_rss_action error.\n");
> +        ret = -1;
> +        goto out;
> +    }
>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> 
>      flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> --
> 1.8.3.1
Chandran, Sugesh Sept. 13, 2018, 11:58 a.m. UTC | #5
Hi Ophir,

Also a quick grep show there are number of places in documentation still pointing to DPDK17.11, I guess its worth while to change them as well.
This will avoid confusion for people who wanted to use this branch, What do you think?



Regards
_Sugesh


> -----Original Message-----
> From: Chandran, Sugesh
> Sent: Wednesday, September 12, 2018 5:05 PM
> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian <ian.stokes@intel.com>;
> Ben Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>; Kevin
> Traynor <ktraynor@redhat.com>
> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> Hi Ophir,
> I verified this patch in our test lab and looks fine to me.
> I have couple of minor comments in the V1 series. Could you please take care of
> them as well?
> 
> 
> Regards
> _Sugesh
> 
> > -----Original Message-----
> > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > Sent: Tuesday, September 11, 2018 12:05 AM
> > To: ovs-dev@openvswitch.org
> > Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
> > <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben
> > Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
> > Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> Ophir
> > Munk <ophirmu@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
> > Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> >
> > 1. Enable compilation and linkage with dpdk 18.08.0 The following dpdk
> > commits which were introduced after dpdk 17.11.x require OVS updates
> > to accommodate to the dpdk changes.
> > - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> > - ab3ce1e0c193 ("ethdev: remove old offload API")
> > - c06ddf9698e0 ("meter: add configuration profile")
> > - e58638c32411 ("ethdev: fix TPID handling in flow API")
> > - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
> > dev")
> > - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> >
> > 2. Limit configured rss hash functions to only those supported by the eth
> device.
> >
> > 3. Update references to DPDK version in Documentation
> >
> > 4. Update DPDK version in travis' linux-build script
> >
> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > ---
> > v1:
> > First version
> >
> > v2:
> > Avoid seg faults cases as described in
> > https://patchwork.ozlabs.org/patch/965451/
> > by using the patch in:
> > https://github.com/kevintraynor/ovs-dpdk-
> > master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> >
> >  .travis/linux-build.sh         |   2 +-
> >  Documentation/faq/releases.rst |   2 +-
> >  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++------------
> >  3 files changed, 104 insertions(+), 42 deletions(-)
> >
> > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > 4b9fc4a..c60ac71
> > 100755
> > --- a/.travis/linux-build.sh
> > +++ b/.travis/linux-build.sh
> > @@ -83,7 +83,7 @@ fi
> >
> >  if [ "$DPDK" ]; then
> >      if [ -z "$DPDK_VER" ]; then
> > -        DPDK_VER="17.11.3"
> > +        DPDK_VER="18.08.0"
> >      fi
> >      install_dpdk $DPDK_VER
> >      if [ "$CC" = "clang" ]; then
> > diff --git a/Documentation/faq/releases.rst
> > b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
> > --- a/Documentation/faq/releases.rst
> > +++ b/Documentation/faq/releases.rst
> > @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
> > release work with?
> >      2.7.x        16.11.7
> >      2.8.x        17.05.2
> >      2.9.x        17.11.3
> > -    2.10.x       17.11.3
> > +    2.10.x       18.08.0
> >      ============ =======
> >
> >  Q: Are all the DPDK releases that OVS versions work with maintained?
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> > f91aa27..6879e9a
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
> >      .rxmode = {
> >          .mq_mode = ETH_MQ_RX_RSS,
> >          .split_hdr_size = 0,
> > -        .header_split   = 0, /* Header Split disabled */
> > -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> > -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> > -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> > -        .hw_strip_crc   = 0,
> > +        .offloads = 0,
> >      },
> >      .rx_adv_conf = {
> >          .rss_conf = {
> > @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
> >      struct rte_meter_srtcm_params app_srtcm_params;
> >      struct rte_meter_srtcm in_policer;
> > +    struct rte_meter_srtcm_profile in_prof;
> >      rte_spinlock_t policer_lock;
> >  };
> >
> > @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev,
> > int n_rxq, int n_txq)
> >      struct rte_eth_dev_info info;
> >      uint16_t conf_mtu;
> >
> > +    rte_eth_dev_info_get(dev->port_id, &info);
> > +
> >      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
> >       * scatter to support jumbo RX. Checking the offload capabilities
> >       * is not an option as PMDs are not required yet to report @@
> > -901,20
> > +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
> > +n_rxq, int
> > n_txq)
> >       * (testing or code review). Listing all such PMDs feels harder
> >       * than highlighting the one known not to need scatter */
> >      if (dev->mtu > ETHER_MTU) {
> > -        rte_eth_dev_info_get(dev->port_id, &info);
> >          if (strncmp(info.driver_name, "net_nfp", 7)) {
> > -            conf.rxmode.enable_scatter = 1;
> > +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
> >          }
> >      }
> >
> >      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> > -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> > -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> > +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> > +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> > +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> >
> >      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> > -        conf.rxmode.hw_strip_crc = 1;
> > +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
> >      }
> >
> > +    /* Limit configured rss hash functions to only those supported
> > +     * by the eth device. */
> > +    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
> > +
> >      /* A device may report more queues than it makes available (this has
> >       * been observed for Intel xl710, which reserves some of them for
> >       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not @@
> > -1932,16
> > +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> >
> >  static inline bool
> >  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> > +                               struct rte_meter_srtcm_profile
> > + *profile,
> >                                 struct rte_mbuf *pkt, uint64_t time)  {
> >      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
> > ether_hdr);
> >
> > -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
> > +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
> > + pkt_len) ==
> >                                                  e_RTE_METER_GREEN;  }
> >
> >  static int
> >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > +                        struct rte_meter_srtcm_profile *profile,
> >                          struct rte_mbuf **pkts, int pkt_cnt,
> >                          bool should_steal)  { @@ -1953,7 +1958,8 @@
> > netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> >      for (i = 0; i < pkt_cnt; i++) {
> >          pkt = pkts[i];
> >          /* Handle current packet */
> > -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
> > +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> > +                                                 current_time)) {
> >              if (cnt != i) {
> >                  pkts[cnt] = pkt;
> >              }
> > @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
> > *policer, struct rte_mbuf **pkts,
> >      int cnt = 0;
> >
> >      rte_spinlock_lock(&policer->policer_lock);
> > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > -                                  pkt_cnt, should_steal);
> > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer->in_prof,
> > +                                  pkts, pkt_cnt, should_steal);
> >      rte_spinlock_unlock(&policer->policer_lock);
> >
> >      return cnt;
> > @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t rate,
> > uint32_t burst)
> >      policer->app_srtcm_params.cir = rate_bytes;
> >      policer->app_srtcm_params.cbs = burst_bytes;
> >      policer->app_srtcm_params.ebs = 0;
> > +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> > +                                         &policer->app_srtcm_params);
> > +    if (err) {
> > +        VLOG_ERR("Could not create rte meter profile for ingress policer");
> > +        free(policer);
> > +        return NULL;
> > +    }
> >      err = rte_meter_srtcm_config(&policer->in_policer,
> > -                                    &policer->app_srtcm_params);
> > +                                    &policer->in_prof);
> >      if (err) {
> >          VLOG_ERR("Could not create rte meter for ingress policer");
> >          free(policer);
> > @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct netdev
> > *netdev, struct smap *args)
> >      smap_add_format(args, "if_descr", "%s %s", rte_version(),
> >                                                 dev_info.driver_name);
> >
> > -    if (dev_info.pci_dev) {
> > -        smap_add_format(args, "pci-vendor_id", "0x%x",
> > -                        dev_info.pci_dev->id.vendor_id);
> > -        smap_add_format(args, "pci-device_id", "0x%x",
> > -                        dev_info.pci_dev->id.device_id);
> > +    const struct rte_bus *bus;
> > +    const struct rte_pci_device *pci_dev;
> > +    bus = rte_bus_find_by_device(dev_info.device);
> > +    if (bus && !strcmp(bus->name, "pci")) {
> > +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> > +        if (pci_dev) {
> > +            smap_add_format(args, "pci-vendor_id", "0x%u",
> > +                            pci_dev->id.vendor_id);
> > +             smap_add_format(args, "pci-device_id", "0x%x",
> > +                             pci_dev->id.device_id);
> > +        }
> >      }
> > -
> >      return 0;
> >  }
> >
> > @@ -3727,6 +3745,7 @@ struct egress_policer {
> >      struct qos_conf qos_conf;
> >      struct rte_meter_srtcm_params app_srtcm_params;
> >      struct rte_meter_srtcm egress_meter;
> > +    struct rte_meter_srtcm_profile egress_prof;
> >  };
> >
> >  static void
> > @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct smap
> > *details,
> >      policer = xmalloc(sizeof *policer);
> >      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
> >      egress_policer_details_to_param(details,
> > &policer->app_srtcm_params);
> > +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> > +                                         &policer->app_srtcm_params);
> > +    if (err) {
> > +        free(policer);
> > +        *conf = NULL;
> > +        return -err;
> > +    }
> >      err = rte_meter_srtcm_config(&policer->egress_meter,
> > -                                 &policer->app_srtcm_params);
> > +                                 &policer->egress_prof);
> > +
> >      if (!err) {
> >          *conf = &policer->qos_conf;
> >      } else {
> > @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf, struct
> > rte_mbuf **pkts, int pkt_cnt,
> >      struct egress_policer *policer =
> >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> > +                                  &policer->egress_prof, pkts,
> >                                    pkt_cnt, should_steal);
> >
> >      return cnt;
> > @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item *item)
> >
> >          VLOG_DBG("rte flow vlan pattern:\n");
> >          if (vlan_spec) {
> > -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
> > +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > +                     ntohs(vlan_spec->inner_type),
> > + ntohs(vlan_spec->tci));
> >          } else {
> >              VLOG_DBG("  Spec = null\n");
> >          }
> >
> >          if (vlan_mask) {
> > -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > -                     vlan_mask->tpid, vlan_mask->tci);
> > +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > +                     vlan_mask->inner_type, vlan_mask->tci);
> >          } else {
> >              VLOG_DBG("  Mask = null\n");
> >          }
> > @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions *actions,
> > enum rte_flow_action_type type,
> >      actions->cnt++;
> >  }
> >
> > +/*
> > + * Storage for struct rte_flow_action_rss
> > + * including storage for key and queue array  */ #define
> > +MAX_RSS_HASH_KEY_LENGTH  128 #define
> MAX_ACTION_RSS_QUEUE_NUM
> > 128
> > +struct action_rss_data {
> > +    struct rte_flow_action_rss conf;
> > +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> > +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
> > +
> >  static struct rte_flow_action_rss *
> >  add_flow_rss_action(struct flow_actions *actions,
> >                      struct netdev *netdev) {
> >      int i;
> > -    struct rte_flow_action_rss *rss;
> > +    struct action_rss_data *rss_data;
> >
> > -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> > -    /*
> > -     * Setting it to NULL will let the driver use the default RSS
> > -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> > -     */
> > -    rss->rss_conf = NULL;
> > -    rss->num = netdev->n_rxq;
> > +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> > +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> > +                 "than max rss num of queues (%u)",
> > +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> > +        return NULL;
> > +    }
> >
> > -    for (i = 0; i < rss->num; i++) {
> > -        rss->queue[i] = i;
> > +    rss_data = xmalloc(sizeof(struct action_rss_data));
> > +    *rss_data = (struct action_rss_data){
> > +        .conf = (struct rte_flow_action_rss){
> > +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> > +            .level = 0,
> > +            .types = ETH_RSS_IP,
> > +            .key_len = 0,
> > +            .queue_num = netdev->n_rxq,
> > +            .queue = rss_data->queue,
> > +            .key = rss_data->key,
> > +        },
> > +        .key = { 0 },
> > +        .queue = { 0 },
> > +    };
> > +
> > +    /* TODO: Override key with default */
> > +
> > +    /* Override queue array with default */
> > +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> > +       rss_data->queue[i] = i;
> >      }
> >
> > -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> > +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
> > + &rss_data->conf);
> >
> > -    return rss;
> > +    return &rss_data->conf;
> >  }
> >
> >  static int
> > @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev
> > *netdev,
> >          vlan_mask.tci  = match->wc.masks.vlans[0].tci &
> > ~htons(VLAN_CFI);
> >
> >          /* match any protocols */
> > -        vlan_mask.tpid = 0;
> > +        vlan_mask.inner_type = 0;
> >
> >          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
> >                           &vlan_spec, &vlan_mask); @@ -4516,6 +4573,11
> > @@
> > end_proto_check:
> >
> >      struct rte_flow_action_rss *rss;
> >      rss = add_flow_rss_action(&actions, netdev);
> > +    if (!rss) {
> > +        VLOG_ERR("add_flow_rss_action error.\n");
> > +        ret = -1;
> > +        goto out;
> > +    }
> >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >
> >      flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
> > --
> > 1.8.3.1
Ophir Munk Sept. 13, 2018, 12:34 p.m. UTC | #6
Hi Sugesh,
I can update documentation. Is it enough to update dpdk.rst.txt only?
If not - can you please send a link to additional files with references to 17.11 to be updated?
I will wait 1-2 days to get more reviews then will send v3.

Regards,
Ophir

> -----Original Message-----
> From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
> Sent: Thursday, September 13, 2018 2:58 PM
> To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
> <ovs-dev@openvswitch.org>
> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> Olga Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> Hi Ophir,
> 
> Also a quick grep show there are number of places in documentation still
> pointing to DPDK17.11, I guess its worth while to change them as well.
> This will avoid confusion for people who wanted to use this branch, What do
> you think?
> 
> 
> 
> Regards
> _Sugesh
> 
> 
> > -----Original Message-----
> > From: Chandran, Sugesh
> > Sent: Wednesday, September 12, 2018 5:05 PM
> > To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> > Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> > <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
> > <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> Olga
> > Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
> > Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > v18.08.0
> >
> > Hi Ophir,
> > I verified this patch in our test lab and looks fine to me.
> > I have couple of minor comments in the V1 series. Could you please
> > take care of them as well?
> >
> >
> > Regards
> > _Sugesh
> >
> > > -----Original Message-----
> > > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > > Sent: Tuesday, September 11, 2018 12:05 AM
> > > To: ovs-dev@openvswitch.org
> > > Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
> > > <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben
> > > Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
> > > Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
> > Ophir
> > > Munk <ophirmu@mellanox.com>; Kevin Traynor
> <ktraynor@redhat.com>
> > > Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> > >
> > > 1. Enable compilation and linkage with dpdk 18.08.0 The following
> > > dpdk commits which were introduced after dpdk 17.11.x require OVS
> > > updates to accommodate to the dpdk changes.
> > > - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> > > - ab3ce1e0c193 ("ethdev: remove old offload API")
> > > - c06ddf9698e0 ("meter: add configuration profile")
> > > - e58638c32411 ("ethdev: fix TPID handling in flow API")
> > > - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
> > > dev")
> > > - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> > >
> > > 2. Limit configured rss hash functions to only those supported by
> > > the eth
> > device.
> > >
> > > 3. Update references to DPDK version in Documentation
> > >
> > > 4. Update DPDK version in travis' linux-build script
> > >
> > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > ---
> > > v1:
> > > First version
> > >
> > > v2:
> > > Avoid seg faults cases as described in
> > >
> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
> > >
> atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
> rmu%4
> > >
> 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
> e4d9ba
> > >
> 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
> Rd6w0TP
> > > x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
> > > by using the patch in:
> > >
> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
> > > ithub.com%2Fkevintraynor%2Fovs-dpdk-
> &amp;data=02%7C01%7Cophirmu%40me
> > >
> llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
> ba6a4
> > >
> d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
> HAHGma%2
> > > F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
> > > master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> > >
> > >  .travis/linux-build.sh         |   2 +-
> > >  Documentation/faq/releases.rst |   2 +-
> > >  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++-------
> -----
> > >  3 files changed, 104 insertions(+), 42 deletions(-)
> > >
> > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > > 4b9fc4a..c60ac71
> > > 100755
> > > --- a/.travis/linux-build.sh
> > > +++ b/.travis/linux-build.sh
> > > @@ -83,7 +83,7 @@ fi
> > >
> > >  if [ "$DPDK" ]; then
> > >      if [ -z "$DPDK_VER" ]; then
> > > -        DPDK_VER="17.11.3"
> > > +        DPDK_VER="18.08.0"
> > >      fi
> > >      install_dpdk $DPDK_VER
> > >      if [ "$CC" = "clang" ]; then
> > > diff --git a/Documentation/faq/releases.rst
> > > b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
> > > --- a/Documentation/faq/releases.rst
> > > +++ b/Documentation/faq/releases.rst
> > > @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
> > > release work with?
> > >      2.7.x        16.11.7
> > >      2.8.x        17.05.2
> > >      2.9.x        17.11.3
> > > -    2.10.x       17.11.3
> > > +    2.10.x       18.08.0
> > >      ============ =======
> > >
> > >  Q: Are all the DPDK releases that OVS versions work with maintained?
> > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> > > f91aa27..6879e9a
> > > 100644
> > > --- a/lib/netdev-dpdk.c
> > > +++ b/lib/netdev-dpdk.c
> > > @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
> > >      .rxmode = {
> > >          .mq_mode = ETH_MQ_RX_RSS,
> > >          .split_hdr_size = 0,
> > > -        .header_split   = 0, /* Header Split disabled */
> > > -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> > > -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> > > -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> > > -        .hw_strip_crc   = 0,
> > > +        .offloads = 0,
> > >      },
> > >      .rx_adv_conf = {
> > >          .rss_conf = {
> > > @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
> > >      struct rte_meter_srtcm_params app_srtcm_params;
> > >      struct rte_meter_srtcm in_policer;
> > > +    struct rte_meter_srtcm_profile in_prof;
> > >      rte_spinlock_t policer_lock;
> > >  };
> > >
> > > @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
> > > *dev, int n_rxq, int n_txq)
> > >      struct rte_eth_dev_info info;
> > >      uint16_t conf_mtu;
> > >
> > > +    rte_eth_dev_info_get(dev->port_id, &info);
> > > +
> > >      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
> > >       * scatter to support jumbo RX. Checking the offload capabilities
> > >       * is not an option as PMDs are not required yet to report @@
> > > -901,20
> > > +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
> > > +n_rxq, int
> > > n_txq)
> > >       * (testing or code review). Listing all such PMDs feels harder
> > >       * than highlighting the one known not to need scatter */
> > >      if (dev->mtu > ETHER_MTU) {
> > > -        rte_eth_dev_info_get(dev->port_id, &info);
> > >          if (strncmp(info.driver_name, "net_nfp", 7)) {
> > > -            conf.rxmode.enable_scatter = 1;
> > > +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
> > >          }
> > >      }
> > >
> > >      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> > > -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> > > -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> > > +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> > > +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> > > +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> > >
> > >      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> > > -        conf.rxmode.hw_strip_crc = 1;
> > > +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
> > >      }
> > >
> > > +    /* Limit configured rss hash functions to only those supported
> > > +     * by the eth device. */
> > > +    conf.rx_adv_conf.rss_conf.rss_hf &=
> > > + info.flow_type_rss_offloads;
> > > +
> > >      /* A device may report more queues than it makes available (this has
> > >       * been observed for Intel xl710, which reserves some of them for
> > >       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
> > > @@
> > > -1932,16
> > > +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
> > > +qid,
> > >
> > >  static inline bool
> > >  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> > > +                               struct rte_meter_srtcm_profile
> > > + *profile,
> > >                                 struct rte_mbuf *pkt, uint64_t time)  {
> > >      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
> > > ether_hdr);
> > >
> > > -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
> > > +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
> > > + pkt_len) ==
> > >                                                  e_RTE_METER_GREEN;
> > > }
> > >
> > >  static int
> > >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > > +                        struct rte_meter_srtcm_profile *profile,
> > >                          struct rte_mbuf **pkts, int pkt_cnt,
> > >                          bool should_steal)  { @@ -1953,7 +1958,8 @@
> > > netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > >      for (i = 0; i < pkt_cnt; i++) {
> > >          pkt = pkts[i];
> > >          /* Handle current packet */
> > > -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
> > > +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> > > +                                                 current_time)) {
> > >              if (cnt != i) {
> > >                  pkts[cnt] = pkt;
> > >              }
> > > @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
> > > *policer, struct rte_mbuf **pkts,
> > >      int cnt = 0;
> > >
> > >      rte_spinlock_lock(&policer->policer_lock);
> > > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > > -                                  pkt_cnt, should_steal);
> > > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer-
> >in_prof,
> > > +                                  pkts, pkt_cnt, should_steal);
> > >      rte_spinlock_unlock(&policer->policer_lock);
> > >
> > >      return cnt;
> > > @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
> rate,
> > > uint32_t burst)
> > >      policer->app_srtcm_params.cir = rate_bytes;
> > >      policer->app_srtcm_params.cbs = burst_bytes;
> > >      policer->app_srtcm_params.ebs = 0;
> > > +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> > > +                                         &policer->app_srtcm_params);
> > > +    if (err) {
> > > +        VLOG_ERR("Could not create rte meter profile for ingress policer");
> > > +        free(policer);
> > > +        return NULL;
> > > +    }
> > >      err = rte_meter_srtcm_config(&policer->in_policer,
> > > -                                    &policer->app_srtcm_params);
> > > +                                    &policer->in_prof);
> > >      if (err) {
> > >          VLOG_ERR("Could not create rte meter for ingress policer");
> > >          free(policer);
> > > @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
> netdev
> > > *netdev, struct smap *args)
> > >      smap_add_format(args, "if_descr", "%s %s", rte_version(),
> > >
> > > dev_info.driver_name);
> > >
> > > -    if (dev_info.pci_dev) {
> > > -        smap_add_format(args, "pci-vendor_id", "0x%x",
> > > -                        dev_info.pci_dev->id.vendor_id);
> > > -        smap_add_format(args, "pci-device_id", "0x%x",
> > > -                        dev_info.pci_dev->id.device_id);
> > > +    const struct rte_bus *bus;
> > > +    const struct rte_pci_device *pci_dev;
> > > +    bus = rte_bus_find_by_device(dev_info.device);
> > > +    if (bus && !strcmp(bus->name, "pci")) {
> > > +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> > > +        if (pci_dev) {
> > > +            smap_add_format(args, "pci-vendor_id", "0x%u",
> > > +                            pci_dev->id.vendor_id);
> > > +             smap_add_format(args, "pci-device_id", "0x%x",
> > > +                             pci_dev->id.device_id);
> > > +        }
> > >      }
> > > -
> > >      return 0;
> > >  }
> > >
> > > @@ -3727,6 +3745,7 @@ struct egress_policer {
> > >      struct qos_conf qos_conf;
> > >      struct rte_meter_srtcm_params app_srtcm_params;
> > >      struct rte_meter_srtcm egress_meter;
> > > +    struct rte_meter_srtcm_profile egress_prof;
> > >  };
> > >
> > >  static void
> > > @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct
> > > smap *details,
> > >      policer = xmalloc(sizeof *policer);
> > >      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
> > >      egress_policer_details_to_param(details,
> > > &policer->app_srtcm_params);
> > > +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> > > +                                         &policer->app_srtcm_params);
> > > +    if (err) {
> > > +        free(policer);
> > > +        *conf = NULL;
> > > +        return -err;
> > > +    }
> > >      err = rte_meter_srtcm_config(&policer->egress_meter,
> > > -                                 &policer->app_srtcm_params);
> > > +                                 &policer->egress_prof);
> > > +
> > >      if (!err) {
> > >          *conf = &policer->qos_conf;
> > >      } else {
> > > @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
> > > struct rte_mbuf **pkts, int pkt_cnt,
> > >      struct egress_policer *policer =
> > >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> > >
> > > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> > > +                                  &policer->egress_prof, pkts,
> > >                                    pkt_cnt, should_steal);
> > >
> > >      return cnt;
> > > @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item
> > > *item)
> > >
> > >          VLOG_DBG("rte flow vlan pattern:\n");
> > >          if (vlan_spec) {
> > > -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
> > > +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
> tci=0x%"PRIx16"\n",
> > > +                     ntohs(vlan_spec->inner_type),
> > > + ntohs(vlan_spec->tci));
> > >          } else {
> > >              VLOG_DBG("  Spec = null\n");
> > >          }
> > >
> > >          if (vlan_mask) {
> > > -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > -                     vlan_mask->tpid, vlan_mask->tci);
> > > +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
> tci=0x%"PRIx16"\n",
> > > +                     vlan_mask->inner_type, vlan_mask->tci);
> > >          } else {
> > >              VLOG_DBG("  Mask = null\n");
> > >          }
> > > @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
> > > *actions, enum rte_flow_action_type type,
> > >      actions->cnt++;
> > >  }
> > >
> > > +/*
> > > + * Storage for struct rte_flow_action_rss
> > > + * including storage for key and queue array  */ #define
> > > +MAX_RSS_HASH_KEY_LENGTH  128 #define
> > MAX_ACTION_RSS_QUEUE_NUM
> > > 128
> > > +struct action_rss_data {
> > > +    struct rte_flow_action_rss conf;
> > > +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> > > +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
> > > +
> > >  static struct rte_flow_action_rss *  add_flow_rss_action(struct
> > > flow_actions *actions,
> > >                      struct netdev *netdev) {
> > >      int i;
> > > -    struct rte_flow_action_rss *rss;
> > > +    struct action_rss_data *rss_data;
> > >
> > > -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> > > -    /*
> > > -     * Setting it to NULL will let the driver use the default RSS
> > > -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> > > -     */
> > > -    rss->rss_conf = NULL;
> > > -    rss->num = netdev->n_rxq;
> > > +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> > > +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> > > +                 "than max rss num of queues (%u)",
> > > +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> > > +        return NULL;
> > > +    }
> > >
> > > -    for (i = 0; i < rss->num; i++) {
> > > -        rss->queue[i] = i;
> > > +    rss_data = xmalloc(sizeof(struct action_rss_data));
> > > +    *rss_data = (struct action_rss_data){
> > > +        .conf = (struct rte_flow_action_rss){
> > > +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> > > +            .level = 0,
> > > +            .types = ETH_RSS_IP,
> > > +            .key_len = 0,
> > > +            .queue_num = netdev->n_rxq,
> > > +            .queue = rss_data->queue,
> > > +            .key = rss_data->key,
> > > +        },
> > > +        .key = { 0 },
> > > +        .queue = { 0 },
> > > +    };
> > > +
> > > +    /* TODO: Override key with default */
> > > +
> > > +    /* Override queue array with default */
> > > +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> > > +       rss_data->queue[i] = i;
> > >      }
> > >
> > > -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> > > +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
> > > + &rss_data->conf);
> > >
> > > -    return rss;
> > > +    return &rss_data->conf;
> > >  }
> > >
> > >  static int
> > > @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
> netdev
> > > *netdev,
> > >          vlan_mask.tci  = match->wc.masks.vlans[0].tci &
> > > ~htons(VLAN_CFI);
> > >
> > >          /* match any protocols */
> > > -        vlan_mask.tpid = 0;
> > > +        vlan_mask.inner_type = 0;
> > >
> > >          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
> > >                           &vlan_spec, &vlan_mask); @@ -4516,6
> > > +4573,11 @@
> > > end_proto_check:
> > >
> > >      struct rte_flow_action_rss *rss;
> > >      rss = add_flow_rss_action(&actions, netdev);
> > > +    if (!rss) {
> > > +        VLOG_ERR("add_flow_rss_action error.\n");
> > > +        ret = -1;
> > > +        goto out;
> > > +    }
> > >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> > >
> > >      flow = rte_flow_create(dev->port_id, &flow_attr,
> > > patterns.items,
> > > --
> > > 1.8.3.1
Stokes, Ian Sept. 13, 2018, 12:45 p.m. UTC | #7
On 9/13/2018 1:34 PM, Ophir Munk wrote:
> Hi Sugesh,
> I can update documentation. Is it enough to update dpdk.rst.txt only?
> If not - can you please send a link to additional files with references to 17.11 to be updated?
> I will wait 1-2 days to get more reviews then will send v3.

Hi Ophir, I've just sent a patch to update master branch to use DPDK 
17.11.4, you can follow the same docs that are modified in that patch 
for the v3 of this. Docs modified and link to the patch itself below.

  .travis/linux-build.sh                   | 2 +-
  Documentation/faq/releases.rst           | 6 +++---
  Documentation/intro/install/dpdk.rst     | 8 ++++----
  Documentation/topics/dpdk/vhost-user.rst | 6 +++---

https://mail.openvswitch.org/pipermail/ovs-dev/2018-September/352104.html

Thanks
Ian
> 
> Regards,
> Ophir
> 
>> -----Original Message-----
>> From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
>> Sent: Thursday, September 13, 2018 2:58 PM
>> To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
>> <ovs-dev@openvswitch.org>
>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>> <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>> Olga Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>
>> Hi Ophir,
>>
>> Also a quick grep show there are number of places in documentation still
>> pointing to DPDK17.11, I guess its worth while to change them as well.
>> This will avoid confusion for people who wanted to use this branch, What do
>> you think?
>>
>>
>>
>> Regards
>> _Sugesh
>>
>>
>>> -----Original Message-----
>>> From: Chandran, Sugesh
>>> Sent: Wednesday, September 12, 2018 5:05 PM
>>> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
>>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>>> <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
>>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>> Olga
>>> Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
>>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
>>> v18.08.0
>>>
>>> Hi Ophir,
>>> I verified this patch in our test lab and looks fine to me.
>>> I have couple of minor comments in the V1 series. Could you please
>>> take care of them as well?
>>>
>>>
>>> Regards
>>> _Sugesh
>>>
>>>> -----Original Message-----
>>>> From: Ophir Munk [mailto:ophirmu@mellanox.com]
>>>> Sent: Tuesday, September 11, 2018 12:05 AM
>>>> To: ovs-dev@openvswitch.org
>>>> Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
>>>> <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben
>>>> Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
>>>> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
>>> Ophir
>>>> Munk <ophirmu@mellanox.com>; Kevin Traynor
>> <ktraynor@redhat.com>
>>>> Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>>>
>>>> 1. Enable compilation and linkage with dpdk 18.08.0 The following
>>>> dpdk commits which were introduced after dpdk 17.11.x require OVS
>>>> updates to accommodate to the dpdk changes.
>>>> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
>>>> - ab3ce1e0c193 ("ethdev: remove old offload API")
>>>> - c06ddf9698e0 ("meter: add configuration profile")
>>>> - e58638c32411 ("ethdev: fix TPID handling in flow API")
>>>> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
>>>> dev")
>>>> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>>>>
>>>> 2. Limit configured rss hash functions to only those supported by
>>>> the eth
>>> device.
>>>>
>>>> 3. Update references to DPDK version in Documentation
>>>>
>>>> 4. Update DPDK version in travis' linux-build script
>>>>
>>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>>> ---
>>>> v1:
>>>> First version
>>>>
>>>> v2:
>>>> Avoid seg faults cases as described in
>>>>
>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
>>>>
>> atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
>> rmu%4
>>>>
>> 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
>> e4d9ba
>>>>
>> 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
>> Rd6w0TP
>>>> x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
>>>> by using the patch in:
>>>>
>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
>>>> ithub.com%2Fkevintraynor%2Fovs-dpdk-
>> &amp;data=02%7C01%7Cophirmu%40me
>>>>
>> llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
>> ba6a4
>>>>
>> d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
>> HAHGma%2
>>>> F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
>>>> master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>>>>
>>>>   .travis/linux-build.sh         |   2 +-
>>>>   Documentation/faq/releases.rst |   2 +-
>>>>   lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++-------
>> -----
>>>>   3 files changed, 104 insertions(+), 42 deletions(-)
>>>>
>>>> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
>>>> 4b9fc4a..c60ac71
>>>> 100755
>>>> --- a/.travis/linux-build.sh
>>>> +++ b/.travis/linux-build.sh
>>>> @@ -83,7 +83,7 @@ fi
>>>>
>>>>   if [ "$DPDK" ]; then
>>>>       if [ -z "$DPDK_VER" ]; then
>>>> -        DPDK_VER="17.11.3"
>>>> +        DPDK_VER="18.08.0"
>>>>       fi
>>>>       install_dpdk $DPDK_VER
>>>>       if [ "$CC" = "clang" ]; then
>>>> diff --git a/Documentation/faq/releases.rst
>>>> b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
>>>> --- a/Documentation/faq/releases.rst
>>>> +++ b/Documentation/faq/releases.rst
>>>> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
>>>> release work with?
>>>>       2.7.x        16.11.7
>>>>       2.8.x        17.05.2
>>>>       2.9.x        17.11.3
>>>> -    2.10.x       17.11.3
>>>> +    2.10.x       18.08.0
>>>>       ============ =======
>>>>
>>>>   Q: Are all the DPDK releases that OVS versions work with maintained?
>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>>>> f91aa27..6879e9a
>>>> 100644
>>>> --- a/lib/netdev-dpdk.c
>>>> +++ b/lib/netdev-dpdk.c
>>>> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>>>>       .rxmode = {
>>>>           .mq_mode = ETH_MQ_RX_RSS,
>>>>           .split_hdr_size = 0,
>>>> -        .header_split   = 0, /* Header Split disabled */
>>>> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
>>>> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
>>>> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
>>>> -        .hw_strip_crc   = 0,
>>>> +        .offloads = 0,
>>>>       },
>>>>       .rx_adv_conf = {
>>>>           .rss_conf = {
>>>> @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
>>>>       struct rte_meter_srtcm_params app_srtcm_params;
>>>>       struct rte_meter_srtcm in_policer;
>>>> +    struct rte_meter_srtcm_profile in_prof;
>>>>       rte_spinlock_t policer_lock;
>>>>   };
>>>>
>>>> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
>>>> *dev, int n_rxq, int n_txq)
>>>>       struct rte_eth_dev_info info;
>>>>       uint16_t conf_mtu;
>>>>
>>>> +    rte_eth_dev_info_get(dev->port_id, &info);
>>>> +
>>>>       /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>>>>        * scatter to support jumbo RX. Checking the offload capabilities
>>>>        * is not an option as PMDs are not required yet to report @@
>>>> -901,20
>>>> +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
>>>> +n_rxq, int
>>>> n_txq)
>>>>        * (testing or code review). Listing all such PMDs feels harder
>>>>        * than highlighting the one known not to need scatter */
>>>>       if (dev->mtu > ETHER_MTU) {
>>>> -        rte_eth_dev_info_get(dev->port_id, &info);
>>>>           if (strncmp(info.driver_name, "net_nfp", 7)) {
>>>> -            conf.rxmode.enable_scatter = 1;
>>>> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>>>>           }
>>>>       }
>>>>
>>>>       conf.intr_conf.lsc = dev->lsc_interrupt_mode;
>>>> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
>>>> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
>>>> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
>>>> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
>>>> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
>>>>
>>>>       if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
>>>> -        conf.rxmode.hw_strip_crc = 1;
>>>> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>>>>       }
>>>>
>>>> +    /* Limit configured rss hash functions to only those supported
>>>> +     * by the eth device. */
>>>> +    conf.rx_adv_conf.rss_conf.rss_hf &=
>>>> + info.flow_type_rss_offloads;
>>>> +
>>>>       /* A device may report more queues than it makes available (this has
>>>>        * been observed for Intel xl710, which reserves some of them for
>>>>        * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
>>>> @@
>>>> -1932,16
>>>> +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
>>>> +qid,
>>>>
>>>>   static inline bool
>>>>   netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>>>> +                               struct rte_meter_srtcm_profile
>>>> + *profile,
>>>>                                  struct rte_mbuf *pkt, uint64_t time)  {
>>>>       uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
>>>> ether_hdr);
>>>>
>>>> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>>>> +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
>>>> + pkt_len) ==
>>>>                                                   e_RTE_METER_GREEN;
>>>> }
>>>>
>>>>   static int
>>>>   netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>> +                        struct rte_meter_srtcm_profile *profile,
>>>>                           struct rte_mbuf **pkts, int pkt_cnt,
>>>>                           bool should_steal)  { @@ -1953,7 +1958,8 @@
>>>> netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>>       for (i = 0; i < pkt_cnt; i++) {
>>>>           pkt = pkts[i];
>>>>           /* Handle current packet */
>>>> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
>>>> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
>>>> +                                                 current_time)) {
>>>>               if (cnt != i) {
>>>>                   pkts[cnt] = pkt;
>>>>               }
>>>> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
>>>> *policer, struct rte_mbuf **pkts,
>>>>       int cnt = 0;
>>>>
>>>>       rte_spinlock_lock(&policer->policer_lock);
>>>> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>>>> -                                  pkt_cnt, should_steal);
>>>> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer-
>>> in_prof,
>>>> +                                  pkts, pkt_cnt, should_steal);
>>>>       rte_spinlock_unlock(&policer->policer_lock);
>>>>
>>>>       return cnt;
>>>> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
>> rate,
>>>> uint32_t burst)
>>>>       policer->app_srtcm_params.cir = rate_bytes;
>>>>       policer->app_srtcm_params.cbs = burst_bytes;
>>>>       policer->app_srtcm_params.ebs = 0;
>>>> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
>>>> +                                         &policer->app_srtcm_params);
>>>> +    if (err) {
>>>> +        VLOG_ERR("Could not create rte meter profile for ingress policer");
>>>> +        free(policer);
>>>> +        return NULL;
>>>> +    }
>>>>       err = rte_meter_srtcm_config(&policer->in_policer,
>>>> -                                    &policer->app_srtcm_params);
>>>> +                                    &policer->in_prof);
>>>>       if (err) {
>>>>           VLOG_ERR("Could not create rte meter for ingress policer");
>>>>           free(policer);
>>>> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
>> netdev
>>>> *netdev, struct smap *args)
>>>>       smap_add_format(args, "if_descr", "%s %s", rte_version(),
>>>>
>>>> dev_info.driver_name);
>>>>
>>>> -    if (dev_info.pci_dev) {
>>>> -        smap_add_format(args, "pci-vendor_id", "0x%x",
>>>> -                        dev_info.pci_dev->id.vendor_id);
>>>> -        smap_add_format(args, "pci-device_id", "0x%x",
>>>> -                        dev_info.pci_dev->id.device_id);
>>>> +    const struct rte_bus *bus;
>>>> +    const struct rte_pci_device *pci_dev;
>>>> +    bus = rte_bus_find_by_device(dev_info.device);
>>>> +    if (bus && !strcmp(bus->name, "pci")) {
>>>> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
>>>> +        if (pci_dev) {
>>>> +            smap_add_format(args, "pci-vendor_id", "0x%u",
>>>> +                            pci_dev->id.vendor_id);
>>>> +             smap_add_format(args, "pci-device_id", "0x%x",
>>>> +                             pci_dev->id.device_id);
>>>> +        }
>>>>       }
>>>> -
>>>>       return 0;
>>>>   }
>>>>
>>>> @@ -3727,6 +3745,7 @@ struct egress_policer {
>>>>       struct qos_conf qos_conf;
>>>>       struct rte_meter_srtcm_params app_srtcm_params;
>>>>       struct rte_meter_srtcm egress_meter;
>>>> +    struct rte_meter_srtcm_profile egress_prof;
>>>>   };
>>>>
>>>>   static void
>>>> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct
>>>> smap *details,
>>>>       policer = xmalloc(sizeof *policer);
>>>>       qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>>>>       egress_policer_details_to_param(details,
>>>> &policer->app_srtcm_params);
>>>> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
>>>> +                                         &policer->app_srtcm_params);
>>>> +    if (err) {
>>>> +        free(policer);
>>>> +        *conf = NULL;
>>>> +        return -err;
>>>> +    }
>>>>       err = rte_meter_srtcm_config(&policer->egress_meter,
>>>> -                                 &policer->app_srtcm_params);
>>>> +                                 &policer->egress_prof);
>>>> +
>>>>       if (!err) {
>>>>           *conf = &policer->qos_conf;
>>>>       } else {
>>>> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
>>>> struct rte_mbuf **pkts, int pkt_cnt,
>>>>       struct egress_policer *policer =
>>>>           CONTAINER_OF(conf, struct egress_policer, qos_conf);
>>>>
>>>> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>>>> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
>>>> +                                  &policer->egress_prof, pkts,
>>>>                                     pkt_cnt, should_steal);
>>>>
>>>>       return cnt;
>>>> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item
>>>> *item)
>>>>
>>>>           VLOG_DBG("rte flow vlan pattern:\n");
>>>>           if (vlan_spec) {
>>>> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
>>>> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
>> tci=0x%"PRIx16"\n",
>>>> +                     ntohs(vlan_spec->inner_type),
>>>> + ntohs(vlan_spec->tci));
>>>>           } else {
>>>>               VLOG_DBG("  Spec = null\n");
>>>>           }
>>>>
>>>>           if (vlan_mask) {
>>>> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>> -                     vlan_mask->tpid, vlan_mask->tci);
>>>> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
>> tci=0x%"PRIx16"\n",
>>>> +                     vlan_mask->inner_type, vlan_mask->tci);
>>>>           } else {
>>>>               VLOG_DBG("  Mask = null\n");
>>>>           }
>>>> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
>>>> *actions, enum rte_flow_action_type type,
>>>>       actions->cnt++;
>>>>   }
>>>>
>>>> +/*
>>>> + * Storage for struct rte_flow_action_rss
>>>> + * including storage for key and queue array  */ #define
>>>> +MAX_RSS_HASH_KEY_LENGTH  128 #define
>>> MAX_ACTION_RSS_QUEUE_NUM
>>>> 128
>>>> +struct action_rss_data {
>>>> +    struct rte_flow_action_rss conf;
>>>> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
>>>> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
>>>> +
>>>>   static struct rte_flow_action_rss *  add_flow_rss_action(struct
>>>> flow_actions *actions,
>>>>                       struct netdev *netdev) {
>>>>       int i;
>>>> -    struct rte_flow_action_rss *rss;
>>>> +    struct action_rss_data *rss_data;
>>>>
>>>> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
>>>> -    /*
>>>> -     * Setting it to NULL will let the driver use the default RSS
>>>> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
>>>> -     */
>>>> -    rss->rss_conf = NULL;
>>>> -    rss->num = netdev->n_rxq;
>>>> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
>>>> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
>>>> +                 "than max rss num of queues (%u)",
>>>> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
>>>> +        return NULL;
>>>> +    }
>>>>
>>>> -    for (i = 0; i < rss->num; i++) {
>>>> -        rss->queue[i] = i;
>>>> +    rss_data = xmalloc(sizeof(struct action_rss_data));
>>>> +    *rss_data = (struct action_rss_data){
>>>> +        .conf = (struct rte_flow_action_rss){
>>>> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>>>> +            .level = 0,
>>>> +            .types = ETH_RSS_IP,
>>>> +            .key_len = 0,
>>>> +            .queue_num = netdev->n_rxq,
>>>> +            .queue = rss_data->queue,
>>>> +            .key = rss_data->key,
>>>> +        },
>>>> +        .key = { 0 },
>>>> +        .queue = { 0 },
>>>> +    };
>>>> +
>>>> +    /* TODO: Override key with default */
>>>> +
>>>> +    /* Override queue array with default */
>>>> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
>>>> +       rss_data->queue[i] = i;
>>>>       }
>>>>
>>>> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
>>>> + &rss_data->conf);
>>>>
>>>> -    return rss;
>>>> +    return &rss_data->conf;
>>>>   }
>>>>
>>>>   static int
>>>> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
>> netdev
>>>> *netdev,
>>>>           vlan_mask.tci  = match->wc.masks.vlans[0].tci &
>>>> ~htons(VLAN_CFI);
>>>>
>>>>           /* match any protocols */
>>>> -        vlan_mask.tpid = 0;
>>>> +        vlan_mask.inner_type = 0;
>>>>
>>>>           add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>>>>                            &vlan_spec, &vlan_mask); @@ -4516,6
>>>> +4573,11 @@
>>>> end_proto_check:
>>>>
>>>>       struct rte_flow_action_rss *rss;
>>>>       rss = add_flow_rss_action(&actions, netdev);
>>>> +    if (!rss) {
>>>> +        VLOG_ERR("add_flow_rss_action error.\n");
>>>> +        ret = -1;
>>>> +        goto out;
>>>> +    }
>>>>       add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>>
>>>>       flow = rte_flow_create(dev->port_id, &flow_attr,
>>>> patterns.items,
>>>> --
>>>> 1.8.3.1
>
Kevin Traynor Sept. 13, 2018, 12:57 p.m. UTC | #8
On 09/13/2018 01:45 PM, Ian Stokes wrote:
> On 9/13/2018 1:34 PM, Ophir Munk wrote:
>> Hi Sugesh,
>> I can update documentation. Is it enough to update dpdk.rst.txt only?
>> If not - can you please send a link to additional files with
>> references to 17.11 to be updated?
>> I will wait 1-2 days to get more reviews then will send v3.
> 
> Hi Ophir, I've just sent a patch to update master branch to use DPDK
> 17.11.4, you can follow the same docs that are modified in that patch
> for the v3 of this. Docs modified and link to the patch itself below.
> 
>  .travis/linux-build.sh                   | 2 +-
>  Documentation/faq/releases.rst           | 6 +++---
>  Documentation/intro/install/dpdk.rst     | 8 ++++----
>  Documentation/topics/dpdk/vhost-user.rst | 6 +++---
> 
> https://mail.openvswitch.org/pipermail/ovs-dev/2018-September/352104.html
> 
> Thanks
> Ian
>>
>> Regards,
>> Ophir
>>
>>> -----Original Message-----
>>> From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
>>> Sent: Thursday, September 13, 2018 2:58 PM
>>> To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
>>> <ovs-dev@openvswitch.org>
>>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>>> <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
>>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>>> Olga Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
>>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>>
>>> Hi Ophir,
>>>
>>> Also a quick grep show there are number of places in documentation still
>>> pointing to DPDK17.11, I guess its worth while to change them as well.
>>> This will avoid confusion for people who wanted to use this branch,
>>> What do
>>> you think?
>>>
>>>
>>>
>>> Regards
>>> _Sugesh
>>>
>>>
>>>> -----Original Message-----
>>>> From: Chandran, Sugesh
>>>> Sent: Wednesday, September 12, 2018 5:05 PM
>>>> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
>>>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>>>> <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
>>>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>>> Olga
>>>> Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
>>>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
>>>> v18.08.0
>>>>
>>>> Hi Ophir,
>>>> I verified this patch in our test lab and looks fine to me.
>>>> I have couple of minor comments in the V1 series. Could you please
>>>> take care of them as well?
>>>>
>>>>
>>>> Regards
>>>> _Sugesh
>>>>
>>>>> -----Original Message-----
>>>>> From: Ophir Munk [mailto:ophirmu@mellanox.com]
>>>>> Sent: Tuesday, September 11, 2018 12:05 AM
>>>>> To: ovs-dev@openvswitch.org
>>>>> Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
>>>>> <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben
>>>>> Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
>>>>> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
>>>> Ophir
>>>>> Munk <ophirmu@mellanox.com>; Kevin Traynor
>>> <ktraynor@redhat.com>
>>>>> Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>>>>
>>>>> 1. Enable compilation and linkage with dpdk 18.08.0 The following
>>>>> dpdk commits which were introduced after dpdk 17.11.x require OVS
>>>>> updates to accommodate to the dpdk changes.
>>>>> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
>>>>> - ab3ce1e0c193 ("ethdev: remove old offload API")
>>>>> - c06ddf9698e0 ("meter: add configuration profile")
>>>>> - e58638c32411 ("ethdev: fix TPID handling in flow API")
>>>>> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
>>>>> dev")
>>>>> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>>>>>
>>>>> 2. Limit configured rss hash functions to only those supported by
>>>>> the eth
>>>> device.
>>>>>
>>>>> 3. Update references to DPDK version in Documentation
>>>>>
>>>>> 4. Update DPDK version in travis' linux-build script
>>>>>
>>>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>>>> ---
>>>>> v1:
>>>>> First version
>>>>>
>>>>> v2:
>>>>> Avoid seg faults cases as described in
>>>>>
>>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
>>>>>
>>> atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
>>> rmu%4
>>>>>
>>> 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
>>> e4d9ba
>>>>>
>>> 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
>>> Rd6w0TP
>>>>> x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
>>>>> by using the patch in:
>>>>>
>>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
>>>>> ithub.com%2Fkevintraynor%2Fovs-dpdk-
>>> &amp;data=02%7C01%7Cophirmu%40me
>>>>>
>>> llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
>>> ba6a4
>>>>>
>>> d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
>>> HAHGma%2
>>>>> F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
>>>>> master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>>>>>
>>>>>   .travis/linux-build.sh         |   2 +-
>>>>>   Documentation/faq/releases.rst |   2 +-
>>>>>   lib/netdev-dpdk.c              | 142
>>>>> +++++++++++++++++++++++++++++-------
>>> -----
>>>>>   3 files changed, 104 insertions(+), 42 deletions(-)
>>>>>
>>>>> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
>>>>> 4b9fc4a..c60ac71
>>>>> 100755
>>>>> --- a/.travis/linux-build.sh
>>>>> +++ b/.travis/linux-build.sh
>>>>> @@ -83,7 +83,7 @@ fi
>>>>>
>>>>>   if [ "$DPDK" ]; then
>>>>>       if [ -z "$DPDK_VER" ]; then
>>>>> -        DPDK_VER="17.11.3"
>>>>> +        DPDK_VER="18.08.0"
>>>>>       fi
>>>>>       install_dpdk $DPDK_VER
>>>>>       if [ "$CC" = "clang" ]; then
>>>>> diff --git a/Documentation/faq/releases.rst
>>>>> b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
>>>>> --- a/Documentation/faq/releases.rst
>>>>> +++ b/Documentation/faq/releases.rst
>>>>> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
>>>>> release work with?
>>>>>       2.7.x        16.11.7
>>>>>       2.8.x        17.05.2
>>>>>       2.9.x        17.11.3
>>>>> -    2.10.x       17.11.3
>>>>> +    2.10.x       18.08.0

Hi - on updating versions in docs, this one should not change as the
released/to be released versions of OVS 2.10.x stick with 17.11.x. We
can just leave this unmodified for now and add an entry for 2.11.x
closer to the release time.

Kevin.

>>>>>       ============ =======
>>>>>
>>>>>   Q: Are all the DPDK releases that OVS versions work with maintained?
>>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>>>>> f91aa27..6879e9a
>>>>> 100644
>>>>> --- a/lib/netdev-dpdk.c
>>>>> +++ b/lib/netdev-dpdk.c
>>>>> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>>>>>       .rxmode = {
>>>>>           .mq_mode = ETH_MQ_RX_RSS,
>>>>>           .split_hdr_size = 0,
>>>>> -        .header_split   = 0, /* Header Split disabled */
>>>>> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
>>>>> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
>>>>> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
>>>>> -        .hw_strip_crc   = 0,
>>>>> +        .offloads = 0,
>>>>>       },
>>>>>       .rx_adv_conf = {
>>>>>           .rss_conf = {
>>>>> @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
>>>>>       struct rte_meter_srtcm_params app_srtcm_params;
>>>>>       struct rte_meter_srtcm in_policer;
>>>>> +    struct rte_meter_srtcm_profile in_prof;
>>>>>       rte_spinlock_t policer_lock;
>>>>>   };
>>>>>
>>>>> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
>>>>> *dev, int n_rxq, int n_txq)
>>>>>       struct rte_eth_dev_info info;
>>>>>       uint16_t conf_mtu;
>>>>>
>>>>> +    rte_eth_dev_info_get(dev->port_id, &info);
>>>>> +
>>>>>       /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>>>>>        * scatter to support jumbo RX. Checking the offload
>>>>> capabilities
>>>>>        * is not an option as PMDs are not required yet to report @@
>>>>> -901,20
>>>>> +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
>>>>> +n_rxq, int
>>>>> n_txq)
>>>>>        * (testing or code review). Listing all such PMDs feels harder
>>>>>        * than highlighting the one known not to need scatter */
>>>>>       if (dev->mtu > ETHER_MTU) {
>>>>> -        rte_eth_dev_info_get(dev->port_id, &info);
>>>>>           if (strncmp(info.driver_name, "net_nfp", 7)) {
>>>>> -            conf.rxmode.enable_scatter = 1;
>>>>> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>>>>>           }
>>>>>       }
>>>>>
>>>>>       conf.intr_conf.lsc = dev->lsc_interrupt_mode;
>>>>> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
>>>>> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
>>>>> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
>>>>> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
>>>>> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
>>>>>
>>>>>       if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
>>>>> -        conf.rxmode.hw_strip_crc = 1;
>>>>> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>>>>>       }
>>>>>
>>>>> +    /* Limit configured rss hash functions to only those supported
>>>>> +     * by the eth device. */
>>>>> +    conf.rx_adv_conf.rss_conf.rss_hf &=
>>>>> + info.flow_type_rss_offloads;
>>>>> +
>>>>>       /* A device may report more queues than it makes available
>>>>> (this has
>>>>>        * been observed for Intel xl710, which reserves some of them
>>>>> for
>>>>>        * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
>>>>> @@
>>>>> -1932,16
>>>>> +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
>>>>> +qid,
>>>>>
>>>>>   static inline bool
>>>>>   netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>>>>> +                               struct rte_meter_srtcm_profile
>>>>> + *profile,
>>>>>                                  struct rte_mbuf *pkt, uint64_t
>>>>> time)  {
>>>>>       uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
>>>>> ether_hdr);
>>>>>
>>>>> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>>>>> +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
>>>>> + pkt_len) ==
>>>>>                                                   e_RTE_METER_GREEN;
>>>>> }
>>>>>
>>>>>   static int
>>>>>   netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>>> +                        struct rte_meter_srtcm_profile *profile,
>>>>>                           struct rte_mbuf **pkts, int pkt_cnt,
>>>>>                           bool should_steal)  { @@ -1953,7 +1958,8 @@
>>>>> netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>>>       for (i = 0; i < pkt_cnt; i++) {
>>>>>           pkt = pkts[i];
>>>>>           /* Handle current packet */
>>>>> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt,
>>>>> current_time)) {
>>>>> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
>>>>> +                                                 current_time)) {
>>>>>               if (cnt != i) {
>>>>>                   pkts[cnt] = pkt;
>>>>>               }
>>>>> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
>>>>> *policer, struct rte_mbuf **pkts,
>>>>>       int cnt = 0;
>>>>>
>>>>>       rte_spinlock_lock(&policer->policer_lock);
>>>>> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>>>>> -                                  pkt_cnt, should_steal);
>>>>> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer-
>>>> in_prof,
>>>>> +                                  pkts, pkt_cnt, should_steal);
>>>>>       rte_spinlock_unlock(&policer->policer_lock);
>>>>>
>>>>>       return cnt;
>>>>> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
>>> rate,
>>>>> uint32_t burst)
>>>>>       policer->app_srtcm_params.cir = rate_bytes;
>>>>>       policer->app_srtcm_params.cbs = burst_bytes;
>>>>>       policer->app_srtcm_params.ebs = 0;
>>>>> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
>>>>> +                                         &policer->app_srtcm_params);
>>>>> +    if (err) {
>>>>> +        VLOG_ERR("Could not create rte meter profile for ingress
>>>>> policer");
>>>>> +        free(policer);
>>>>> +        return NULL;
>>>>> +    }
>>>>>       err = rte_meter_srtcm_config(&policer->in_policer,
>>>>> -                                    &policer->app_srtcm_params);
>>>>> +                                    &policer->in_prof);
>>>>>       if (err) {
>>>>>           VLOG_ERR("Could not create rte meter for ingress policer");
>>>>>           free(policer);
>>>>> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
>>> netdev
>>>>> *netdev, struct smap *args)
>>>>>       smap_add_format(args, "if_descr", "%s %s", rte_version(),
>>>>>
>>>>> dev_info.driver_name);
>>>>>
>>>>> -    if (dev_info.pci_dev) {
>>>>> -        smap_add_format(args, "pci-vendor_id", "0x%x",
>>>>> -                        dev_info.pci_dev->id.vendor_id);
>>>>> -        smap_add_format(args, "pci-device_id", "0x%x",
>>>>> -                        dev_info.pci_dev->id.device_id);
>>>>> +    const struct rte_bus *bus;
>>>>> +    const struct rte_pci_device *pci_dev;
>>>>> +    bus = rte_bus_find_by_device(dev_info.device);
>>>>> +    if (bus && !strcmp(bus->name, "pci")) {
>>>>> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
>>>>> +        if (pci_dev) {
>>>>> +            smap_add_format(args, "pci-vendor_id", "0x%u",
>>>>> +                            pci_dev->id.vendor_id);
>>>>> +             smap_add_format(args, "pci-device_id", "0x%x",
>>>>> +                             pci_dev->id.device_id);
>>>>> +        }
>>>>>       }
>>>>> -
>>>>>       return 0;
>>>>>   }
>>>>>
>>>>> @@ -3727,6 +3745,7 @@ struct egress_policer {
>>>>>       struct qos_conf qos_conf;
>>>>>       struct rte_meter_srtcm_params app_srtcm_params;
>>>>>       struct rte_meter_srtcm egress_meter;
>>>>> +    struct rte_meter_srtcm_profile egress_prof;
>>>>>   };
>>>>>
>>>>>   static void
>>>>> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct
>>>>> smap *details,
>>>>>       policer = xmalloc(sizeof *policer);
>>>>>       qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>>>>>       egress_policer_details_to_param(details,
>>>>> &policer->app_srtcm_params);
>>>>> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
>>>>> +                                         &policer->app_srtcm_params);
>>>>> +    if (err) {
>>>>> +        free(policer);
>>>>> +        *conf = NULL;
>>>>> +        return -err;
>>>>> +    }
>>>>>       err = rte_meter_srtcm_config(&policer->egress_meter,
>>>>> -                                 &policer->app_srtcm_params);
>>>>> +                                 &policer->egress_prof);
>>>>> +
>>>>>       if (!err) {
>>>>>           *conf = &policer->qos_conf;
>>>>>       } else {
>>>>> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
>>>>> struct rte_mbuf **pkts, int pkt_cnt,
>>>>>       struct egress_policer *policer =
>>>>>           CONTAINER_OF(conf, struct egress_policer, qos_conf);
>>>>>
>>>>> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>>>>> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
>>>>> +                                  &policer->egress_prof, pkts,
>>>>>                                     pkt_cnt, should_steal);
>>>>>
>>>>>       return cnt;
>>>>> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item
>>>>> *item)
>>>>>
>>>>>           VLOG_DBG("rte flow vlan pattern:\n");
>>>>>           if (vlan_spec) {
>>>>> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>>> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
>>>>> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
>>> tci=0x%"PRIx16"\n",
>>>>> +                     ntohs(vlan_spec->inner_type),
>>>>> + ntohs(vlan_spec->tci));
>>>>>           } else {
>>>>>               VLOG_DBG("  Spec = null\n");
>>>>>           }
>>>>>
>>>>>           if (vlan_mask) {
>>>>> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>>> -                     vlan_mask->tpid, vlan_mask->tci);
>>>>> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
>>> tci=0x%"PRIx16"\n",
>>>>> +                     vlan_mask->inner_type, vlan_mask->tci);
>>>>>           } else {
>>>>>               VLOG_DBG("  Mask = null\n");
>>>>>           }
>>>>> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
>>>>> *actions, enum rte_flow_action_type type,
>>>>>       actions->cnt++;
>>>>>   }
>>>>>
>>>>> +/*
>>>>> + * Storage for struct rte_flow_action_rss
>>>>> + * including storage for key and queue array  */ #define
>>>>> +MAX_RSS_HASH_KEY_LENGTH  128 #define
>>>> MAX_ACTION_RSS_QUEUE_NUM
>>>>> 128
>>>>> +struct action_rss_data {
>>>>> +    struct rte_flow_action_rss conf;
>>>>> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
>>>>> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
>>>>> +
>>>>>   static struct rte_flow_action_rss *  add_flow_rss_action(struct
>>>>> flow_actions *actions,
>>>>>                       struct netdev *netdev) {
>>>>>       int i;
>>>>> -    struct rte_flow_action_rss *rss;
>>>>> +    struct action_rss_data *rss_data;
>>>>>
>>>>> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
>>>>> -    /*
>>>>> -     * Setting it to NULL will let the driver use the default RSS
>>>>> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
>>>>> -     */
>>>>> -    rss->rss_conf = NULL;
>>>>> -    rss->num = netdev->n_rxq;
>>>>> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
>>>>> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
>>>>> +                 "than max rss num of queues (%u)",
>>>>> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
>>>>> +        return NULL;
>>>>> +    }
>>>>>
>>>>> -    for (i = 0; i < rss->num; i++) {
>>>>> -        rss->queue[i] = i;
>>>>> +    rss_data = xmalloc(sizeof(struct action_rss_data));
>>>>> +    *rss_data = (struct action_rss_data){
>>>>> +        .conf = (struct rte_flow_action_rss){
>>>>> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>>>>> +            .level = 0,
>>>>> +            .types = ETH_RSS_IP,
>>>>> +            .key_len = 0,
>>>>> +            .queue_num = netdev->n_rxq,
>>>>> +            .queue = rss_data->queue,
>>>>> +            .key = rss_data->key,
>>>>> +        },
>>>>> +        .key = { 0 },
>>>>> +        .queue = { 0 },
>>>>> +    };
>>>>> +
>>>>> +    /* TODO: Override key with default */
>>>>> +
>>>>> +    /* Override queue array with default */
>>>>> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
>>>>> +       rss_data->queue[i] = i;
>>>>>       }
>>>>>
>>>>> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
>>>>> + &rss_data->conf);
>>>>>
>>>>> -    return rss;
>>>>> +    return &rss_data->conf;
>>>>>   }
>>>>>
>>>>>   static int
>>>>> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
>>> netdev
>>>>> *netdev,
>>>>>           vlan_mask.tci  = match->wc.masks.vlans[0].tci &
>>>>> ~htons(VLAN_CFI);
>>>>>
>>>>>           /* match any protocols */
>>>>> -        vlan_mask.tpid = 0;
>>>>> +        vlan_mask.inner_type = 0;
>>>>>
>>>>>           add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>>>>>                            &vlan_spec, &vlan_mask); @@ -4516,6
>>>>> +4573,11 @@
>>>>> end_proto_check:
>>>>>
>>>>>       struct rte_flow_action_rss *rss;
>>>>>       rss = add_flow_rss_action(&actions, netdev);
>>>>> +    if (!rss) {
>>>>> +        VLOG_ERR("add_flow_rss_action error.\n");
>>>>> +        ret = -1;
>>>>> +        goto out;
>>>>> +    }
>>>>>       add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>>>
>>>>>       flow = rte_flow_create(dev->port_id, &flow_attr,
>>>>> patterns.items,
>>>>> -- 
>>>>> 1.8.3.1
>>
>
Stokes, Ian Sept. 13, 2018, 1 p.m. UTC | #9
On 9/13/2018 1:57 PM, Kevin Traynor wrote:
> On 09/13/2018 01:45 PM, Ian Stokes wrote:
>> On 9/13/2018 1:34 PM, Ophir Munk wrote:
>>> Hi Sugesh,
>>> I can update documentation. Is it enough to update dpdk.rst.txt only?
>>> If not - can you please send a link to additional files with
>>> references to 17.11 to be updated?
>>> I will wait 1-2 days to get more reviews then will send v3.
>>
>> Hi Ophir, I've just sent a patch to update master branch to use DPDK
>> 17.11.4, you can follow the same docs that are modified in that patch
>> for the v3 of this. Docs modified and link to the patch itself below.
>>
>>   .travis/linux-build.sh                   | 2 +-
>>   Documentation/faq/releases.rst           | 6 +++---
>>   Documentation/intro/install/dpdk.rst     | 8 ++++----
>>   Documentation/topics/dpdk/vhost-user.rst | 6 +++---
>>
>> https://mail.openvswitch.org/pipermail/ovs-dev/2018-September/352104.html
>>
>> Thanks
>> Ian
>>>
>>> Regards,
>>> Ophir
>>>
>>>> -----Original Message-----
>>>> From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
>>>> Sent: Thursday, September 13, 2018 2:58 PM
>>>> To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
>>>> <ovs-dev@openvswitch.org>
>>>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>>>> <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
>>>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>>>> Olga Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
>>>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>>>
>>>> Hi Ophir,
>>>>
>>>> Also a quick grep show there are number of places in documentation still
>>>> pointing to DPDK17.11, I guess its worth while to change them as well.
>>>> This will avoid confusion for people who wanted to use this branch,
>>>> What do
>>>> you think?
>>>>
>>>>
>>>>
>>>> Regards
>>>> _Sugesh
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Chandran, Sugesh
>>>>> Sent: Wednesday, September 12, 2018 5:05 PM
>>>>> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
>>>>> Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
>>>>> <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
>>>>> <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
>>>> Olga
>>>>> Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
>>>>> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
>>>>> v18.08.0
>>>>>
>>>>> Hi Ophir,
>>>>> I verified this patch in our test lab and looks fine to me.
>>>>> I have couple of minor comments in the V1 series. Could you please
>>>>> take care of them as well?
>>>>>
>>>>>
>>>>> Regards
>>>>> _Sugesh
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Ophir Munk [mailto:ophirmu@mellanox.com]
>>>>>> Sent: Tuesday, September 11, 2018 12:05 AM
>>>>>> To: ovs-dev@openvswitch.org
>>>>>> Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
>>>>>> <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>; Ben
>>>>>> Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>; Thomas
>>>>>> Monjalon <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>;
>>>>> Ophir
>>>>>> Munk <ophirmu@mellanox.com>; Kevin Traynor
>>>> <ktraynor@redhat.com>
>>>>>> Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
>>>>>>
>>>>>> 1. Enable compilation and linkage with dpdk 18.08.0 The following
>>>>>> dpdk commits which were introduced after dpdk 17.11.x require OVS
>>>>>> updates to accommodate to the dpdk changes.
>>>>>> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
>>>>>> - ab3ce1e0c193 ("ethdev: remove old offload API")
>>>>>> - c06ddf9698e0 ("meter: add configuration profile")
>>>>>> - e58638c32411 ("ethdev: fix TPID handling in flow API")
>>>>>> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
>>>>>> dev")
>>>>>> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>>>>>>
>>>>>> 2. Limit configured rss hash functions to only those supported by
>>>>>> the eth
>>>>> device.
>>>>>>
>>>>>> 3. Update references to DPDK version in Documentation
>>>>>>
>>>>>> 4. Update DPDK version in travis' linux-build script
>>>>>>
>>>>>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
>>>>>> ---
>>>>>> v1:
>>>>>> First version
>>>>>>
>>>>>> v2:
>>>>>> Avoid seg faults cases as described in
>>>>>>
>>>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
>>>>>>
>>>> atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
>>>> rmu%4
>>>>>>
>>>> 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
>>>> e4d9ba
>>>>>>
>>>> 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
>>>> Rd6w0TP
>>>>>> x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
>>>>>> by using the patch in:
>>>>>>
>>>> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
>>>>>> ithub.com%2Fkevintraynor%2Fovs-dpdk-
>>>> &amp;data=02%7C01%7Cophirmu%40me
>>>>>>
>>>> llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
>>>> ba6a4
>>>>>>
>>>> d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
>>>> HAHGma%2
>>>>>> F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
>>>>>> master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>>>>>>
>>>>>>    .travis/linux-build.sh         |   2 +-
>>>>>>    Documentation/faq/releases.rst |   2 +-
>>>>>>    lib/netdev-dpdk.c              | 142
>>>>>> +++++++++++++++++++++++++++++-------
>>>> -----
>>>>>>    3 files changed, 104 insertions(+), 42 deletions(-)
>>>>>>
>>>>>> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
>>>>>> 4b9fc4a..c60ac71
>>>>>> 100755
>>>>>> --- a/.travis/linux-build.sh
>>>>>> +++ b/.travis/linux-build.sh
>>>>>> @@ -83,7 +83,7 @@ fi
>>>>>>
>>>>>>    if [ "$DPDK" ]; then
>>>>>>        if [ -z "$DPDK_VER" ]; then
>>>>>> -        DPDK_VER="17.11.3"
>>>>>> +        DPDK_VER="18.08.0"
>>>>>>        fi
>>>>>>        install_dpdk $DPDK_VER
>>>>>>        if [ "$CC" = "clang" ]; then
>>>>>> diff --git a/Documentation/faq/releases.rst
>>>>>> b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
>>>>>> --- a/Documentation/faq/releases.rst
>>>>>> +++ b/Documentation/faq/releases.rst
>>>>>> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
>>>>>> release work with?
>>>>>>        2.7.x        16.11.7
>>>>>>        2.8.x        17.05.2
>>>>>>        2.9.x        17.11.3
>>>>>> -    2.10.x       17.11.3
>>>>>> +    2.10.x       18.08.0
> 
> Hi - on updating versions in docs, this one should not change as the
> released/to be released versions of OVS 2.10.x stick with 17.11.x. We
> can just leave this unmodified for now and add an entry for 2.11.x
> closer to the release time.

That makes sense actually as there's no releases from this branch. +1

Ian
> 
> Kevin.
> 
>>>>>>        ============ =======
>>>>>>
>>>>>>    Q: Are all the DPDK releases that OVS versions work with maintained?
>>>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
>>>>>> f91aa27..6879e9a
>>>>>> 100644
>>>>>> --- a/lib/netdev-dpdk.c
>>>>>> +++ b/lib/netdev-dpdk.c
>>>>>> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>>>>>>        .rxmode = {
>>>>>>            .mq_mode = ETH_MQ_RX_RSS,
>>>>>>            .split_hdr_size = 0,
>>>>>> -        .header_split   = 0, /* Header Split disabled */
>>>>>> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
>>>>>> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
>>>>>> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
>>>>>> -        .hw_strip_crc   = 0,
>>>>>> +        .offloads = 0,
>>>>>>        },
>>>>>>        .rx_adv_conf = {
>>>>>>            .rss_conf = {
>>>>>> @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
>>>>>>        struct rte_meter_srtcm_params app_srtcm_params;
>>>>>>        struct rte_meter_srtcm in_policer;
>>>>>> +    struct rte_meter_srtcm_profile in_prof;
>>>>>>        rte_spinlock_t policer_lock;
>>>>>>    };
>>>>>>
>>>>>> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
>>>>>> *dev, int n_rxq, int n_txq)
>>>>>>        struct rte_eth_dev_info info;
>>>>>>        uint16_t conf_mtu;
>>>>>>
>>>>>> +    rte_eth_dev_info_get(dev->port_id, &info);
>>>>>> +
>>>>>>        /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>>>>>>         * scatter to support jumbo RX. Checking the offload
>>>>>> capabilities
>>>>>>         * is not an option as PMDs are not required yet to report @@
>>>>>> -901,20
>>>>>> +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
>>>>>> +n_rxq, int
>>>>>> n_txq)
>>>>>>         * (testing or code review). Listing all such PMDs feels harder
>>>>>>         * than highlighting the one known not to need scatter */
>>>>>>        if (dev->mtu > ETHER_MTU) {
>>>>>> -        rte_eth_dev_info_get(dev->port_id, &info);
>>>>>>            if (strncmp(info.driver_name, "net_nfp", 7)) {
>>>>>> -            conf.rxmode.enable_scatter = 1;
>>>>>> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>>>>>>            }
>>>>>>        }
>>>>>>
>>>>>>        conf.intr_conf.lsc = dev->lsc_interrupt_mode;
>>>>>> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
>>>>>> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
>>>>>> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
>>>>>> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
>>>>>> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
>>>>>>
>>>>>>        if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
>>>>>> -        conf.rxmode.hw_strip_crc = 1;
>>>>>> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>>>>>>        }
>>>>>>
>>>>>> +    /* Limit configured rss hash functions to only those supported
>>>>>> +     * by the eth device. */
>>>>>> +    conf.rx_adv_conf.rss_conf.rss_hf &=
>>>>>> + info.flow_type_rss_offloads;
>>>>>> +
>>>>>>        /* A device may report more queues than it makes available
>>>>>> (this has
>>>>>>         * been observed for Intel xl710, which reserves some of them
>>>>>> for
>>>>>>         * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
>>>>>> @@
>>>>>> -1932,16
>>>>>> +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
>>>>>> +qid,
>>>>>>
>>>>>>    static inline bool
>>>>>>    netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
>>>>>> +                               struct rte_meter_srtcm_profile
>>>>>> + *profile,
>>>>>>                                   struct rte_mbuf *pkt, uint64_t
>>>>>> time)  {
>>>>>>        uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
>>>>>> ether_hdr);
>>>>>>
>>>>>> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
>>>>>> +    return rte_meter_srtcm_color_blind_check(meter, profile, time,
>>>>>> + pkt_len) ==
>>>>>>                                                    e_RTE_METER_GREEN;
>>>>>> }
>>>>>>
>>>>>>    static int
>>>>>>    netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>>>> +                        struct rte_meter_srtcm_profile *profile,
>>>>>>                            struct rte_mbuf **pkts, int pkt_cnt,
>>>>>>                            bool should_steal)  { @@ -1953,7 +1958,8 @@
>>>>>> netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>>>>>>        for (i = 0; i < pkt_cnt; i++) {
>>>>>>            pkt = pkts[i];
>>>>>>            /* Handle current packet */
>>>>>> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt,
>>>>>> current_time)) {
>>>>>> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
>>>>>> +                                                 current_time)) {
>>>>>>                if (cnt != i) {
>>>>>>                    pkts[cnt] = pkt;
>>>>>>                }
>>>>>> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
>>>>>> *policer, struct rte_mbuf **pkts,
>>>>>>        int cnt = 0;
>>>>>>
>>>>>>        rte_spinlock_lock(&policer->policer_lock);
>>>>>> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
>>>>>> -                                  pkt_cnt, should_steal);
>>>>>> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer-
>>>>> in_prof,
>>>>>> +                                  pkts, pkt_cnt, should_steal);
>>>>>>        rte_spinlock_unlock(&policer->policer_lock);
>>>>>>
>>>>>>        return cnt;
>>>>>> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
>>>> rate,
>>>>>> uint32_t burst)
>>>>>>        policer->app_srtcm_params.cir = rate_bytes;
>>>>>>        policer->app_srtcm_params.cbs = burst_bytes;
>>>>>>        policer->app_srtcm_params.ebs = 0;
>>>>>> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
>>>>>> +                                         &policer->app_srtcm_params);
>>>>>> +    if (err) {
>>>>>> +        VLOG_ERR("Could not create rte meter profile for ingress
>>>>>> policer");
>>>>>> +        free(policer);
>>>>>> +        return NULL;
>>>>>> +    }
>>>>>>        err = rte_meter_srtcm_config(&policer->in_policer,
>>>>>> -                                    &policer->app_srtcm_params);
>>>>>> +                                    &policer->in_prof);
>>>>>>        if (err) {
>>>>>>            VLOG_ERR("Could not create rte meter for ingress policer");
>>>>>>            free(policer);
>>>>>> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
>>>> netdev
>>>>>> *netdev, struct smap *args)
>>>>>>        smap_add_format(args, "if_descr", "%s %s", rte_version(),
>>>>>>
>>>>>> dev_info.driver_name);
>>>>>>
>>>>>> -    if (dev_info.pci_dev) {
>>>>>> -        smap_add_format(args, "pci-vendor_id", "0x%x",
>>>>>> -                        dev_info.pci_dev->id.vendor_id);
>>>>>> -        smap_add_format(args, "pci-device_id", "0x%x",
>>>>>> -                        dev_info.pci_dev->id.device_id);
>>>>>> +    const struct rte_bus *bus;
>>>>>> +    const struct rte_pci_device *pci_dev;
>>>>>> +    bus = rte_bus_find_by_device(dev_info.device);
>>>>>> +    if (bus && !strcmp(bus->name, "pci")) {
>>>>>> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
>>>>>> +        if (pci_dev) {
>>>>>> +            smap_add_format(args, "pci-vendor_id", "0x%u",
>>>>>> +                            pci_dev->id.vendor_id);
>>>>>> +             smap_add_format(args, "pci-device_id", "0x%x",
>>>>>> +                             pci_dev->id.device_id);
>>>>>> +        }
>>>>>>        }
>>>>>> -
>>>>>>        return 0;
>>>>>>    }
>>>>>>
>>>>>> @@ -3727,6 +3745,7 @@ struct egress_policer {
>>>>>>        struct qos_conf qos_conf;
>>>>>>        struct rte_meter_srtcm_params app_srtcm_params;
>>>>>>        struct rte_meter_srtcm egress_meter;
>>>>>> +    struct rte_meter_srtcm_profile egress_prof;
>>>>>>    };
>>>>>>
>>>>>>    static void
>>>>>> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct
>>>>>> smap *details,
>>>>>>        policer = xmalloc(sizeof *policer);
>>>>>>        qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>>>>>>        egress_policer_details_to_param(details,
>>>>>> &policer->app_srtcm_params);
>>>>>> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
>>>>>> +                                         &policer->app_srtcm_params);
>>>>>> +    if (err) {
>>>>>> +        free(policer);
>>>>>> +        *conf = NULL;
>>>>>> +        return -err;
>>>>>> +    }
>>>>>>        err = rte_meter_srtcm_config(&policer->egress_meter,
>>>>>> -                                 &policer->app_srtcm_params);
>>>>>> +                                 &policer->egress_prof);
>>>>>> +
>>>>>>        if (!err) {
>>>>>>            *conf = &policer->qos_conf;
>>>>>>        } else {
>>>>>> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
>>>>>> struct rte_mbuf **pkts, int pkt_cnt,
>>>>>>        struct egress_policer *policer =
>>>>>>            CONTAINER_OF(conf, struct egress_policer, qos_conf);
>>>>>>
>>>>>> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
>>>>>> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
>>>>>> +                                  &policer->egress_prof, pkts,
>>>>>>                                      pkt_cnt, should_steal);
>>>>>>
>>>>>>        return cnt;
>>>>>> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item
>>>>>> *item)
>>>>>>
>>>>>>            VLOG_DBG("rte flow vlan pattern:\n");
>>>>>>            if (vlan_spec) {
>>>>>> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>>>> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
>>>>>> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
>>>> tci=0x%"PRIx16"\n",
>>>>>> +                     ntohs(vlan_spec->inner_type),
>>>>>> + ntohs(vlan_spec->tci));
>>>>>>            } else {
>>>>>>                VLOG_DBG("  Spec = null\n");
>>>>>>            }
>>>>>>
>>>>>>            if (vlan_mask) {
>>>>>> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
>>>>>> -                     vlan_mask->tpid, vlan_mask->tci);
>>>>>> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
>>>> tci=0x%"PRIx16"\n",
>>>>>> +                     vlan_mask->inner_type, vlan_mask->tci);
>>>>>>            } else {
>>>>>>                VLOG_DBG("  Mask = null\n");
>>>>>>            }
>>>>>> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
>>>>>> *actions, enum rte_flow_action_type type,
>>>>>>        actions->cnt++;
>>>>>>    }
>>>>>>
>>>>>> +/*
>>>>>> + * Storage for struct rte_flow_action_rss
>>>>>> + * including storage for key and queue array  */ #define
>>>>>> +MAX_RSS_HASH_KEY_LENGTH  128 #define
>>>>> MAX_ACTION_RSS_QUEUE_NUM
>>>>>> 128
>>>>>> +struct action_rss_data {
>>>>>> +    struct rte_flow_action_rss conf;
>>>>>> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
>>>>>> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
>>>>>> +
>>>>>>    static struct rte_flow_action_rss *  add_flow_rss_action(struct
>>>>>> flow_actions *actions,
>>>>>>                        struct netdev *netdev) {
>>>>>>        int i;
>>>>>> -    struct rte_flow_action_rss *rss;
>>>>>> +    struct action_rss_data *rss_data;
>>>>>>
>>>>>> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
>>>>>> -    /*
>>>>>> -     * Setting it to NULL will let the driver use the default RSS
>>>>>> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
>>>>>> -     */
>>>>>> -    rss->rss_conf = NULL;
>>>>>> -    rss->num = netdev->n_rxq;
>>>>>> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
>>>>>> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
>>>>>> +                 "than max rss num of queues (%u)",
>>>>>> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
>>>>>> +        return NULL;
>>>>>> +    }
>>>>>>
>>>>>> -    for (i = 0; i < rss->num; i++) {
>>>>>> -        rss->queue[i] = i;
>>>>>> +    rss_data = xmalloc(sizeof(struct action_rss_data));
>>>>>> +    *rss_data = (struct action_rss_data){
>>>>>> +        .conf = (struct rte_flow_action_rss){
>>>>>> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
>>>>>> +            .level = 0,
>>>>>> +            .types = ETH_RSS_IP,
>>>>>> +            .key_len = 0,
>>>>>> +            .queue_num = netdev->n_rxq,
>>>>>> +            .queue = rss_data->queue,
>>>>>> +            .key = rss_data->key,
>>>>>> +        },
>>>>>> +        .key = { 0 },
>>>>>> +        .queue = { 0 },
>>>>>> +    };
>>>>>> +
>>>>>> +    /* TODO: Override key with default */
>>>>>> +
>>>>>> +    /* Override queue array with default */
>>>>>> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
>>>>>> +       rss_data->queue[i] = i;
>>>>>>        }
>>>>>>
>>>>>> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
>>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
>>>>>> + &rss_data->conf);
>>>>>>
>>>>>> -    return rss;
>>>>>> +    return &rss_data->conf;
>>>>>>    }
>>>>>>
>>>>>>    static int
>>>>>> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
>>>> netdev
>>>>>> *netdev,
>>>>>>            vlan_mask.tci  = match->wc.masks.vlans[0].tci &
>>>>>> ~htons(VLAN_CFI);
>>>>>>
>>>>>>            /* match any protocols */
>>>>>> -        vlan_mask.tpid = 0;
>>>>>> +        vlan_mask.inner_type = 0;
>>>>>>
>>>>>>            add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>>>>>>                             &vlan_spec, &vlan_mask); @@ -4516,6
>>>>>> +4573,11 @@
>>>>>> end_proto_check:
>>>>>>
>>>>>>        struct rte_flow_action_rss *rss;
>>>>>>        rss = add_flow_rss_action(&actions, netdev);
>>>>>> +    if (!rss) {
>>>>>> +        VLOG_ERR("add_flow_rss_action error.\n");
>>>>>> +        ret = -1;
>>>>>> +        goto out;
>>>>>> +    }
>>>>>>        add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>>>>
>>>>>>        flow = rte_flow_create(dev->port_id, &flow_attr,
>>>>>> patterns.items,
>>>>>> -- 
>>>>>> 1.8.3.1
>>>
>>
>
Chandran, Sugesh Sept. 13, 2018, 1:20 p.m. UTC | #10
Regards
_Sugesh

> >>>>>> -    2.10.x       17.11.3
> >>>>>> +    2.10.x       18.08.0
> >
> > Hi - on updating versions in docs, this one should not change as the
> > released/to be released versions of OVS 2.10.x stick with 17.11.x. We
> > can just leave this unmodified for now and add an entry for 2.11.x
> > closer to the release time.
> 
> That makes sense actually as there's no releases from this branch. +1
> 
> Ian
> >
> > Kevin.
> >
[Sugesh] +1
Eelco Chaudron Sept. 17, 2018, 4:38 p.m. UTC | #11
Hi Ophir,

I did some basic PVP testing root vs non-root and it all seems to work 
as expected (did not do any performance comparison).

I do see compiler warnings about usage of deprecated DPDK functions, I 
guess these need to be fixed:


lib/netdev-dpdk.c: In function 'netdev_dpdk_destruct':
lib/netdev-dpdk.c:1336:9: error: 'rte_eth_dev_detach' is deprecated 
(declared at 
/home/root/OVS_dpdk-hwol_DPDK_v18.08/dpdk-stable/x86_64-native-linuxapp-gcc/include/rte_ethdev.h:1451) 
[-Werror=deprecated-declarations]
          if (rte_eth_dev_detach(dev->port_id, devname) < 0) {
          ^
lib/netdev-dpdk.c: In function 'netdev_dpdk_process_devargs':
lib/netdev-dpdk.c:1629:13: error: 'rte_eth_dev_attach' is deprecated 
(declared at 
/home/root/OVS_dpdk-hwol_DPDK_v18.08/dpdk-stable/x86_64-native-linuxapp-gcc/include/rte_ethdev.h:1435) 
[-Werror=deprecated-declarations]
              if (!rte_eth_dev_attach(devargs, &new_port_id)) {
              ^
lib/netdev-dpdk.c: In function 'netdev_dpdk_detach':
lib/netdev-dpdk.c:3159:5: error: 'rte_eth_dev_detach' is deprecated 
(declared at 
/home/root/OVS_dpdk-hwol_DPDK_v18.08/dpdk-stable/x86_64-native-linuxapp-gcc/include/rte_ethdev.h:1451) 
[-Werror=deprecated-declarations]
      ret = rte_eth_dev_detach(port_id, devname);
      ^

See more comments inline…

//Eelco


	
On 11 Sep 2018, at 1:04, Ophir Munk wrote:

> 1. Enable compilation and linkage with dpdk 18.08.0
> The following dpdk commits which were introduced after dpdk 17.11.x
> require OVS updates to accommodate to the dpdk changes.
> - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> - ab3ce1e0c193 ("ethdev: remove old offload API")
> - c06ddf9698e0 ("meter: add configuration profile")
> - e58638c32411 ("ethdev: fix TPID handling in flow API")
> - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic 
> dev")
> - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
>
> 2. Limit configured rss hash functions to only those supported
> by the eth device.
>
> 3. Update references to DPDK version in Documentation
>
> 4. Update DPDK version in travis' linux-build script
>
> Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> ---
> v1:
> First version
>
> v2:
> Avoid seg faults cases as described in
> https://patchwork.ozlabs.org/patch/965451/
> by using the patch in:
> https://github.com/kevintraynor/ovs-dpdk-master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
>
>  .travis/linux-build.sh         |   2 +-
>  Documentation/faq/releases.rst |   2 +-
>  lib/netdev-dpdk.c              | 142 
> +++++++++++++++++++++++++++++------------
>  3 files changed, 104 insertions(+), 42 deletions(-)
>
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> index 4b9fc4a..c60ac71 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -83,7 +83,7 @@ fi
>
>  if [ "$DPDK" ]; then
>      if [ -z "$DPDK_VER" ]; then
> -        DPDK_VER="17.11.3"
> +        DPDK_VER="18.08.0"
>      fi
>      install_dpdk $DPDK_VER
>      if [ "$CC" = "clang" ]; then
> diff --git a/Documentation/faq/releases.rst 
> b/Documentation/faq/releases.rst
> index 41d41e3..646ae09 100644
> --- a/Documentation/faq/releases.rst
> +++ b/Documentation/faq/releases.rst
> @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch 
> release work with?
>      2.7.x        16.11.7
>      2.8.x        17.05.2
>      2.9.x        17.11.3
> -    2.10.x       17.11.3
> +    2.10.x       18.08.0
>      ============ =======
>
>  Q: Are all the DPDK releases that OVS versions work with maintained?
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index f91aa27..6879e9a 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
>      .rxmode = {
>          .mq_mode = ETH_MQ_RX_RSS,
>          .split_hdr_size = 0,
> -        .header_split   = 0, /* Header Split disabled */
> -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> -        .hw_strip_crc   = 0,
> +        .offloads = 0,
>      },
>      .rx_adv_conf = {
>          .rss_conf = {
> @@ -364,6 +360,7 @@ struct dpdk_ring {
>  struct ingress_policer {
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm in_policer;
> +    struct rte_meter_srtcm_profile in_prof;
>      rte_spinlock_t policer_lock;
>  };
> @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, 
> int n_rxq, int n_txq)
>      struct rte_eth_dev_info info;
>      uint16_t conf_mtu;
> +    rte_eth_dev_info_get(dev->port_id, &info);
> +
>      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
>       * scatter to support jumbo RX. Checking the offload capabilities
>       * is not an option as PMDs are not required yet to report
> @@ -901,20 +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk 
> *dev, int n_rxq, int n_txq)
>       * (testing or code review). Listing all such PMDs feels harder
>       * than highlighting the one known not to need scatter */
>      if (dev->mtu > ETHER_MTU) {
> -        rte_eth_dev_info_get(dev->port_id, &info);
>          if (strncmp(info.driver_name, "net_nfp", 7)) {
> -            conf.rxmode.enable_scatter = 1;
> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>          }
>      }

EC> The above was supposed to be fixed in the drivers for 18+, was this 
the case? If so we should be able to change this hard-coded nic name? 
I’ve copied in Pablo who added the code above.


>      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> +                             DEV_RX_OFFLOAD_CHECKSUM : 0;

EC> Maybe convert the above also to an if, as below, to make it more 
clear?

>      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> -        conf.rxmode.hw_strip_crc = 1;
> +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
>      }
> +    /* Limit configured rss hash functions to only those supported
> +     * by the eth device. */
> +    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
> +
>      /* A device may report more queues than it makes available (this 
> has
>       * been observed for Intel xl710, which reserves some of them for
>       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
> @@ -1932,16 +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk 
> *dev, int qid,
>  static inline bool
>  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> +                               struct rte_meter_srtcm_profile 
> *profile,
>                                 struct rte_mbuf *pkt, uint64_t time)
>  {
>      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct 
> ether_hdr);
> -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
> +    return rte_meter_srtcm_color_blind_check(meter, profile, time, 
> pkt_len) ==
>                                                  e_RTE_METER_GREEN;

EC> Indentation looks odd

>  }
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> +                        struct rte_meter_srtcm_profile *profile,
>                          struct rte_mbuf **pkts, int pkt_cnt,
>                          bool should_steal)
>  {
> @@ -1953,7 +1958,8 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm 
> *meter,
>      for (i = 0; i < pkt_cnt; i++) {
>          pkt = pkts[i];
>          /* Handle current packet */
> -        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) 
> {
> +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> +                                                 current_time)) {

EC> Indentation
>
>              if (cnt != i) {
>                  pkts[cnt] = pkt;
>              }
> @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer 
> *policer, struct rte_mbuf **pkts,
>      int cnt = 0;
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> -                                  pkt_cnt, should_steal);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, 
> &policer->in_prof,
> +                                  pkts, pkt_cnt, should_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
>      return cnt;
> @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t rate, 
> uint32_t burst)
>      policer->app_srtcm_params.cir = rate_bytes;
>      policer->app_srtcm_params.cbs = burst_bytes;
>      policer->app_srtcm_params.ebs = 0;
> +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        VLOG_ERR("Could not create rte meter profile for ingress 
> policer");
> +        free(policer);
> +        return NULL;
> +    }
>      err = rte_meter_srtcm_config(&policer->in_policer,
> -                                    &policer->app_srtcm_params);
> +                                    &policer->in_prof);

EC> indentation

>      if (err) {
>          VLOG_ERR("Could not create rte meter for ingress policer");
>          free(policer);
> @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct netdev 
> *netdev, struct smap *args)
>      smap_add_format(args, "if_descr", "%s %s", rte_version(),
>                                                 dev_info.driver_name);
> -    if (dev_info.pci_dev) {
> -        smap_add_format(args, "pci-vendor_id", "0x%x",
> -                        dev_info.pci_dev->id.vendor_id);
> -        smap_add_format(args, "pci-device_id", "0x%x",
> -                        dev_info.pci_dev->id.device_id);
> +    const struct rte_bus *bus;
> +    const struct rte_pci_device *pci_dev;
> +    bus = rte_bus_find_by_device(dev_info.device);
> +    if (bus && !strcmp(bus->name, "pci")) {
> +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> +        if (pci_dev) {
> +            smap_add_format(args, "pci-vendor_id", "0x%u",
> +                            pci_dev->id.vendor_id);
> +             smap_add_format(args, "pci-device_id", "0x%x",
> +                             pci_dev->id.device_id);

EC> Guess this should be %x, and odd indentation, but I see Ilya got 
this also.

> +        }
>      }
> -
>      return 0;
>  }
> @@ -3727,6 +3745,7 @@ struct egress_policer {
>      struct qos_conf qos_conf;
>      struct rte_meter_srtcm_params app_srtcm_params;
>      struct rte_meter_srtcm egress_meter;
> +    struct rte_meter_srtcm_profile egress_prof;
>  };
>  static void
> @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct smap 
> *details,
>      policer = xmalloc(sizeof *policer);
>      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
>      egress_policer_details_to_param(details, 
> &policer->app_srtcm_params);
> +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> +                                         &policer->app_srtcm_params);
> +    if (err) {
> +        free(policer);
> +        *conf = NULL;
> +        return -err;
> +    }
>      err = rte_meter_srtcm_config(&policer->egress_meter,
> -                                 &policer->app_srtcm_params);
> +                                 &policer->egress_prof);
> +
>      if (!err) {
>          *conf = &policer->qos_conf;
>      } else {
> @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf, struct 
> rte_mbuf **pkts, int pkt_cnt,
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> +                                  &policer->egress_prof, pkts,
>                                    pkt_cnt, should_steal);
>      return cnt;
> @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item *item)
>          VLOG_DBG("rte flow vlan pattern:\n");
>          if (vlan_spec) {
> -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
> +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", 
> tci=0x%"PRIx16"\n",
> +                     ntohs(vlan_spec->inner_type), 
> ntohs(vlan_spec->tci));
>          } else {
>              VLOG_DBG("  Spec = null\n");
>          }
>          if (vlan_mask) {
> -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> -                     vlan_mask->tpid, vlan_mask->tci);
> +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", 
> tci=0x%"PRIx16"\n",
> +                     vlan_mask->inner_type, vlan_mask->tci);

EC> is the mask always in host order vs the value being in network 
order? Or is this an existing bug?

>          } else {
>              VLOG_DBG("  Mask = null\n");
>          }
> @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions *actions, 
> enum rte_flow_action_type type,
>      actions->cnt++;
>  }
> +/*
> + * Storage for struct rte_flow_action_rss
> + * including storage for key and queue array
> + */
> +#define MAX_RSS_HASH_KEY_LENGTH  128
> +#define MAX_ACTION_RSS_QUEUE_NUM 128
> +struct action_rss_data {
> +    struct rte_flow_action_rss conf;
> +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH];
> +};
> +
>  static struct rte_flow_action_rss *
>  add_flow_rss_action(struct flow_actions *actions,
>                      struct netdev *netdev) {
>      int i;
> -    struct rte_flow_action_rss *rss;
> +    struct action_rss_data *rss_data;
> -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> -    /*
> -     * Setting it to NULL will let the driver use the default RSS
> -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> -     */
> -    rss->rss_conf = NULL;
> -    rss->num = netdev->n_rxq;
> +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> +                 "than max rss num of queues (%u)",
> +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> +        return NULL;
> +    }
> -    for (i = 0; i < rss->num; i++) {
> -        rss->queue[i] = i;
> +    rss_data = xmalloc(sizeof(struct action_rss_data));
> +    *rss_data = (struct action_rss_data){
> +        .conf = (struct rte_flow_action_rss){
> +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +            .level = 0,
> +            .types = ETH_RSS_IP,
> +            .key_len = 0,
> +            .queue_num = netdev->n_rxq,
> +            .queue = rss_data->queue,
> +            .key = rss_data->key,
> +        },
> +        .key = { 0 },
> +        .queue = { 0 },
> +    };
> +

EC> See Ilya's comment on xzalloc() so only non-zero values are needed.

> +    /* TODO: Override key with default */

EC> Any reason why this todo is left in?

> +    /* Override queue array with default */
> +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> +       rss_data->queue[i] = i;
>      }
> -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, 
> &rss_data->conf);
> -    return rss;
> +    return &rss_data->conf;
>  }
>  static int
> @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev 
> *netdev,
>          vlan_mask.tci  = match->wc.masks.vlans[0].tci & 
> ~htons(VLAN_CFI);
>          /* match any protocols */
> -        vlan_mask.tpid = 0;
> +        vlan_mask.inner_type = 0;
>          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
>                           &vlan_spec, &vlan_mask);
> @@ -4516,6 +4573,11 @@ end_proto_check:
>      struct rte_flow_action_rss *rss;
>      rss = add_flow_rss_action(&actions, netdev);
> +    if (!rss) {
> +        VLOG_ERR("add_flow_rss_action error.\n");
> +        ret = -1;
> +        goto out;
> +    }
>      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>      flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
Stokes, Ian Sept. 19, 2018, 9:55 a.m. UTC | #12
> Hi Sugesh,
> I can update documentation. Is it enough to update dpdk.rst.txt only?
> If not - can you please send a link to additional files with references to
> 17.11 to be updated?
> I will wait 1-2 days to get more reviews then will send v3.
> 

Hi Ophir,

In terms of functionality I've smoke tested the following and did not see any issues.

P2p (Single queue and multi queue).
Pvp (Single queue and multi queue using both Linux bridge and testpmd in guest).
Hotplug attach for phy devices.
Vhostuser client reconnect.
Vhost user socket dir and numa functionality.
Ingress rate limiting.
QoS egress policing.
Flow Control.
LSC enablement.
Extended and custom statistics.

Jumbo frames is the only feature that is broken currently for i40e devices, but the workaround suggested on the v2 should suffice for that.

Other than that, from a functional point of view it seems ok.

Are you planning to send a v3 this week?

Just an FYI I'm OOO on vacation for 2 weeks from Monday the 24th, if you have a v3 it would be good if we could get it merged this week if possible so as not to block further work.

Thanks
Ian 

> Regards,
> Ophir
> 
> > -----Original Message-----
> > From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
> > Sent: Thursday, September 13, 2018 2:58 PM
> > To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
> > <ovs-dev@openvswitch.org>
> > Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> > <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
> > <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>; Olga
> > Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
> > Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > v18.08.0
> >
> > Hi Ophir,
> >
> > Also a quick grep show there are number of places in documentation
> > still pointing to DPDK17.11, I guess its worth while to change them as
> well.
> > This will avoid confusion for people who wanted to use this branch,
> > What do you think?
> >
> >
> >
> > Regards
> > _Sugesh
> >
> >
> > > -----Original Message-----
> > > From: Chandran, Sugesh
> > > Sent: Wednesday, September 12, 2018 5:05 PM
> > > To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> > > Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> > > <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
> > > <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> > Olga
> > > Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
> > > Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > > v18.08.0
> > >
> > > Hi Ophir,
> > > I verified this patch in our test lab and looks fine to me.
> > > I have couple of minor comments in the V1 series. Could you please
> > > take care of them as well?
> > >
> > >
> > > Regards
> > > _Sugesh
> > >
> > > > -----Original Message-----
> > > > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > > > Sent: Tuesday, September 11, 2018 12:05 AM
> > > > To: ovs-dev@openvswitch.org
> > > > Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
> > > > <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>;
> > > > Ben Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>;
> > > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > > <olgas@mellanox.com>;
> > > Ophir
> > > > Munk <ophirmu@mellanox.com>; Kevin Traynor
> > <ktraynor@redhat.com>
> > > > Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > > > v18.08.0
> > > >
> > > > 1. Enable compilation and linkage with dpdk 18.08.0 The following
> > > > dpdk commits which were introduced after dpdk 17.11.x require OVS
> > > > updates to accommodate to the dpdk changes.
> > > > - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> > > > - ab3ce1e0c193 ("ethdev: remove old offload API")
> > > > - c06ddf9698e0 ("meter: add configuration profile")
> > > > - e58638c32411 ("ethdev: fix TPID handling in flow API")
> > > > - cd8c7c7ce241 ("ethdev: replace bus specific struct with generic
> > > > dev")
> > > > - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> > > >
> > > > 2. Limit configured rss hash functions to only those supported by
> > > > the eth
> > > device.
> > > >
> > > > 3. Update references to DPDK version in Documentation
> > > >
> > > > 4. Update DPDK version in travis' linux-build script
> > > >
> > > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > > ---
> > > > v1:
> > > > First version
> > > >
> > > > v2:
> > > > Avoid seg faults cases as described in
> > > >
> > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
> > > >
> > atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
> > rmu%4
> > > >
> > 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
> > e4d9ba
> > > >
> > 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
> > Rd6w0TP
> > > > x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
> > > > by using the patch in:
> > > >
> > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
> > > > ithub.com%2Fkevintraynor%2Fovs-dpdk-
> > &amp;data=02%7C01%7Cophirmu%40me
> > > >
> > llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
> > ba6a4
> > > >
> > d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
> > HAHGma%2
> > > > F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
> > > > master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> > > >
> > > >  .travis/linux-build.sh         |   2 +-
> > > >  Documentation/faq/releases.rst |   2 +-
> > > >  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++-
> ------
> > -----
> > > >  3 files changed, 104 insertions(+), 42 deletions(-)
> > > >
> > > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index
> > > > 4b9fc4a..c60ac71
> > > > 100755
> > > > --- a/.travis/linux-build.sh
> > > > +++ b/.travis/linux-build.sh
> > > > @@ -83,7 +83,7 @@ fi
> > > >
> > > >  if [ "$DPDK" ]; then
> > > >      if [ -z "$DPDK_VER" ]; then
> > > > -        DPDK_VER="17.11.3"
> > > > +        DPDK_VER="18.08.0"
> > > >      fi
> > > >      install_dpdk $DPDK_VER
> > > >      if [ "$CC" = "clang" ]; then
> > > > diff --git a/Documentation/faq/releases.rst
> > > > b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
> > > > --- a/Documentation/faq/releases.rst
> > > > +++ b/Documentation/faq/releases.rst
> > > > @@ -168,7 +168,7 @@ Q: What DPDK version does each Open vSwitch
> > > > release work with?
> > > >      2.7.x        16.11.7
> > > >      2.8.x        17.05.2
> > > >      2.9.x        17.11.3
> > > > -    2.10.x       17.11.3
> > > > +    2.10.x       18.08.0
> > > >      ============ =======
> > > >
> > > >  Q: Are all the DPDK releases that OVS versions work with
> maintained?
> > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> > > > f91aa27..6879e9a
> > > > 100644
> > > > --- a/lib/netdev-dpdk.c
> > > > +++ b/lib/netdev-dpdk.c
> > > > @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf = {
> > > >      .rxmode = {
> > > >          .mq_mode = ETH_MQ_RX_RSS,
> > > >          .split_hdr_size = 0,
> > > > -        .header_split   = 0, /* Header Split disabled */
> > > > -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> > > > -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> > > > -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> > > > -        .hw_strip_crc   = 0,
> > > > +        .offloads = 0,
> > > >      },
> > > >      .rx_adv_conf = {
> > > >          .rss_conf = {
> > > > @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
> > > >      struct rte_meter_srtcm_params app_srtcm_params;
> > > >      struct rte_meter_srtcm in_policer;
> > > > +    struct rte_meter_srtcm_profile in_prof;
> > > >      rte_spinlock_t policer_lock;
> > > >  };
> > > >
> > > > @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct netdev_dpdk
> > > > *dev, int n_rxq, int n_txq)
> > > >      struct rte_eth_dev_info info;
> > > >      uint16_t conf_mtu;
> > > >
> > > > +    rte_eth_dev_info_get(dev->port_id, &info);
> > > > +
> > > >      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
> > > >       * scatter to support jumbo RX. Checking the offload
> capabilities
> > > >       * is not an option as PMDs are not required yet to report @@
> > > > -901,20
> > > > +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
> > > > +n_rxq, int
> > > > n_txq)
> > > >       * (testing or code review). Listing all such PMDs feels harder
> > > >       * than highlighting the one known not to need scatter */
> > > >      if (dev->mtu > ETHER_MTU) {
> > > > -        rte_eth_dev_info_get(dev->port_id, &info);
> > > >          if (strncmp(info.driver_name, "net_nfp", 7)) {
> > > > -            conf.rxmode.enable_scatter = 1;
> > > > +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
> > > >          }
> > > >      }
> > > >
> > > >      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> > > > -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> > > > -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> > > > +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> > > > +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> > > > +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> > > >
> > > >      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> > > > -        conf.rxmode.hw_strip_crc = 1;
> > > > +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
> > > >      }
> > > >
> > > > +    /* Limit configured rss hash functions to only those supported
> > > > +     * by the eth device. */
> > > > +    conf.rx_adv_conf.rss_conf.rss_hf &=
> > > > + info.flow_type_rss_offloads;
> > > > +
> > > >      /* A device may report more queues than it makes available
> (this has
> > > >       * been observed for Intel xl710, which reserves some of them
> for
> > > >       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
> > > > @@
> > > > -1932,16
> > > > +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int
> > > > +qid,
> > > >
> > > >  static inline bool
> > > >  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> > > > +                               struct rte_meter_srtcm_profile
> > > > + *profile,
> > > >                                 struct rte_mbuf *pkt, uint64_t time)
> {
> > > >      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
> > > > ether_hdr);
> > > >
> > > > -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
> ==
> > > > +    return rte_meter_srtcm_color_blind_check(meter, profile,
> > > > + time,
> > > > + pkt_len) ==
> > > >
> > > > e_RTE_METER_GREEN; }
> > > >
> > > >  static int
> > > >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > > > +                        struct rte_meter_srtcm_profile *profile,
> > > >                          struct rte_mbuf **pkts, int pkt_cnt,
> > > >                          bool should_steal)  { @@ -1953,7 +1958,8
> > > > @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > > >      for (i = 0; i < pkt_cnt; i++) {
> > > >          pkt = pkts[i];
> > > >          /* Handle current packet */
> > > > -        if (netdev_dpdk_policer_pkt_handle(meter, pkt,
> current_time)) {
> > > > +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> > > > +                                                 current_time)) {
> > > >              if (cnt != i) {
> > > >                  pkts[cnt] = pkt;
> > > >              }
> > > > @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
> > > > *policer, struct rte_mbuf **pkts,
> > > >      int cnt = 0;
> > > >
> > > >      rte_spinlock_lock(&policer->policer_lock);
> > > > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > > > -                                  pkt_cnt, should_steal);
> > > > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer-
> > >in_prof,
> > > > +                                  pkts, pkt_cnt, should_steal);
> > > >      rte_spinlock_unlock(&policer->policer_lock);
> > > >
> > > >      return cnt;
> > > > @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
> > rate,
> > > > uint32_t burst)
> > > >      policer->app_srtcm_params.cir = rate_bytes;
> > > >      policer->app_srtcm_params.cbs = burst_bytes;
> > > >      policer->app_srtcm_params.ebs = 0;
> > > > +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> > > > +                                         &policer-
> >app_srtcm_params);
> > > > +    if (err) {
> > > > +        VLOG_ERR("Could not create rte meter profile for ingress
> policer");
> > > > +        free(policer);
> > > > +        return NULL;
> > > > +    }
> > > >      err = rte_meter_srtcm_config(&policer->in_policer,
> > > > -                                    &policer->app_srtcm_params);
> > > > +                                    &policer->in_prof);
> > > >      if (err) {
> > > >          VLOG_ERR("Could not create rte meter for ingress policer");
> > > >          free(policer);
> > > > @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
> > netdev
> > > > *netdev, struct smap *args)
> > > >      smap_add_format(args, "if_descr", "%s %s", rte_version(),
> > > >
> > > > dev_info.driver_name);
> > > >
> > > > -    if (dev_info.pci_dev) {
> > > > -        smap_add_format(args, "pci-vendor_id", "0x%x",
> > > > -                        dev_info.pci_dev->id.vendor_id);
> > > > -        smap_add_format(args, "pci-device_id", "0x%x",
> > > > -                        dev_info.pci_dev->id.device_id);
> > > > +    const struct rte_bus *bus;
> > > > +    const struct rte_pci_device *pci_dev;
> > > > +    bus = rte_bus_find_by_device(dev_info.device);
> > > > +    if (bus && !strcmp(bus->name, "pci")) {
> > > > +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> > > > +        if (pci_dev) {
> > > > +            smap_add_format(args, "pci-vendor_id", "0x%u",
> > > > +                            pci_dev->id.vendor_id);
> > > > +             smap_add_format(args, "pci-device_id", "0x%x",
> > > > +                             pci_dev->id.device_id);
> > > > +        }
> > > >      }
> > > > -
> > > >      return 0;
> > > >  }
> > > >
> > > > @@ -3727,6 +3745,7 @@ struct egress_policer {
> > > >      struct qos_conf qos_conf;
> > > >      struct rte_meter_srtcm_params app_srtcm_params;
> > > >      struct rte_meter_srtcm egress_meter;
> > > > +    struct rte_meter_srtcm_profile egress_prof;
> > > >  };
> > > >
> > > >  static void
> > > > @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const struct
> > > > smap *details,
> > > >      policer = xmalloc(sizeof *policer);
> > > >      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
> > > >      egress_policer_details_to_param(details,
> > > > &policer->app_srtcm_params);
> > > > +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> > > > +                                         &policer-
> >app_srtcm_params);
> > > > +    if (err) {
> > > > +        free(policer);
> > > > +        *conf = NULL;
> > > > +        return -err;
> > > > +    }
> > > >      err = rte_meter_srtcm_config(&policer->egress_meter,
> > > > -                                 &policer->app_srtcm_params);
> > > > +                                 &policer->egress_prof);
> > > > +
> > > >      if (!err) {
> > > >          *conf = &policer->qos_conf;
> > > >      } else {
> > > > @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
> > > > struct rte_mbuf **pkts, int pkt_cnt,
> > > >      struct egress_policer *policer =
> > > >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> > > >
> > > > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > > > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> > > > +                                  &policer->egress_prof, pkts,
> > > >                                    pkt_cnt, should_steal);
> > > >
> > > >      return cnt;
> > > > @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct rte_flow_item
> > > > *item)
> > > >
> > > >          VLOG_DBG("rte flow vlan pattern:\n");
> > > >          if (vlan_spec) {
> > > > -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > > -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec-
> >tci));
> > > > +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
> > tci=0x%"PRIx16"\n",
> > > > +                     ntohs(vlan_spec->inner_type),
> > > > + ntohs(vlan_spec->tci));
> > > >          } else {
> > > >              VLOG_DBG("  Spec = null\n");
> > > >          }
> > > >
> > > >          if (vlan_mask) {
> > > > -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > > -                     vlan_mask->tpid, vlan_mask->tci);
> > > > +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
> > tci=0x%"PRIx16"\n",
> > > > +                     vlan_mask->inner_type, vlan_mask->tci);
> > > >          } else {
> > > >              VLOG_DBG("  Mask = null\n");
> > > >          }
> > > > @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
> > > > *actions, enum rte_flow_action_type type,
> > > >      actions->cnt++;
> > > >  }
> > > >
> > > > +/*
> > > > + * Storage for struct rte_flow_action_rss
> > > > + * including storage for key and queue array  */ #define
> > > > +MAX_RSS_HASH_KEY_LENGTH  128 #define
> > > MAX_ACTION_RSS_QUEUE_NUM
> > > > 128
> > > > +struct action_rss_data {
> > > > +    struct rte_flow_action_rss conf;
> > > > +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> > > > +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
> > > > +
> > > >  static struct rte_flow_action_rss *  add_flow_rss_action(struct
> > > > flow_actions *actions,
> > > >                      struct netdev *netdev) {
> > > >      int i;
> > > > -    struct rte_flow_action_rss *rss;
> > > > +    struct action_rss_data *rss_data;
> > > >
> > > > -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> > > > -    /*
> > > > -     * Setting it to NULL will let the driver use the default RSS
> > > > -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> > > > -     */
> > > > -    rss->rss_conf = NULL;
> > > > -    rss->num = netdev->n_rxq;
> > > > +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> > > > +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> > > > +                 "than max rss num of queues (%u)",
> > > > +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> > > > +        return NULL;
> > > > +    }
> > > >
> > > > -    for (i = 0; i < rss->num; i++) {
> > > > -        rss->queue[i] = i;
> > > > +    rss_data = xmalloc(sizeof(struct action_rss_data));
> > > > +    *rss_data = (struct action_rss_data){
> > > > +        .conf = (struct rte_flow_action_rss){
> > > > +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> > > > +            .level = 0,
> > > > +            .types = ETH_RSS_IP,
> > > > +            .key_len = 0,
> > > > +            .queue_num = netdev->n_rxq,
> > > > +            .queue = rss_data->queue,
> > > > +            .key = rss_data->key,
> > > > +        },
> > > > +        .key = { 0 },
> > > > +        .queue = { 0 },
> > > > +    };
> > > > +
> > > > +    /* TODO: Override key with default */
> > > > +
> > > > +    /* Override queue array with default */
> > > > +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> > > > +       rss_data->queue[i] = i;
> > > >      }
> > > >
> > > > -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> > > > +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
> > > > + &rss_data->conf);
> > > >
> > > > -    return rss;
> > > > +    return &rss_data->conf;
> > > >  }
> > > >
> > > >  static int
> > > > @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
> > netdev
> > > > *netdev,
> > > >          vlan_mask.tci  = match->wc.masks.vlans[0].tci &
> > > > ~htons(VLAN_CFI);
> > > >
> > > >          /* match any protocols */
> > > > -        vlan_mask.tpid = 0;
> > > > +        vlan_mask.inner_type = 0;
> > > >
> > > >          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
> > > >                           &vlan_spec, &vlan_mask); @@ -4516,6
> > > > +4573,11 @@
> > > > end_proto_check:
> > > >
> > > >      struct rte_flow_action_rss *rss;
> > > >      rss = add_flow_rss_action(&actions, netdev);
> > > > +    if (!rss) {
> > > > +        VLOG_ERR("add_flow_rss_action error.\n");
> > > > +        ret = -1;
> > > > +        goto out;
> > > > +    }
> > > >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> > > >
> > > >      flow = rte_flow_create(dev->port_id, &flow_attr,
> > > > patterns.items,
> > > > --
> > > > 1.8.3.1
Ophir Munk Sept. 19, 2018, 6:40 p.m. UTC | #13
Hi all,
I incorporated all your reviews into v3 patch which was sent to the mailing list: https://patchwork.ozlabs.org/patch/971860/

Regards,
Ophir

> -----Original Message-----
> From: Stokes, Ian [mailto:ian.stokes@intel.com]
> Sent: Wednesday, September 19, 2018 12:55 PM
> To: Ophir Munk <ophirmu@mellanox.com>; Chandran, Sugesh
> <sugesh.chandran@intel.com>; 'ovs-dev@openvswitch.org' <ovs-
> dev@openvswitch.org>
> Cc: Asaf Penso <asafp@mellanox.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf
> Shuler <shahafs@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>; 'Kevin
> Traynor' <ktraynor@redhat.com>
> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> > Hi Sugesh,
> > I can update documentation. Is it enough to update dpdk.rst.txt only?
> > If not - can you please send a link to additional files with
> > references to
> > 17.11 to be updated?
> > I will wait 1-2 days to get more reviews then will send v3.
> >
> 
> Hi Ophir,
> 
> In terms of functionality I've smoke tested the following and did not see any
> issues.
> 
> P2p (Single queue and multi queue).
> Pvp (Single queue and multi queue using both Linux bridge and testpmd in
> guest).
> Hotplug attach for phy devices.
> Vhostuser client reconnect.
> Vhost user socket dir and numa functionality.
> Ingress rate limiting.
> QoS egress policing.
> Flow Control.
> LSC enablement.
> Extended and custom statistics.
> 
> Jumbo frames is the only feature that is broken currently for i40e devices,
> but the workaround suggested on the v2 should suffice for that.
> 
> Other than that, from a functional point of view it seems ok.
> 
> Are you planning to send a v3 this week?
> 
> Just an FYI I'm OOO on vacation for 2 weeks from Monday the 24th, if you
> have a v3 it would be good if we could get it merged this week if possible so
> as not to block further work.
> 
> Thanks
> Ian
> 
> > Regards,
> > Ophir
> >
> > > -----Original Message-----
> > > From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
> > > Sent: Thursday, September 13, 2018 2:58 PM
> > > To: Ophir Munk <ophirmu@mellanox.com>; 'ovs-dev@openvswitch.org'
> > > <ovs-dev@openvswitch.org>
> > > Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> > > <ian.stokes@intel.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf Shuler
> > > <shahafs@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>;
> Olga
> > > Shern <olgas@mellanox.com>; 'Kevin Traynor' <ktraynor@redhat.com>
> > > Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > > v18.08.0
> > >
> > > Hi Ophir,
> > >
> > > Also a quick grep show there are number of places in documentation
> > > still pointing to DPDK17.11, I guess its worth while to change them
> > > as
> > well.
> > > This will avoid confusion for people who wanted to use this branch,
> > > What do you think?
> > >
> > >
> > >
> > > Regards
> > > _Sugesh
> > >
> > >
> > > > -----Original Message-----
> > > > From: Chandran, Sugesh
> > > > Sent: Wednesday, September 12, 2018 5:05 PM
> > > > To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> > > > Cc: Asaf Penso <asafp@mellanox.com>; Stokes, Ian
> > > > <ian.stokes@intel.com>; Ben Pfaff <blp@ovn.org>; Shahaf Shuler
> > > > <shahafs@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>;
> > > Olga
> > > > Shern <olgas@mellanox.com>; Kevin Traynor <ktraynor@redhat.com>
> > > > Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > > > v18.08.0
> > > >
> > > > Hi Ophir,
> > > > I verified this patch in our test lab and looks fine to me.
> > > > I have couple of minor comments in the V1 series. Could you please
> > > > take care of them as well?
> > > >
> > > >
> > > > Regards
> > > > _Sugesh
> > > >
> > > > > -----Original Message-----
> > > > > From: Ophir Munk [mailto:ophirmu@mellanox.com]
> > > > > Sent: Tuesday, September 11, 2018 12:05 AM
> > > > > To: ovs-dev@openvswitch.org
> > > > > Cc: Asaf Penso <asafp@mellanox.com>; Chandran, Sugesh
> > > > > <sugesh.chandran@intel.com>; Stokes, Ian <ian.stokes@intel.com>;
> > > > > Ben Pfaff <blp@ovn.org>; Shahaf Shuler <shahafs@mellanox.com>;
> > > > > Thomas Monjalon <thomas@monjalon.net>; Olga Shern
> > > > > <olgas@mellanox.com>;
> > > > Ophir
> > > > > Munk <ophirmu@mellanox.com>; Kevin Traynor
> > > <ktraynor@redhat.com>
> > > > > Subject: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk
> > > > > v18.08.0
> > > > >
> > > > > 1. Enable compilation and linkage with dpdk 18.08.0 The
> > > > > following dpdk commits which were introduced after dpdk 17.11.x
> > > > > require OVS updates to accommodate to the dpdk changes.
> > > > > - ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
> > > > > - ab3ce1e0c193 ("ethdev: remove old offload API")
> > > > > - c06ddf9698e0 ("meter: add configuration profile")
> > > > > - e58638c32411 ("ethdev: fix TPID handling in flow API")
> > > > > - cd8c7c7ce241 ("ethdev: replace bus specific struct with
> > > > > generic
> > > > > dev")
> > > > > - ac8d22de2394 ("ethdev: flatten RSS configuration in flow API")
> > > > >
> > > > > 2. Limit configured rss hash functions to only those supported
> > > > > by the eth
> > > > device.
> > > > >
> > > > > 3. Update references to DPDK version in Documentation
> > > > >
> > > > > 4. Update DPDK version in travis' linux-build script
> > > > >
> > > > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
> > > > > ---
> > > > > v1:
> > > > > First version
> > > > >
> > > > > v2:
> > > > > Avoid seg faults cases as described in
> > > > >
> > >
> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fp
> > > > >
> > >
> atchwork.ozlabs.org%2Fpatch%2F965451%2F&amp;data=02%7C01%7Cophi
> > > rmu%4
> > > > >
> > >
> 0mellanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2
> > > e4d9ba
> > > > >
> > >
> 6a4d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=CFZjVD
> > > Rd6w0TP
> > > > > x7lKWOkUVEh28aoyrpl%2Fop635Vrlx0%3D&amp;reserved=0
> > > > > by using the patch in:
> > > > >
> > >
> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fg
> > > > > ithub.com%2Fkevintraynor%2Fovs-dpdk-
> > > &amp;data=02%7C01%7Cophirmu%40me
> > > > >
> > >
> llanox.com%7Ccc4dcac1d427416c8dc508d619703178%7Ca652971c7d2e4d9
> > > ba6a4
> > > > >
> > >
> d149256f461b%7C0%7C0%7C636724366985653825&amp;sdata=%2F2B4uH
> > > HAHGma%2
> > > > > F48VS2I3QsMm%2ByO3LMZdaHlxVKJ9JYg%3D&amp;reserved=0
> > > > > master/commit/88f46cc5ab338eb4f3ca5db1eacd0effefe4fa0c
> > > > >
> > > > >  .travis/linux-build.sh         |   2 +-
> > > > >  Documentation/faq/releases.rst |   2 +-
> > > > >  lib/netdev-dpdk.c              | 142 +++++++++++++++++++++++++++++-
> > ------
> > > -----
> > > > >  3 files changed, 104 insertions(+), 42 deletions(-)
> > > > >
> > > > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> > > > > index
> > > > > 4b9fc4a..c60ac71
> > > > > 100755
> > > > > --- a/.travis/linux-build.sh
> > > > > +++ b/.travis/linux-build.sh
> > > > > @@ -83,7 +83,7 @@ fi
> > > > >
> > > > >  if [ "$DPDK" ]; then
> > > > >      if [ -z "$DPDK_VER" ]; then
> > > > > -        DPDK_VER="17.11.3"
> > > > > +        DPDK_VER="18.08.0"
> > > > >      fi
> > > > >      install_dpdk $DPDK_VER
> > > > >      if [ "$CC" = "clang" ]; then diff --git
> > > > > a/Documentation/faq/releases.rst
> > > > > b/Documentation/faq/releases.rst index 41d41e3..646ae09 100644
> > > > > --- a/Documentation/faq/releases.rst
> > > > > +++ b/Documentation/faq/releases.rst
> > > > > @@ -168,7 +168,7 @@ Q: What DPDK version does each Open
> vSwitch
> > > > > release work with?
> > > > >      2.7.x        16.11.7
> > > > >      2.8.x        17.05.2
> > > > >      2.9.x        17.11.3
> > > > > -    2.10.x       17.11.3
> > > > > +    2.10.x       18.08.0
> > > > >      ============ =======
> > > > >
> > > > >  Q: Are all the DPDK releases that OVS versions work with
> > maintained?
> > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> > > > > f91aa27..6879e9a
> > > > > 100644
> > > > > --- a/lib/netdev-dpdk.c
> > > > > +++ b/lib/netdev-dpdk.c
> > > > > @@ -168,11 +168,7 @@ static const struct rte_eth_conf port_conf =
> {
> > > > >      .rxmode = {
> > > > >          .mq_mode = ETH_MQ_RX_RSS,
> > > > >          .split_hdr_size = 0,
> > > > > -        .header_split   = 0, /* Header Split disabled */
> > > > > -        .hw_ip_checksum = 0, /* IP checksum offload disabled */
> > > > > -        .hw_vlan_filter = 0, /* VLAN filtering disabled */
> > > > > -        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
> > > > > -        .hw_strip_crc   = 0,
> > > > > +        .offloads = 0,
> > > > >      },
> > > > >      .rx_adv_conf = {
> > > > >          .rss_conf = {
> > > > > @@ -364,6 +360,7 @@ struct dpdk_ring {  struct ingress_policer {
> > > > >      struct rte_meter_srtcm_params app_srtcm_params;
> > > > >      struct rte_meter_srtcm in_policer;
> > > > > +    struct rte_meter_srtcm_profile in_prof;
> > > > >      rte_spinlock_t policer_lock;  };
> > > > >
> > > > > @@ -894,6 +891,8 @@ dpdk_eth_dev_port_config(struct
> netdev_dpdk
> > > > > *dev, int n_rxq, int n_txq)
> > > > >      struct rte_eth_dev_info info;
> > > > >      uint16_t conf_mtu;
> > > > >
> > > > > +    rte_eth_dev_info_get(dev->port_id, &info);
> > > > > +
> > > > >      /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
> > > > >       * scatter to support jumbo RX. Checking the offload
> > capabilities
> > > > >       * is not an option as PMDs are not required yet to report
> > > > > @@
> > > > > -901,20
> > > > > +900,24 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int
> > > > > +n_rxq, int
> > > > > n_txq)
> > > > >       * (testing or code review). Listing all such PMDs feels harder
> > > > >       * than highlighting the one known not to need scatter */
> > > > >      if (dev->mtu > ETHER_MTU) {
> > > > > -        rte_eth_dev_info_get(dev->port_id, &info);
> > > > >          if (strncmp(info.driver_name, "net_nfp", 7)) {
> > > > > -            conf.rxmode.enable_scatter = 1;
> > > > > +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
> > > > >          }
> > > > >      }
> > > > >
> > > > >      conf.intr_conf.lsc = dev->lsc_interrupt_mode;
> > > > > -    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
> > > > > -                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
> > > > > +    conf.rxmode.offloads |= ((dev->hw_ol_features &
> > > > > +                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
> > > > > +                             DEV_RX_OFFLOAD_CHECKSUM : 0;
> > > > >
> > > > >      if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
> > > > > -        conf.rxmode.hw_strip_crc = 1;
> > > > > +        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
> > > > >      }
> > > > >
> > > > > +    /* Limit configured rss hash functions to only those supported
> > > > > +     * by the eth device. */
> > > > > +    conf.rx_adv_conf.rss_conf.rss_hf &=
> > > > > + info.flow_type_rss_offloads;
> > > > > +
> > > > >      /* A device may report more queues than it makes available
> > (this has
> > > > >       * been observed for Intel xl710, which reserves some of
> > > > > them
> > for
> > > > >       * SRIOV):  rte_eth_*_queue_setup will fail if a queue is
> > > > > not @@
> > > > > -1932,16
> > > > > +1935,18 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev,
> > > > > +int qid,
> > > > >
> > > > >  static inline bool
> > > > >  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
> > > > > +                               struct rte_meter_srtcm_profile
> > > > > + *profile,
> > > > >                                 struct rte_mbuf *pkt, uint64_t
> > > > > time)
> > {
> > > > >      uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
> > > > > ether_hdr);
> > > > >
> > > > > -    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
> > ==
> > > > > +    return rte_meter_srtcm_color_blind_check(meter, profile,
> > > > > + time,
> > > > > + pkt_len) ==
> > > > >
> > > > > e_RTE_METER_GREEN; }
> > > > >
> > > > >  static int
> > > > >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > > > > +                        struct rte_meter_srtcm_profile
> > > > > + *profile,
> > > > >                          struct rte_mbuf **pkts, int pkt_cnt,
> > > > >                          bool should_steal)  { @@ -1953,7
> > > > > +1958,8 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
> > > > >      for (i = 0; i < pkt_cnt; i++) {
> > > > >          pkt = pkts[i];
> > > > >          /* Handle current packet */
> > > > > -        if (netdev_dpdk_policer_pkt_handle(meter, pkt,
> > current_time)) {
> > > > > +        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
> > > > > +                                                 current_time))
> > > > > + {
> > > > >              if (cnt != i) {
> > > > >                  pkts[cnt] = pkt;
> > > > >              }
> > > > > @@ -1975,8 +1981,8 @@ ingress_policer_run(struct ingress_policer
> > > > > *policer, struct rte_mbuf **pkts,
> > > > >      int cnt = 0;
> > > > >
> > > > >      rte_spinlock_lock(&policer->policer_lock);
> > > > > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > > > > -                                  pkt_cnt, should_steal);
> > > > > +    cnt = netdev_dpdk_policer_run(&policer->in_policer,
> > > > > + &policer-
> > > >in_prof,
> > > > > +                                  pkts, pkt_cnt, should_steal);
> > > > >      rte_spinlock_unlock(&policer->policer_lock);
> > > > >
> > > > >      return cnt;
> > > > > @@ -2767,8 +2773,15 @@ netdev_dpdk_policer_construct(uint32_t
> > > rate,
> > > > > uint32_t burst)
> > > > >      policer->app_srtcm_params.cir = rate_bytes;
> > > > >      policer->app_srtcm_params.cbs = burst_bytes;
> > > > >      policer->app_srtcm_params.ebs = 0;
> > > > > +    err = rte_meter_srtcm_profile_config(&policer->in_prof,
> > > > > +                                         &policer-
> > >app_srtcm_params);
> > > > > +    if (err) {
> > > > > +        VLOG_ERR("Could not create rte meter profile for
> > > > > + ingress
> > policer");
> > > > > +        free(policer);
> > > > > +        return NULL;
> > > > > +    }
> > > > >      err = rte_meter_srtcm_config(&policer->in_policer,
> > > > > -                                    &policer->app_srtcm_params);
> > > > > +                                    &policer->in_prof);
> > > > >      if (err) {
> > > > >          VLOG_ERR("Could not create rte meter for ingress policer");
> > > > >          free(policer);
> > > > > @@ -3043,13 +3056,18 @@ netdev_dpdk_get_status(const struct
> > > netdev
> > > > > *netdev, struct smap *args)
> > > > >      smap_add_format(args, "if_descr", "%s %s", rte_version(),
> > > > >
> > > > > dev_info.driver_name);
> > > > >
> > > > > -    if (dev_info.pci_dev) {
> > > > > -        smap_add_format(args, "pci-vendor_id", "0x%x",
> > > > > -                        dev_info.pci_dev->id.vendor_id);
> > > > > -        smap_add_format(args, "pci-device_id", "0x%x",
> > > > > -                        dev_info.pci_dev->id.device_id);
> > > > > +    const struct rte_bus *bus;
> > > > > +    const struct rte_pci_device *pci_dev;
> > > > > +    bus = rte_bus_find_by_device(dev_info.device);
> > > > > +    if (bus && !strcmp(bus->name, "pci")) {
> > > > > +        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> > > > > +        if (pci_dev) {
> > > > > +            smap_add_format(args, "pci-vendor_id", "0x%u",
> > > > > +                            pci_dev->id.vendor_id);
> > > > > +             smap_add_format(args, "pci-device_id", "0x%x",
> > > > > +                             pci_dev->id.device_id);
> > > > > +        }
> > > > >      }
> > > > > -
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > @@ -3727,6 +3745,7 @@ struct egress_policer {
> > > > >      struct qos_conf qos_conf;
> > > > >      struct rte_meter_srtcm_params app_srtcm_params;
> > > > >      struct rte_meter_srtcm egress_meter;
> > > > > +    struct rte_meter_srtcm_profile egress_prof;
> > > > >  };
> > > > >
> > > > >  static void
> > > > > @@ -3749,8 +3768,16 @@ egress_policer_qos_construct(const
> struct
> > > > > smap *details,
> > > > >      policer = xmalloc(sizeof *policer);
> > > > >      qos_conf_init(&policer->qos_conf, &egress_policer_ops);
> > > > >      egress_policer_details_to_param(details,
> > > > > &policer->app_srtcm_params);
> > > > > +    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
> > > > > +                                         &policer-
> > >app_srtcm_params);
> > > > > +    if (err) {
> > > > > +        free(policer);
> > > > > +        *conf = NULL;
> > > > > +        return -err;
> > > > > +    }
> > > > >      err = rte_meter_srtcm_config(&policer->egress_meter,
> > > > > -                                 &policer->app_srtcm_params);
> > > > > +                                 &policer->egress_prof);
> > > > > +
> > > > >      if (!err) {
> > > > >          *conf = &policer->qos_conf;
> > > > >      } else {
> > > > > @@ -3803,7 +3830,8 @@ egress_policer_run(struct qos_conf *conf,
> > > > > struct rte_mbuf **pkts, int pkt_cnt,
> > > > >      struct egress_policer *policer =
> > > > >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> > > > >
> > > > > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > > > > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
> > > > > +                                  &policer->egress_prof, pkts,
> > > > >                                    pkt_cnt, should_steal);
> > > > >
> > > > >      return cnt;
> > > > > @@ -4103,15 +4131,15 @@ dump_flow_pattern(struct
> rte_flow_item
> > > > > *item)
> > > > >
> > > > >          VLOG_DBG("rte flow vlan pattern:\n");
> > > > >          if (vlan_spec) {
> > > > > -            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > > > -                     ntohs(vlan_spec->tpid), ntohs(vlan_spec-
> > >tci));
> > > > > +            VLOG_DBG("  Spec: inner_type=0x%"PRIx16",
> > > tci=0x%"PRIx16"\n",
> > > > > +                     ntohs(vlan_spec->inner_type),
> > > > > + ntohs(vlan_spec->tci));
> > > > >          } else {
> > > > >              VLOG_DBG("  Spec = null\n");
> > > > >          }
> > > > >
> > > > >          if (vlan_mask) {
> > > > > -            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
> > > > > -                     vlan_mask->tpid, vlan_mask->tci);
> > > > > +            VLOG_DBG("  Mask: inner_type=0x%"PRIx16",
> > > tci=0x%"PRIx16"\n",
> > > > > +                     vlan_mask->inner_type, vlan_mask->tci);
> > > > >          } else {
> > > > >              VLOG_DBG("  Mask = null\n");
> > > > >          }
> > > > > @@ -4281,27 +4309,56 @@ add_flow_action(struct flow_actions
> > > > > *actions, enum rte_flow_action_type type,
> > > > >      actions->cnt++;
> > > > >  }
> > > > >
> > > > > +/*
> > > > > + * Storage for struct rte_flow_action_rss
> > > > > + * including storage for key and queue array  */ #define
> > > > > +MAX_RSS_HASH_KEY_LENGTH  128 #define
> > > > MAX_ACTION_RSS_QUEUE_NUM
> > > > > 128
> > > > > +struct action_rss_data {
> > > > > +    struct rte_flow_action_rss conf;
> > > > > +    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
> > > > > +    uint8_t key[MAX_RSS_HASH_KEY_LENGTH]; };
> > > > > +
> > > > >  static struct rte_flow_action_rss *  add_flow_rss_action(struct
> > > > > flow_actions *actions,
> > > > >                      struct netdev *netdev) {
> > > > >      int i;
> > > > > -    struct rte_flow_action_rss *rss;
> > > > > +    struct action_rss_data *rss_data;
> > > > >
> > > > > -    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
> > > > > -    /*
> > > > > -     * Setting it to NULL will let the driver use the default RSS
> > > > > -     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
> > > > > -     */
> > > > > -    rss->rss_conf = NULL;
> > > > > -    rss->num = netdev->n_rxq;
> > > > > +    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
> > > > > +        VLOG_ERR("Num of rxq (%u) must not be greater " \
> > > > > +                 "than max rss num of queues (%u)",
> > > > > +                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
> > > > > +        return NULL;
> > > > > +    }
> > > > >
> > > > > -    for (i = 0; i < rss->num; i++) {
> > > > > -        rss->queue[i] = i;
> > > > > +    rss_data = xmalloc(sizeof(struct action_rss_data));
> > > > > +    *rss_data = (struct action_rss_data){
> > > > > +        .conf = (struct rte_flow_action_rss){
> > > > > +            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> > > > > +            .level = 0,
> > > > > +            .types = ETH_RSS_IP,
> > > > > +            .key_len = 0,
> > > > > +            .queue_num = netdev->n_rxq,
> > > > > +            .queue = rss_data->queue,
> > > > > +            .key = rss_data->key,
> > > > > +        },
> > > > > +        .key = { 0 },
> > > > > +        .queue = { 0 },
> > > > > +    };
> > > > > +
> > > > > +    /* TODO: Override key with default */
> > > > > +
> > > > > +    /* Override queue array with default */
> > > > > +    for (i = 0; i < rss_data->conf.queue_num; i++) {
> > > > > +       rss_data->queue[i] = i;
> > > > >      }
> > > > >
> > > > > -    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
> > > > > +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS,
> > > > > + &rss_data->conf);
> > > > >
> > > > > -    return rss;
> > > > > +    return &rss_data->conf;
> > > > >  }
> > > > >
> > > > >  static int
> > > > > @@ -4365,7 +4422,7 @@ netdev_dpdk_add_rte_flow_offload(struct
> > > netdev
> > > > > *netdev,
> > > > >          vlan_mask.tci  = match->wc.masks.vlans[0].tci &
> > > > > ~htons(VLAN_CFI);
> > > > >
> > > > >          /* match any protocols */
> > > > > -        vlan_mask.tpid = 0;
> > > > > +        vlan_mask.inner_type = 0;
> > > > >
> > > > >          add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
> > > > >                           &vlan_spec, &vlan_mask); @@ -4516,6
> > > > > +4573,11 @@
> > > > > end_proto_check:
> > > > >
> > > > >      struct rte_flow_action_rss *rss;
> > > > >      rss = add_flow_rss_action(&actions, netdev);
> > > > > +    if (!rss) {
> > > > > +        VLOG_ERR("add_flow_rss_action error.\n");
> > > > > +        ret = -1;
> > > > > +        goto out;
> > > > > +    }
> > > > >      add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END,
> NULL);
> > > > >
> > > > >      flow = rte_flow_create(dev->port_id, &flow_attr,
> > > > > patterns.items,
> > > > > --
> > > > > 1.8.3.1
Ophir Munk Sept. 19, 2018, 6:58 p.m. UTC | #14
Hi,

Also, please note in file Documentation/topics/dpdk/vhost-user.rst:
I did not update "v17.11" in the following link (as there is currently no link to "v18.08").

> Further information can be found in the DPDK documentation
> <http://dpdk.readthedocs.io/en/v17.11/prog_guide/vhost_lib.html>`__

Regards,
Ophir

> -----Original Message-----
> From: Chandran, Sugesh [mailto:sugesh.chandran@intel.com]
> Sent: Thursday, September 13, 2018 4:20 PM
> To: Stokes, Ian <ian.stokes@intel.com>; Kevin Traynor
> <ktraynor@redhat.com>; Ophir Munk <ophirmu@mellanox.com>; 'ovs-
> dev@openvswitch.org' <ovs-dev@openvswitch.org>
> Cc: Asaf Penso <asafp@mellanox.com>; 'Ben Pfaff' <blp@ovn.org>; Shahaf
> Shuler <shahafs@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Olga Shern <olgas@mellanox.com>
> Subject: RE: [dpdk-howl PATCH v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> 
> 
> Regards
> _Sugesh
> 
> > >>>>>> -    2.10.x       17.11.3
> > >>>>>> +    2.10.x       18.08.0
> > >
> > > Hi - on updating versions in docs, this one should not change as the
> > > released/to be released versions of OVS 2.10.x stick with 17.11.x.
> > > We can just leave this unmodified for now and add an entry for
> > > 2.11.x closer to the release time.
> >
> > That makes sense actually as there's no releases from this branch. +1
> >
> > Ian
> > >
> > > Kevin.
> > >
> [Sugesh] +1
Ophir Munk Oct. 10, 2018, 5:02 p.m. UTC | #15
Hi Ian,
Regarding your comment - I have sent patch https://patchwork.ozlabs.org/patch/981980/

Regards,
Ophir

> -----Original Message-----
> From: Ian Stokes [mailto:ian.stokes@intel.com]
> Sent: Wednesday, September 12, 2018 12:18 AM
> To: Ilya Maximets <i.maximets@samsung.com>; Ophir Munk
> <ophirmu@mellanox.com>; ovs-dev@openvswitch.org
> Cc: Shahaf Shuler <shahafs@mellanox.com>; Asaf Penso
> <asafp@mellanox.com>; Thomas Monjalon <thomas@monjalon.net>; Kevin
> Traynor <ktraynor@redhat.com>
> Subject: Re: [ovs-dev,dpdk-howl,v2] netdev-dpdk: Upgrade to dpdk v18.08.0
> 
> >>       if (dev->mtu > ETHER_MTU) {
> >> -        rte_eth_dev_info_get(dev->port_id, &info);
> >>           if (strncmp(info.driver_name, "net_nfp", 7)) {
> >> -            conf.rxmode.enable_scatter = 1;
> >> +            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
> 
> Above will break MTU support for i40e devices (and any other devices that
> do not have scatter listed in their offload capabilities).
> 
> DPDK seems to have implemented offload capabilities for more PMD drivers
> since 17.11 so I'm thinking we can remove the specific check against net_nfp
> altogether and check for support for scatter in device capabilities before
> setting it for the device. Something like below:
> 
>      if (dev->mtu > ETHER_MTU) {
>          if (info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER) {
>              conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
>          }
>      }
> 
> It would require confirmation/testing for other PMD devices however that
> they list supported offloads correctly for scatter if it's required for RX jumbo
> frames.
> 
> Ian
diff mbox series

Patch

diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
index 4b9fc4a..c60ac71 100755
--- a/.travis/linux-build.sh
+++ b/.travis/linux-build.sh
@@ -83,7 +83,7 @@  fi
 
 if [ "$DPDK" ]; then
     if [ -z "$DPDK_VER" ]; then
-        DPDK_VER="17.11.3"
+        DPDK_VER="18.08.0"
     fi
     install_dpdk $DPDK_VER
     if [ "$CC" = "clang" ]; then
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 41d41e3..646ae09 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -168,7 +168,7 @@  Q: What DPDK version does each Open vSwitch release work with?
     2.7.x        16.11.7
     2.8.x        17.05.2
     2.9.x        17.11.3
-    2.10.x       17.11.3
+    2.10.x       18.08.0
     ============ =======
 
 Q: Are all the DPDK releases that OVS versions work with maintained?
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f91aa27..6879e9a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -168,11 +168,7 @@  static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .mq_mode = ETH_MQ_RX_RSS,
         .split_hdr_size = 0,
-        .header_split   = 0, /* Header Split disabled */
-        .hw_ip_checksum = 0, /* IP checksum offload disabled */
-        .hw_vlan_filter = 0, /* VLAN filtering disabled */
-        .jumbo_frame    = 0, /* Jumbo Frame Support disabled */
-        .hw_strip_crc   = 0,
+        .offloads = 0,
     },
     .rx_adv_conf = {
         .rss_conf = {
@@ -364,6 +360,7 @@  struct dpdk_ring {
 struct ingress_policer {
     struct rte_meter_srtcm_params app_srtcm_params;
     struct rte_meter_srtcm in_policer;
+    struct rte_meter_srtcm_profile in_prof;
     rte_spinlock_t policer_lock;
 };
 
@@ -894,6 +891,8 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
     struct rte_eth_dev_info info;
     uint16_t conf_mtu;
 
+    rte_eth_dev_info_get(dev->port_id, &info);
+
     /* As of DPDK 17.11.1 a few PMDs require to explicitly enable
      * scatter to support jumbo RX. Checking the offload capabilities
      * is not an option as PMDs are not required yet to report
@@ -901,20 +900,24 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
      * (testing or code review). Listing all such PMDs feels harder
      * than highlighting the one known not to need scatter */
     if (dev->mtu > ETHER_MTU) {
-        rte_eth_dev_info_get(dev->port_id, &info);
         if (strncmp(info.driver_name, "net_nfp", 7)) {
-            conf.rxmode.enable_scatter = 1;
+            conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
         }
     }
 
     conf.intr_conf.lsc = dev->lsc_interrupt_mode;
-    conf.rxmode.hw_ip_checksum = (dev->hw_ol_features &
-                                  NETDEV_RX_CHECKSUM_OFFLOAD) != 0;
+    conf.rxmode.offloads |= ((dev->hw_ol_features &
+                             NETDEV_RX_CHECKSUM_OFFLOAD) != 0) ?
+                             DEV_RX_OFFLOAD_CHECKSUM : 0;
 
     if (dev->hw_ol_features & NETDEV_RX_HW_CRC_STRIP) {
-        conf.rxmode.hw_strip_crc = 1;
+        conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
     }
 
+    /* Limit configured rss hash functions to only those supported
+     * by the eth device. */
+    conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
+
     /* A device may report more queues than it makes available (this has
      * been observed for Intel xl710, which reserves some of them for
      * SRIOV):  rte_eth_*_queue_setup will fail if a queue is not
@@ -1932,16 +1935,18 @@  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
 
 static inline bool
 netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
+                               struct rte_meter_srtcm_profile *profile,
                                struct rte_mbuf *pkt, uint64_t time)
 {
     uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
 
-    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
+    return rte_meter_srtcm_color_blind_check(meter, profile, time, pkt_len) ==
                                                 e_RTE_METER_GREEN;
 }
 
 static int
 netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
+                        struct rte_meter_srtcm_profile *profile,
                         struct rte_mbuf **pkts, int pkt_cnt,
                         bool should_steal)
 {
@@ -1953,7 +1958,8 @@  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
     for (i = 0; i < pkt_cnt; i++) {
         pkt = pkts[i];
         /* Handle current packet */
-        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
+        if (netdev_dpdk_policer_pkt_handle(meter, profile, pkt,
+                                                 current_time)) {
             if (cnt != i) {
                 pkts[cnt] = pkt;
             }
@@ -1975,8 +1981,8 @@  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
     int cnt = 0;
 
     rte_spinlock_lock(&policer->policer_lock);
-    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
-                                  pkt_cnt, should_steal);
+    cnt = netdev_dpdk_policer_run(&policer->in_policer, &policer->in_prof,
+                                  pkts, pkt_cnt, should_steal);
     rte_spinlock_unlock(&policer->policer_lock);
 
     return cnt;
@@ -2767,8 +2773,15 @@  netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
     policer->app_srtcm_params.cir = rate_bytes;
     policer->app_srtcm_params.cbs = burst_bytes;
     policer->app_srtcm_params.ebs = 0;
+    err = rte_meter_srtcm_profile_config(&policer->in_prof,
+                                         &policer->app_srtcm_params);
+    if (err) {
+        VLOG_ERR("Could not create rte meter profile for ingress policer");
+        free(policer);
+        return NULL;
+    }
     err = rte_meter_srtcm_config(&policer->in_policer,
-                                    &policer->app_srtcm_params);
+                                    &policer->in_prof);
     if (err) {
         VLOG_ERR("Could not create rte meter for ingress policer");
         free(policer);
@@ -3043,13 +3056,18 @@  netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
     smap_add_format(args, "if_descr", "%s %s", rte_version(),
                                                dev_info.driver_name);
 
-    if (dev_info.pci_dev) {
-        smap_add_format(args, "pci-vendor_id", "0x%x",
-                        dev_info.pci_dev->id.vendor_id);
-        smap_add_format(args, "pci-device_id", "0x%x",
-                        dev_info.pci_dev->id.device_id);
+    const struct rte_bus *bus;
+    const struct rte_pci_device *pci_dev;
+    bus = rte_bus_find_by_device(dev_info.device);
+    if (bus && !strcmp(bus->name, "pci")) {
+        pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+        if (pci_dev) {
+            smap_add_format(args, "pci-vendor_id", "0x%u",
+                            pci_dev->id.vendor_id);
+             smap_add_format(args, "pci-device_id", "0x%x",
+                             pci_dev->id.device_id);
+        }
     }
-
     return 0;
 }
 
@@ -3727,6 +3745,7 @@  struct egress_policer {
     struct qos_conf qos_conf;
     struct rte_meter_srtcm_params app_srtcm_params;
     struct rte_meter_srtcm egress_meter;
+    struct rte_meter_srtcm_profile egress_prof;
 };
 
 static void
@@ -3749,8 +3768,16 @@  egress_policer_qos_construct(const struct smap *details,
     policer = xmalloc(sizeof *policer);
     qos_conf_init(&policer->qos_conf, &egress_policer_ops);
     egress_policer_details_to_param(details, &policer->app_srtcm_params);
+    err = rte_meter_srtcm_profile_config(&policer->egress_prof,
+                                         &policer->app_srtcm_params);
+    if (err) {
+        free(policer);
+        *conf = NULL;
+        return -err;
+    }
     err = rte_meter_srtcm_config(&policer->egress_meter,
-                                 &policer->app_srtcm_params);
+                                 &policer->egress_prof);
+
     if (!err) {
         *conf = &policer->qos_conf;
     } else {
@@ -3803,7 +3830,8 @@  egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
     struct egress_policer *policer =
         CONTAINER_OF(conf, struct egress_policer, qos_conf);
 
-    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
+    cnt = netdev_dpdk_policer_run(&policer->egress_meter,
+                                  &policer->egress_prof, pkts,
                                   pkt_cnt, should_steal);
 
     return cnt;
@@ -4103,15 +4131,15 @@  dump_flow_pattern(struct rte_flow_item *item)
 
         VLOG_DBG("rte flow vlan pattern:\n");
         if (vlan_spec) {
-            VLOG_DBG("  Spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
-                     ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
+            VLOG_DBG("  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
+                     ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
         } else {
             VLOG_DBG("  Spec = null\n");
         }
 
         if (vlan_mask) {
-            VLOG_DBG("  Mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
-                     vlan_mask->tpid, vlan_mask->tci);
+            VLOG_DBG("  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
+                     vlan_mask->inner_type, vlan_mask->tci);
         } else {
             VLOG_DBG("  Mask = null\n");
         }
@@ -4281,27 +4309,56 @@  add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
     actions->cnt++;
 }
 
+/*
+ * Storage for struct rte_flow_action_rss
+ * including storage for key and queue array
+ */
+#define MAX_RSS_HASH_KEY_LENGTH  128
+#define MAX_ACTION_RSS_QUEUE_NUM 128
+struct action_rss_data {
+    struct rte_flow_action_rss conf;
+    uint16_t queue[MAX_ACTION_RSS_QUEUE_NUM];
+    uint8_t key[MAX_RSS_HASH_KEY_LENGTH];
+};
+
 static struct rte_flow_action_rss *
 add_flow_rss_action(struct flow_actions *actions,
                     struct netdev *netdev) {
     int i;
-    struct rte_flow_action_rss *rss;
+    struct action_rss_data *rss_data;
 
-    rss = xmalloc(sizeof(*rss) + sizeof(uint16_t) * netdev->n_rxq);
-    /*
-     * Setting it to NULL will let the driver use the default RSS
-     * configuration we have set: &port_conf.rx_adv_conf.rss_conf.
-     */
-    rss->rss_conf = NULL;
-    rss->num = netdev->n_rxq;
+    if (netdev->n_rxq > MAX_ACTION_RSS_QUEUE_NUM) {
+        VLOG_ERR("Num of rxq (%u) must not be greater " \
+                 "than max rss num of queues (%u)",
+                 netdev->n_rxq, MAX_ACTION_RSS_QUEUE_NUM);
+        return NULL;
+    }
 
-    for (i = 0; i < rss->num; i++) {
-        rss->queue[i] = i;
+    rss_data = xmalloc(sizeof(struct action_rss_data));
+    *rss_data = (struct action_rss_data){
+        .conf = (struct rte_flow_action_rss){
+            .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+            .level = 0,
+            .types = ETH_RSS_IP,
+            .key_len = 0,
+            .queue_num = netdev->n_rxq,
+            .queue = rss_data->queue,
+            .key = rss_data->key,
+        },
+        .key = { 0 },
+        .queue = { 0 },
+    };
+
+    /* TODO: Override key with default */
+
+    /* Override queue array with default */
+    for (i = 0; i < rss_data->conf.queue_num; i++) {
+       rss_data->queue[i] = i;
     }
 
-    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, rss);
+    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf);
 
-    return rss;
+    return &rss_data->conf;
 }
 
 static int
@@ -4365,7 +4422,7 @@  netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
         vlan_mask.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
         /* match any protocols */
-        vlan_mask.tpid = 0;
+        vlan_mask.inner_type = 0;
 
         add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN,
                          &vlan_spec, &vlan_mask);
@@ -4516,6 +4573,11 @@  end_proto_check:
 
     struct rte_flow_action_rss *rss;
     rss = add_flow_rss_action(&actions, netdev);
+    if (!rss) {
+        VLOG_ERR("add_flow_rss_action error.\n");
+        ret = -1;
+        goto out;
+    }
     add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
 
     flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,