diff mbox series

[ovs-dev,V2,10/14] netdev-offload-dpdk: Support tunnel pop action

Message ID 20210210152702.4898-11-elibr@nvidia.com
State Changes Requested
Headers show
Series Netdev vxlan-decap offload | expand

Commit Message

Eli Britstein Feb. 10, 2021, 3:26 p.m. UTC
Support tunnel pop action.

Signed-off-by: Eli Britstein <elibr@nvidia.com>
Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
---
 Documentation/howto/dpdk.rst |   1 +
 NEWS                         |   1 +
 lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
 3 files changed, 160 insertions(+), 15 deletions(-)

Comments

Sriharsha Basavapatna Feb. 24, 2021, 7:52 a.m. UTC | #1
On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr@nvidia.com> wrote:
>
> Support tunnel pop action.
>
> Signed-off-by: Eli Britstein <elibr@nvidia.com>
> Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
> ---
>  Documentation/howto/dpdk.rst |   1 +
>  NEWS                         |   1 +
>  lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
>  3 files changed, 160 insertions(+), 15 deletions(-)
>
> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> index f0d45e47b..4918d80f3 100644
> --- a/Documentation/howto/dpdk.rst
> +++ b/Documentation/howto/dpdk.rst
> @@ -398,6 +398,7 @@ Supported actions for hardware offload are:
>  - VLAN Push/Pop (push_vlan/pop_vlan).
>  - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
>  - Clone/output (tnl_push and output) for encapsulating over a tunnel.
> +- Tunnel pop, for changing from a physical port to a vport.
>
>  Further Reading
>  ---------------
> diff --git a/NEWS b/NEWS
> index a7bffce97..6850d5621 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,6 +26,7 @@ v2.15.0 - xx xxx xxxx
>     - DPDK:
>       * Removed support for vhost-user dequeue zero-copy.
>       * Add support for DPDK 20.11.
> +     * Add hardware offload support for tunnel pop action (experimental).
>     - Userspace datapath:
>       * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
>         restricts a flow dump to a single PMD thread if set.
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 78f866080..493cc9159 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -140,15 +140,30 @@ struct flow_actions {
>      struct rte_flow_action *actions;
>      int cnt;
>      int current_max;
> +    struct netdev *tnl_netdev;
> +    /* tnl_actions is the opaque array of actions returned by the PMD. */
> +    struct rte_flow_action *tnl_actions;

Why is this an opaque array ? Since it is struct rte_flow_action, OVS
knows the type and members. Is it opaque because the value of
rte_flow_action_type member is unknown to OVS ? Is it a private action
type and if so how does the PMD ensure that it doesn't conflict with
standard action types ?

> +    uint32_t num_of_tnl_actions;
> +    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
> +     * field.
> +     */
> +    int tnl_actions_pos;

Names should indicate that they are private or pmd specific ?

tnl_actions --> tnl_private_actions or tnl_pmd_actions
num_of_tnl_actions --> num_private_tnl_actions or num_pmd_tnl_actions
tnl_actions_pos --> tnl_private_actions_pos or tnl_pmd_actions_pos

> +    struct ds s_tnl;
>  };
>
>  static void
> -dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
> +dump_flow_attr(struct ds *s, struct ds *s_extra,
> +               const struct rte_flow_attr *attr,
> +               struct flow_actions *flow_actions)
>  {
> -    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
> +    if (flow_actions->num_of_tnl_actions) {
> +        ds_clone(s_extra, &flow_actions->s_tnl);
> +    }
> +    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
>                    attr->ingress  ? "ingress " : "",
>                    attr->egress   ? "egress " : "", attr->priority, attr->group,
> -                  attr->transfer ? "transfer " : "");
> +                  attr->transfer ? "transfer " : "",
> +                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
>  }
>
>  /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
> @@ -395,9 +410,19 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
>
>  static void
>  dump_flow_action(struct ds *s, struct ds *s_extra,
> -                 const struct rte_flow_action *actions)
> +                 struct flow_actions *flow_actions, int act_index)
>  {
> -    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
> +    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
> +
> +    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
> +        ds_put_cstr(s, "end");
> +    } else if (flow_actions->num_of_tnl_actions &&
> +               act_index >= flow_actions->tnl_actions_pos &&
> +               act_index < flow_actions->tnl_actions_pos +
> +                           flow_actions->num_of_tnl_actions) {
> +        /* Opaque PMD tunnel actions is skipped. */

Wouldn't it be useful to at least print the value of PMD action types ?

> +        return;
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
>          const struct rte_flow_action_mark *mark = actions->conf;
>
>          ds_put_cstr(s, "mark ");
> @@ -528,6 +553,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
>          ds_put_cstr(s, "vxlan_encap / ");
>          dump_vxlan_encap(s_extra, items);
>          ds_put_cstr(s_extra, ";");
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
> +        const struct rte_flow_action_jump *jump = actions->conf;
> +
> +        ds_put_cstr(s, "jump ");
> +        if (jump) {
> +            ds_put_format(s, "group %"PRIu32" ", jump->group);
> +        }
> +        ds_put_cstr(s, "/ ");
>      } else {
>          ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>      }
> @@ -537,20 +570,21 @@ static struct ds *
>  dump_flow(struct ds *s, struct ds *s_extra,
>            const struct rte_flow_attr *attr,
>            const struct rte_flow_item *items,
> -          const struct rte_flow_action *actions)
> +          struct flow_actions *flow_actions)
>  {
> +    int i;
> +
>      if (attr) {
> -        dump_flow_attr(s, attr);
> +        dump_flow_attr(s, s_extra, attr, flow_actions);
>      }
>      ds_put_cstr(s, "pattern ");
>      while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
>          dump_flow_pattern(s, items++);
>      }
>      ds_put_cstr(s, "end actions ");
> -    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
> -        dump_flow_action(s, s_extra, actions++);
> +    for (i = 0; i < flow_actions->cnt; i++) {
> +        dump_flow_action(s, s_extra, flow_actions, i);
>      }
> -    ds_put_cstr(s, "end");
>      return s;
>  }
>
> @@ -558,9 +592,10 @@ static struct rte_flow *
>  netdev_offload_dpdk_flow_create(struct netdev *netdev,
>                                  const struct rte_flow_attr *attr,
>                                  const struct rte_flow_item *items,
> -                                const struct rte_flow_action *actions,
> +                                struct flow_actions *flow_actions,
>                                  struct rte_flow_error *error)
>  {
> +    const struct rte_flow_action *actions = flow_actions->actions;
>      struct ds s_extra = DS_EMPTY_INITIALIZER;
>      struct ds s = DS_EMPTY_INITIALIZER;
>      struct rte_flow *flow;
> @@ -569,7 +604,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>      flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
>      if (flow) {
>          if (!VLOG_DROP_DBG(&rl)) {
> -            dump_flow(&s, &s_extra, attr, items, actions);
> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>              extra_str = ds_cstr(&s_extra);
>              VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
>                          netdev_get_name(netdev), (intptr_t) flow, extra_str,
> @@ -584,7 +619,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>          VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
>                  netdev_get_name(netdev), error->type, error->message);
>          if (!vlog_should_drop(&this_module, level, &rl)) {
> -            dump_flow(&s, &s_extra, attr, items, actions);
> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>              extra_str = ds_cstr(&s_extra);
>              VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>                      netdev_get_name(netdev), extra_str,
> @@ -640,6 +675,23 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>      actions->cnt++;
>  }
>
> +static void
> +add_flow_tnl_actions(struct flow_actions *actions,
> +                     struct netdev *tnl_netdev,
> +                     struct rte_flow_action *tnl_actions,
> +                     uint32_t num_of_tnl_actions)
> +{
> +    int i;
> +
> +    actions->tnl_netdev = tnl_netdev;
> +    actions->tnl_actions_pos = actions->cnt;
> +    actions->tnl_actions = tnl_actions;
> +    actions->num_of_tnl_actions = num_of_tnl_actions;
> +    for (i = 0; i < num_of_tnl_actions; i++) {
> +        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
> +    }
> +}
> +
>  static void
>  free_flow_patterns(struct flow_patterns *patterns)
>  {
> @@ -661,9 +713,23 @@ free_flow_patterns(struct flow_patterns *patterns)
>  static void
>  free_flow_actions(struct flow_actions *actions)
>  {
> +    struct rte_flow_error error;
>      int i;
>
>      for (i = 0; i < actions->cnt; i++) {
> +        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
> +            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
> +                    (actions->tnl_netdev, actions->tnl_actions,
> +                     actions->num_of_tnl_actions, &error)) {
> +                VLOG_DBG_RL(&rl, "%s: "
> +                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
> +                            "failed: %d (%s).",
> +                            netdev_get_name(actions->tnl_netdev),
> +                            error.type, error.message);
> +            }
> +            i += actions->num_of_tnl_actions - 1;
> +            continue;
> +        }
>          if (actions->actions[i].conf) {
>              free(CONST_CAST(void *, actions->actions[i].conf));
>          }
> @@ -671,6 +737,7 @@ free_flow_actions(struct flow_actions *actions)
>      free(actions->actions);
>      actions->actions = NULL;
>      actions->cnt = 0;
> +    ds_destroy(&actions->s_tnl);
>  }
>
>  static int
> @@ -960,7 +1027,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
>      add_flow_mark_rss_actions(&actions, flow_mark, netdev);
>
>      flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> -                                           actions.actions, &error);
> +                                           &actions, &error);
>
>      free_flow_actions(&actions);
>      return flow;
> @@ -1307,6 +1374,78 @@ parse_clone_actions(struct netdev *netdev,
>      return 0;
>  }
>
> +static void
> +add_jump_action(struct flow_actions *actions, uint32_t group)
> +{
> +    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
> +
> +    jump->group = group;
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
> +}
> +
> +static int
> +vport_to_rte_tunnel(struct netdev *vport,
> +                    struct rte_flow_tunnel *tunnel,
> +                    struct netdev *netdev,
> +                    struct ds *s_tnl)
> +{
> +    const struct netdev_tunnel_config *tnl_cfg;
> +
> +    memset(tunnel, 0, sizeof *tunnel);
> +    if (!strcmp(netdev_get_type(vport), "vxlan")) {
> +        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
> +        tnl_cfg = netdev_get_tunnel_config(vport);
> +        if (!tnl_cfg) {
> +            return -1;
> +        }
> +        tunnel->tp_dst = tnl_cfg->dst_port;
> +        if (!VLOG_DROP_DBG(&rl)) {
> +            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
> +                          netdev_dpdk_get_port_id(netdev));
> +        }
> +    } else {
> +        OVS_NOT_REACHED();
> +    }
> +
> +    return 0;
> +}
> +
> +static int
> +add_tnl_pop_action(struct netdev *netdev,
> +                   struct flow_actions *actions,
> +                   const struct nlattr *nla)
> +{
> +    struct rte_flow_action *tnl_actions = NULL;
> +    uint32_t num_of_tnl_actions = 0;
> +    struct rte_flow_tunnel tunnel;
> +    struct rte_flow_error error;
> +    struct netdev *vport;
> +    odp_port_t port;
> +    int ret;
> +
> +    port = nl_attr_get_odp_port(nla);
> +    vport = netdev_ports_get(port, netdev->dpif_type);
> +    if (vport == NULL) {
> +        return -1;
> +    }
> +    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
> +    netdev_close(vport);
> +    if (ret) {
> +        return ret;
> +    }
> +    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
> +                                                &num_of_tnl_actions, &error);
> +    if (ret) {
> +        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
> +                    "%d (%s).", netdev_get_name(netdev), error.type,
> +                    error.message);
> +        return ret;
> +    }
> +    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
> +    add_jump_action(actions, 0);

It is not clear why jump action is added and with a group value of 0 ?

Thanks,
-Harsha
> +    return 0;
> +}
> +
>  static int
>  parse_flow_actions(struct netdev *netdev,
>                     struct flow_actions *actions,
> @@ -1351,6 +1490,10 @@ parse_flow_actions(struct netdev *netdev,
>                                      clone_actions_len)) {
>                  return -1;
>              }
> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> +            if (add_tnl_pop_action(netdev, actions, nla)) {
> +                return -1;
> +            }
>          } else {
>              VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
>              return -1;
> @@ -1383,7 +1526,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>          goto out;
>      }
>      flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> -                                           actions.actions, &error);
> +                                           &actions, &error);
>  out:
>      free_flow_actions(&actions);
>      return flow;
> --
> 2.28.0.546.g385c171
>
Eli Britstein Feb. 24, 2021, 8:20 a.m. UTC | #2
On 2/24/2021 9:52 AM, Sriharsha Basavapatna wrote:
> On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr@nvidia.com> wrote:
>> Support tunnel pop action.
>>
>> Signed-off-by: Eli Britstein <elibr@nvidia.com>
>> Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
>> ---
>>   Documentation/howto/dpdk.rst |   1 +
>>   NEWS                         |   1 +
>>   lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
>>   3 files changed, 160 insertions(+), 15 deletions(-)
>>
>> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
>> index f0d45e47b..4918d80f3 100644
>> --- a/Documentation/howto/dpdk.rst
>> +++ b/Documentation/howto/dpdk.rst
>> @@ -398,6 +398,7 @@ Supported actions for hardware offload are:
>>   - VLAN Push/Pop (push_vlan/pop_vlan).
>>   - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
>>   - Clone/output (tnl_push and output) for encapsulating over a tunnel.
>> +- Tunnel pop, for changing from a physical port to a vport.
>>
>>   Further Reading
>>   ---------------
>> diff --git a/NEWS b/NEWS
>> index a7bffce97..6850d5621 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -26,6 +26,7 @@ v2.15.0 - xx xxx xxxx
>>      - DPDK:
>>        * Removed support for vhost-user dequeue zero-copy.
>>        * Add support for DPDK 20.11.
>> +     * Add hardware offload support for tunnel pop action (experimental).
>>      - Userspace datapath:
>>        * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
>>          restricts a flow dump to a single PMD thread if set.
>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>> index 78f866080..493cc9159 100644
>> --- a/lib/netdev-offload-dpdk.c
>> +++ b/lib/netdev-offload-dpdk.c
>> @@ -140,15 +140,30 @@ struct flow_actions {
>>       struct rte_flow_action *actions;
>>       int cnt;
>>       int current_max;
>> +    struct netdev *tnl_netdev;
>> +    /* tnl_actions is the opaque array of actions returned by the PMD. */
>> +    struct rte_flow_action *tnl_actions;
> Why is this an opaque array ? Since it is struct rte_flow_action, OVS
> knows the type and members. Is it opaque because the value of
> rte_flow_action_type member is unknown to OVS ? Is it a private action
> type and if so how does the PMD ensure that it doesn't conflict with
> standard action types ?

True it is not used by OVS, but that's not why it opaque. Although it is 
struct rte_flow_action array, the PMD may use its own private actions, 
not defined in rte_flow.h, thus not known to the application.

The details of this array is under the PMD's responsibility.

>
>> +    uint32_t num_of_tnl_actions;
>> +    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
>> +     * field.
>> +     */
>> +    int tnl_actions_pos;
> Names should indicate that they are private or pmd specific ?
>
> tnl_actions --> tnl_private_actions or tnl_pmd_actions
> num_of_tnl_actions --> num_private_tnl_actions or num_pmd_tnl_actions
> tnl_actions_pos --> tnl_private_actions_pos or tnl_pmd_actions_pos
OK. _pmd_
>
>> +    struct ds s_tnl;
>>   };
>>
>>   static void
>> -dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
>> +dump_flow_attr(struct ds *s, struct ds *s_extra,
>> +               const struct rte_flow_attr *attr,
>> +               struct flow_actions *flow_actions)
>>   {
>> -    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
>> +    if (flow_actions->num_of_tnl_actions) {
>> +        ds_clone(s_extra, &flow_actions->s_tnl);
>> +    }
>> +    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
>>                     attr->ingress  ? "ingress " : "",
>>                     attr->egress   ? "egress " : "", attr->priority, attr->group,
>> -                  attr->transfer ? "transfer " : "");
>> +                  attr->transfer ? "transfer " : "",
>> +                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
>>   }
>>
>>   /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
>> @@ -395,9 +410,19 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
>>
>>   static void
>>   dump_flow_action(struct ds *s, struct ds *s_extra,
>> -                 const struct rte_flow_action *actions)
>> +                 struct flow_actions *flow_actions, int act_index)
>>   {
>> -    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
>> +    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
>> +
>> +    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
>> +        ds_put_cstr(s, "end");
>> +    } else if (flow_actions->num_of_tnl_actions &&
>> +               act_index >= flow_actions->tnl_actions_pos &&
>> +               act_index < flow_actions->tnl_actions_pos +
>> +                           flow_actions->num_of_tnl_actions) {
>> +        /* Opaque PMD tunnel actions is skipped. */
> Wouldn't it be useful to at least print the value of PMD action types ?
The only way we can print them as raw numbers. I see little added value 
for this on one hand, but on the other hand it will defect the testpmd 
format print, so I think better to avoid.
>
>> +        return;
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
>>           const struct rte_flow_action_mark *mark = actions->conf;
>>
>>           ds_put_cstr(s, "mark ");
>> @@ -528,6 +553,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
>>           ds_put_cstr(s, "vxlan_encap / ");
>>           dump_vxlan_encap(s_extra, items);
>>           ds_put_cstr(s_extra, ";");
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
>> +        const struct rte_flow_action_jump *jump = actions->conf;
>> +
>> +        ds_put_cstr(s, "jump ");
>> +        if (jump) {
>> +            ds_put_format(s, "group %"PRIu32" ", jump->group);
>> +        }
>> +        ds_put_cstr(s, "/ ");
>>       } else {
>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>       }
>> @@ -537,20 +570,21 @@ static struct ds *
>>   dump_flow(struct ds *s, struct ds *s_extra,
>>             const struct rte_flow_attr *attr,
>>             const struct rte_flow_item *items,
>> -          const struct rte_flow_action *actions)
>> +          struct flow_actions *flow_actions)
>>   {
>> +    int i;
>> +
>>       if (attr) {
>> -        dump_flow_attr(s, attr);
>> +        dump_flow_attr(s, s_extra, attr, flow_actions);
>>       }
>>       ds_put_cstr(s, "pattern ");
>>       while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
>>           dump_flow_pattern(s, items++);
>>       }
>>       ds_put_cstr(s, "end actions ");
>> -    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
>> -        dump_flow_action(s, s_extra, actions++);
>> +    for (i = 0; i < flow_actions->cnt; i++) {
>> +        dump_flow_action(s, s_extra, flow_actions, i);
>>       }
>> -    ds_put_cstr(s, "end");
>>       return s;
>>   }
>>
>> @@ -558,9 +592,10 @@ static struct rte_flow *
>>   netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>                                   const struct rte_flow_attr *attr,
>>                                   const struct rte_flow_item *items,
>> -                                const struct rte_flow_action *actions,
>> +                                struct flow_actions *flow_actions,
>>                                   struct rte_flow_error *error)
>>   {
>> +    const struct rte_flow_action *actions = flow_actions->actions;
>>       struct ds s_extra = DS_EMPTY_INITIALIZER;
>>       struct ds s = DS_EMPTY_INITIALIZER;
>>       struct rte_flow *flow;
>> @@ -569,7 +604,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>       flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
>>       if (flow) {
>>           if (!VLOG_DROP_DBG(&rl)) {
>> -            dump_flow(&s, &s_extra, attr, items, actions);
>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>>               extra_str = ds_cstr(&s_extra);
>>               VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
>>                           netdev_get_name(netdev), (intptr_t) flow, extra_str,
>> @@ -584,7 +619,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>           VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
>>                   netdev_get_name(netdev), error->type, error->message);
>>           if (!vlog_should_drop(&this_module, level, &rl)) {
>> -            dump_flow(&s, &s_extra, attr, items, actions);
>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>>               extra_str = ds_cstr(&s_extra);
>>               VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>>                       netdev_get_name(netdev), extra_str,
>> @@ -640,6 +675,23 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>>       actions->cnt++;
>>   }
>>
>> +static void
>> +add_flow_tnl_actions(struct flow_actions *actions,
>> +                     struct netdev *tnl_netdev,
>> +                     struct rte_flow_action *tnl_actions,
>> +                     uint32_t num_of_tnl_actions)
>> +{
>> +    int i;
>> +
>> +    actions->tnl_netdev = tnl_netdev;
>> +    actions->tnl_actions_pos = actions->cnt;
>> +    actions->tnl_actions = tnl_actions;
>> +    actions->num_of_tnl_actions = num_of_tnl_actions;
>> +    for (i = 0; i < num_of_tnl_actions; i++) {
>> +        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
>> +    }
>> +}
>> +
>>   static void
>>   free_flow_patterns(struct flow_patterns *patterns)
>>   {
>> @@ -661,9 +713,23 @@ free_flow_patterns(struct flow_patterns *patterns)
>>   static void
>>   free_flow_actions(struct flow_actions *actions)
>>   {
>> +    struct rte_flow_error error;
>>       int i;
>>
>>       for (i = 0; i < actions->cnt; i++) {
>> +        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
>> +            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
>> +                    (actions->tnl_netdev, actions->tnl_actions,
>> +                     actions->num_of_tnl_actions, &error)) {
>> +                VLOG_DBG_RL(&rl, "%s: "
>> +                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
>> +                            "failed: %d (%s).",
>> +                            netdev_get_name(actions->tnl_netdev),
>> +                            error.type, error.message);
>> +            }
>> +            i += actions->num_of_tnl_actions - 1;
>> +            continue;
>> +        }
>>           if (actions->actions[i].conf) {
>>               free(CONST_CAST(void *, actions->actions[i].conf));
>>           }
>> @@ -671,6 +737,7 @@ free_flow_actions(struct flow_actions *actions)
>>       free(actions->actions);
>>       actions->actions = NULL;
>>       actions->cnt = 0;
>> +    ds_destroy(&actions->s_tnl);
>>   }
>>
>>   static int
>> @@ -960,7 +1027,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
>>       add_flow_mark_rss_actions(&actions, flow_mark, netdev);
>>
>>       flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
>> -                                           actions.actions, &error);
>> +                                           &actions, &error);
>>
>>       free_flow_actions(&actions);
>>       return flow;
>> @@ -1307,6 +1374,78 @@ parse_clone_actions(struct netdev *netdev,
>>       return 0;
>>   }
>>
>> +static void
>> +add_jump_action(struct flow_actions *actions, uint32_t group)
>> +{
>> +    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
>> +
>> +    jump->group = group;
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
>> +}
>> +
>> +static int
>> +vport_to_rte_tunnel(struct netdev *vport,
>> +                    struct rte_flow_tunnel *tunnel,
>> +                    struct netdev *netdev,
>> +                    struct ds *s_tnl)
>> +{
>> +    const struct netdev_tunnel_config *tnl_cfg;
>> +
>> +    memset(tunnel, 0, sizeof *tunnel);
>> +    if (!strcmp(netdev_get_type(vport), "vxlan")) {
>> +        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
>> +        tnl_cfg = netdev_get_tunnel_config(vport);
>> +        if (!tnl_cfg) {
>> +            return -1;
>> +        }
>> +        tunnel->tp_dst = tnl_cfg->dst_port;
>> +        if (!VLOG_DROP_DBG(&rl)) {
>> +            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>> +                          netdev_dpdk_get_port_id(netdev));
>> +        }
>> +    } else {
>> +        OVS_NOT_REACHED();
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>> +add_tnl_pop_action(struct netdev *netdev,
>> +                   struct flow_actions *actions,
>> +                   const struct nlattr *nla)
>> +{
>> +    struct rte_flow_action *tnl_actions = NULL;
>> +    uint32_t num_of_tnl_actions = 0;
>> +    struct rte_flow_tunnel tunnel;
>> +    struct rte_flow_error error;
>> +    struct netdev *vport;
>> +    odp_port_t port;
>> +    int ret;
>> +
>> +    port = nl_attr_get_odp_port(nla);
>> +    vport = netdev_ports_get(port, netdev->dpif_type);
>> +    if (vport == NULL) {
>> +        return -1;
>> +    }
>> +    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
>> +    netdev_close(vport);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
>> +                                                &num_of_tnl_actions, &error);
>> +    if (ret) {
>> +        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
>> +                    "%d (%s).", netdev_get_name(netdev), error.type,
>> +                    error.message);
>> +        return ret;
>> +    }
>> +    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
>> +    add_jump_action(actions, 0);
> It is not clear why jump action is added and with a group value of 0 ?

This is equivalent to dp_netdev_recirculate, when the group represents 
the recirc_id that is kept 0.

I will add a comment.

>
> Thanks,
> -Harsha
>> +    return 0;
>> +}
>> +
>>   static int
>>   parse_flow_actions(struct netdev *netdev,
>>                      struct flow_actions *actions,
>> @@ -1351,6 +1490,10 @@ parse_flow_actions(struct netdev *netdev,
>>                                       clone_actions_len)) {
>>                   return -1;
>>               }
>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
>> +            if (add_tnl_pop_action(netdev, actions, nla)) {
>> +                return -1;
>> +            }
>>           } else {
>>               VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
>>               return -1;
>> @@ -1383,7 +1526,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>>           goto out;
>>       }
>>       flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
>> -                                           actions.actions, &error);
>> +                                           &actions, &error);
>>   out:
>>       free_flow_actions(&actions);
>>       return flow;
>> --
>> 2.28.0.546.g385c171
>>
Sriharsha Basavapatna Feb. 25, 2021, 7:35 a.m. UTC | #3
On Wed, Feb 24, 2021 at 1:50 PM Eli Britstein <elibr@nvidia.com> wrote:
>
>
> On 2/24/2021 9:52 AM, Sriharsha Basavapatna wrote:
> > On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr@nvidia.com> wrote:
> >> Support tunnel pop action.
> >>
> >> Signed-off-by: Eli Britstein <elibr@nvidia.com>
> >> Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
> >> ---
> >>   Documentation/howto/dpdk.rst |   1 +
> >>   NEWS                         |   1 +
> >>   lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
> >>   3 files changed, 160 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> >> index f0d45e47b..4918d80f3 100644
> >> --- a/Documentation/howto/dpdk.rst
> >> +++ b/Documentation/howto/dpdk.rst
> >> @@ -398,6 +398,7 @@ Supported actions for hardware offload are:
> >>   - VLAN Push/Pop (push_vlan/pop_vlan).
> >>   - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
> >>   - Clone/output (tnl_push and output) for encapsulating over a tunnel.
> >> +- Tunnel pop, for changing from a physical port to a vport.
> >>
> >>   Further Reading
> >>   ---------------
> >> diff --git a/NEWS b/NEWS
> >> index a7bffce97..6850d5621 100644
> >> --- a/NEWS
> >> +++ b/NEWS
> >> @@ -26,6 +26,7 @@ v2.15.0 - xx xxx xxxx
> >>      - DPDK:
> >>        * Removed support for vhost-user dequeue zero-copy.
> >>        * Add support for DPDK 20.11.
> >> +     * Add hardware offload support for tunnel pop action (experimental).
> >>      - Userspace datapath:
> >>        * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
> >>          restricts a flow dump to a single PMD thread if set.
> >> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> >> index 78f866080..493cc9159 100644
> >> --- a/lib/netdev-offload-dpdk.c
> >> +++ b/lib/netdev-offload-dpdk.c
> >> @@ -140,15 +140,30 @@ struct flow_actions {
> >>       struct rte_flow_action *actions;
> >>       int cnt;
> >>       int current_max;
> >> +    struct netdev *tnl_netdev;
> >> +    /* tnl_actions is the opaque array of actions returned by the PMD. */
> >> +    struct rte_flow_action *tnl_actions;
> > Why is this an opaque array ? Since it is struct rte_flow_action, OVS
> > knows the type and members. Is it opaque because the value of
> > rte_flow_action_type member is unknown to OVS ? Is it a private action
> > type and if so how does the PMD ensure that it doesn't conflict with
> > standard action types ?
>
> True it is not used by OVS, but that's not why it opaque. Although it is
> struct rte_flow_action array, the PMD may use its own private actions,
> not defined in rte_flow.h, thus not known to the application.
>
> The details of this array is under the PMD's responsibility.

So this means the PMD has to pick a range of private action values
that are not defined in rte_flow.h,  but what if later a new action
type with the same value is added to rte_flow.h ?
The other question is if the PMD could use one of the existing action
types in rte_flow.h [i.e, to avoid defining its own private action
types] and return it in the opaque action array ?

>
> >
> >> +    uint32_t num_of_tnl_actions;
> >> +    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
> >> +     * field.
> >> +     */
> >> +    int tnl_actions_pos;
> > Names should indicate that they are private or pmd specific ?
> >
> > tnl_actions --> tnl_private_actions or tnl_pmd_actions
> > num_of_tnl_actions --> num_private_tnl_actions or num_pmd_tnl_actions
> > tnl_actions_pos --> tnl_private_actions_pos or tnl_pmd_actions_pos
> OK. _pmd_
> >
> >> +    struct ds s_tnl;
> >>   };
> >>
> >>   static void
> >> -dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
> >> +dump_flow_attr(struct ds *s, struct ds *s_extra,
> >> +               const struct rte_flow_attr *attr,
> >> +               struct flow_actions *flow_actions)
> >>   {
> >> -    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
> >> +    if (flow_actions->num_of_tnl_actions) {
> >> +        ds_clone(s_extra, &flow_actions->s_tnl);
> >> +    }
> >> +    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
> >>                     attr->ingress  ? "ingress " : "",
> >>                     attr->egress   ? "egress " : "", attr->priority, attr->group,
> >> -                  attr->transfer ? "transfer " : "");
> >> +                  attr->transfer ? "transfer " : "",
> >> +                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
> >>   }
> >>
> >>   /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
> >> @@ -395,9 +410,19 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
> >>
> >>   static void
> >>   dump_flow_action(struct ds *s, struct ds *s_extra,
> >> -                 const struct rte_flow_action *actions)
> >> +                 struct flow_actions *flow_actions, int act_index)
> >>   {
> >> -    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
> >> +    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
> >> +
> >> +    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
> >> +        ds_put_cstr(s, "end");
> >> +    } else if (flow_actions->num_of_tnl_actions &&
> >> +               act_index >= flow_actions->tnl_actions_pos &&
> >> +               act_index < flow_actions->tnl_actions_pos +
> >> +                           flow_actions->num_of_tnl_actions) {
> >> +        /* Opaque PMD tunnel actions is skipped. */
> > Wouldn't it be useful to at least print the value of PMD action types ?
> The only way we can print them as raw numbers. I see little added value
> for this on one hand, but on the other hand it will defect the testpmd
> format print, so I think better to avoid.

It might be useful for debugging ?
> >
> >> +        return;
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
> >>           const struct rte_flow_action_mark *mark = actions->conf;
> >>
> >>           ds_put_cstr(s, "mark ");
> >> @@ -528,6 +553,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
> >>           ds_put_cstr(s, "vxlan_encap / ");
> >>           dump_vxlan_encap(s_extra, items);
> >>           ds_put_cstr(s_extra, ";");
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
> >> +        const struct rte_flow_action_jump *jump = actions->conf;
> >> +
> >> +        ds_put_cstr(s, "jump ");
> >> +        if (jump) {
> >> +            ds_put_format(s, "group %"PRIu32" ", jump->group);
> >> +        }
> >> +        ds_put_cstr(s, "/ ");
> >>       } else {
> >>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>       }
> >> @@ -537,20 +570,21 @@ static struct ds *
> >>   dump_flow(struct ds *s, struct ds *s_extra,
> >>             const struct rte_flow_attr *attr,
> >>             const struct rte_flow_item *items,
> >> -          const struct rte_flow_action *actions)
> >> +          struct flow_actions *flow_actions)
> >>   {
> >> +    int i;
> >> +
> >>       if (attr) {
> >> -        dump_flow_attr(s, attr);
> >> +        dump_flow_attr(s, s_extra, attr, flow_actions);
> >>       }
> >>       ds_put_cstr(s, "pattern ");
> >>       while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
> >>           dump_flow_pattern(s, items++);
> >>       }
> >>       ds_put_cstr(s, "end actions ");
> >> -    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
> >> -        dump_flow_action(s, s_extra, actions++);
> >> +    for (i = 0; i < flow_actions->cnt; i++) {
> >> +        dump_flow_action(s, s_extra, flow_actions, i);
> >>       }
> >> -    ds_put_cstr(s, "end");
> >>       return s;
> >>   }
> >>
> >> @@ -558,9 +592,10 @@ static struct rte_flow *
> >>   netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>                                   const struct rte_flow_attr *attr,
> >>                                   const struct rte_flow_item *items,
> >> -                                const struct rte_flow_action *actions,
> >> +                                struct flow_actions *flow_actions,
> >>                                   struct rte_flow_error *error)
> >>   {
> >> +    const struct rte_flow_action *actions = flow_actions->actions;
> >>       struct ds s_extra = DS_EMPTY_INITIALIZER;
> >>       struct ds s = DS_EMPTY_INITIALIZER;
> >>       struct rte_flow *flow;
> >> @@ -569,7 +604,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>       flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
> >>       if (flow) {
> >>           if (!VLOG_DROP_DBG(&rl)) {
> >> -            dump_flow(&s, &s_extra, attr, items, actions);
> >> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
> >>               extra_str = ds_cstr(&s_extra);
> >>               VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
> >>                           netdev_get_name(netdev), (intptr_t) flow, extra_str,
> >> @@ -584,7 +619,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>           VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
> >>                   netdev_get_name(netdev), error->type, error->message);
> >>           if (!vlog_should_drop(&this_module, level, &rl)) {
> >> -            dump_flow(&s, &s_extra, attr, items, actions);
> >> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
> >>               extra_str = ds_cstr(&s_extra);
> >>               VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
> >>                       netdev_get_name(netdev), extra_str,
> >> @@ -640,6 +675,23 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
> >>       actions->cnt++;
> >>   }
> >>
> >> +static void
> >> +add_flow_tnl_actions(struct flow_actions *actions,
> >> +                     struct netdev *tnl_netdev,
> >> +                     struct rte_flow_action *tnl_actions,
> >> +                     uint32_t num_of_tnl_actions)
> >> +{
> >> +    int i;
> >> +
> >> +    actions->tnl_netdev = tnl_netdev;
> >> +    actions->tnl_actions_pos = actions->cnt;
> >> +    actions->tnl_actions = tnl_actions;
> >> +    actions->num_of_tnl_actions = num_of_tnl_actions;
> >> +    for (i = 0; i < num_of_tnl_actions; i++) {
> >> +        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
> >> +    }
> >> +}
> >> +
> >>   static void
> >>   free_flow_patterns(struct flow_patterns *patterns)
> >>   {
> >> @@ -661,9 +713,23 @@ free_flow_patterns(struct flow_patterns *patterns)
> >>   static void
> >>   free_flow_actions(struct flow_actions *actions)
> >>   {
> >> +    struct rte_flow_error error;
> >>       int i;
> >>
> >>       for (i = 0; i < actions->cnt; i++) {
> >> +        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
> >> +            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
> >> +                    (actions->tnl_netdev, actions->tnl_actions,
> >> +                     actions->num_of_tnl_actions, &error)) {
> >> +                VLOG_DBG_RL(&rl, "%s: "
> >> +                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
> >> +                            "failed: %d (%s).",
> >> +                            netdev_get_name(actions->tnl_netdev),
> >> +                            error.type, error.message);
> >> +            }
> >> +            i += actions->num_of_tnl_actions - 1;
> >> +            continue;
> >> +        }
> >>           if (actions->actions[i].conf) {
> >>               free(CONST_CAST(void *, actions->actions[i].conf));
> >>           }
> >> @@ -671,6 +737,7 @@ free_flow_actions(struct flow_actions *actions)
> >>       free(actions->actions);
> >>       actions->actions = NULL;
> >>       actions->cnt = 0;
> >> +    ds_destroy(&actions->s_tnl);
> >>   }
> >>
> >>   static int
> >> @@ -960,7 +1027,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
> >>       add_flow_mark_rss_actions(&actions, flow_mark, netdev);
> >>
> >>       flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> >> -                                           actions.actions, &error);
> >> +                                           &actions, &error);
> >>
> >>       free_flow_actions(&actions);
> >>       return flow;
> >> @@ -1307,6 +1374,78 @@ parse_clone_actions(struct netdev *netdev,
> >>       return 0;
> >>   }
> >>
> >> +static void
> >> +add_jump_action(struct flow_actions *actions, uint32_t group)
> >> +{
> >> +    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
> >> +
> >> +    jump->group = group;
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
> >> +}
> >> +
> >> +static int
> >> +vport_to_rte_tunnel(struct netdev *vport,
> >> +                    struct rte_flow_tunnel *tunnel,
> >> +                    struct netdev *netdev,
> >> +                    struct ds *s_tnl)
> >> +{
> >> +    const struct netdev_tunnel_config *tnl_cfg;
> >> +
> >> +    memset(tunnel, 0, sizeof *tunnel);
> >> +    if (!strcmp(netdev_get_type(vport), "vxlan")) {
> >> +        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
> >> +        tnl_cfg = netdev_get_tunnel_config(vport);
> >> +        if (!tnl_cfg) {
> >> +            return -1;
> >> +        }
> >> +        tunnel->tp_dst = tnl_cfg->dst_port;
> >> +        if (!VLOG_DROP_DBG(&rl)) {
> >> +            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
> >> +                          netdev_dpdk_get_port_id(netdev));
> >> +        }
> >> +    } else {
> >> +        OVS_NOT_REACHED();
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int
> >> +add_tnl_pop_action(struct netdev *netdev,
> >> +                   struct flow_actions *actions,
> >> +                   const struct nlattr *nla)
> >> +{
> >> +    struct rte_flow_action *tnl_actions = NULL;
> >> +    uint32_t num_of_tnl_actions = 0;
> >> +    struct rte_flow_tunnel tunnel;
> >> +    struct rte_flow_error error;
> >> +    struct netdev *vport;
> >> +    odp_port_t port;
> >> +    int ret;
> >> +
> >> +    port = nl_attr_get_odp_port(nla);
> >> +    vport = netdev_ports_get(port, netdev->dpif_type);
> >> +    if (vport == NULL) {
> >> +        return -1;
> >> +    }
> >> +    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
> >> +    netdev_close(vport);
> >> +    if (ret) {
> >> +        return ret;
> >> +    }
> >> +    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
> >> +                                                &num_of_tnl_actions, &error);
> >> +    if (ret) {
> >> +        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
> >> +                    "%d (%s).", netdev_get_name(netdev), error.type,
> >> +                    error.message);
> >> +        return ret;
> >> +    }
> >> +    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
> >> +    add_jump_action(actions, 0);
> > It is not clear why jump action is added and with a group value of 0 ?
>
> This is equivalent to dp_netdev_recirculate, when the group represents
> the recirc_id that is kept 0.
>
> I will add a comment.

But this is flow-F1 and there's no recirculation at this point. How
should the PMD interpret this ?


>
> >
> > Thanks,
> > -Harsha
> >> +    return 0;
> >> +}
> >> +
> >>   static int
> >>   parse_flow_actions(struct netdev *netdev,
> >>                      struct flow_actions *actions,
> >> @@ -1351,6 +1490,10 @@ parse_flow_actions(struct netdev *netdev,
> >>                                       clone_actions_len)) {
> >>                   return -1;
> >>               }
> >> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> >> +            if (add_tnl_pop_action(netdev, actions, nla)) {
> >> +                return -1;
> >> +            }
> >>           } else {
> >>               VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
> >>               return -1;
> >> @@ -1383,7 +1526,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
> >>           goto out;
> >>       }
> >>       flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> >> -                                           actions.actions, &error);
> >> +                                           &actions, &error);
> >>   out:
> >>       free_flow_actions(&actions);
> >>       return flow;
> >> --
> >> 2.28.0.546.g385c171
> >>
Eli Britstein Feb. 25, 2021, 2:20 p.m. UTC | #4
On 2/25/2021 9:35 AM, Sriharsha Basavapatna wrote:
> On Wed, Feb 24, 2021 at 1:50 PM Eli Britstein <elibr@nvidia.com> wrote:
>>
>> On 2/24/2021 9:52 AM, Sriharsha Basavapatna wrote:
>>> On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr@nvidia.com> wrote:
>>>> Support tunnel pop action.
>>>>
>>>> Signed-off-by: Eli Britstein <elibr@nvidia.com>
>>>> Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
>>>> ---
>>>>    Documentation/howto/dpdk.rst |   1 +
>>>>    NEWS                         |   1 +
>>>>    lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
>>>>    3 files changed, 160 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
>>>> index f0d45e47b..4918d80f3 100644
>>>> --- a/Documentation/howto/dpdk.rst
>>>> +++ b/Documentation/howto/dpdk.rst
>>>> @@ -398,6 +398,7 @@ Supported actions for hardware offload are:
>>>>    - VLAN Push/Pop (push_vlan/pop_vlan).
>>>>    - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
>>>>    - Clone/output (tnl_push and output) for encapsulating over a tunnel.
>>>> +- Tunnel pop, for changing from a physical port to a vport.
>>>>
>>>>    Further Reading
>>>>    ---------------
>>>> diff --git a/NEWS b/NEWS
>>>> index a7bffce97..6850d5621 100644
>>>> --- a/NEWS
>>>> +++ b/NEWS
>>>> @@ -26,6 +26,7 @@ v2.15.0 - xx xxx xxxx
>>>>       - DPDK:
>>>>         * Removed support for vhost-user dequeue zero-copy.
>>>>         * Add support for DPDK 20.11.
>>>> +     * Add hardware offload support for tunnel pop action (experimental).
>>>>       - Userspace datapath:
>>>>         * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
>>>>           restricts a flow dump to a single PMD thread if set.
>>>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>>>> index 78f866080..493cc9159 100644
>>>> --- a/lib/netdev-offload-dpdk.c
>>>> +++ b/lib/netdev-offload-dpdk.c
>>>> @@ -140,15 +140,30 @@ struct flow_actions {
>>>>        struct rte_flow_action *actions;
>>>>        int cnt;
>>>>        int current_max;
>>>> +    struct netdev *tnl_netdev;
>>>> +    /* tnl_actions is the opaque array of actions returned by the PMD. */
>>>> +    struct rte_flow_action *tnl_actions;
>>> Why is this an opaque array ? Since it is struct rte_flow_action, OVS
>>> knows the type and members. Is it opaque because the value of
>>> rte_flow_action_type member is unknown to OVS ? Is it a private action
>>> type and if so how does the PMD ensure that it doesn't conflict with
>>> standard action types ?
>> True it is not used by OVS, but that's not why it opaque. Although it is
>> struct rte_flow_action array, the PMD may use its own private actions,
>> not defined in rte_flow.h, thus not known to the application.
>>
>> The details of this array is under the PMD's responsibility.
> So this means the PMD has to pick a range of private action values
> that are not defined in rte_flow.h,  but what if later a new action
> type with the same value is added to rte_flow.h ?
> The other question is if the PMD could use one of the existing action
> types in rte_flow.h [i.e, to avoid defining its own private action
> types] and return it in the opaque action array ?

The goal of the API is to be able for each PMD to have its own 
implementation, private actions or not.

For the application, this is opaque, as it doesn't know the details of it.

If there is a change in rte_flow.h, it means ABI change in DPDK. DPDK 
release policy protects us in a sense.

>
>>>> +    uint32_t num_of_tnl_actions;
>>>> +    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
>>>> +     * field.
>>>> +     */
>>>> +    int tnl_actions_pos;
>>> Names should indicate that they are private or pmd specific ?
>>>
>>> tnl_actions --> tnl_private_actions or tnl_pmd_actions
>>> num_of_tnl_actions --> num_private_tnl_actions or num_pmd_tnl_actions
>>> tnl_actions_pos --> tnl_private_actions_pos or tnl_pmd_actions_pos
>> OK. _pmd_
>>>> +    struct ds s_tnl;
>>>>    };
>>>>
>>>>    static void
>>>> -dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
>>>> +dump_flow_attr(struct ds *s, struct ds *s_extra,
>>>> +               const struct rte_flow_attr *attr,
>>>> +               struct flow_actions *flow_actions)
>>>>    {
>>>> -    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
>>>> +    if (flow_actions->num_of_tnl_actions) {
>>>> +        ds_clone(s_extra, &flow_actions->s_tnl);
>>>> +    }
>>>> +    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
>>>>                      attr->ingress  ? "ingress " : "",
>>>>                      attr->egress   ? "egress " : "", attr->priority, attr->group,
>>>> -                  attr->transfer ? "transfer " : "");
>>>> +                  attr->transfer ? "transfer " : "",
>>>> +                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
>>>>    }
>>>>
>>>>    /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
>>>> @@ -395,9 +410,19 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
>>>>
>>>>    static void
>>>>    dump_flow_action(struct ds *s, struct ds *s_extra,
>>>> -                 const struct rte_flow_action *actions)
>>>> +                 struct flow_actions *flow_actions, int act_index)
>>>>    {
>>>> -    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
>>>> +    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
>>>> +
>>>> +    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
>>>> +        ds_put_cstr(s, "end");
>>>> +    } else if (flow_actions->num_of_tnl_actions &&
>>>> +               act_index >= flow_actions->tnl_actions_pos &&
>>>> +               act_index < flow_actions->tnl_actions_pos +
>>>> +                           flow_actions->num_of_tnl_actions) {
>>>> +        /* Opaque PMD tunnel actions is skipped. */
>>> Wouldn't it be useful to at least print the value of PMD action types ?
>> The only way we can print them as raw numbers. I see little added value
>> for this on one hand, but on the other hand it will defect the testpmd
>> format print, so I think better to avoid.
> It might be useful for debugging ?
I think maybe for DPDK debug, can be added in testpmd, but not useful 
for OVS.
>>>> +        return;
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
>>>>            const struct rte_flow_action_mark *mark = actions->conf;
>>>>
>>>>            ds_put_cstr(s, "mark ");
>>>> @@ -528,6 +553,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
>>>>            ds_put_cstr(s, "vxlan_encap / ");
>>>>            dump_vxlan_encap(s_extra, items);
>>>>            ds_put_cstr(s_extra, ";");
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
>>>> +        const struct rte_flow_action_jump *jump = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "jump ");
>>>> +        if (jump) {
>>>> +            ds_put_format(s, "group %"PRIu32" ", jump->group);
>>>> +        }
>>>> +        ds_put_cstr(s, "/ ");
>>>>        } else {
>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>>>        }
>>>> @@ -537,20 +570,21 @@ static struct ds *
>>>>    dump_flow(struct ds *s, struct ds *s_extra,
>>>>              const struct rte_flow_attr *attr,
>>>>              const struct rte_flow_item *items,
>>>> -          const struct rte_flow_action *actions)
>>>> +          struct flow_actions *flow_actions)
>>>>    {
>>>> +    int i;
>>>> +
>>>>        if (attr) {
>>>> -        dump_flow_attr(s, attr);
>>>> +        dump_flow_attr(s, s_extra, attr, flow_actions);
>>>>        }
>>>>        ds_put_cstr(s, "pattern ");
>>>>        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
>>>>            dump_flow_pattern(s, items++);
>>>>        }
>>>>        ds_put_cstr(s, "end actions ");
>>>> -    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
>>>> -        dump_flow_action(s, s_extra, actions++);
>>>> +    for (i = 0; i < flow_actions->cnt; i++) {
>>>> +        dump_flow_action(s, s_extra, flow_actions, i);
>>>>        }
>>>> -    ds_put_cstr(s, "end");
>>>>        return s;
>>>>    }
>>>>
>>>> @@ -558,9 +592,10 @@ static struct rte_flow *
>>>>    netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>                                    const struct rte_flow_attr *attr,
>>>>                                    const struct rte_flow_item *items,
>>>> -                                const struct rte_flow_action *actions,
>>>> +                                struct flow_actions *flow_actions,
>>>>                                    struct rte_flow_error *error)
>>>>    {
>>>> +    const struct rte_flow_action *actions = flow_actions->actions;
>>>>        struct ds s_extra = DS_EMPTY_INITIALIZER;
>>>>        struct ds s = DS_EMPTY_INITIALIZER;
>>>>        struct rte_flow *flow;
>>>> @@ -569,7 +604,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>        flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
>>>>        if (flow) {
>>>>            if (!VLOG_DROP_DBG(&rl)) {
>>>> -            dump_flow(&s, &s_extra, attr, items, actions);
>>>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>>>>                extra_str = ds_cstr(&s_extra);
>>>>                VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
>>>>                            netdev_get_name(netdev), (intptr_t) flow, extra_str,
>>>> @@ -584,7 +619,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
>>>>            VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
>>>>                    netdev_get_name(netdev), error->type, error->message);
>>>>            if (!vlog_should_drop(&this_module, level, &rl)) {
>>>> -            dump_flow(&s, &s_extra, attr, items, actions);
>>>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
>>>>                extra_str = ds_cstr(&s_extra);
>>>>                VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
>>>>                        netdev_get_name(netdev), extra_str,
>>>> @@ -640,6 +675,23 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
>>>>        actions->cnt++;
>>>>    }
>>>>
>>>> +static void
>>>> +add_flow_tnl_actions(struct flow_actions *actions,
>>>> +                     struct netdev *tnl_netdev,
>>>> +                     struct rte_flow_action *tnl_actions,
>>>> +                     uint32_t num_of_tnl_actions)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    actions->tnl_netdev = tnl_netdev;
>>>> +    actions->tnl_actions_pos = actions->cnt;
>>>> +    actions->tnl_actions = tnl_actions;
>>>> +    actions->num_of_tnl_actions = num_of_tnl_actions;
>>>> +    for (i = 0; i < num_of_tnl_actions; i++) {
>>>> +        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
>>>> +    }
>>>> +}
>>>> +
>>>>    static void
>>>>    free_flow_patterns(struct flow_patterns *patterns)
>>>>    {
>>>> @@ -661,9 +713,23 @@ free_flow_patterns(struct flow_patterns *patterns)
>>>>    static void
>>>>    free_flow_actions(struct flow_actions *actions)
>>>>    {
>>>> +    struct rte_flow_error error;
>>>>        int i;
>>>>
>>>>        for (i = 0; i < actions->cnt; i++) {
>>>> +        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
>>>> +            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
>>>> +                    (actions->tnl_netdev, actions->tnl_actions,
>>>> +                     actions->num_of_tnl_actions, &error)) {
>>>> +                VLOG_DBG_RL(&rl, "%s: "
>>>> +                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
>>>> +                            "failed: %d (%s).",
>>>> +                            netdev_get_name(actions->tnl_netdev),
>>>> +                            error.type, error.message);
>>>> +            }
>>>> +            i += actions->num_of_tnl_actions - 1;
>>>> +            continue;
>>>> +        }
>>>>            if (actions->actions[i].conf) {
>>>>                free(CONST_CAST(void *, actions->actions[i].conf));
>>>>            }
>>>> @@ -671,6 +737,7 @@ free_flow_actions(struct flow_actions *actions)
>>>>        free(actions->actions);
>>>>        actions->actions = NULL;
>>>>        actions->cnt = 0;
>>>> +    ds_destroy(&actions->s_tnl);
>>>>    }
>>>>
>>>>    static int
>>>> @@ -960,7 +1027,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
>>>>        add_flow_mark_rss_actions(&actions, flow_mark, netdev);
>>>>
>>>>        flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
>>>> -                                           actions.actions, &error);
>>>> +                                           &actions, &error);
>>>>
>>>>        free_flow_actions(&actions);
>>>>        return flow;
>>>> @@ -1307,6 +1374,78 @@ parse_clone_actions(struct netdev *netdev,
>>>>        return 0;
>>>>    }
>>>>
>>>> +static void
>>>> +add_jump_action(struct flow_actions *actions, uint32_t group)
>>>> +{
>>>> +    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
>>>> +
>>>> +    jump->group = group;
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
>>>> +}
>>>> +
>>>> +static int
>>>> +vport_to_rte_tunnel(struct netdev *vport,
>>>> +                    struct rte_flow_tunnel *tunnel,
>>>> +                    struct netdev *netdev,
>>>> +                    struct ds *s_tnl)
>>>> +{
>>>> +    const struct netdev_tunnel_config *tnl_cfg;
>>>> +
>>>> +    memset(tunnel, 0, sizeof *tunnel);
>>>> +    if (!strcmp(netdev_get_type(vport), "vxlan")) {
>>>> +        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
>>>> +        tnl_cfg = netdev_get_tunnel_config(vport);
>>>> +        if (!tnl_cfg) {
>>>> +            return -1;
>>>> +        }
>>>> +        tunnel->tp_dst = tnl_cfg->dst_port;
>>>> +        if (!VLOG_DROP_DBG(&rl)) {
>>>> +            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
>>>> +                          netdev_dpdk_get_port_id(netdev));
>>>> +        }
>>>> +    } else {
>>>> +        OVS_NOT_REACHED();
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +add_tnl_pop_action(struct netdev *netdev,
>>>> +                   struct flow_actions *actions,
>>>> +                   const struct nlattr *nla)
>>>> +{
>>>> +    struct rte_flow_action *tnl_actions = NULL;
>>>> +    uint32_t num_of_tnl_actions = 0;
>>>> +    struct rte_flow_tunnel tunnel;
>>>> +    struct rte_flow_error error;
>>>> +    struct netdev *vport;
>>>> +    odp_port_t port;
>>>> +    int ret;
>>>> +
>>>> +    port = nl_attr_get_odp_port(nla);
>>>> +    vport = netdev_ports_get(port, netdev->dpif_type);
>>>> +    if (vport == NULL) {
>>>> +        return -1;
>>>> +    }
>>>> +    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
>>>> +    netdev_close(vport);
>>>> +    if (ret) {
>>>> +        return ret;
>>>> +    }
>>>> +    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
>>>> +                                                &num_of_tnl_actions, &error);
>>>> +    if (ret) {
>>>> +        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
>>>> +                    "%d (%s).", netdev_get_name(netdev), error.type,
>>>> +                    error.message);
>>>> +        return ret;
>>>> +    }
>>>> +    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
>>>> +    add_jump_action(actions, 0);
>>> It is not clear why jump action is added and with a group value of 0 ?
>> This is equivalent to dp_netdev_recirculate, when the group represents
>> the recirc_id that is kept 0.
>>
>> I will add a comment.
> But this is flow-F1 and there's no recirculation at this point. How
> should the PMD interpret this ?
The PMD interpretation is for the PMD to decide. For example:

- Proceed in another table.

- Keep it in some database to be ready for "F2"-like flow, so to merge 
them and apply as a single flow.

- Any other way. It is up to the PMD.

>
>
>>> Thanks,
>>> -Harsha
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    static int
>>>>    parse_flow_actions(struct netdev *netdev,
>>>>                       struct flow_actions *actions,
>>>> @@ -1351,6 +1490,10 @@ parse_flow_actions(struct netdev *netdev,
>>>>                                        clone_actions_len)) {
>>>>                    return -1;
>>>>                }
>>>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
>>>> +            if (add_tnl_pop_action(netdev, actions, nla)) {
>>>> +                return -1;
>>>> +            }
>>>>            } else {
>>>>                VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
>>>>                return -1;
>>>> @@ -1383,7 +1526,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
>>>>            goto out;
>>>>        }
>>>>        flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
>>>> -                                           actions.actions, &error);
>>>> +                                           &actions, &error);
>>>>    out:
>>>>        free_flow_actions(&actions);
>>>>        return flow;
>>>> --
>>>> 2.28.0.546.g385c171
>>>>
Sriharsha Basavapatna March 1, 2021, 11:30 a.m. UTC | #5
On Thu, Feb 25, 2021 at 7:50 PM Eli Britstein <elibr@nvidia.com> wrote:
>
>
> On 2/25/2021 9:35 AM, Sriharsha Basavapatna wrote:
> > On Wed, Feb 24, 2021 at 1:50 PM Eli Britstein <elibr@nvidia.com> wrote:
> >>
> >> On 2/24/2021 9:52 AM, Sriharsha Basavapatna wrote:
> >>> On Wed, Feb 10, 2021 at 8:57 PM Eli Britstein <elibr@nvidia.com> wrote:
> >>>> Support tunnel pop action.
> >>>>
> >>>> Signed-off-by: Eli Britstein <elibr@nvidia.com>
> >>>> Reviewed-by: Gaetan Rivet <gaetanr@nvidia.com>
> >>>> ---
> >>>>    Documentation/howto/dpdk.rst |   1 +
> >>>>    NEWS                         |   1 +
> >>>>    lib/netdev-offload-dpdk.c    | 173 ++++++++++++++++++++++++++++++++---
> >>>>    3 files changed, 160 insertions(+), 15 deletions(-)
> >>>>
> >>>> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> >>>> index f0d45e47b..4918d80f3 100644
> >>>> --- a/Documentation/howto/dpdk.rst
> >>>> +++ b/Documentation/howto/dpdk.rst
> >>>> @@ -398,6 +398,7 @@ Supported actions for hardware offload are:
> >>>>    - VLAN Push/Pop (push_vlan/pop_vlan).
> >>>>    - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
> >>>>    - Clone/output (tnl_push and output) for encapsulating over a tunnel.
> >>>> +- Tunnel pop, for changing from a physical port to a vport.
> >>>>
> >>>>    Further Reading
> >>>>    ---------------
> >>>> diff --git a/NEWS b/NEWS
> >>>> index a7bffce97..6850d5621 100644
> >>>> --- a/NEWS
> >>>> +++ b/NEWS
> >>>> @@ -26,6 +26,7 @@ v2.15.0 - xx xxx xxxx
> >>>>       - DPDK:
> >>>>         * Removed support for vhost-user dequeue zero-copy.
> >>>>         * Add support for DPDK 20.11.
> >>>> +     * Add hardware offload support for tunnel pop action (experimental).
> >>>>       - Userspace datapath:
> >>>>         * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
> >>>>           restricts a flow dump to a single PMD thread if set.
> >>>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> >>>> index 78f866080..493cc9159 100644
> >>>> --- a/lib/netdev-offload-dpdk.c
> >>>> +++ b/lib/netdev-offload-dpdk.c
> >>>> @@ -140,15 +140,30 @@ struct flow_actions {
> >>>>        struct rte_flow_action *actions;
> >>>>        int cnt;
> >>>>        int current_max;
> >>>> +    struct netdev *tnl_netdev;
> >>>> +    /* tnl_actions is the opaque array of actions returned by the PMD. */
> >>>> +    struct rte_flow_action *tnl_actions;
> >>> Why is this an opaque array ? Since it is struct rte_flow_action, OVS
> >>> knows the type and members. Is it opaque because the value of
> >>> rte_flow_action_type member is unknown to OVS ? Is it a private action
> >>> type and if so how does the PMD ensure that it doesn't conflict with
> >>> standard action types ?
> >> True it is not used by OVS, but that's not why it opaque. Although it is
> >> struct rte_flow_action array, the PMD may use its own private actions,
> >> not defined in rte_flow.h, thus not known to the application.
> >>
> >> The details of this array is under the PMD's responsibility.
> > So this means the PMD has to pick a range of private action values
> > that are not defined in rte_flow.h,  but what if later a new action
> > type with the same value is added to rte_flow.h ?
> > The other question is if the PMD could use one of the existing action
> > types in rte_flow.h [i.e, to avoid defining its own private action
> > types] and return it in the opaque action array ?
>
> The goal of the API is to be able for each PMD to have its own
> implementation, private actions or not.
>
> For the application, this is opaque, as it doesn't know the details of it.
>
> If there is a change in rte_flow.h, it means ABI change in DPDK. DPDK
> release policy protects us in a sense.

Take this example: assume action type 100 is not yet defined in
rte_flow.h and a PMD uses this value for a new private action that it
defines. Later, if a new standard action type is added to rte_flow.h
with the same value, then the PMD has no way to distinguish if the
action is a standard action or its private action. Also, this private
action type defined by some vendor's PMD could be 100 and it could be
200 in another vendor's PMD. So don't we need to ensure that the
standard action types and private action types don't overlap ? One way
to handle this might be to reserve a range of values in rte_flow.h as
a vendor specific range, for example 100 to 200. And each PMD could
define its own private action types within this range, since it is
ensured that no standard action types would be defined in that range.

>
> >
> >>>> +    uint32_t num_of_tnl_actions;
> >>>> +    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
> >>>> +     * field.
> >>>> +     */
> >>>> +    int tnl_actions_pos;
> >>> Names should indicate that they are private or pmd specific ?
> >>>
> >>> tnl_actions --> tnl_private_actions or tnl_pmd_actions
> >>> num_of_tnl_actions --> num_private_tnl_actions or num_pmd_tnl_actions
> >>> tnl_actions_pos --> tnl_private_actions_pos or tnl_pmd_actions_pos
> >> OK. _pmd_
> >>>> +    struct ds s_tnl;
> >>>>    };
> >>>>
> >>>>    static void
> >>>> -dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
> >>>> +dump_flow_attr(struct ds *s, struct ds *s_extra,
> >>>> +               const struct rte_flow_attr *attr,
> >>>> +               struct flow_actions *flow_actions)
> >>>>    {
> >>>> -    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
> >>>> +    if (flow_actions->num_of_tnl_actions) {
> >>>> +        ds_clone(s_extra, &flow_actions->s_tnl);
> >>>> +    }
> >>>> +    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
> >>>>                      attr->ingress  ? "ingress " : "",
> >>>>                      attr->egress   ? "egress " : "", attr->priority, attr->group,
> >>>> -                  attr->transfer ? "transfer " : "");
> >>>> +                  attr->transfer ? "transfer " : "",
> >>>> +                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
> >>>>    }
> >>>>
> >>>>    /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
> >>>> @@ -395,9 +410,19 @@ dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
> >>>>
> >>>>    static void
> >>>>    dump_flow_action(struct ds *s, struct ds *s_extra,
> >>>> -                 const struct rte_flow_action *actions)
> >>>> +                 struct flow_actions *flow_actions, int act_index)
> >>>>    {
> >>>> -    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
> >>>> +    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
> >>>> +
> >>>> +    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
> >>>> +        ds_put_cstr(s, "end");
> >>>> +    } else if (flow_actions->num_of_tnl_actions &&
> >>>> +               act_index >= flow_actions->tnl_actions_pos &&
> >>>> +               act_index < flow_actions->tnl_actions_pos +
> >>>> +                           flow_actions->num_of_tnl_actions) {
> >>>> +        /* Opaque PMD tunnel actions is skipped. */
> >>> Wouldn't it be useful to at least print the value of PMD action types ?
> >> The only way we can print them as raw numbers. I see little added value
> >> for this on one hand, but on the other hand it will defect the testpmd
> >> format print, so I think better to avoid.
> > It might be useful for debugging ?
> I think maybe for DPDK debug, can be added in testpmd, but not useful
> for OVS.
> >>>> +        return;
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
> >>>>            const struct rte_flow_action_mark *mark = actions->conf;
> >>>>
> >>>>            ds_put_cstr(s, "mark ");
> >>>> @@ -528,6 +553,14 @@ dump_flow_action(struct ds *s, struct ds *s_extra,
> >>>>            ds_put_cstr(s, "vxlan_encap / ");
> >>>>            dump_vxlan_encap(s_extra, items);
> >>>>            ds_put_cstr(s_extra, ";");
> >>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
> >>>> +        const struct rte_flow_action_jump *jump = actions->conf;
> >>>> +
> >>>> +        ds_put_cstr(s, "jump ");
> >>>> +        if (jump) {
> >>>> +            ds_put_format(s, "group %"PRIu32" ", jump->group);
> >>>> +        }
> >>>> +        ds_put_cstr(s, "/ ");
> >>>>        } else {
> >>>>            ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>>>        }
> >>>> @@ -537,20 +570,21 @@ static struct ds *
> >>>>    dump_flow(struct ds *s, struct ds *s_extra,
> >>>>              const struct rte_flow_attr *attr,
> >>>>              const struct rte_flow_item *items,
> >>>> -          const struct rte_flow_action *actions)
> >>>> +          struct flow_actions *flow_actions)
> >>>>    {
> >>>> +    int i;
> >>>> +
> >>>>        if (attr) {
> >>>> -        dump_flow_attr(s, attr);
> >>>> +        dump_flow_attr(s, s_extra, attr, flow_actions);
> >>>>        }
> >>>>        ds_put_cstr(s, "pattern ");
> >>>>        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
> >>>>            dump_flow_pattern(s, items++);
> >>>>        }
> >>>>        ds_put_cstr(s, "end actions ");
> >>>> -    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
> >>>> -        dump_flow_action(s, s_extra, actions++);
> >>>> +    for (i = 0; i < flow_actions->cnt; i++) {
> >>>> +        dump_flow_action(s, s_extra, flow_actions, i);
> >>>>        }
> >>>> -    ds_put_cstr(s, "end");
> >>>>        return s;
> >>>>    }
> >>>>
> >>>> @@ -558,9 +592,10 @@ static struct rte_flow *
> >>>>    netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>>>                                    const struct rte_flow_attr *attr,
> >>>>                                    const struct rte_flow_item *items,
> >>>> -                                const struct rte_flow_action *actions,
> >>>> +                                struct flow_actions *flow_actions,
> >>>>                                    struct rte_flow_error *error)
> >>>>    {
> >>>> +    const struct rte_flow_action *actions = flow_actions->actions;
> >>>>        struct ds s_extra = DS_EMPTY_INITIALIZER;
> >>>>        struct ds s = DS_EMPTY_INITIALIZER;
> >>>>        struct rte_flow *flow;
> >>>> @@ -569,7 +604,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>>>        flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
> >>>>        if (flow) {
> >>>>            if (!VLOG_DROP_DBG(&rl)) {
> >>>> -            dump_flow(&s, &s_extra, attr, items, actions);
> >>>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
> >>>>                extra_str = ds_cstr(&s_extra);
> >>>>                VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
> >>>>                            netdev_get_name(netdev), (intptr_t) flow, extra_str,
> >>>> @@ -584,7 +619,7 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
> >>>>            VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
> >>>>                    netdev_get_name(netdev), error->type, error->message);
> >>>>            if (!vlog_should_drop(&this_module, level, &rl)) {
> >>>> -            dump_flow(&s, &s_extra, attr, items, actions);
> >>>> +            dump_flow(&s, &s_extra, attr, items, flow_actions);
> >>>>                extra_str = ds_cstr(&s_extra);
> >>>>                VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
> >>>>                        netdev_get_name(netdev), extra_str,
> >>>> @@ -640,6 +675,23 @@ add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
> >>>>        actions->cnt++;
> >>>>    }
> >>>>
> >>>> +static void
> >>>> +add_flow_tnl_actions(struct flow_actions *actions,
> >>>> +                     struct netdev *tnl_netdev,
> >>>> +                     struct rte_flow_action *tnl_actions,
> >>>> +                     uint32_t num_of_tnl_actions)
> >>>> +{
> >>>> +    int i;
> >>>> +
> >>>> +    actions->tnl_netdev = tnl_netdev;
> >>>> +    actions->tnl_actions_pos = actions->cnt;
> >>>> +    actions->tnl_actions = tnl_actions;
> >>>> +    actions->num_of_tnl_actions = num_of_tnl_actions;
> >>>> +    for (i = 0; i < num_of_tnl_actions; i++) {
> >>>> +        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
> >>>> +    }
> >>>> +}
> >>>> +
> >>>>    static void
> >>>>    free_flow_patterns(struct flow_patterns *patterns)
> >>>>    {
> >>>> @@ -661,9 +713,23 @@ free_flow_patterns(struct flow_patterns *patterns)
> >>>>    static void
> >>>>    free_flow_actions(struct flow_actions *actions)
> >>>>    {
> >>>> +    struct rte_flow_error error;
> >>>>        int i;
> >>>>
> >>>>        for (i = 0; i < actions->cnt; i++) {
> >>>> +        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
> >>>> +            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
> >>>> +                    (actions->tnl_netdev, actions->tnl_actions,
> >>>> +                     actions->num_of_tnl_actions, &error)) {
> >>>> +                VLOG_DBG_RL(&rl, "%s: "
> >>>> +                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
> >>>> +                            "failed: %d (%s).",
> >>>> +                            netdev_get_name(actions->tnl_netdev),
> >>>> +                            error.type, error.message);
> >>>> +            }
> >>>> +            i += actions->num_of_tnl_actions - 1;
> >>>> +            continue;
> >>>> +        }
> >>>>            if (actions->actions[i].conf) {
> >>>>                free(CONST_CAST(void *, actions->actions[i].conf));
> >>>>            }
> >>>> @@ -671,6 +737,7 @@ free_flow_actions(struct flow_actions *actions)
> >>>>        free(actions->actions);
> >>>>        actions->actions = NULL;
> >>>>        actions->cnt = 0;
> >>>> +    ds_destroy(&actions->s_tnl);
> >>>>    }
> >>>>
> >>>>    static int
> >>>> @@ -960,7 +1027,7 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
> >>>>        add_flow_mark_rss_actions(&actions, flow_mark, netdev);
> >>>>
> >>>>        flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> >>>> -                                           actions.actions, &error);
> >>>> +                                           &actions, &error);
> >>>>
> >>>>        free_flow_actions(&actions);
> >>>>        return flow;
> >>>> @@ -1307,6 +1374,78 @@ parse_clone_actions(struct netdev *netdev,
> >>>>        return 0;
> >>>>    }
> >>>>
> >>>> +static void
> >>>> +add_jump_action(struct flow_actions *actions, uint32_t group)
> >>>> +{
> >>>> +    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
> >>>> +
> >>>> +    jump->group = group;
> >>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
> >>>> +}
> >>>> +
> >>>> +static int
> >>>> +vport_to_rte_tunnel(struct netdev *vport,
> >>>> +                    struct rte_flow_tunnel *tunnel,
> >>>> +                    struct netdev *netdev,
> >>>> +                    struct ds *s_tnl)
> >>>> +{
> >>>> +    const struct netdev_tunnel_config *tnl_cfg;
> >>>> +
> >>>> +    memset(tunnel, 0, sizeof *tunnel);
> >>>> +    if (!strcmp(netdev_get_type(vport), "vxlan")) {
> >>>> +        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
> >>>> +        tnl_cfg = netdev_get_tunnel_config(vport);
> >>>> +        if (!tnl_cfg) {
> >>>> +            return -1;
> >>>> +        }
> >>>> +        tunnel->tp_dst = tnl_cfg->dst_port;
> >>>> +        if (!VLOG_DROP_DBG(&rl)) {
> >>>> +            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
> >>>> +                          netdev_dpdk_get_port_id(netdev));
> >>>> +        }
> >>>> +    } else {
> >>>> +        OVS_NOT_REACHED();
> >>>> +    }
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>> +static int
> >>>> +add_tnl_pop_action(struct netdev *netdev,
> >>>> +                   struct flow_actions *actions,
> >>>> +                   const struct nlattr *nla)
> >>>> +{
> >>>> +    struct rte_flow_action *tnl_actions = NULL;
> >>>> +    uint32_t num_of_tnl_actions = 0;
> >>>> +    struct rte_flow_tunnel tunnel;
> >>>> +    struct rte_flow_error error;
> >>>> +    struct netdev *vport;
> >>>> +    odp_port_t port;
> >>>> +    int ret;
> >>>> +
> >>>> +    port = nl_attr_get_odp_port(nla);
> >>>> +    vport = netdev_ports_get(port, netdev->dpif_type);
> >>>> +    if (vport == NULL) {
> >>>> +        return -1;
> >>>> +    }
> >>>> +    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
> >>>> +    netdev_close(vport);
> >>>> +    if (ret) {
> >>>> +        return ret;
> >>>> +    }
> >>>> +    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
> >>>> +                                                &num_of_tnl_actions, &error);
> >>>> +    if (ret) {
> >>>> +        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
> >>>> +                    "%d (%s).", netdev_get_name(netdev), error.type,
> >>>> +                    error.message);
> >>>> +        return ret;
> >>>> +    }
> >>>> +    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
> >>>> +    add_jump_action(actions, 0);
> >>> It is not clear why jump action is added and with a group value of 0 ?
> >> This is equivalent to dp_netdev_recirculate, when the group represents
> >> the recirc_id that is kept 0.
> >>
> >> I will add a comment.
> > But this is flow-F1 and there's no recirculation at this point. How
> > should the PMD interpret this ?
> The PMD interpretation is for the PMD to decide. For example:
>
> - Proceed in another table.
>
> - Keep it in some database to be ready for "F2"-like flow, so to merge
> them and apply as a single flow.
>
> - Any other way. It is up to the PMD.
>
> >
> >
> >>> Thanks,
> >>> -Harsha
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>>    static int
> >>>>    parse_flow_actions(struct netdev *netdev,
> >>>>                       struct flow_actions *actions,
> >>>> @@ -1351,6 +1490,10 @@ parse_flow_actions(struct netdev *netdev,
> >>>>                                        clone_actions_len)) {
> >>>>                    return -1;
> >>>>                }
> >>>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
> >>>> +            if (add_tnl_pop_action(netdev, actions, nla)) {
> >>>> +                return -1;
> >>>> +            }
> >>>>            } else {
> >>>>                VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
> >>>>                return -1;
> >>>> @@ -1383,7 +1526,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
> >>>>            goto out;
> >>>>        }
> >>>>        flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
> >>>> -                                           actions.actions, &error);
> >>>> +                                           &actions, &error);
> >>>>    out:
> >>>>        free_flow_actions(&actions);
> >>>>        return flow;
> >>>> --
> >>>> 2.28.0.546.g385c171
> >>>>
Eli Britstein March 1, 2021, 11:50 a.m. UTC | #6
On 3/1/2021 1:30 PM, Sriharsha Basavapatna wrote:
>>>> The details of this array is under the PMD's responsibility.
>>> So this means the PMD has to pick a range of private action values
>>> that are not defined in rte_flow.h,  but what if later a new action
>>> type with the same value is added to rte_flow.h ?
>>> The other question is if the PMD could use one of the existing action
>>> types in rte_flow.h [i.e, to avoid defining its own private action
>>> types] and return it in the opaque action array ?
>> The goal of the API is to be able for each PMD to have its own
>> implementation, private actions or not.
>>
>> For the application, this is opaque, as it doesn't know the details of it.
>>
>> If there is a change in rte_flow.h, it means ABI change in DPDK. DPDK
>> release policy protects us in a sense.
> Take this example: assume action type 100 is not yet defined in
> rte_flow.h and a PMD uses this value for a new private action that it
> defines. Later, if a new standard action type is added to rte_flow.h
> with the same value, then the PMD has no way to distinguish if the
> action is a standard action or its private action. Also, this private
> action type defined by some vendor's PMD could be 100 and it could be
> 200 in another vendor's PMD. So don't we need to ensure that the
> standard action types and private action types don't overlap ? One way
> to handle this might be to reserve a range of values in rte_flow.h as
> a vendor specific range, for example 100 to 200. And each PMD could
> define its own private action types within this range, since it is
> ensured that no standard action types would be defined in that range.

I don't know regarding other PMDs, but at least for mlx5, the "private" 
are negative values, or for unsigned > 2^31, so I don't think they would 
be a conflict.

I suppose DPDK PMDs should follow changes in rte_flow.h. Anyway, it is 
not related to OVS.

>
Ivan Malov March 2, 2021, 9:22 a.m. UTC | #7
Hi Eli,

 From what has been discussed so far it follows that JUMP from group 0 
to the very same group 0 is intentional and corresponds to 
dp_netdev_recirculate. But could you please clarify, is OvS supposed to 
always use the group value of 0 with the tunnel offload model? In other 
words, can PMD expect the OvS to always insert "tunnel set" and "tunnel 
match" rules at group 0?

Can the PMD always expect OvS to use priority level of 0, too?

A brief comment in the code would be useful, too.
Eli Britstein March 2, 2021, 9:30 a.m. UTC | #8
On 3/2/2021 11:22 AM, Ivan Malov wrote:
> External email: Use caution opening links or attachments
>
>
> Hi Eli,
>
> From what has been discussed so far it follows that JUMP from group 0
> to the very same group 0 is intentional and corresponds to
> dp_netdev_recirculate. But could you please clarify, is OvS supposed to
> always use the group value of 0 with the tunnel offload model? In other
> words, can PMD expect the OvS to always insert "tunnel set" and "tunnel
> match" rules at group 0?
>
> Can the PMD always expect OvS to use priority level of 0, too?
Hi Ivan,

Thanks for your comments.

In the proposed series, yes. OVS will always insert tunnel-set and 
tunnel-match rules in group 0, and in priority 0.

However, the PMD and DPDK in general are not OVS specific, and other 
applications may choose to use in other ways, so the PMD must not assume 
such assumptions.

>
> A brief comment in the code would be useful, too.
Yes, I will add it in v3 to be sent soon.
>
> -- 
> Ivan M
diff mbox series

Patch

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index f0d45e47b..4918d80f3 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -398,6 +398,7 @@  Supported actions for hardware offload are:
 - VLAN Push/Pop (push_vlan/pop_vlan).
 - Modification of IPv6 (set_field:<ADDR>->ipv6_src/ipv6_dst/mod_nw_ttl).
 - Clone/output (tnl_push and output) for encapsulating over a tunnel.
+- Tunnel pop, for changing from a physical port to a vport.
 
 Further Reading
 ---------------
diff --git a/NEWS b/NEWS
index a7bffce97..6850d5621 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,7 @@  v2.15.0 - xx xxx xxxx
    - DPDK:
      * Removed support for vhost-user dequeue zero-copy.
      * Add support for DPDK 20.11.
+     * Add hardware offload support for tunnel pop action (experimental).
    - Userspace datapath:
      * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
        restricts a flow dump to a single PMD thread if set.
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 78f866080..493cc9159 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -140,15 +140,30 @@  struct flow_actions {
     struct rte_flow_action *actions;
     int cnt;
     int current_max;
+    struct netdev *tnl_netdev;
+    /* tnl_actions is the opaque array of actions returned by the PMD. */
+    struct rte_flow_action *tnl_actions;
+    uint32_t num_of_tnl_actions;
+    /* tnl_actions_pos is where the tunnel actions starts within the 'actions'
+     * field.
+     */
+    int tnl_actions_pos;
+    struct ds s_tnl;
 };
 
 static void
-dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
+dump_flow_attr(struct ds *s, struct ds *s_extra,
+               const struct rte_flow_attr *attr,
+               struct flow_actions *flow_actions)
 {
-    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
+    if (flow_actions->num_of_tnl_actions) {
+        ds_clone(s_extra, &flow_actions->s_tnl);
+    }
+    ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s%s",
                   attr->ingress  ? "ingress " : "",
                   attr->egress   ? "egress " : "", attr->priority, attr->group,
-                  attr->transfer ? "transfer " : "");
+                  attr->transfer ? "transfer " : "",
+                  flow_actions->num_of_tnl_actions ? "tunnel_set 1 " : "");
 }
 
 /* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
@@ -395,9 +410,19 @@  dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
 
 static void
 dump_flow_action(struct ds *s, struct ds *s_extra,
-                 const struct rte_flow_action *actions)
+                 struct flow_actions *flow_actions, int act_index)
 {
-    if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
+    const struct rte_flow_action *actions = &flow_actions->actions[act_index];
+
+    if (actions->type == RTE_FLOW_ACTION_TYPE_END) {
+        ds_put_cstr(s, "end");
+    } else if (flow_actions->num_of_tnl_actions &&
+               act_index >= flow_actions->tnl_actions_pos &&
+               act_index < flow_actions->tnl_actions_pos +
+                           flow_actions->num_of_tnl_actions) {
+        /* Opaque PMD tunnel actions is skipped. */
+        return;
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
         const struct rte_flow_action_mark *mark = actions->conf;
 
         ds_put_cstr(s, "mark ");
@@ -528,6 +553,14 @@  dump_flow_action(struct ds *s, struct ds *s_extra,
         ds_put_cstr(s, "vxlan_encap / ");
         dump_vxlan_encap(s_extra, items);
         ds_put_cstr(s_extra, ";");
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) {
+        const struct rte_flow_action_jump *jump = actions->conf;
+
+        ds_put_cstr(s, "jump ");
+        if (jump) {
+            ds_put_format(s, "group %"PRIu32" ", jump->group);
+        }
+        ds_put_cstr(s, "/ ");
     } else {
         ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
     }
@@ -537,20 +570,21 @@  static struct ds *
 dump_flow(struct ds *s, struct ds *s_extra,
           const struct rte_flow_attr *attr,
           const struct rte_flow_item *items,
-          const struct rte_flow_action *actions)
+          struct flow_actions *flow_actions)
 {
+    int i;
+
     if (attr) {
-        dump_flow_attr(s, attr);
+        dump_flow_attr(s, s_extra, attr, flow_actions);
     }
     ds_put_cstr(s, "pattern ");
     while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
         dump_flow_pattern(s, items++);
     }
     ds_put_cstr(s, "end actions ");
-    while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
-        dump_flow_action(s, s_extra, actions++);
+    for (i = 0; i < flow_actions->cnt; i++) {
+        dump_flow_action(s, s_extra, flow_actions, i);
     }
-    ds_put_cstr(s, "end");
     return s;
 }
 
@@ -558,9 +592,10 @@  static struct rte_flow *
 netdev_offload_dpdk_flow_create(struct netdev *netdev,
                                 const struct rte_flow_attr *attr,
                                 const struct rte_flow_item *items,
-                                const struct rte_flow_action *actions,
+                                struct flow_actions *flow_actions,
                                 struct rte_flow_error *error)
 {
+    const struct rte_flow_action *actions = flow_actions->actions;
     struct ds s_extra = DS_EMPTY_INITIALIZER;
     struct ds s = DS_EMPTY_INITIALIZER;
     struct rte_flow *flow;
@@ -569,7 +604,7 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
     flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
     if (flow) {
         if (!VLOG_DROP_DBG(&rl)) {
-            dump_flow(&s, &s_extra, attr, items, actions);
+            dump_flow(&s, &s_extra, attr, items, flow_actions);
             extra_str = ds_cstr(&s_extra);
             VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s  flow create %d %s",
                         netdev_get_name(netdev), (intptr_t) flow, extra_str,
@@ -584,7 +619,7 @@  netdev_offload_dpdk_flow_create(struct netdev *netdev,
         VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
                 netdev_get_name(netdev), error->type, error->message);
         if (!vlog_should_drop(&this_module, level, &rl)) {
-            dump_flow(&s, &s_extra, attr, items, actions);
+            dump_flow(&s, &s_extra, attr, items, flow_actions);
             extra_str = ds_cstr(&s_extra);
             VLOG_RL(&rl, level, "%s: Failed flow: %s  flow create %d %s",
                     netdev_get_name(netdev), extra_str,
@@ -640,6 +675,23 @@  add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
     actions->cnt++;
 }
 
+static void
+add_flow_tnl_actions(struct flow_actions *actions,
+                     struct netdev *tnl_netdev,
+                     struct rte_flow_action *tnl_actions,
+                     uint32_t num_of_tnl_actions)
+{
+    int i;
+
+    actions->tnl_netdev = tnl_netdev;
+    actions->tnl_actions_pos = actions->cnt;
+    actions->tnl_actions = tnl_actions;
+    actions->num_of_tnl_actions = num_of_tnl_actions;
+    for (i = 0; i < num_of_tnl_actions; i++) {
+        add_flow_action(actions, tnl_actions[i].type, tnl_actions[i].conf);
+    }
+}
+
 static void
 free_flow_patterns(struct flow_patterns *patterns)
 {
@@ -661,9 +713,23 @@  free_flow_patterns(struct flow_patterns *patterns)
 static void
 free_flow_actions(struct flow_actions *actions)
 {
+    struct rte_flow_error error;
     int i;
 
     for (i = 0; i < actions->cnt; i++) {
+        if (actions->num_of_tnl_actions && i == actions->tnl_actions_pos) {
+            if (netdev_dpdk_rte_flow_tunnel_action_decap_release
+                    (actions->tnl_netdev, actions->tnl_actions,
+                     actions->num_of_tnl_actions, &error)) {
+                VLOG_DBG_RL(&rl, "%s: "
+                            "netdev_dpdk_rte_flow_tunnel_action_decap_release "
+                            "failed: %d (%s).",
+                            netdev_get_name(actions->tnl_netdev),
+                            error.type, error.message);
+            }
+            i += actions->num_of_tnl_actions - 1;
+            continue;
+        }
         if (actions->actions[i].conf) {
             free(CONST_CAST(void *, actions->actions[i].conf));
         }
@@ -671,6 +737,7 @@  free_flow_actions(struct flow_actions *actions)
     free(actions->actions);
     actions->actions = NULL;
     actions->cnt = 0;
+    ds_destroy(&actions->s_tnl);
 }
 
 static int
@@ -960,7 +1027,7 @@  netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
     add_flow_mark_rss_actions(&actions, flow_mark, netdev);
 
     flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
-                                           actions.actions, &error);
+                                           &actions, &error);
 
     free_flow_actions(&actions);
     return flow;
@@ -1307,6 +1374,78 @@  parse_clone_actions(struct netdev *netdev,
     return 0;
 }
 
+static void
+add_jump_action(struct flow_actions *actions, uint32_t group)
+{
+    struct rte_flow_action_jump *jump = xzalloc (sizeof *jump);
+
+    jump->group = group;
+    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_JUMP, jump);
+}
+
+static int
+vport_to_rte_tunnel(struct netdev *vport,
+                    struct rte_flow_tunnel *tunnel,
+                    struct netdev *netdev,
+                    struct ds *s_tnl)
+{
+    const struct netdev_tunnel_config *tnl_cfg;
+
+    memset(tunnel, 0, sizeof *tunnel);
+    if (!strcmp(netdev_get_type(vport), "vxlan")) {
+        tunnel->type = RTE_FLOW_ITEM_TYPE_VXLAN;
+        tnl_cfg = netdev_get_tunnel_config(vport);
+        if (!tnl_cfg) {
+            return -1;
+        }
+        tunnel->tp_dst = tnl_cfg->dst_port;
+        if (!VLOG_DROP_DBG(&rl)) {
+            ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
+                          netdev_dpdk_get_port_id(netdev));
+        }
+    } else {
+        OVS_NOT_REACHED();
+    }
+
+    return 0;
+}
+
+static int
+add_tnl_pop_action(struct netdev *netdev,
+                   struct flow_actions *actions,
+                   const struct nlattr *nla)
+{
+    struct rte_flow_action *tnl_actions = NULL;
+    uint32_t num_of_tnl_actions = 0;
+    struct rte_flow_tunnel tunnel;
+    struct rte_flow_error error;
+    struct netdev *vport;
+    odp_port_t port;
+    int ret;
+
+    port = nl_attr_get_odp_port(nla);
+    vport = netdev_ports_get(port, netdev->dpif_type);
+    if (vport == NULL) {
+        return -1;
+    }
+    ret = vport_to_rte_tunnel(vport, &tunnel, netdev, &actions->s_tnl);
+    netdev_close(vport);
+    if (ret) {
+        return ret;
+    }
+    ret = netdev_dpdk_rte_flow_tunnel_decap_set(netdev, &tunnel, &tnl_actions,
+                                                &num_of_tnl_actions, &error);
+    if (ret) {
+        VLOG_DBG_RL(&rl, "%s: netdev_dpdk_rte_flow_tunnel_decap_set failed: "
+                    "%d (%s).", netdev_get_name(netdev), error.type,
+                    error.message);
+        return ret;
+    }
+    add_flow_tnl_actions(actions, netdev, tnl_actions, num_of_tnl_actions);
+    add_jump_action(actions, 0);
+    return 0;
+}
+
 static int
 parse_flow_actions(struct netdev *netdev,
                    struct flow_actions *actions,
@@ -1351,6 +1490,10 @@  parse_flow_actions(struct netdev *netdev,
                                     clone_actions_len)) {
                 return -1;
             }
+        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) {
+            if (add_tnl_pop_action(netdev, actions, nla)) {
+                return -1;
+            }
         } else {
             VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
             return -1;
@@ -1383,7 +1526,7 @@  netdev_offload_dpdk_actions(struct netdev *netdev,
         goto out;
     }
     flow = netdev_offload_dpdk_flow_create(netdev, &flow_attr, patterns->items,
-                                           actions.actions, &error);
+                                           &actions, &error);
 out:
     free_flow_actions(&actions);
     return flow;