diff mbox series

[ovs-dev,v14,7/7] netdev-offload-tc: Add offload support for sFlow

Message ID 20210715060140.3866-8-cmi@nvidia.com
State Changes Requested
Headers show
Series Add offload support for sFlow | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot success github build: passed

Commit Message

Chris Mi July 15, 2021, 6:01 a.m. UTC
Create a unique group ID to map the sFlow info when offloading sFlow
action to TC. When showing the offloaded datapath flows, translate the
group ID from TC sample action to sFlow info using the mapping.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
---
 NEWS                    |   1 +
 lib/netdev-offload-tc.c | 212 +++++++++++++++++++++++++++++++++++++---
 lib/tc.c                |  61 +++++++++++-
 lib/tc.h                |  15 ++-
 4 files changed, 272 insertions(+), 17 deletions(-)

Comments

Eelco Chaudron Sept. 9, 2021, 2:29 p.m. UTC | #1
On 15 Jul 2021, at 8:01, Chris Mi wrote:

> Create a unique group ID to map the sFlow info when offloading sFlow
> action to TC. When showing the offloaded datapath flows, translate the
> group ID from TC sample action to sFlow info using the mapping.

See comments below and this completes my review of the v14. See the 
other email on the update of the flows going wrong.

I’ll do a more thorough review on v15 assuming the issue gets fixed, 
especially the sgid allocation/re-use/free.

> Signed-off-by: Chris Mi <cmi@nvidia.com>
> Reviewed-by: Eli Britstein <elibr@nvidia.com>
> ---
>  NEWS                    |   1 +
>  lib/netdev-offload-tc.c | 212 
> +++++++++++++++++++++++++++++++++++++---
>  lib/tc.c                |  61 +++++++++++-
>  lib/tc.h                |  15 ++-
>  4 files changed, 272 insertions(+), 17 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 6cdccc715..7c0361e18 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -50,6 +50,7 @@ Post-v2.15.0
>     - OVS now reports the datapath capability 'ct_zero_snat', which 
> reflects
>       whether the SNAT with all-zero IP address is supported.
>       See ovs-vswitchd.conf.db(5) for details.
> +   - Add sFlow offload support for kernel (netlink) datapath.
>
>
>  v2.15.0 - 15 Feb 2021
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 2f16cf279..71e51394f 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -20,6 +20,7 @@
>  #include <linux/if_ether.h>
>
>  #include "dpif.h"
> +#include "dpif-offload-provider.h"
>  #include "hash.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/match.h"
> @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower 
> *flower,
>          action = flower->actions;
>          for (i = 0; i < flower->action_count; i++, action++) {
>              switch (action->type) {
> +            case TC_ACT_SAMPLE: {
> +                const struct sgid_node *node;
> +
> +                node = sgid_find(action->sample.group_id);
> +                if (!node) {
> +                    VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, 
> sgid: %d",
> +                                __func__, action->sample.group_id);

This should probably be a warning, as this could happen if the DP has 
not yet been synced? i.e. revalidator being in the process of cleanup 
and someone requesting dp content?

> +                    return ENOENT;
> +                }
> +                nl_msg_put(buf, node->sflow.action,
> +                           node->sflow.action->nla_len);
> +            }
> +            break;
>              case TC_ACT_VLAN_POP: {
>                  nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>              }
> @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct 
> tc_flower *flower, struct match *match)
>      }
>  }
>
> +static int
> +parse_userspace_attributes(const struct nlattr *actions,
> +                           struct dpif_sflow_attr *sflow_attr)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    const struct nlattr *nla;
> +    unsigned int left;
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +        if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
> +            struct user_action_cookie *cookie;
> +
> +            cookie = CONST_CAST(struct user_action_cookie *, 
> nl_attr_get(nla));
> +            if (cookie->type == USER_ACTION_COOKIE_SFLOW) {

Here I think the userdata type should be changed to "struct nlattr * " 
just like in the struct dpif_upcall.
This should probably be done in the patch introducing the 
dpif_sflow_attr, so it will also be aligned with the actions member.

> +                sflow_attr->userdata = CONST_CAST(void *, 
> nl_attr_get(nla));
> +                sflow_attr->userdata_len = nl_attr_get_size(nla);
> +                return 0;
> +            }
> +        }
> +    }
> +
> +    VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than 
> sFlow",
> +                __func__);
> +    return EOPNOTSUPP;
> +}
> +
> +static int
> +parse_sample_actions_attribute(const struct nlattr *actions,
> +                               struct dpif_sflow_attr *sflow_attr)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    const struct nlattr *nla;
> +    unsigned int left;
> +    int err = EINVAL;
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> +            err = parse_userspace_attributes(nla, sflow_attr);
> +        } else {
> +            /* We can't offload other nested actions */
> +            VLOG_DBG_RL(&rl, "%s: can only offload "
> +                        "OVS_ACTION_ATTR_USERSPACE attribute", 
> __func__);
> +            return EINVAL;
> +        }
> +    }
> +
> +    if (err) {
> +        VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
> attribute",
> +                    __func__);
> +    }
> +    return err;
> +}
> +
> +static int
> +parse_sample_action(struct tc_flower *flower, struct tc_action 
> *tc_action,
> +                    const struct nlattr *sample_action,
> +                    const struct flow_tnl *tnl, uint32_t *group_id,
> +                    const ovs_u128 *ufid)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    struct dpif_sflow_attr sflow_attr;
> +    const struct nlattr *nla;
> +    unsigned int left;
> +    int ret = EINVAL;
> +
> +    if (*group_id) {
> +        VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
> +                    "per flow is supported", __func__);
> +        return EOPNOTSUPP;
> +    }
> +
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    sflow_attr.ufid = *ufid;
> +    sflow_attr.action = sample_action;
> +
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
> +
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
> +        if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
> +            ret = parse_sample_actions_attribute(nla, &sflow_attr);
> +        } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) 
> {
> +            tc_action->type = TC_ACT_SAMPLE;
> +            tc_action->sample.rate = UINT32_MAX / 
> nl_attr_get_u32(nla);
> +        } else {
> +            return EINVAL;
> +        }
> +    }
> +
> +    if (!tc_action->sample.rate || ret) {
> +        return EINVAL;
> +    }
> +
> +    *group_id = sgid_alloc_ctx(&sflow_attr);
> +    if (!*group_id) {
> +        VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample 
> action",
> +                    __func__);
> +        return ENOENT;
> +    }
> +    tc_action->sample.group_id = *group_id;
> +    flower->action_count++;
> +
> +    return 0;
> +}
> +
> +static int
> +parse_userspace_action(struct tc_flower *flower, struct tc_action 
> *tc_action,
> +                       const struct nlattr *userspace_action,
> +                       const struct flow_tnl *tnl, uint32_t 
> *group_id,
> +                       const ovs_u128 *ufid)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    struct dpif_sflow_attr sflow_attr;
> +    int err;
> +
> +    if (*group_id) {
> +        VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
> +                    "per flow is supported", __func__);
> +        return EOPNOTSUPP;
> +    }
> +
> +    /* If sampling rate is 1, there is only a sFlow cookie inside of 
> a
> +     * userspace action, but no sample attribute. That means we can
> +     * only offload userspace actions for sFlow.
> +     */
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    sflow_attr.ufid = *ufid;
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
> +    err = parse_userspace_attributes(userspace_action, &sflow_attr);
> +    if (err) {
> +        return err;
> +    }
> +    sflow_attr.action = userspace_action;
> +    *group_id = sgid_alloc_ctx(&sflow_attr);
> +    if (!*group_id) {
> +        VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample 
> action",
> +                    __func__);
> +        return ENOENT;
> +    }
> +    tc_action->type = TC_ACT_SAMPLE;
> +    tc_action->sample.group_id = *group_id;
> +    tc_action->sample.rate = 1;
> +    flower->action_count++;
> +
> +    return 0;
> +}
> +
>  static int
>  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>                     struct nlattr *actions, size_t actions_len,
> @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>      const struct flow_tnl *tnl_mask = &mask->tunnel;
>      struct tc_action *action;
>      bool recirc_act = false;
> +    uint32_t sample_gid = 0;
>      uint32_t block_id = 0;
>      struct nlattr *nla;
>      struct tcf_id id;
> @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>      NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>          if (flower.action_count >= TCA_ACT_MAX_NUM) {
>              VLOG_DBG_RL(&rl, "Can only support %d actions", 
> TCA_ACT_MAX_NUM);
> -            return EOPNOTSUPP;
> +            err = EOPNOTSUPP;
> +            goto out;
>          }
>          action = &flower.actions[flower.action_count];
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
> @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              if (!outdev) {
>                  VLOG_DBG_RL(&rl, "Can't find netdev for output port 
> %d", port);
> -                return ENODEV;
> +                err = ENODEV;
> +                goto out;
>              }
>              action->out.ifindex_out = netdev_get_ifindex(outdev);
>              action->out.ingress = 
> is_internal_port(netdev_get_type(outdev));
> @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              err = parse_put_flow_set_action(&flower, action, set, 
> set_len);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>              if (action->type == TC_ACT_ENCAP) {
>                  action->encap.tp_dst = info->tp_dst_port;
> @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>              err = parse_put_flow_set_masked_action(&flower, action, 
> set,
>                                                     set_len, true);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>              const struct nlattr *ct = nl_attr_get(nla);
> @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
> match *match,
>
>              err = parse_put_flow_ct_action(&flower, action, ct, 
> ct_len);
>              if (err) {
> -                return err;
> +                goto out;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>              action->type = TC_ACT_CT;
> @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>              action->chain = 0;  /* 0 is reserved and not used by 
> recirc. */
>              flower.action_count++;
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
> -            struct dpif_sflow_attr sflow_attr;
> -
> -            memset(&sflow_attr, 0, sizeof sflow_attr);
> -            sgid_alloc_ctx(&sflow_attr);
> +            err = parse_sample_action(&flower, action, nla, tnl, 
> &sample_gid,
> +                                      ufid);
> +            if (err) {
> +                goto out;
> +            }
> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> +            err = parse_userspace_action(&flower, action, nla, tnl,
> +                                         &sample_gid, ufid);
> +            if (err) {
> +                goto out;
> +            }
>          } else {
>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>                          nl_attr_type(nla));
> -            return EOPNOTSUPP;
> +            err = EOPNOTSUPP;
> +            goto out;
>          }
>      }
>
>      if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>          VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not 
> supported");
> -        return EOPNOTSUPP;
> +        err = EOPNOTSUPP;
> +        goto out;
>      }
>
>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
> @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>      prio = get_prio_for_tc_flower(&flower);
>      if (prio == 0) {
>          VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", 
> ovs_strerror(ENOSPC));
> -        return ENOSPC;
> +        err = ENOSPC;
> +        goto out;
>      }
>
>      flower.act_cookie.data = ufid;
>      flower.act_cookie.len = sizeof *ufid;
>
>      block_id = get_block_id_from_netdev(netdev);
> -    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
> +    id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook, 
> sample_gid);
>      err = tc_replace_flower(&id, &flower);
>      if (!err) {
>          if (stats) {
>              memset(stats, 0, sizeof *stats);
>          }
>          add_ufid_tc_mapping(netdev, ufid, &id);
> +    } else {
> +        goto out;
> +    }

Guess this will look nicer:

     if (err) {
         goto out;
      }

      if (stats) {
          memset(stats, 0, sizeof *stats);
      }
      add_ufid_tc_mapping(netdev, ufid, &id);
      return 0;

> +
> +    return 0;
> +
> +out:
> +    if (sample_gid) {
> +        sgid_free(sample_gid);
>      }
>
>      return err;
> diff --git a/lib/tc.c b/lib/tc.c
> index 33287ea05..c5788220f 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -23,14 +23,15 @@
>  #include <linux/if_packet.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/tc_act/tc_csum.h>
> +#include <linux/tc_act/tc_ct.h>
>  #include <linux/tc_act/tc_gact.h>
>  #include <linux/tc_act/tc_mirred.h>
>  #include <linux/tc_act/tc_mpls.h>
>  #include <linux/tc_act/tc_pedit.h>
> +#include <linux/tc_act/tc_sample.h>
>  #include <linux/tc_act/tc_skbedit.h>
>  #include <linux/tc_act/tc_tunnel_key.h>
>  #include <linux/tc_act/tc_vlan.h>
> -#include <linux/tc_act/tc_ct.h>
>  #include <linux/gen_stats.h>
>  #include <net/if.h>
>  #include <unistd.h>
> @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options, 
> struct tc_flower *flower)
>      return 0;
>  }
>
> +static const struct nl_policy sample_policy[] = {
> +    [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
> +                           .min_len = sizeof(struct tc_sample),
> +                           .optional = false, },
> +    [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
> +                                   .optional = false, },
> +    [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
> +                          .optional = false, },
> +};
> +
> +static int
> +nl_parse_act_sample(struct nlattr *options, struct tc_flower *flower)
> +{
> +    struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
> +    struct tc_action *action;
> +
> +    if (!nl_parse_nested(options, sample_policy, sample_attrs,
> +                         ARRAY_SIZE(sample_policy))) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse sample action 
> options");
> +        return EPROTO;
> +    }
> +
> +    action = &flower->actions[flower->action_count++];
> +    action->type = TC_ACT_SAMPLE;
> +    action->sample.group_id =
> +        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
> +    action->sample.rate =
> +        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
> +
> +    return 0;
> +}
> +
>  static const struct nl_policy mirred_policy[] = {
>      [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>                             .min_len = sizeof(struct tc_mirred),
> @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr *action, 
> struct tc_flower *flower,
>          /* Added for TC rule only (not in OvS rule) so ignore. */
>      } else if (!strcmp(act_kind, "ct")) {
>          nl_parse_act_ct(act_options, flower);
> +    } else if (!strcmp(act_kind, "sample")) {
> +        nl_parse_act_sample(act_options, flower);
>      } else {
>          VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", 
> act_kind);
>          err = EINVAL;
> @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf *request, 
> int ifindex, int action,
>      nl_msg_end_nested(request, offset);
>  }
>
> +static void
> +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate, uint32_t 
> group_id)
> +{
> +    size_t offset;
> +
> +    nl_msg_put_string(request, TCA_ACT_KIND, "sample");
> +    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | 
> NLA_F_NESTED);
> +    {
> +        struct tc_sample parm = { .action = TC_ACT_PIPE };
> +
> +        nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof 
> parm);
> +        nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
> +        nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
> +    }
> +    nl_msg_end_nested(request, offset);
> +}
> +
>  static inline void
>  nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>      if (ck->len) {
> @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf *request, 
> struct tc_flower *flower)
>                  nl_msg_end_nested(request, act_offset);
>              }
>              break;
> +            case TC_ACT_SAMPLE: {
> +                act_offset = nl_msg_start_nested(request, 
> act_index++);
> +                nl_msg_put_act_sample(request, action->sample.rate,
> +                                      action->sample.group_id);
> +                nl_msg_end_nested(request, act_offset);
> +            }
> +            break;
>              case TC_ACT_OUTPUT: {
>                  if (!released && flower->tunnel) {
>                      act_offset = nl_msg_start_nested(request, 
> act_index++);
> diff --git a/lib/tc.h b/lib/tc.h
> index 2e4084f48..f764d7d1e 100644
> --- a/lib/tc.h
> +++ b/lib/tc.h
> @@ -174,6 +174,7 @@ enum tc_action_type {
>      TC_ACT_MPLS_SET,
>      TC_ACT_GOTO,
>      TC_ACT_CT,
> +    TC_ACT_SAMPLE,
>  };
>
>  enum nat_type {
> @@ -256,6 +257,11 @@ struct tc_action {
>              bool force;
>              bool commit;
>          } ct;
> +
> +        struct {
> +            uint32_t rate;
> +            uint32_t group_id;
> +        } sample;
>       };
>
>       enum tc_action_type type;
> @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t block_id, 
> uint16_t prio,
>  }
>
>  static inline struct tcf_id
> -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
> -                     uint16_t prio, enum tc_qdisc_hook hook)
> +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
> +                     uint16_t prio, enum tc_qdisc_hook hook,
> +                     uint32_t sample_group_id)
>  {
>      struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>
>      id.chain = chain;
> +    id.sample_group_id = sample_group_id;
>
>      return id;
>  }
> @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id 
> *id2)
>             && id1->hook == id2->hook
>             && id1->block_id == id2->block_id
>             && id1->ifindex == id2->ifindex
> -           && id1->chain == id2->chain;
> +           && id1->chain == id2->chain
> +           && id1->sample_group_id == id2->sample_group_id;
>  }
>
>  enum tc_offload_policy {
> -- 
> 2.21.0
Chris Mi Sept. 10, 2021, 11:02 a.m. UTC | #2
On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>
> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>
>     Create a unique group ID to map the sFlow info when offloading sFlow
>     action to TC. When showing the offloaded datapath flows, translate
>     the
>     group ID from TC sample action to sFlow info using the mapping.
>
> See comments below and this completes my review of the v14. See the 
> other email on the update of the flows going wrong.
>
I'm struggling with the test these days and make little progress. I 
usually use sflowtool to receive the sFlow packets.
After investigation, I found the test uses 'ovstest test-sflow'. If I 
use it manually, it works both for offload and none-offload.
But if I integrate it in the new test, it only works for non-offload. It 
can't receive sFlow packets for offload.
I don't know what is missing. So I plan to address v14 comments next 
week and ignore the test now. Maybe I can find something
unusual later on.  And thanks for the full review on v14.

-Chris
>
> I’ll do a more thorough review on v15 assuming the issue gets fixed, 
> especially the sgid allocation/re-use/free.
>
>     Signed-off-by: Chris Mi <cmi@nvidia.com>
>     Reviewed-by: Eli Britstein <elibr@nvidia.com>
>     ---
>     NEWS | 1 +
>     lib/netdev-offload-tc.c | 212
>     +++++++++++++++++++++++++++++++++++++---
>     lib/tc.c | 61 +++++++++++-
>     lib/tc.h | 15 ++-
>     4 files changed, 272 insertions(+), 17 deletions(-)
>
>     diff --git a/NEWS b/NEWS
>     index 6cdccc715..7c0361e18 100644
>     --- a/NEWS
>     +++ b/NEWS
>     @@ -50,6 +50,7 @@ Post-v2.15.0
>     - OVS now reports the datapath capability 'ct_zero_snat', which
>     reflects
>     whether the SNAT with all-zero IP address is supported.
>     See ovs-vswitchd.conf.db(5) for details.
>     + - Add sFlow offload support for kernel (netlink) datapath.
>
>     v2.15.0 - 15 Feb 2021
>     diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>     index 2f16cf279..71e51394f 100644
>     --- a/lib/netdev-offload-tc.c
>     +++ b/lib/netdev-offload-tc.c
>     @@ -20,6 +20,7 @@
>     #include <linux/if_ether.h>
>
>     #include "dpif.h"
>     +#include "dpif-offload-provider.h"
>     #include "hash.h"
>     #include "openvswitch/hmap.h"
>     #include "openvswitch/match.h"
>     @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>     *flower,
>     action = flower->actions;
>     for (i = 0; i < flower->action_count; i++, action++) {
>     switch (action->type) {
>     + case TC_ACT_SAMPLE: {
>     + const struct sgid_node *node;
>     +
>     + node = sgid_find(action->sample.group_id);
>     + if (!node) {
>     + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>     + __func__, action->sample.group_id);
>
> This should probably be a warning, as this could happen if the DP has 
> not yet been synced? i.e. revalidator being in the process of cleanup 
> and someone requesting dp content?
>
>     + return ENOENT;
>     + }
>     + nl_msg_put(buf, node->sflow.action,
>     + node->sflow.action->nla_len);
>     + }
>     + break;
>     case TC_ACT_VLAN_POP: {
>     nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>     }
>     @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>     tc_flower *flower, struct match *match)
>     }
>     }
>
>     +static int
>     +parse_userspace_attributes(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>     + struct user_action_cookie *cookie;
>     +
>     + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>     + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>
> Here I think the userdata type should be changed to "struct nlattr * " 
> just like in the struct dpif_upcall.
> This should probably be done in the patch introducing the 
> dpif_sflow_attr, so it will also be aligned with the actions member.
>
>     + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>     + sflow_attr->userdata_len = nl_attr_get_size(nla);
>     + return 0;
>     + }
>     + }
>     + }
>     +
>     + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>     sFlow",
>     + __func__);
>     + return EOPNOTSUPP;
>     +}
>     +
>     +static int
>     +parse_sample_actions_attribute(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int err = EINVAL;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_attributes(nla, sflow_attr);
>     + } else {
>     + /* We can't offload other nested actions */
>     + VLOG_DBG_RL(&rl, "%s: can only offload "
>     + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (err) {
>     + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>     attribute",
>     + __func__);
>     + }
>     + return err;
>     +}
>     +
>     +static int
>     +parse_sample_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *sample_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int ret = EINVAL;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + sflow_attr.action = sample_action;
>     +
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>     + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>     + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>     + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>     + } else {
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (!tc_action->sample.rate || ret) {
>     + return EINVAL;
>     + }
>     +
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->sample.group_id = *group_id;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     +static int
>     +parse_userspace_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *userspace_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + int err;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>     + * userspace action, but no sample attribute. That means we can
>     + * only offload userspace actions for sFlow.
>     + */
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>     + if (err) {
>     + return err;
>     + }
>     + sflow_attr.action = userspace_action;
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.group_id = *group_id;
>     + tc_action->sample.rate = 1;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     static int
>     netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>     struct nlattr *actions, size_t actions_len,
>     @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     const struct flow_tnl *tnl_mask = &mask->tunnel;
>     struct tc_action *action;
>     bool recirc_act = false;
>     + uint32_t sample_gid = 0;
>     uint32_t block_id = 0;
>     struct nlattr *nla;
>     struct tcf_id id;
>     @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>     if (flower.action_count >= TCA_ACT_MAX_NUM) {
>     VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     action = &flower.actions[flower.action_count];
>     if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>     @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     if (!outdev) {
>     VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>     - return ENODEV;
>     + err = ENODEV;
>     + goto out;
>     }
>     action->out.ifindex_out = netdev_get_ifindex(outdev);
>     action->out.ingress = is_internal_port(netdev_get_type(outdev));
>     @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_set_action(&flower, action, set, set_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     if (action->type == TC_ACT_ENCAP) {
>     action->encap.tp_dst = info->tp_dst_port;
>     @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     err = parse_put_flow_set_masked_action(&flower, action, set,
>     set_len, true);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>     const struct nlattr *ct = nl_attr_get(nla);
>     @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>     action->type = TC_ACT_CT;
>     @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     action->chain = 0; /* 0 is reserved and not used by recirc. */
>     flower.action_count++;
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>     - struct dpif_sflow_attr sflow_attr;
>     -
>     - memset(&sflow_attr, 0, sizeof sflow_attr);
>     - sgid_alloc_ctx(&sflow_attr);
>     + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>     + ufid);
>     + if (err) {
>     + goto out;
>     + }
>     + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_action(&flower, action, nla, tnl,
>     + &sample_gid, ufid);
>     + if (err) {
>     + goto out;
>     + }
>     } else {
>     VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>     nl_attr_type(nla));
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     }
>
>     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>     VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>
>     if (get_ufid_tc_mapping(ufid, &id) == 0) {
>     @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     prio = get_prio_for_tc_flower(&flower);
>     if (prio == 0) {
>     VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>     - return ENOSPC;
>     + err = ENOSPC;
>     + goto out;
>     }
>
>     flower.act_cookie.data = ufid;
>     flower.act_cookie.len = sizeof *ufid;
>
>     block_id = get_block_id_from_netdev(netdev);
>     - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>     + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>     sample_gid);
>     err = tc_replace_flower(&id, &flower);
>     if (!err) {
>     if (stats) {
>     memset(stats, 0, sizeof *stats);
>     }
>     add_ufid_tc_mapping(netdev, ufid, &id);
>     + } else {
>     + goto out;
>     + }
>
> Guess this will look nicer:
>
> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); 
> } add_ufid_tc_mapping(netdev, ufid, &id); return 0; |
>
>     +
>     + return 0;
>     +
>     +out:
>     + if (sample_gid) {
>     + sgid_free(sample_gid);
>     }
>
>     return err;
>     diff --git a/lib/tc.c b/lib/tc.c
>     index 33287ea05..c5788220f 100644
>     --- a/lib/tc.c
>     +++ b/lib/tc.c
>     @@ -23,14 +23,15 @@
>     #include <linux/if_packet.h>
>     #include <linux/rtnetlink.h>
>     #include <linux/tc_act/tc_csum.h>
>     +#include <linux/tc_act/tc_ct.h>
>     #include <linux/tc_act/tc_gact.h>
>     #include <linux/tc_act/tc_mirred.h>
>     #include <linux/tc_act/tc_mpls.h>
>     #include <linux/tc_act/tc_pedit.h>
>     +#include <linux/tc_act/tc_sample.h>
>     #include <linux/tc_act/tc_skbedit.h>
>     #include <linux/tc_act/tc_tunnel_key.h>
>     #include <linux/tc_act/tc_vlan.h>
>     -#include <linux/tc_act/tc_ct.h>
>     #include <linux/gen_stats.h>
>     #include <net/if.h>
>     #include <unistd.h>
>     @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>     struct tc_flower *flower)
>     return 0;
>     }
>
>     +static const struct nl_policy sample_policy[] = {
>     + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>     + .min_len = sizeof(struct tc_sample),
>     + .optional = false, },
>     + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>     + .optional = false, },
>     + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>     + .optional = false, },
>     +};
>     +
>     +static int
>     +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>     *flower)
>     +{
>     + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>     + struct tc_action *action;
>     +
>     + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>     + ARRAY_SIZE(sample_policy))) {
>     + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>     + return EPROTO;
>     + }
>     +
>     + action = &flower->actions[flower->action_count++];
>     + action->type = TC_ACT_SAMPLE;
>     + action->sample.group_id =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>     + action->sample.rate =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>     +
>     + return 0;
>     +}
>     +
>     static const struct nl_policy mirred_policy[] = {
>     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>     .min_len = sizeof(struct tc_mirred),
>     @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>     *action, struct tc_flower *flower,
>     /* Added for TC rule only (not in OvS rule) so ignore. */
>     } else if (!strcmp(act_kind, "ct")) {
>     nl_parse_act_ct(act_options, flower);
>     + } else if (!strcmp(act_kind, "sample")) {
>     + nl_parse_act_sample(act_options, flower);
>     } else {
>     VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>     err = EINVAL;
>     @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>     *request, int ifindex, int action,
>     nl_msg_end_nested(request, offset);
>     }
>
>     +static void
>     +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>     uint32_t group_id)
>     +{
>     + size_t offset;
>     +
>     + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>     + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>     NLA_F_NESTED);
>     + {
>     + struct tc_sample parm = { .action = TC_ACT_PIPE };
>     +
>     + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>     + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>     + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>     + }
>     + nl_msg_end_nested(request, offset);
>     +}
>     +
>     static inline void
>     nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>     if (ck->len) {
>     @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>     *request, struct tc_flower *flower)
>     nl_msg_end_nested(request, act_offset);
>     }
>     break;
>     + case TC_ACT_SAMPLE: {
>     + act_offset = nl_msg_start_nested(request, act_index++);
>     + nl_msg_put_act_sample(request, action->sample.rate,
>     + action->sample.group_id);
>     + nl_msg_end_nested(request, act_offset);
>     + }
>     + break;
>     case TC_ACT_OUTPUT: {
>     if (!released && flower->tunnel) {
>     act_offset = nl_msg_start_nested(request, act_index++);
>     diff --git a/lib/tc.h b/lib/tc.h
>     index 2e4084f48..f764d7d1e 100644
>     --- a/lib/tc.h
>     +++ b/lib/tc.h
>     @@ -174,6 +174,7 @@ enum tc_action_type {
>     TC_ACT_MPLS_SET,
>     TC_ACT_GOTO,
>     TC_ACT_CT,
>     + TC_ACT_SAMPLE,
>     };
>
>     enum nat_type {
>     @@ -256,6 +257,11 @@ struct tc_action {
>     bool force;
>     bool commit;
>     } ct;
>     +
>     + struct {
>     + uint32_t rate;
>     + uint32_t group_id;
>     + } sample;
>     };
>
>     enum tc_action_type type;
>     @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>     block_id, uint16_t prio,
>     }
>
>     static inline struct tcf_id
>     -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>     - uint16_t prio, enum tc_qdisc_hook hook)
>     +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>     + uint16_t prio, enum tc_qdisc_hook hook,
>     + uint32_t sample_group_id)
>     {
>     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>
>     id.chain = chain;
>     + id.sample_group_id = sample_group_id;
>
>     return id;
>     }
>     @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>     *id2)
>     && id1->hook == id2->hook
>     && id1->block_id == id2->block_id
>     && id1->ifindex == id2->ifindex
>     - && id1->chain == id2->chain;
>     + && id1->chain == id2->chain
>     + && id1->sample_group_id == id2->sample_group_id;
>     }
>
>     enum tc_offload_policy {
>     -- 
>     2.21.0
>
Eelco Chaudron Sept. 13, 2021, 9:18 a.m. UTC | #3
On 10 Sep 2021, at 13:02, Chris Mi wrote:

> On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>>
>> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>>
>>     Create a unique group ID to map the sFlow info when offloading sFlow
>>     action to TC. When showing the offloaded datapath flows, translate
>>     the
>>     group ID from TC sample action to sFlow info using the mapping.
>>
>> See comments below and this completes my review of the v14. See the other email on the update of the flows going wrong.
>>
> I'm struggling with the test these days and make little progress. I usually use sflowtool to receive the sFlow packets.
> After investigation, I found the test uses 'ovstest test-sflow'. If I use it manually, it works both for offload and none-offload.
> But if I integrate it in the new test, it only works for non-offload. It can't receive sFlow packets for offload.
> I don't know what is missing.
The test framework behaves oddly sometimes. I always add a lot of debugging stuff to make sure I know what is running, and/or add a long sleep before the error, so I have time to debug the running setup.

> So I plan to address v14 comments next week and ignore the test now. Maybe I can find something
> unusual later on.  And thanks for the full review on v14.
>
> -Chris
>>
>> I’ll do a more thorough review on v15 assuming the issue gets fixed, especially the sgid allocation/re-use/free.
>>
>>     Signed-off-by: Chris Mi <cmi@nvidia.com>
>>     Reviewed-by: Eli Britstein <elibr@nvidia.com>
>>     ---
>>     NEWS | 1 +
>>     lib/netdev-offload-tc.c | 212
>>     +++++++++++++++++++++++++++++++++++++---
>>     lib/tc.c | 61 +++++++++++-
>>     lib/tc.h | 15 ++-
>>     4 files changed, 272 insertions(+), 17 deletions(-)
>>
>>     diff --git a/NEWS b/NEWS
>>     index 6cdccc715..7c0361e18 100644
>>     --- a/NEWS
>>     +++ b/NEWS
>>     @@ -50,6 +50,7 @@ Post-v2.15.0
>>     - OVS now reports the datapath capability 'ct_zero_snat', which
>>     reflects
>>     whether the SNAT with all-zero IP address is supported.
>>     See ovs-vswitchd.conf.db(5) for details.
>>     + - Add sFlow offload support for kernel (netlink) datapath.
>>
>>     v2.15.0 - 15 Feb 2021
>>     diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>     index 2f16cf279..71e51394f 100644
>>     --- a/lib/netdev-offload-tc.c
>>     +++ b/lib/netdev-offload-tc.c
>>     @@ -20,6 +20,7 @@
>>     #include <linux/if_ether.h>
>>
>>     #include "dpif.h"
>>     +#include "dpif-offload-provider.h"
>>     #include "hash.h"
>>     #include "openvswitch/hmap.h"
>>     #include "openvswitch/match.h"
>>     @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>>     *flower,
>>     action = flower->actions;
>>     for (i = 0; i < flower->action_count; i++, action++) {
>>     switch (action->type) {
>>     + case TC_ACT_SAMPLE: {
>>     + const struct sgid_node *node;
>>     +
>>     + node = sgid_find(action->sample.group_id);
>>     + if (!node) {
>>     + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>>     + __func__, action->sample.group_id);
>>
>> This should probably be a warning, as this could happen if the DP has not yet been synced? i.e. revalidator being in the process of cleanup and someone requesting dp content?
>>
>>     + return ENOENT;
>>     + }
>>     + nl_msg_put(buf, node->sflow.action,
>>     + node->sflow.action->nla_len);
>>     + }
>>     + break;
>>     case TC_ACT_VLAN_POP: {
>>     nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>>     }
>>     @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>>     tc_flower *flower, struct match *match)
>>     }
>>     }
>>
>>     +static int
>>     +parse_userspace_attributes(const struct nlattr *actions,
>>     + struct dpif_sflow_attr *sflow_attr)
>>     +{
>>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>     + const struct nlattr *nla;
>>     + unsigned int left;
>>     +
>>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>     + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>>     + struct user_action_cookie *cookie;
>>     +
>>     + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>>     + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>>
>> Here I think the userdata type should be changed to "struct nlattr * " just like in the struct dpif_upcall.
>> This should probably be done in the patch introducing the dpif_sflow_attr, so it will also be aligned with the actions member.
>>
>>     + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>>     + sflow_attr->userdata_len = nl_attr_get_size(nla);
>>     + return 0;
>>     + }
>>     + }
>>     + }
>>     +
>>     + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>>     sFlow",
>>     + __func__);
>>     + return EOPNOTSUPP;
>>     +}
>>     +
>>     +static int
>>     +parse_sample_actions_attribute(const struct nlattr *actions,
>>     + struct dpif_sflow_attr *sflow_attr)
>>     +{
>>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>     + const struct nlattr *nla;
>>     + unsigned int left;
>>     + int err = EINVAL;
>>     +
>>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>     + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>     + err = parse_userspace_attributes(nla, sflow_attr);
>>     + } else {
>>     + /* We can't offload other nested actions */
>>     + VLOG_DBG_RL(&rl, "%s: can only offload "
>>     + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>>     + return EINVAL;
>>     + }
>>     + }
>>     +
>>     + if (err) {
>>     + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>>     attribute",
>>     + __func__);
>>     + }
>>     + return err;
>>     +}
>>     +
>>     +static int
>>     +parse_sample_action(struct tc_flower *flower, struct tc_action
>>     *tc_action,
>>     + const struct nlattr *sample_action,
>>     + const struct flow_tnl *tnl, uint32_t *group_id,
>>     + const ovs_u128 *ufid)
>>     +{
>>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>     + struct dpif_sflow_attr sflow_attr;
>>     + const struct nlattr *nla;
>>     + unsigned int left;
>>     + int ret = EINVAL;
>>     +
>>     + if (*group_id) {
>>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>>     + "per flow is supported", __func__);
>>     + return EOPNOTSUPP;
>>     + }
>>     +
>>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>>     + sflow_attr.ufid = *ufid;
>>     + sflow_attr.action = sample_action;
>>     +
>>     + if (flower->tunnel) {
>>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>>     + }
>>     +
>>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>>     + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>>     + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>>     + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>>     + tc_action->type = TC_ACT_SAMPLE;
>>     + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>>     + } else {
>>     + return EINVAL;
>>     + }
>>     + }
>>     +
>>     + if (!tc_action->sample.rate || ret) {
>>     + return EINVAL;
>>     + }
>>     +
>>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>>     + if (!*group_id) {
>>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>>     action",
>>     + __func__);
>>     + return ENOENT;
>>     + }
>>     + tc_action->sample.group_id = *group_id;
>>     + flower->action_count++;
>>     +
>>     + return 0;
>>     +}
>>     +
>>     +static int
>>     +parse_userspace_action(struct tc_flower *flower, struct tc_action
>>     *tc_action,
>>     + const struct nlattr *userspace_action,
>>     + const struct flow_tnl *tnl, uint32_t *group_id,
>>     + const ovs_u128 *ufid)
>>     +{
>>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>     + struct dpif_sflow_attr sflow_attr;
>>     + int err;
>>     +
>>     + if (*group_id) {
>>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>>     + "per flow is supported", __func__);
>>     + return EOPNOTSUPP;
>>     + }
>>     +
>>     + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>>     + * userspace action, but no sample attribute. That means we can
>>     + * only offload userspace actions for sFlow.
>>     + */
>>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>>     + sflow_attr.ufid = *ufid;
>>     + if (flower->tunnel) {
>>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>>     + }
>>     + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>>     + if (err) {
>>     + return err;
>>     + }
>>     + sflow_attr.action = userspace_action;
>>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>>     + if (!*group_id) {
>>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>>     action",
>>     + __func__);
>>     + return ENOENT;
>>     + }
>>     + tc_action->type = TC_ACT_SAMPLE;
>>     + tc_action->sample.group_id = *group_id;
>>     + tc_action->sample.rate = 1;
>>     + flower->action_count++;
>>     +
>>     + return 0;
>>     +}
>>     +
>>     static int
>>     netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>>     struct nlattr *actions, size_t actions_len,
>>     @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>     const struct flow_tnl *tnl_mask = &mask->tunnel;
>>     struct tc_action *action;
>>     bool recirc_act = false;
>>     + uint32_t sample_gid = 0;
>>     uint32_t block_id = 0;
>>     struct nlattr *nla;
>>     struct tcf_id id;
>>     @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>>     if (flower.action_count >= TCA_ACT_MAX_NUM) {
>>     VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>>     - return EOPNOTSUPP;
>>     + err = EOPNOTSUPP;
>>     + goto out;
>>     }
>>     action = &flower.actions[flower.action_count];
>>     if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>     @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>
>>     if (!outdev) {
>>     VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>>     - return ENODEV;
>>     + err = ENODEV;
>>     + goto out;
>>     }
>>     action->out.ifindex_out = netdev_get_ifindex(outdev);
>>     action->out.ingress = is_internal_port(netdev_get_type(outdev));
>>     @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>
>>     err = parse_put_flow_set_action(&flower, action, set, set_len);
>>     if (err) {
>>     - return err;
>>     + goto out;
>>     }
>>     if (action->type == TC_ACT_ENCAP) {
>>     action->encap.tp_dst = info->tp_dst_port;
>>     @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>     err = parse_put_flow_set_masked_action(&flower, action, set,
>>     set_len, true);
>>     if (err) {
>>     - return err;
>>     + goto out;
>>     }
>>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>>     const struct nlattr *ct = nl_attr_get(nla);
>>     @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>
>>     err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>>     if (err) {
>>     - return err;
>>     + goto out;
>>     }
>>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>>     action->type = TC_ACT_CT;
>>     @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>     action->chain = 0; /* 0 is reserved and not used by recirc. */
>>     flower.action_count++;
>>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>>     - struct dpif_sflow_attr sflow_attr;
>>     -
>>     - memset(&sflow_attr, 0, sizeof sflow_attr);
>>     - sgid_alloc_ctx(&sflow_attr);
>>     + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>>     + ufid);
>>     + if (err) {
>>     + goto out;
>>     + }
>>     + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>     + err = parse_userspace_action(&flower, action, nla, tnl,
>>     + &sample_gid, ufid);
>>     + if (err) {
>>     + goto out;
>>     + }
>>     } else {
>>     VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>>     nl_attr_type(nla));
>>     - return EOPNOTSUPP;
>>     + err = EOPNOTSUPP;
>>     + goto out;
>>     }
>>     }
>>
>>     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>>     VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>>     - return EOPNOTSUPP;
>>     + err = EOPNOTSUPP;
>>     + goto out;
>>     }
>>
>>     if (get_ufid_tc_mapping(ufid, &id) == 0) {
>>     @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>>     struct match *match,
>>     prio = get_prio_for_tc_flower(&flower);
>>     if (prio == 0) {
>>     VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>>     - return ENOSPC;
>>     + err = ENOSPC;
>>     + goto out;
>>     }
>>
>>     flower.act_cookie.data = ufid;
>>     flower.act_cookie.len = sizeof *ufid;
>>
>>     block_id = get_block_id_from_netdev(netdev);
>>     - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>>     + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>>     sample_gid);
>>     err = tc_replace_flower(&id, &flower);
>>     if (!err) {
>>     if (stats) {
>>     memset(stats, 0, sizeof *stats);
>>     }
>>     add_ufid_tc_mapping(netdev, ufid, &id);
>>     + } else {
>>     + goto out;
>>     + }
>>
>> Guess this will look nicer:
>>
>> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); } add_ufid_tc_mapping(netdev, ufid, &id); return 0; |
>>
>>     +
>>     + return 0;
>>     +
>>     +out:
>>     + if (sample_gid) {
>>     + sgid_free(sample_gid);
>>     }
>>
>>     return err;
>>     diff --git a/lib/tc.c b/lib/tc.c
>>     index 33287ea05..c5788220f 100644
>>     --- a/lib/tc.c
>>     +++ b/lib/tc.c
>>     @@ -23,14 +23,15 @@
>>     #include <linux/if_packet.h>
>>     #include <linux/rtnetlink.h>
>>     #include <linux/tc_act/tc_csum.h>
>>     +#include <linux/tc_act/tc_ct.h>
>>     #include <linux/tc_act/tc_gact.h>
>>     #include <linux/tc_act/tc_mirred.h>
>>     #include <linux/tc_act/tc_mpls.h>
>>     #include <linux/tc_act/tc_pedit.h>
>>     +#include <linux/tc_act/tc_sample.h>
>>     #include <linux/tc_act/tc_skbedit.h>
>>     #include <linux/tc_act/tc_tunnel_key.h>
>>     #include <linux/tc_act/tc_vlan.h>
>>     -#include <linux/tc_act/tc_ct.h>
>>     #include <linux/gen_stats.h>
>>     #include <net/if.h>
>>     #include <unistd.h>
>>     @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>>     struct tc_flower *flower)
>>     return 0;
>>     }
>>
>>     +static const struct nl_policy sample_policy[] = {
>>     + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>>     + .min_len = sizeof(struct tc_sample),
>>     + .optional = false, },
>>     + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>>     + .optional = false, },
>>     + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>>     + .optional = false, },
>>     +};
>>     +
>>     +static int
>>     +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>>     *flower)
>>     +{
>>     + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>>     + struct tc_action *action;
>>     +
>>     + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>>     + ARRAY_SIZE(sample_policy))) {
>>     + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>>     + return EPROTO;
>>     + }
>>     +
>>     + action = &flower->actions[flower->action_count++];
>>     + action->type = TC_ACT_SAMPLE;
>>     + action->sample.group_id =
>>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>>     + action->sample.rate =
>>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>>     +
>>     + return 0;
>>     +}
>>     +
>>     static const struct nl_policy mirred_policy[] = {
>>     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>>     .min_len = sizeof(struct tc_mirred),
>>     @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>>     *action, struct tc_flower *flower,
>>     /* Added for TC rule only (not in OvS rule) so ignore. */
>>     } else if (!strcmp(act_kind, "ct")) {
>>     nl_parse_act_ct(act_options, flower);
>>     + } else if (!strcmp(act_kind, "sample")) {
>>     + nl_parse_act_sample(act_options, flower);
>>     } else {
>>     VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>>     err = EINVAL;
>>     @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>>     *request, int ifindex, int action,
>>     nl_msg_end_nested(request, offset);
>>     }
>>
>>     +static void
>>     +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>>     uint32_t group_id)
>>     +{
>>     + size_t offset;
>>     +
>>     + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>>     + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>>     NLA_F_NESTED);
>>     + {
>>     + struct tc_sample parm = { .action = TC_ACT_PIPE };
>>     +
>>     + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>>     + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>>     + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>>     + }
>>     + nl_msg_end_nested(request, offset);
>>     +}
>>     +
>>     static inline void
>>     nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>>     if (ck->len) {
>>     @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>>     *request, struct tc_flower *flower)
>>     nl_msg_end_nested(request, act_offset);
>>     }
>>     break;
>>     + case TC_ACT_SAMPLE: {
>>     + act_offset = nl_msg_start_nested(request, act_index++);
>>     + nl_msg_put_act_sample(request, action->sample.rate,
>>     + action->sample.group_id);
>>     + nl_msg_end_nested(request, act_offset);
>>     + }
>>     + break;
>>     case TC_ACT_OUTPUT: {
>>     if (!released && flower->tunnel) {
>>     act_offset = nl_msg_start_nested(request, act_index++);
>>     diff --git a/lib/tc.h b/lib/tc.h
>>     index 2e4084f48..f764d7d1e 100644
>>     --- a/lib/tc.h
>>     +++ b/lib/tc.h
>>     @@ -174,6 +174,7 @@ enum tc_action_type {
>>     TC_ACT_MPLS_SET,
>>     TC_ACT_GOTO,
>>     TC_ACT_CT,
>>     + TC_ACT_SAMPLE,
>>     };
>>
>>     enum nat_type {
>>     @@ -256,6 +257,11 @@ struct tc_action {
>>     bool force;
>>     bool commit;
>>     } ct;
>>     +
>>     + struct {
>>     + uint32_t rate;
>>     + uint32_t group_id;
>>     + } sample;
>>     };
>>
>>     enum tc_action_type type;
>>     @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>>     block_id, uint16_t prio,
>>     }
>>
>>     static inline struct tcf_id
>>     -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>>     - uint16_t prio, enum tc_qdisc_hook hook)
>>     +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>>     + uint16_t prio, enum tc_qdisc_hook hook,
>>     + uint32_t sample_group_id)
>>     {
>>     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>>
>>     id.chain = chain;
>>     + id.sample_group_id = sample_group_id;
>>
>>     return id;
>>     }
>>     @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>>     *id2)
>>     && id1->hook == id2->hook
>>     && id1->block_id == id2->block_id
>>     && id1->ifindex == id2->ifindex
>>     - && id1->chain == id2->chain;
>>     + && id1->chain == id2->chain
>>     + && id1->sample_group_id == id2->sample_group_id;
>>     }
>>
>>     enum tc_offload_policy {
>>     --
>>     2.21.0
>>
Chris Mi Sept. 13, 2021, 11:53 a.m. UTC | #4
On 9/13/2021 5:18 PM, Eelco Chaudron wrote:
> On 10 Sep 2021, at 13:02, Chris Mi wrote:
>
>> On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>>> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>>>
>>>      Create a unique group ID to map the sFlow info when offloading sFlow
>>>      action to TC. When showing the offloaded datapath flows, translate
>>>      the
>>>      group ID from TC sample action to sFlow info using the mapping.
>>>
>>> See comments below and this completes my review of the v14. See the other email on the update of the flows going wrong.
>>>
>> I'm struggling with the test these days and make little progress. I usually use sflowtool to receive the sFlow packets.
>> After investigation, I found the test uses 'ovstest test-sflow'. If I use it manually, it works both for offload and none-offload.
>> But if I integrate it in the new test, it only works for non-offload. It can't receive sFlow packets for offload.
>> I don't know what is missing.
> The test framework behaves oddly sometimes. I always add a lot of debugging stuff to make sure I know what is running, and/or add a long sleep before the error, so I have time to debug the running setup.
Thanks, Eelco. This is helpful.

-Chris
>
>> So I plan to address v14 comments next week and ignore the test now. Maybe I can find something
>> unusual later on.  And thanks for the full review on v14.
>>
>> -Chris
>>> I’ll do a more thorough review on v15 assuming the issue gets fixed, especially the sgid allocation/re-use/free.
>>>
>>>      Signed-off-by: Chris Mi <cmi@nvidia.com>
>>>      Reviewed-by: Eli Britstein <elibr@nvidia.com>
>>>      ---
>>>      NEWS | 1 +
>>>      lib/netdev-offload-tc.c | 212
>>>      +++++++++++++++++++++++++++++++++++++---
>>>      lib/tc.c | 61 +++++++++++-
>>>      lib/tc.h | 15 ++-
>>>      4 files changed, 272 insertions(+), 17 deletions(-)
>>>
>>>      diff --git a/NEWS b/NEWS
>>>      index 6cdccc715..7c0361e18 100644
>>>      --- a/NEWS
>>>      +++ b/NEWS
>>>      @@ -50,6 +50,7 @@ Post-v2.15.0
>>>      - OVS now reports the datapath capability 'ct_zero_snat', which
>>>      reflects
>>>      whether the SNAT with all-zero IP address is supported.
>>>      See ovs-vswitchd.conf.db(5) for details.
>>>      + - Add sFlow offload support for kernel (netlink) datapath.
>>>
>>>      v2.15.0 - 15 Feb 2021
>>>      diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>>      index 2f16cf279..71e51394f 100644
>>>      --- a/lib/netdev-offload-tc.c
>>>      +++ b/lib/netdev-offload-tc.c
>>>      @@ -20,6 +20,7 @@
>>>      #include <linux/if_ether.h>
>>>
>>>      #include "dpif.h"
>>>      +#include "dpif-offload-provider.h"
>>>      #include "hash.h"
>>>      #include "openvswitch/hmap.h"
>>>      #include "openvswitch/match.h"
>>>      @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>>>      *flower,
>>>      action = flower->actions;
>>>      for (i = 0; i < flower->action_count; i++, action++) {
>>>      switch (action->type) {
>>>      + case TC_ACT_SAMPLE: {
>>>      + const struct sgid_node *node;
>>>      +
>>>      + node = sgid_find(action->sample.group_id);
>>>      + if (!node) {
>>>      + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>>>      + __func__, action->sample.group_id);
>>>
>>> This should probably be a warning, as this could happen if the DP has not yet been synced? i.e. revalidator being in the process of cleanup and someone requesting dp content?
>>>
>>>      + return ENOENT;
>>>      + }
>>>      + nl_msg_put(buf, node->sflow.action,
>>>      + node->sflow.action->nla_len);
>>>      + }
>>>      + break;
>>>      case TC_ACT_VLAN_POP: {
>>>      nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>>>      }
>>>      @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>>>      tc_flower *flower, struct match *match)
>>>      }
>>>      }
>>>
>>>      +static int
>>>      +parse_userspace_attributes(const struct nlattr *actions,
>>>      + struct dpif_sflow_attr *sflow_attr)
>>>      +{
>>>      + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>      + const struct nlattr *nla;
>>>      + unsigned int left;
>>>      +
>>>      + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>>      + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>>>      + struct user_action_cookie *cookie;
>>>      +
>>>      + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>>>      + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>>>
>>> Here I think the userdata type should be changed to "struct nlattr * " just like in the struct dpif_upcall.
>>> This should probably be done in the patch introducing the dpif_sflow_attr, so it will also be aligned with the actions member.
>>>
>>>      + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>>>      + sflow_attr->userdata_len = nl_attr_get_size(nla);
>>>      + return 0;
>>>      + }
>>>      + }
>>>      + }
>>>      +
>>>      + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>>>      sFlow",
>>>      + __func__);
>>>      + return EOPNOTSUPP;
>>>      +}
>>>      +
>>>      +static int
>>>      +parse_sample_actions_attribute(const struct nlattr *actions,
>>>      + struct dpif_sflow_attr *sflow_attr)
>>>      +{
>>>      + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>      + const struct nlattr *nla;
>>>      + unsigned int left;
>>>      + int err = EINVAL;
>>>      +
>>>      + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>>      + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>>      + err = parse_userspace_attributes(nla, sflow_attr);
>>>      + } else {
>>>      + /* We can't offload other nested actions */
>>>      + VLOG_DBG_RL(&rl, "%s: can only offload "
>>>      + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>>>      + return EINVAL;
>>>      + }
>>>      + }
>>>      +
>>>      + if (err) {
>>>      + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>>>      attribute",
>>>      + __func__);
>>>      + }
>>>      + return err;
>>>      +}
>>>      +
>>>      +static int
>>>      +parse_sample_action(struct tc_flower *flower, struct tc_action
>>>      *tc_action,
>>>      + const struct nlattr *sample_action,
>>>      + const struct flow_tnl *tnl, uint32_t *group_id,
>>>      + const ovs_u128 *ufid)
>>>      +{
>>>      + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>      + struct dpif_sflow_attr sflow_attr;
>>>      + const struct nlattr *nla;
>>>      + unsigned int left;
>>>      + int ret = EINVAL;
>>>      +
>>>      + if (*group_id) {
>>>      + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>>>      + "per flow is supported", __func__);
>>>      + return EOPNOTSUPP;
>>>      + }
>>>      +
>>>      + memset(&sflow_attr, 0, sizeof sflow_attr);
>>>      + sflow_attr.ufid = *ufid;
>>>      + sflow_attr.action = sample_action;
>>>      +
>>>      + if (flower->tunnel) {
>>>      + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>>>      + }
>>>      +
>>>      + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>>>      + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>>>      + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>>>      + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>>>      + tc_action->type = TC_ACT_SAMPLE;
>>>      + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>>>      + } else {
>>>      + return EINVAL;
>>>      + }
>>>      + }
>>>      +
>>>      + if (!tc_action->sample.rate || ret) {
>>>      + return EINVAL;
>>>      + }
>>>      +
>>>      + *group_id = sgid_alloc_ctx(&sflow_attr);
>>>      + if (!*group_id) {
>>>      + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>>>      action",
>>>      + __func__);
>>>      + return ENOENT;
>>>      + }
>>>      + tc_action->sample.group_id = *group_id;
>>>      + flower->action_count++;
>>>      +
>>>      + return 0;
>>>      +}
>>>      +
>>>      +static int
>>>      +parse_userspace_action(struct tc_flower *flower, struct tc_action
>>>      *tc_action,
>>>      + const struct nlattr *userspace_action,
>>>      + const struct flow_tnl *tnl, uint32_t *group_id,
>>>      + const ovs_u128 *ufid)
>>>      +{
>>>      + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>      + struct dpif_sflow_attr sflow_attr;
>>>      + int err;
>>>      +
>>>      + if (*group_id) {
>>>      + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>>>      + "per flow is supported", __func__);
>>>      + return EOPNOTSUPP;
>>>      + }
>>>      +
>>>      + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>>>      + * userspace action, but no sample attribute. That means we can
>>>      + * only offload userspace actions for sFlow.
>>>      + */
>>>      + memset(&sflow_attr, 0, sizeof sflow_attr);
>>>      + sflow_attr.ufid = *ufid;
>>>      + if (flower->tunnel) {
>>>      + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>>>      + }
>>>      + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>>>      + if (err) {
>>>      + return err;
>>>      + }
>>>      + sflow_attr.action = userspace_action;
>>>      + *group_id = sgid_alloc_ctx(&sflow_attr);
>>>      + if (!*group_id) {
>>>      + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>>>      action",
>>>      + __func__);
>>>      + return ENOENT;
>>>      + }
>>>      + tc_action->type = TC_ACT_SAMPLE;
>>>      + tc_action->sample.group_id = *group_id;
>>>      + tc_action->sample.rate = 1;
>>>      + flower->action_count++;
>>>      +
>>>      + return 0;
>>>      +}
>>>      +
>>>      static int
>>>      netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>>>      struct nlattr *actions, size_t actions_len,
>>>      @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>      const struct flow_tnl *tnl_mask = &mask->tunnel;
>>>      struct tc_action *action;
>>>      bool recirc_act = false;
>>>      + uint32_t sample_gid = 0;
>>>      uint32_t block_id = 0;
>>>      struct nlattr *nla;
>>>      struct tcf_id id;
>>>      @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>      NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>>>      if (flower.action_count >= TCA_ACT_MAX_NUM) {
>>>      VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>>>      - return EOPNOTSUPP;
>>>      + err = EOPNOTSUPP;
>>>      + goto out;
>>>      }
>>>      action = &flower.actions[flower.action_count];
>>>      if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>>      @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>
>>>      if (!outdev) {
>>>      VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>>>      - return ENODEV;
>>>      + err = ENODEV;
>>>      + goto out;
>>>      }
>>>      action->out.ifindex_out = netdev_get_ifindex(outdev);
>>>      action->out.ingress = is_internal_port(netdev_get_type(outdev));
>>>      @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>
>>>      err = parse_put_flow_set_action(&flower, action, set, set_len);
>>>      if (err) {
>>>      - return err;
>>>      + goto out;
>>>      }
>>>      if (action->type == TC_ACT_ENCAP) {
>>>      action->encap.tp_dst = info->tp_dst_port;
>>>      @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>      err = parse_put_flow_set_masked_action(&flower, action, set,
>>>      set_len, true);
>>>      if (err) {
>>>      - return err;
>>>      + goto out;
>>>      }
>>>      } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>>>      const struct nlattr *ct = nl_attr_get(nla);
>>>      @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>
>>>      err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>>>      if (err) {
>>>      - return err;
>>>      + goto out;
>>>      }
>>>      } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>>>      action->type = TC_ACT_CT;
>>>      @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>      action->chain = 0; /* 0 is reserved and not used by recirc. */
>>>      flower.action_count++;
>>>      } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>>>      - struct dpif_sflow_attr sflow_attr;
>>>      -
>>>      - memset(&sflow_attr, 0, sizeof sflow_attr);
>>>      - sgid_alloc_ctx(&sflow_attr);
>>>      + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>>>      + ufid);
>>>      + if (err) {
>>>      + goto out;
>>>      + }
>>>      + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>>      + err = parse_userspace_action(&flower, action, nla, tnl,
>>>      + &sample_gid, ufid);
>>>      + if (err) {
>>>      + goto out;
>>>      + }
>>>      } else {
>>>      VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>>>      nl_attr_type(nla));
>>>      - return EOPNOTSUPP;
>>>      + err = EOPNOTSUPP;
>>>      + goto out;
>>>      }
>>>      }
>>>
>>>      if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>>>      VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>>>      - return EOPNOTSUPP;
>>>      + err = EOPNOTSUPP;
>>>      + goto out;
>>>      }
>>>
>>>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
>>>      @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>>>      struct match *match,
>>>      prio = get_prio_for_tc_flower(&flower);
>>>      if (prio == 0) {
>>>      VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>>>      - return ENOSPC;
>>>      + err = ENOSPC;
>>>      + goto out;
>>>      }
>>>
>>>      flower.act_cookie.data = ufid;
>>>      flower.act_cookie.len = sizeof *ufid;
>>>
>>>      block_id = get_block_id_from_netdev(netdev);
>>>      - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>>>      + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>>>      sample_gid);
>>>      err = tc_replace_flower(&id, &flower);
>>>      if (!err) {
>>>      if (stats) {
>>>      memset(stats, 0, sizeof *stats);
>>>      }
>>>      add_ufid_tc_mapping(netdev, ufid, &id);
>>>      + } else {
>>>      + goto out;
>>>      + }
>>>
>>> Guess this will look nicer:
>>>
>>> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); } add_ufid_tc_mapping(netdev, ufid, &id); return 0; |
>>>
>>>      +
>>>      + return 0;
>>>      +
>>>      +out:
>>>      + if (sample_gid) {
>>>      + sgid_free(sample_gid);
>>>      }
>>>
>>>      return err;
>>>      diff --git a/lib/tc.c b/lib/tc.c
>>>      index 33287ea05..c5788220f 100644
>>>      --- a/lib/tc.c
>>>      +++ b/lib/tc.c
>>>      @@ -23,14 +23,15 @@
>>>      #include <linux/if_packet.h>
>>>      #include <linux/rtnetlink.h>
>>>      #include <linux/tc_act/tc_csum.h>
>>>      +#include <linux/tc_act/tc_ct.h>
>>>      #include <linux/tc_act/tc_gact.h>
>>>      #include <linux/tc_act/tc_mirred.h>
>>>      #include <linux/tc_act/tc_mpls.h>
>>>      #include <linux/tc_act/tc_pedit.h>
>>>      +#include <linux/tc_act/tc_sample.h>
>>>      #include <linux/tc_act/tc_skbedit.h>
>>>      #include <linux/tc_act/tc_tunnel_key.h>
>>>      #include <linux/tc_act/tc_vlan.h>
>>>      -#include <linux/tc_act/tc_ct.h>
>>>      #include <linux/gen_stats.h>
>>>      #include <net/if.h>
>>>      #include <unistd.h>
>>>      @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>>>      struct tc_flower *flower)
>>>      return 0;
>>>      }
>>>
>>>      +static const struct nl_policy sample_policy[] = {
>>>      + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>>>      + .min_len = sizeof(struct tc_sample),
>>>      + .optional = false, },
>>>      + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>>>      + .optional = false, },
>>>      + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>>>      + .optional = false, },
>>>      +};
>>>      +
>>>      +static int
>>>      +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>>>      *flower)
>>>      +{
>>>      + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>>>      + struct tc_action *action;
>>>      +
>>>      + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>>>      + ARRAY_SIZE(sample_policy))) {
>>>      + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>>>      + return EPROTO;
>>>      + }
>>>      +
>>>      + action = &flower->actions[flower->action_count++];
>>>      + action->type = TC_ACT_SAMPLE;
>>>      + action->sample.group_id =
>>>      + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>>>      + action->sample.rate =
>>>      + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>>>      +
>>>      + return 0;
>>>      +}
>>>      +
>>>      static const struct nl_policy mirred_policy[] = {
>>>      [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>>>      .min_len = sizeof(struct tc_mirred),
>>>      @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>>>      *action, struct tc_flower *flower,
>>>      /* Added for TC rule only (not in OvS rule) so ignore. */
>>>      } else if (!strcmp(act_kind, "ct")) {
>>>      nl_parse_act_ct(act_options, flower);
>>>      + } else if (!strcmp(act_kind, "sample")) {
>>>      + nl_parse_act_sample(act_options, flower);
>>>      } else {
>>>      VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>>>      err = EINVAL;
>>>      @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>>>      *request, int ifindex, int action,
>>>      nl_msg_end_nested(request, offset);
>>>      }
>>>
>>>      +static void
>>>      +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>>>      uint32_t group_id)
>>>      +{
>>>      + size_t offset;
>>>      +
>>>      + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>>>      + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>>>      NLA_F_NESTED);
>>>      + {
>>>      + struct tc_sample parm = { .action = TC_ACT_PIPE };
>>>      +
>>>      + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>>>      + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>>>      + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>>>      + }
>>>      + nl_msg_end_nested(request, offset);
>>>      +}
>>>      +
>>>      static inline void
>>>      nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>>>      if (ck->len) {
>>>      @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>>>      *request, struct tc_flower *flower)
>>>      nl_msg_end_nested(request, act_offset);
>>>      }
>>>      break;
>>>      + case TC_ACT_SAMPLE: {
>>>      + act_offset = nl_msg_start_nested(request, act_index++);
>>>      + nl_msg_put_act_sample(request, action->sample.rate,
>>>      + action->sample.group_id);
>>>      + nl_msg_end_nested(request, act_offset);
>>>      + }
>>>      + break;
>>>      case TC_ACT_OUTPUT: {
>>>      if (!released && flower->tunnel) {
>>>      act_offset = nl_msg_start_nested(request, act_index++);
>>>      diff --git a/lib/tc.h b/lib/tc.h
>>>      index 2e4084f48..f764d7d1e 100644
>>>      --- a/lib/tc.h
>>>      +++ b/lib/tc.h
>>>      @@ -174,6 +174,7 @@ enum tc_action_type {
>>>      TC_ACT_MPLS_SET,
>>>      TC_ACT_GOTO,
>>>      TC_ACT_CT,
>>>      + TC_ACT_SAMPLE,
>>>      };
>>>
>>>      enum nat_type {
>>>      @@ -256,6 +257,11 @@ struct tc_action {
>>>      bool force;
>>>      bool commit;
>>>      } ct;
>>>      +
>>>      + struct {
>>>      + uint32_t rate;
>>>      + uint32_t group_id;
>>>      + } sample;
>>>      };
>>>
>>>      enum tc_action_type type;
>>>      @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>>>      block_id, uint16_t prio,
>>>      }
>>>
>>>      static inline struct tcf_id
>>>      -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>>>      - uint16_t prio, enum tc_qdisc_hook hook)
>>>      +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>>>      + uint16_t prio, enum tc_qdisc_hook hook,
>>>      + uint32_t sample_group_id)
>>>      {
>>>      struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>>>
>>>      id.chain = chain;
>>>      + id.sample_group_id = sample_group_id;
>>>
>>>      return id;
>>>      }
>>>      @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>>>      *id2)
>>>      && id1->hook == id2->hook
>>>      && id1->block_id == id2->block_id
>>>      && id1->ifindex == id2->ifindex
>>>      - && id1->chain == id2->chain;
>>>      + && id1->chain == id2->chain
>>>      + && id1->sample_group_id == id2->sample_group_id;
>>>      }
>>>
>>>      enum tc_offload_policy {
>>>      --
>>>      2.21.0
>>>
Chris Mi Sept. 15, 2021, 10:26 a.m. UTC | #5
On 9/9/2021 10:29 PM, Eelco Chaudron wrote:
>
> On 15 Jul 2021, at 8:01, Chris Mi wrote:
>
>     Create a unique group ID to map the sFlow info when offloading sFlow
>     action to TC. When showing the offloaded datapath flows, translate
>     the
>     group ID from TC sample action to sFlow info using the mapping.
>
> See comments below and this completes my review of the v14. See the 
> other email on the update of the flows going wrong.
>
OK. A update for the test. The reason I can't receive sampled packets is 
because we need to restart the ovs
after setting hw-offload to true. There is no existing API to restart 
the ovs. I write some commands to implement it.
Now we can receive sampled packets. But when stopping ovs, I see a lot 
of errors. So the test still fails.
Need to debug it further. If possible, can I submit the test in another 
series?
>
> I’ll do a more thorough review on v15 assuming the issue gets fixed, 
> especially the sgid allocation/re-use/free.
>
>     Signed-off-by: Chris Mi <cmi@nvidia.com>
>     Reviewed-by: Eli Britstein <elibr@nvidia.com>
>     ---
>     NEWS | 1 +
>     lib/netdev-offload-tc.c | 212
>     +++++++++++++++++++++++++++++++++++++---
>     lib/tc.c | 61 +++++++++++-
>     lib/tc.h | 15 ++-
>     4 files changed, 272 insertions(+), 17 deletions(-)
>
>     diff --git a/NEWS b/NEWS
>     index 6cdccc715..7c0361e18 100644
>     --- a/NEWS
>     +++ b/NEWS
>     @@ -50,6 +50,7 @@ Post-v2.15.0
>     - OVS now reports the datapath capability 'ct_zero_snat', which
>     reflects
>     whether the SNAT with all-zero IP address is supported.
>     See ovs-vswitchd.conf.db(5) for details.
>     + - Add sFlow offload support for kernel (netlink) datapath.
>
>     v2.15.0 - 15 Feb 2021
>     diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>     index 2f16cf279..71e51394f 100644
>     --- a/lib/netdev-offload-tc.c
>     +++ b/lib/netdev-offload-tc.c
>     @@ -20,6 +20,7 @@
>     #include <linux/if_ether.h>
>
>     #include "dpif.h"
>     +#include "dpif-offload-provider.h"
>     #include "hash.h"
>     #include "openvswitch/hmap.h"
>     #include "openvswitch/match.h"
>     @@ -1087,6 +1088,19 @@ parse_tc_flower_to_match(struct tc_flower
>     *flower,
>     action = flower->actions;
>     for (i = 0; i < flower->action_count; i++, action++) {
>     switch (action->type) {
>     + case TC_ACT_SAMPLE: {
>     + const struct sgid_node *node;
>     +
>     + node = sgid_find(action->sample.group_id);
>     + if (!node) {
>     + VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
>     + __func__, action->sample.group_id);
>
> This should probably be a warning, as this could happen if the DP has 
> not yet been synced? i.e. revalidator being in the process of cleanup 
> and someone requesting dp content?
>
Done.
>
>     + return ENOENT;
>     + }
>     + nl_msg_put(buf, node->sflow.action,
>     + node->sflow.action->nla_len);
>     + }
>     + break;
>     case TC_ACT_VLAN_POP: {
>     nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
>     }
>     @@ -1825,6 +1839,156 @@ parse_match_ct_state_to_flower(struct
>     tc_flower *flower, struct match *match)
>     }
>     }
>
>     +static int
>     +parse_userspace_attributes(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>     + struct user_action_cookie *cookie;
>     +
>     + cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
>     + if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>
> Here I think the userdata type should be changed to "struct nlattr * " 
> just like in the struct dpif_upcall.
> This should probably be done in the patch introducing the 
> dpif_sflow_attr, so it will also be aligned with the actions member.
>
Done.
>
>     + sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
>     + sflow_attr->userdata_len = nl_attr_get_size(nla);
>     + return 0;
>     + }
>     + }
>     + }
>     +
>     + VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than
>     sFlow",
>     + __func__);
>     + return EOPNOTSUPP;
>     +}
>     +
>     +static int
>     +parse_sample_actions_attribute(const struct nlattr *actions,
>     + struct dpif_sflow_attr *sflow_attr)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int err = EINVAL;
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>     + if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_attributes(nla, sflow_attr);
>     + } else {
>     + /* We can't offload other nested actions */
>     + VLOG_DBG_RL(&rl, "%s: can only offload "
>     + "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (err) {
>     + VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE
>     attribute",
>     + __func__);
>     + }
>     + return err;
>     +}
>     +
>     +static int
>     +parse_sample_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *sample_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + const struct nlattr *nla;
>     + unsigned int left;
>     + int ret = EINVAL;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + sflow_attr.action = sample_action;
>     +
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     +
>     + NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>     + if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
>     + ret = parse_sample_actions_attribute(nla, &sflow_attr);
>     + } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
>     + } else {
>     + return EINVAL;
>     + }
>     + }
>     +
>     + if (!tc_action->sample.rate || ret) {
>     + return EINVAL;
>     + }
>     +
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->sample.group_id = *group_id;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     +static int
>     +parse_userspace_action(struct tc_flower *flower, struct tc_action
>     *tc_action,
>     + const struct nlattr *userspace_action,
>     + const struct flow_tnl *tnl, uint32_t *group_id,
>     + const ovs_u128 *ufid)
>     +{
>     + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>     + struct dpif_sflow_attr sflow_attr;
>     + int err;
>     +
>     + if (*group_id) {
>     + VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
>     + "per flow is supported", __func__);
>     + return EOPNOTSUPP;
>     + }
>     +
>     + /* If sampling rate is 1, there is only a sFlow cookie inside of a
>     + * userspace action, but no sample attribute. That means we can
>     + * only offload userspace actions for sFlow.
>     + */
>     + memset(&sflow_attr, 0, sizeof sflow_attr);
>     + sflow_attr.ufid = *ufid;
>     + if (flower->tunnel) {
>     + sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>     + }
>     + err = parse_userspace_attributes(userspace_action, &sflow_attr);
>     + if (err) {
>     + return err;
>     + }
>     + sflow_attr.action = userspace_action;
>     + *group_id = sgid_alloc_ctx(&sflow_attr);
>     + if (!*group_id) {
>     + VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample
>     action",
>     + __func__);
>     + return ENOENT;
>     + }
>     + tc_action->type = TC_ACT_SAMPLE;
>     + tc_action->sample.group_id = *group_id;
>     + tc_action->sample.rate = 1;
>     + flower->action_count++;
>     +
>     + return 0;
>     +}
>     +
>     static int
>     netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>     struct nlattr *actions, size_t actions_len,
>     @@ -1840,6 +2004,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     const struct flow_tnl *tnl_mask = &mask->tunnel;
>     struct tc_action *action;
>     bool recirc_act = false;
>     + uint32_t sample_gid = 0;
>     uint32_t block_id = 0;
>     struct nlattr *nla;
>     struct tcf_id id;
>     @@ -2092,7 +2257,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
>     if (flower.action_count >= TCA_ACT_MAX_NUM) {
>     VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     action = &flower.actions[flower.action_count];
>     if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>     @@ -2102,7 +2268,8 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     if (!outdev) {
>     VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
>     - return ENODEV;
>     + err = ENODEV;
>     + goto out;
>     }
>     action->out.ifindex_out = netdev_get_ifindex(outdev);
>     action->out.ingress = is_internal_port(netdev_get_type(outdev));
>     @@ -2140,7 +2307,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_set_action(&flower, action, set, set_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     if (action->type == TC_ACT_ENCAP) {
>     action->encap.tp_dst = info->tp_dst_port;
>     @@ -2153,7 +2320,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     err = parse_put_flow_set_masked_action(&flower, action, set,
>     set_len, true);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
>     const struct nlattr *ct = nl_attr_get(nla);
>     @@ -2165,7 +2332,7 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>
>     err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
>     if (err) {
>     - return err;
>     + goto out;
>     }
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
>     action->type = TC_ACT_CT;
>     @@ -2181,20 +2348,29 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     action->chain = 0; /* 0 is reserved and not used by recirc. */
>     flower.action_count++;
>     } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>     - struct dpif_sflow_attr sflow_attr;
>     -
>     - memset(&sflow_attr, 0, sizeof sflow_attr);
>     - sgid_alloc_ctx(&sflow_attr);
>     + err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
>     + ufid);
>     + if (err) {
>     + goto out;
>     + }
>     + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>     + err = parse_userspace_action(&flower, action, nla, tnl,
>     + &sample_gid, ufid);
>     + if (err) {
>     + goto out;
>     + }
>     } else {
>     VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>     nl_attr_type(nla));
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>     }
>
>     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
>     VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
>     - return EOPNOTSUPP;
>     + err = EOPNOTSUPP;
>     + goto out;
>     }
>
>     if (get_ufid_tc_mapping(ufid, &id) == 0) {
>     @@ -2206,20 +2382,30 @@ netdev_tc_flow_put(struct netdev *netdev,
>     struct match *match,
>     prio = get_prio_for_tc_flower(&flower);
>     if (prio == 0) {
>     VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
>     - return ENOSPC;
>     + err = ENOSPC;
>     + goto out;
>     }
>
>     flower.act_cookie.data = ufid;
>     flower.act_cookie.len = sizeof *ufid;
>
>     block_id = get_block_id_from_netdev(netdev);
>     - id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
>     + id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook,
>     sample_gid);
>     err = tc_replace_flower(&id, &flower);
>     if (!err) {
>     if (stats) {
>     memset(stats, 0, sizeof *stats);
>     }
>     add_ufid_tc_mapping(netdev, ufid, &id);
>     + } else {
>     + goto out;
>     + }
>
> Guess this will look nicer:
>
> |if (err) { goto out; } if (stats) { memset(stats, 0, sizeof *stats); 
> } add_ufid_tc_mapping(netdev, ufid, &id); return 0;|
|Done.

Thanks,
Chris
|
> ||
>
>     +
>     + return 0;
>     +
>     +out:
>     + if (sample_gid) {
>     + sgid_free(sample_gid);
>     }
>
>     return err;
>     diff --git a/lib/tc.c b/lib/tc.c
>     index 33287ea05..c5788220f 100644
>     --- a/lib/tc.c
>     +++ b/lib/tc.c
>     @@ -23,14 +23,15 @@
>     #include <linux/if_packet.h>
>     #include <linux/rtnetlink.h>
>     #include <linux/tc_act/tc_csum.h>
>     +#include <linux/tc_act/tc_ct.h>
>     #include <linux/tc_act/tc_gact.h>
>     #include <linux/tc_act/tc_mirred.h>
>     #include <linux/tc_act/tc_mpls.h>
>     #include <linux/tc_act/tc_pedit.h>
>     +#include <linux/tc_act/tc_sample.h>
>     #include <linux/tc_act/tc_skbedit.h>
>     #include <linux/tc_act/tc_tunnel_key.h>
>     #include <linux/tc_act/tc_vlan.h>
>     -#include <linux/tc_act/tc_ct.h>
>     #include <linux/gen_stats.h>
>     #include <net/if.h>
>     #include <unistd.h>
>     @@ -1341,6 +1342,38 @@ nl_parse_act_gact(struct nlattr *options,
>     struct tc_flower *flower)
>     return 0;
>     }
>
>     +static const struct nl_policy sample_policy[] = {
>     + [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
>     + .min_len = sizeof(struct tc_sample),
>     + .optional = false, },
>     + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
>     + .optional = false, },
>     + [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
>     + .optional = false, },
>     +};
>     +
>     +static int
>     +nl_parse_act_sample(struct nlattr *options, struct tc_flower
>     *flower)
>     +{
>     + struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
>     + struct tc_action *action;
>     +
>     + if (!nl_parse_nested(options, sample_policy, sample_attrs,
>     + ARRAY_SIZE(sample_policy))) {
>     + VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
>     + return EPROTO;
>     + }
>     +
>     + action = &flower->actions[flower->action_count++];
>     + action->type = TC_ACT_SAMPLE;
>     + action->sample.group_id =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
>     + action->sample.rate =
>     + nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
>     +
>     + return 0;
>     +}
>     +
>     static const struct nl_policy mirred_policy[] = {
>     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
>     .min_len = sizeof(struct tc_mirred),
>     @@ -1749,6 +1782,8 @@ nl_parse_single_action(struct nlattr
>     *action, struct tc_flower *flower,
>     /* Added for TC rule only (not in OvS rule) so ignore. */
>     } else if (!strcmp(act_kind, "ct")) {
>     nl_parse_act_ct(act_options, flower);
>     + } else if (!strcmp(act_kind, "sample")) {
>     + nl_parse_act_sample(act_options, flower);
>     } else {
>     VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
>     err = EINVAL;
>     @@ -2375,6 +2410,23 @@ nl_msg_put_act_mirred(struct ofpbuf
>     *request, int ifindex, int action,
>     nl_msg_end_nested(request, offset);
>     }
>
>     +static void
>     +nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate,
>     uint32_t group_id)
>     +{
>     + size_t offset;
>     +
>     + nl_msg_put_string(request, TCA_ACT_KIND, "sample");
>     + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS |
>     NLA_F_NESTED);
>     + {
>     + struct tc_sample parm = { .action = TC_ACT_PIPE };
>     +
>     + nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
>     + nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
>     + nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
>     + }
>     + nl_msg_end_nested(request, offset);
>     +}
>     +
>     static inline void
>     nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
>     if (ck->len) {
>     @@ -2634,6 +2686,13 @@ nl_msg_put_flower_acts(struct ofpbuf
>     *request, struct tc_flower *flower)
>     nl_msg_end_nested(request, act_offset);
>     }
>     break;
>     + case TC_ACT_SAMPLE: {
>     + act_offset = nl_msg_start_nested(request, act_index++);
>     + nl_msg_put_act_sample(request, action->sample.rate,
>     + action->sample.group_id);
>     + nl_msg_end_nested(request, act_offset);
>     + }
>     + break;
>     case TC_ACT_OUTPUT: {
>     if (!released && flower->tunnel) {
>     act_offset = nl_msg_start_nested(request, act_index++);
>     diff --git a/lib/tc.h b/lib/tc.h
>     index 2e4084f48..f764d7d1e 100644
>     --- a/lib/tc.h
>     +++ b/lib/tc.h
>     @@ -174,6 +174,7 @@ enum tc_action_type {
>     TC_ACT_MPLS_SET,
>     TC_ACT_GOTO,
>     TC_ACT_CT,
>     + TC_ACT_SAMPLE,
>     };
>
>     enum nat_type {
>     @@ -256,6 +257,11 @@ struct tc_action {
>     bool force;
>     bool commit;
>     } ct;
>     +
>     + struct {
>     + uint32_t rate;
>     + uint32_t group_id;
>     + } sample;
>     };
>
>     enum tc_action_type type;
>     @@ -294,12 +300,14 @@ tc_make_tcf_id(int ifindex, uint32_t
>     block_id, uint16_t prio,
>     }
>
>     static inline struct tcf_id
>     -tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
>     - uint16_t prio, enum tc_qdisc_hook hook)
>     +tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
>     + uint16_t prio, enum tc_qdisc_hook hook,
>     + uint32_t sample_group_id)
>     {
>     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
>
>     id.chain = chain;
>     + id.sample_group_id = sample_group_id;
>
>     return id;
>     }
>     @@ -313,7 +321,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id
>     *id2)
>     && id1->hook == id2->hook
>     && id1->block_id == id2->block_id
>     && id1->ifindex == id2->ifindex
>     - && id1->chain == id2->chain;
>     + && id1->chain == id2->chain
>     + && id1->sample_group_id == id2->sample_group_id;
>     }
>
>     enum tc_offload_policy {
>     -- 
>     2.21.0
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 6cdccc715..7c0361e18 100644
--- a/NEWS
+++ b/NEWS
@@ -50,6 +50,7 @@  Post-v2.15.0
    - OVS now reports the datapath capability 'ct_zero_snat', which reflects
      whether the SNAT with all-zero IP address is supported.
      See ovs-vswitchd.conf.db(5) for details.
+   - Add sFlow offload support for kernel (netlink) datapath.
 
 
 v2.15.0 - 15 Feb 2021
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 2f16cf279..71e51394f 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -20,6 +20,7 @@ 
 #include <linux/if_ether.h>
 
 #include "dpif.h"
+#include "dpif-offload-provider.h"
 #include "hash.h"
 #include "openvswitch/hmap.h"
 #include "openvswitch/match.h"
@@ -1087,6 +1088,19 @@  parse_tc_flower_to_match(struct tc_flower *flower,
         action = flower->actions;
         for (i = 0; i < flower->action_count; i++, action++) {
             switch (action->type) {
+            case TC_ACT_SAMPLE: {
+                const struct sgid_node *node;
+
+                node = sgid_find(action->sample.group_id);
+                if (!node) {
+                    VLOG_ERR_RL(&error_rl, "%s: sgid node is NULL, sgid: %d",
+                                __func__, action->sample.group_id);
+                    return ENOENT;
+                }
+                nl_msg_put(buf, node->sflow.action,
+                           node->sflow.action->nla_len);
+            }
+            break;
             case TC_ACT_VLAN_POP: {
                 nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
             }
@@ -1825,6 +1839,156 @@  parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match)
     }
 }
 
+static int
+parse_userspace_attributes(const struct nlattr *actions,
+                           struct dpif_sflow_attr *sflow_attr)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    const struct nlattr *nla;
+    unsigned int left;
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+        if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
+            struct user_action_cookie *cookie;
+
+            cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
+            if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
+                sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
+                sflow_attr->userdata_len = nl_attr_get_size(nla);
+                return 0;
+            }
+        }
+    }
+
+    VLOG_DBG_RL(&rl, "%s: cannot offload userspace action other than sFlow",
+                __func__);
+    return EOPNOTSUPP;
+}
+
+static int
+parse_sample_actions_attribute(const struct nlattr *actions,
+                               struct dpif_sflow_attr *sflow_attr)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    const struct nlattr *nla;
+    unsigned int left;
+    int err = EINVAL;
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+        if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
+            err = parse_userspace_attributes(nla, sflow_attr);
+        } else {
+            /* We can't offload other nested actions */
+            VLOG_DBG_RL(&rl, "%s: can only offload "
+                        "OVS_ACTION_ATTR_USERSPACE attribute", __func__);
+            return EINVAL;
+        }
+    }
+
+    if (err) {
+        VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE attribute",
+                    __func__);
+    }
+    return err;
+}
+
+static int
+parse_sample_action(struct tc_flower *flower, struct tc_action *tc_action,
+                    const struct nlattr *sample_action,
+                    const struct flow_tnl *tnl, uint32_t *group_id,
+                    const ovs_u128 *ufid)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    struct dpif_sflow_attr sflow_attr;
+    const struct nlattr *nla;
+    unsigned int left;
+    int ret = EINVAL;
+
+    if (*group_id) {
+        VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
+                    "per flow is supported", __func__);
+        return EOPNOTSUPP;
+    }
+
+    memset(&sflow_attr, 0, sizeof sflow_attr);
+    sflow_attr.ufid = *ufid;
+    sflow_attr.action = sample_action;
+
+    if (flower->tunnel) {
+        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+    }
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
+        if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
+            ret = parse_sample_actions_attribute(nla, &sflow_attr);
+        } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
+            tc_action->type = TC_ACT_SAMPLE;
+            tc_action->sample.rate = UINT32_MAX / nl_attr_get_u32(nla);
+        } else {
+            return EINVAL;
+        }
+    }
+
+    if (!tc_action->sample.rate || ret) {
+        return EINVAL;
+    }
+
+    *group_id = sgid_alloc_ctx(&sflow_attr);
+    if (!*group_id) {
+        VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample action",
+                    __func__);
+        return ENOENT;
+    }
+    tc_action->sample.group_id = *group_id;
+    flower->action_count++;
+
+    return 0;
+}
+
+static int
+parse_userspace_action(struct tc_flower *flower, struct tc_action *tc_action,
+                       const struct nlattr *userspace_action,
+                       const struct flow_tnl *tnl, uint32_t *group_id,
+                       const ovs_u128 *ufid)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    struct dpif_sflow_attr sflow_attr;
+    int err;
+
+    if (*group_id) {
+        VLOG_ERR_RL(&error_rl, "%s: Only a single TC_SAMPLE action "
+                    "per flow is supported", __func__);
+        return EOPNOTSUPP;
+    }
+
+    /* If sampling rate is 1, there is only a sFlow cookie inside of a
+     * userspace action, but no sample attribute. That means we can
+     * only offload userspace actions for sFlow.
+     */
+    memset(&sflow_attr, 0, sizeof sflow_attr);
+    sflow_attr.ufid = *ufid;
+    if (flower->tunnel) {
+        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+    }
+    err = parse_userspace_attributes(userspace_action, &sflow_attr);
+    if (err) {
+        return err;
+    }
+    sflow_attr.action = userspace_action;
+    *group_id = sgid_alloc_ctx(&sflow_attr);
+    if (!*group_id) {
+        VLOG_DBG_RL(&rl, "%s: Failed allocating group id for sample action",
+                    __func__);
+        return ENOENT;
+    }
+    tc_action->type = TC_ACT_SAMPLE;
+    tc_action->sample.group_id = *group_id;
+    tc_action->sample.rate = 1;
+    flower->action_count++;
+
+    return 0;
+}
+
 static int
 netdev_tc_flow_put(struct netdev *netdev, struct match *match,
                    struct nlattr *actions, size_t actions_len,
@@ -1840,6 +2004,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     const struct flow_tnl *tnl_mask = &mask->tunnel;
     struct tc_action *action;
     bool recirc_act = false;
+    uint32_t sample_gid = 0;
     uint32_t block_id = 0;
     struct nlattr *nla;
     struct tcf_id id;
@@ -2092,7 +2257,8 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
         if (flower.action_count >= TCA_ACT_MAX_NUM) {
             VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
-            return EOPNOTSUPP;
+            err = EOPNOTSUPP;
+            goto out;
         }
         action = &flower.actions[flower.action_count];
         if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
@@ -2102,7 +2268,8 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             if (!outdev) {
                 VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
-                return ENODEV;
+                err = ENODEV;
+                goto out;
             }
             action->out.ifindex_out = netdev_get_ifindex(outdev);
             action->out.ingress = is_internal_port(netdev_get_type(outdev));
@@ -2140,7 +2307,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             err = parse_put_flow_set_action(&flower, action, set, set_len);
             if (err) {
-                return err;
+                goto out;
             }
             if (action->type == TC_ACT_ENCAP) {
                 action->encap.tp_dst = info->tp_dst_port;
@@ -2153,7 +2320,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             err = parse_put_flow_set_masked_action(&flower, action, set,
                                                    set_len, true);
             if (err) {
-                return err;
+                goto out;
             }
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
             const struct nlattr *ct = nl_attr_get(nla);
@@ -2165,7 +2332,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
             if (err) {
-                return err;
+                goto out;
             }
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
             action->type = TC_ACT_CT;
@@ -2181,20 +2348,29 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             action->chain = 0;  /* 0 is reserved and not used by recirc. */
             flower.action_count++;
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
-            struct dpif_sflow_attr sflow_attr;
-
-            memset(&sflow_attr, 0, sizeof sflow_attr);
-            sgid_alloc_ctx(&sflow_attr);
+            err = parse_sample_action(&flower, action, nla, tnl, &sample_gid,
+                                      ufid);
+            if (err) {
+                goto out;
+            }
+        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
+            err = parse_userspace_action(&flower, action, nla, tnl,
+                                         &sample_gid, ufid);
+            if (err) {
+                goto out;
+            }
         } else {
             VLOG_DBG_RL(&rl, "unsupported put action type: %d",
                         nl_attr_type(nla));
-            return EOPNOTSUPP;
+            err = EOPNOTSUPP;
+            goto out;
         }
     }
 
     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
         VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
-        return EOPNOTSUPP;
+        err = EOPNOTSUPP;
+        goto out;
     }
 
     if (get_ufid_tc_mapping(ufid, &id) == 0) {
@@ -2206,20 +2382,30 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     prio = get_prio_for_tc_flower(&flower);
     if (prio == 0) {
         VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
-        return ENOSPC;
+        err = ENOSPC;
+        goto out;
     }
 
     flower.act_cookie.data = ufid;
     flower.act_cookie.len = sizeof *ufid;
 
     block_id = get_block_id_from_netdev(netdev);
-    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
+    id = tc_make_tcf_id_all(ifindex, block_id, chain, prio, hook, sample_gid);
     err = tc_replace_flower(&id, &flower);
     if (!err) {
         if (stats) {
             memset(stats, 0, sizeof *stats);
         }
         add_ufid_tc_mapping(netdev, ufid, &id);
+    } else {
+        goto out;
+    }
+
+    return 0;
+
+out:
+    if (sample_gid) {
+        sgid_free(sample_gid);
     }
 
     return err;
diff --git a/lib/tc.c b/lib/tc.c
index 33287ea05..c5788220f 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -23,14 +23,15 @@ 
 #include <linux/if_packet.h>
 #include <linux/rtnetlink.h>
 #include <linux/tc_act/tc_csum.h>
+#include <linux/tc_act/tc_ct.h>
 #include <linux/tc_act/tc_gact.h>
 #include <linux/tc_act/tc_mirred.h>
 #include <linux/tc_act/tc_mpls.h>
 #include <linux/tc_act/tc_pedit.h>
+#include <linux/tc_act/tc_sample.h>
 #include <linux/tc_act/tc_skbedit.h>
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <linux/tc_act/tc_vlan.h>
-#include <linux/tc_act/tc_ct.h>
 #include <linux/gen_stats.h>
 #include <net/if.h>
 #include <unistd.h>
@@ -1341,6 +1342,38 @@  nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower)
     return 0;
 }
 
+static const struct nl_policy sample_policy[] = {
+    [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
+                           .min_len = sizeof(struct tc_sample),
+                           .optional = false, },
+    [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
+                                   .optional = false, },
+    [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
+                          .optional = false, },
+};
+
+static int
+nl_parse_act_sample(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
+    struct tc_action *action;
+
+    if (!nl_parse_nested(options, sample_policy, sample_attrs,
+                         ARRAY_SIZE(sample_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
+        return EPROTO;
+    }
+
+    action = &flower->actions[flower->action_count++];
+    action->type = TC_ACT_SAMPLE;
+    action->sample.group_id =
+        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
+    action->sample.rate =
+        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
+
+    return 0;
+}
+
 static const struct nl_policy mirred_policy[] = {
     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
                            .min_len = sizeof(struct tc_mirred),
@@ -1749,6 +1782,8 @@  nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
         /* Added for TC rule only (not in OvS rule) so ignore. */
     } else if (!strcmp(act_kind, "ct")) {
         nl_parse_act_ct(act_options, flower);
+    } else if (!strcmp(act_kind, "sample")) {
+        nl_parse_act_sample(act_options, flower);
     } else {
         VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
         err = EINVAL;
@@ -2375,6 +2410,23 @@  nl_msg_put_act_mirred(struct ofpbuf *request, int ifindex, int action,
     nl_msg_end_nested(request, offset);
 }
 
+static void
+nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate, uint32_t group_id)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "sample");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
+    {
+        struct tc_sample parm = { .action = TC_ACT_PIPE };
+
+        nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
+        nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
+        nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
 static inline void
 nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
     if (ck->len) {
@@ -2634,6 +2686,13 @@  nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
                 nl_msg_end_nested(request, act_offset);
             }
             break;
+            case TC_ACT_SAMPLE: {
+                act_offset = nl_msg_start_nested(request, act_index++);
+                nl_msg_put_act_sample(request, action->sample.rate,
+                                      action->sample.group_id);
+                nl_msg_end_nested(request, act_offset);
+            }
+            break;
             case TC_ACT_OUTPUT: {
                 if (!released && flower->tunnel) {
                     act_offset = nl_msg_start_nested(request, act_index++);
diff --git a/lib/tc.h b/lib/tc.h
index 2e4084f48..f764d7d1e 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -174,6 +174,7 @@  enum tc_action_type {
     TC_ACT_MPLS_SET,
     TC_ACT_GOTO,
     TC_ACT_CT,
+    TC_ACT_SAMPLE,
 };
 
 enum nat_type {
@@ -256,6 +257,11 @@  struct tc_action {
             bool force;
             bool commit;
         } ct;
+
+        struct {
+            uint32_t rate;
+            uint32_t group_id;
+        } sample;
      };
 
      enum tc_action_type type;
@@ -294,12 +300,14 @@  tc_make_tcf_id(int ifindex, uint32_t block_id, uint16_t prio,
 }
 
 static inline struct tcf_id
-tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
-                     uint16_t prio, enum tc_qdisc_hook hook)
+tc_make_tcf_id_all(int ifindex, uint32_t block_id, uint32_t chain,
+                     uint16_t prio, enum tc_qdisc_hook hook,
+                     uint32_t sample_group_id)
 {
     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
 
     id.chain = chain;
+    id.sample_group_id = sample_group_id;
 
     return id;
 }
@@ -313,7 +321,8 @@  is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2)
            && id1->hook == id2->hook
            && id1->block_id == id2->block_id
            && id1->ifindex == id2->ifindex
-           && id1->chain == id2->chain;
+           && id1->chain == id2->chain
+           && id1->sample_group_id == id2->sample_group_id;
 }
 
 enum tc_offload_policy {