diff mbox series

[ovs-dev,v7,11/11] netdev-offload-tc: Add offload support for sFlow

Message ID 20201130044857.23873-12-cmi@nvidia.com
State Changes Requested
Headers show
Series Add offload support for sFlow | expand

Commit Message

Chris Mi Nov. 30, 2020, 4:48 a.m. UTC
Create a unique group ID to map the sFlow info when offloading sFlow
action to TC. When showing the offloaded datapath flows, translate the
group ID from TC sample action to sFlow info using the mapping.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
---
 lib/netdev-offload-tc.c | 148 +++++++++++++++++++++++++++++++++++++---
 lib/tc.c                |  61 ++++++++++++++++-
 lib/tc.h                |   9 ++-
 3 files changed, 206 insertions(+), 12 deletions(-)

Comments

Eelco Chaudron Dec. 2, 2020, 3:08 p.m. UTC | #1
On 30 Nov 2020, at 5:48, Chris Mi wrote:

> Create a unique group ID to map the sFlow info when offloading sFlow
> action to TC. When showing the offloaded datapath flows, translate the
> group ID from TC sample action to sFlow info using the mapping.
>
> Signed-off-by: Chris Mi <cmi@nvidia.com>
> Reviewed-by: Eli Britstein <elibr@nvidia.com>
>

<SNIP>

This is not a full review, I was just trying to understand how you 
implemented the conditional action set as I’m working on something 
similar for dec_ttl.

However, I noticed that you ignore this condition entirely, which I 
think should be solved one way or the other in this patchset. See my 
comments below.

> @@ -2061,17 +2147,49 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>              struct dpif_sflow_attr sflow_attr;
>
>              memset(&sflow_attr, 0, sizeof sflow_attr);
> -            gid_alloc_ctx(&sflow_attr);
> +            if (flower.tunnel) {
> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, 
> tnl);
> +            }
> +            err = parse_sample_action(nla, &sflow_attr, action);

The main problem here (in the above call) you only look for 
OVS_ACTION_ATTR_USERSPACE, if it exists you are ok and handle ONLY that 
action.
If there are more actions like “userspace(),1,2”, they output to 
ports 1 and 2 are ignored. Guess for your implementation you should only 
allow TC offload if the action set consists of a single userspace() 
action.

> +            if (err) {
> +                goto out;
> +            }
> +            group_id = gid_alloc_ctx(&sflow_attr);
> +            action->sample.action_group_id = group_id;
> +            flower.action_count++;

I think these last three commands should be done as part of 
parse_sample_action() as all other "if" conditions do either all in the 
branch or in the called function.


> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {

The OVS_ACTION_ATTR_USERSPACE should not be handled here as a separate 
action, at least it should not end up as a TC_ACT_SAMPLE. What if 
someone else is having an OVS_ACTION_ATTR_USERPSACE in its chain, now it 
will be translated into a SAMPLE action.

The parse_sample_action() function called above should loop trough the 
OVS_SAMPLE_ATTR_ACTIONS and find the OVS_ACTION_ATTR_USERSPACE, which it 
looks like it does. So this “if (nl_attr_type(nla) == 
OVS_ACTION_ATTR_USERSPACE)” part should not be needed at all.

> +            struct dpif_sflow_attr sflow_attr;
> +
> +            /* If there is a sFlow cookie inside of a userspace 
> attribute,
> +             * but no sample attribute, that means sampling rate is 
> 1.
> +             */
> +            memset(&sflow_attr, 0, sizeof sflow_attr);
> +            if (flower.tunnel) {
> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, 
> tnl);
> +            }
> +            err = parse_userspace_userdata(nla, &sflow_attr);
> +            if (err) {
> +                goto out;
> +            }
> +            sflow_attr.sflow = nla;
> +            sflow_attr.sflow_len = nla->nla_len;
> +            group_id = gid_alloc_ctx(&sflow_attr);
> +            action->type = TC_ACT_SAMPLE;
> +            action->sample.action_group_id = group_id;
> +            action->sample.action_rate = 1;
> +            flower.action_count++;
>          } else {
>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>                          nl_attr_type(nla));
> -            return EOPNOTSUPP;
> +            err = EOPNOTSUPP;
> +            goto out;
>          }
>      }
>

<SNIP>

One other general command, how would we guarantee that the group ID 
value used by tc-sample is unique and not being used by any other 
application than OVS? Do we assume tc-sample is elusively assigned to 
OVS? This might not be the case in customer environments, so we might 
need to be able to disable offloading this action.

One final question, do you know if this will be supported in your NICs 
as hardware offload?

Cheers,

Eelco
Chris Mi Dec. 4, 2020, 5:34 a.m. UTC | #2
On 12/2/2020 11:08 PM, Eelco Chaudron wrote:
>
>
> On 30 Nov 2020, at 5:48, Chris Mi wrote:
>
>> Create a unique group ID to map the sFlow info when offloading sFlow
>> action to TC. When showing the offloaded datapath flows, translate the
>> group ID from TC sample action to sFlow info using the mapping.
>>
>> Signed-off-by: Chris Mi <cmi@nvidia.com>
>> Reviewed-by: Eli Britstein <elibr@nvidia.com>
>>
>
> <SNIP>
>
> This is not a full review, I was just trying to understand how you 
> implemented the conditional action set as I’m working on something 
> similar for dec_ttl.
>
> However, I noticed that you ignore this condition entirely, which I 
> think should be solved one way or the other in this patchset. See my 
> comments below.
>
>> @@ -2061,17 +2147,49 @@ netdev_tc_flow_put(struct netdev *netdev, 
>> struct match *match,
>>              struct dpif_sflow_attr sflow_attr;
>>
>>              memset(&sflow_attr, 0, sizeof sflow_attr);
>> -            gid_alloc_ctx(&sflow_attr);
>> +            if (flower.tunnel) {
>> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>> +            }
>> +            err = parse_sample_action(nla, &sflow_attr, action);
>
> The main problem here (in the above call) you only look for 
> OVS_ACTION_ATTR_USERSPACE, if it exists you are ok and handle ONLY 
> that action.
> If there are more actions like “userspace(),1,2”, they output to ports 
> 1 and 2 are ignored. Guess for your implementation you should only 
> allow TC offload if the action set consists of a single userspace() 
> action.
If sample rate is not 1, the sample action looks like this:
actions:sample(sample=50.0%,actions(userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions))),enp4s0f0_1

If sample rate is 1, the sample action looks like this:
actions:userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions),enp4s0f0_1

In your case, userspace(),1,2, if there is a sFlow cookie inside 
userspace action, we can offload. Otherwise, we don't offload.

In the code, we deal with above 2 situations like this:
...
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
...
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
...
>
>> +            if (err) {
>> +                goto out;
>> +            }
>> +            group_id = gid_alloc_ctx(&sflow_attr);
>> +            action->sample.action_group_id = group_id;
>> +            flower.action_count++;
>
> I think these last three commands should be done as part of 
> parse_sample_action() as all other "if" conditions do either all in 
> the branch or in the called function.
Please see my above reply. There are two situations. I have comment in 
the code:

2160         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
2161             struct dpif_sflow_attr sflow_attr;
2162
2163             /* If there is a sFlow cookie inside of a userspace 
attribute,
2164              * but no sample attribute, that means sampling rate is 1.
2165              */
>
>
>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>
> The OVS_ACTION_ATTR_USERSPACE should not be handled here as a separate 
> action, at least it should not end up as a TC_ACT_SAMPLE. What if 
> someone else is having an OVS_ACTION_ATTR_USERPSACE in its chain, now 
> it will be translated into a SAMPLE action.
>
> The parse_sample_action() function called above should loop trough the 
> OVS_SAMPLE_ATTR_ACTIONS and find the OVS_ACTION_ATTR_USERSPACE, which 
> it looks like it does. So this “if (nl_attr_type(nla) == 
> OVS_ACTION_ATTR_USERSPACE)” part should not be needed at all.
Please see above reply.
>
>> +            struct dpif_sflow_attr sflow_attr;
>> +
>> +            /* If there is a sFlow cookie inside of a userspace 
>> attribute,
>> +             * but no sample attribute, that means sampling rate is 1.
>> +             */
>> +            memset(&sflow_attr, 0, sizeof sflow_attr);
>> +            if (flower.tunnel) {
>> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
>> +            }
>> +            err = parse_userspace_userdata(nla, &sflow_attr);
>> +            if (err) {
>> +                goto out;
>> +            }
>> +            sflow_attr.sflow = nla;
>> +            sflow_attr.sflow_len = nla->nla_len;
>> +            group_id = gid_alloc_ctx(&sflow_attr);
>> +            action->type = TC_ACT_SAMPLE;
>> +            action->sample.action_group_id = group_id;
>> +            action->sample.action_rate = 1;
>> +            flower.action_count++;
>>          } else {
>>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>>                          nl_attr_type(nla));
>> -            return EOPNOTSUPP;
>> +            err = EOPNOTSUPP;
>> +            goto out;
>>          }
>>      }
>>
>
> <SNIP>
>
> One other general command, how would we guarantee that the group ID 
> value used by tc-sample is unique and not being used by any other 
> application than OVS? 
Please see the comment:

  239 /* Allocate a unique group id for the given set of flow metadata.
  240  * The ID space is 2^^32, so there should never be a situation in 
which all
  241  * the IDs are used up.  We loop until we find a free one. */
  242 static struct gid_node *
  243 gid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash)

This id is similar to recirc id.
> Do we assume tc-sample is elusively assigned to OVS?
Yes, I think so.
> This might not be the case in customer environments, so we might need 
> to be able to disable offloading this action.
If we can offload, why not disable it. If we can't offload, tc will 
return EOPNOTSUPP and fail back to ovs datapath.
So I don't think we need to be able to disable it.
>
> One final question, do you know if this will be supported in your NICs 
> as hardware offload?
Sure. We submitted the first version of the OVS patch set in September. 
We support offload before that.
I believe the driver change will be in linux net-next branch soon.

Thanks,
Chris
>
> Cheers,
>
> Eelco
>
Eelco Chaudron Dec. 4, 2020, 11:34 a.m. UTC | #3
On 4 Dec 2020, at 6:34, Chris Mi wrote:

> On 12/2/2020 11:08 PM, Eelco Chaudron wrote:
>>
>>
>> On 30 Nov 2020, at 5:48, Chris Mi wrote:
>>
>>> Create a unique group ID to map the sFlow info when offloading sFlow
>>> action to TC. When showing the offloaded datapath flows, translate 
>>> the
>>> group ID from TC sample action to sFlow info using the mapping.
>>>
>>> Signed-off-by: Chris Mi <cmi@nvidia.com>
>>> Reviewed-by: Eli Britstein <elibr@nvidia.com>
>>>
>>
>> <SNIP>
>>
>> This is not a full review, I was just trying to understand how you 
>> implemented the conditional action set as I’m working on something 
>> similar for dec_ttl.
>>
>> However, I noticed that you ignore this condition entirely, which I 
>> think should be solved one way or the other in this patchset. See my 
>> comments below.
>>
>>> @@ -2061,17 +2147,49 @@ netdev_tc_flow_put(struct netdev *netdev, 
>>> struct match *match,
>>>              struct dpif_sflow_attr sflow_attr;
>>>
>>>              memset(&sflow_attr, 0, sizeof sflow_attr);
>>> -            gid_alloc_ctx(&sflow_attr);
>>> +            if (flower.tunnel) {
>>> +                sflow_attr.tunnel = 
>>> CONST_CAST(struct flow_tnl *, tnl);
>>> +            }
>>> +            err = parse_sample_action(nla, &sflow_attr, 
>>> action);
>>
>> The main problem here (in the above call) you only look for 
>> OVS_ACTION_ATTR_USERSPACE, if it exists you are ok and handle ONLY 
>> that action.
>> If there are more actions like “userspace(),1,2”, they output to 
>> ports 1 and 2 are ignored. Guess for your implementation you should 
>> only allow TC offload if the action set consists of a single 
>> userspace() action.
> If sample rate is not 1, the sample action looks like this:
> actions:sample(sample=50.0%,actions(userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions))),enp4s0f0_1

actions:sample(sample=50.0%,actions(userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions)),output=12),enp4s0f0_1

You assume the only action in actions is userspace, there could be 
others like for example output=12 above.
Currently, this might not happen but could be in the future. So you have 
to be sure all actions in the actions() set are offloadable. See below 
for more details.

> If sample rate is 1, the sample action looks like this:
> actions:userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions),enp4s0f0_1
>
> In your case, userspace(),1,2, if there is a sFlow cookie inside 
> userspace action, we can offload. Otherwise, we don't offload.

Thanks, now it makes sense to me :) Maybe the comment should be more 
explicit, saying something that we can only offload user spaces actions 
for sflow.

> In the code, we deal with above 2 situations like this:
> ...
>         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) 
> {
> ...
>         } else if (nl_attr_type(nla) == 
> OVS_ACTION_ATTR_USERSPACE) {
> ...
>>
>>> +            if (err) {
>>> +                goto out;
>>> +            }
>>> +            group_id = gid_alloc_ctx(&sflow_attr);
>>> +            action->sample.action_group_id = group_id;
>>> +            flower.action_count++;
>>
>> I think these last three commands should be done as part of 
>> parse_sample_action() as all other "if" conditions do either all in 
>> the branch or in the called function.

I’m suggesting the following (including same ordering and name changes 
to reflect what they point to):

diff --git a/lib/dpif.h b/lib/dpif.h
index ed4257210..150162034 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -878,6 +878,7 @@ void dpif_register_upcall_cb(struct dpif *, 
upcall_callback *, void *aux);
  struct dpif_sflow_attr {
      const struct nlattr *sflow; /* sFlow action */
      size_t sflow_len;           /* Length of 'sflow' in bytes. */
+    // Why do we need to store sflow_len, it's part of the nlattr data?

      void *userdata;             /* struct user_action_cookie */
      size_t userdata_len;        /* struct user_action_cookie length */
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 75744aa3c..84991ff6a 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1722,20 +1722,25 @@ parse_action_userspace(const struct nlattr 
*actions,
  }

  static int
-parse_sample_action(const struct nlattr *actions,
-                    struct dpif_sflow_attr *sflow_attr,
-                    struct tc_action *tc_action)
+parse_sample_action(struct tc_flower *flower, struct tc_action 
*tc_action,
+                    const struct nlattr *sample_action,
+                    const struct flow_tnl *tnl)
  {
+    struct dpif_sflow_attr sflow_attr;
      const struct nlattr *nla;
      unsigned int left;
      int ret = EINVAL;

-    sflow_attr->sflow = actions;
-    sflow_attr->sflow_len = actions->nla_len;
+    memset(&sflow_attr, 0, sizeof sflow_attr);
+    sflow_attr.sflow = sample_action;
+    sflow_attr.sflow_len = sample_action->nla_len;
+    if (flower->tunnel) {
+        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+    }

-    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
          if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
-            ret = parse_action_userspace(nla, sflow_attr);
+            ret = parse_action_userspace(nla, &sflow_attr);
          } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
              tc_action->type = TC_ACT_SAMPLE;
              tc_action->sample.action_rate = UINT32_MAX / 
nl_attr_get_u32(nla);
@@ -1744,11 +1749,13 @@ parse_sample_action(const struct nlattr 
*actions,
          }
      }

-    if (tc_action->sample.action_rate) {
-        return ret;
-    }
+    if (!tc_action->sample.action_rate || !ret)
+        return EINVAL;

-    return EINVAL;
+    tc_action->sample.action_group_id = gid_alloc_ctx(&sflow_attr);
+    //In theory alloc can fail, so maybe we should check for it.
+    flower->action_count++;
+    return 0;
  }

  static int
@@ -2144,19 +2151,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
match *match,
              action->chain = 0;  /* 0 is reserved and not used by 
recirc. */
              flower.action_count++;
          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
-            struct dpif_sflow_attr sflow_attr;
-
-            memset(&sflow_attr, 0, sizeof sflow_attr);
-            if (flower.tunnel) {
-                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
-            }
-            err = parse_sample_action(nla, &sflow_attr, action);
+            err = parse_sample_action(&flower, action, nla, tnl);
              if (err) {
                  goto out;
              }
-            group_id = gid_alloc_ctx(&sflow_attr);
-            action->sample.action_group_id = group_id;
-            flower.action_count++;
          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
              struct dpif_sflow_attr sflow_attr;

In addition the main objection is that you ignore the other actions, 
which I think can be solved with the following diff (your called your 
function odd, this is why you probably did not notice it):

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 84991ff6a..7277b699f 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1704,21 +1704,31 @@ parse_userspace_userdata(const struct nlattr 
*actions,
  }

  static int
-parse_action_userspace(const struct nlattr *actions,
-                       struct dpif_sflow_attr *sflow_attr)
+parse_sample_actions(const struct nlattr *actions,
+                     struct dpif_sflow_attr *sflow_attr)
  {
      const struct nlattr *nla;
      unsigned int left;
+    int err = EINVAL;

      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
-            return parse_userspace_userdata(nla, sflow_attr);
+            err = parse_userspace_userdata(nla, sflow_attr);
+        } else {
+            /* We can't offload other nested actions */
+            VLOG_DBG_RL(&error_rl,
+                        "%s: other than OVS_ACTION_ATTR_USERSPACE 
attribute",
+                        __func__);
+            return EINVAL;
          }
      }

-    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
attribute",
-                __func__);
-    return EINVAL;
+    if (err) {
+        //THIS IS NOT AN ERROR CASE, FROM A DATAPATH IT'S VALID TO DO 
ANY ACTION
+        VLOG_DBG_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
attribute",
+                    __func__);
+    }
+    return err;
  }

  static int
@@ -1740,7 +1750,7 @@ parse_sample_action(struct tc_flower *flower, 
struct tc_action *tc_action,

      NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
          if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
-            ret = parse_action_userspace(nla, &sflow_attr);
+            ret = parse_sample_actions(nla, &sflow_attr);
          } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
              tc_action->type = TC_ACT_SAMPLE;
              tc_action->sample.action_rate = UINT32_MAX / 
nl_attr_get_u32(nla);


> Please see my above reply. There are two situations. I have comment in 
> the code:
>
> 2160         } else if (nl_attr_type(nla) == 
> OVS_ACTION_ATTR_USERSPACE) {
> 2161             struct dpif_sflow_attr flow_attr;
> 2162
> 2163             /* If there is a sFlow cookie inside of a 
> userspace attribute,
> 2164              * but no sample attribute, that means 
> sampling rate is 1.
> 2165              */

Ack, maybe also wrap this in a single function… Something like (not 
tested):

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 7277b699f..bf3cd3ada 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1680,14 +1680,15 @@ flower_match_to_tun_opt(struct tc_flower 
*flower, const struct flow_tnl *tnl,
      flower->mask.tunnel.metadata.present.len = 
tnl->metadata.present.len;
  }

+
  static int
-parse_userspace_userdata(const struct nlattr *actions,
-                         struct dpif_sflow_attr *sflow_attr)
+__parse_userspace_attributes(const struct nlattr *userspace_action,
+                             struct dpif_sflow_attr *sflow_attr)
  {
      const struct nlattr *nla;
      unsigned int left;

-    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, userspace_action) {
          if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
              struct user_action_cookie *cookie;

@@ -1695,14 +1696,50 @@ parse_userspace_userdata(const struct nlattr 
*actions,
              if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
                  sflow_attr->userdata = CONST_CAST(void *, 
nl_attr_get(nla));
                  sflow_attr->userdata_len = nl_attr_get_size(nla);
+                // ^ Why do we store this user data? It's part of the 
data we
+                //   store in sflow_attr.sflow.
                  return 0;
              }
+            // What about the other attributes, don't we need them? Or 
do
+            // we process them in the slow path by sflow_attr.sflow?
+            // As I mentioned earlier, I did not review the full code.
          }
      }
-
      return EINVAL;
  }

+static int
+parse_userspace_attributes(struct tc_flower *flower, struct tc_action 
*action,
+                           const struct nlattr *userspace_action,
+                           const struct flow_tnl *tnl)
+{
+    struct dpif_sflow_attr sflow_attr;
+    int err;
+
+    /* We only support offloading the userspace action for sFlow.
+     * This is when the sFlow cookie exists inside of a the userspace
+     * attribute. This is true when the sample rate for sFlow is 1.
+     */
+    memset(&sflow_attr, 0, sizeof sflow_attr);
+    if (flower->tunnel) {
+        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+    }
+
+    err = __parse_userspace_attributes(userspace_action, &sflow_attr);
+    if (err)
+        return err;
+
+    sflow_attr.sflow = userspace_action;
+    //Did you notice that you put the userspace_action here?
+    //In the other case you put the sample_action here...
+    sflow_attr.sflow_len = userspace_action->nla_len;
+    action->type = TC_ACT_SAMPLE;
+    action->sample.action_group_id = gid_alloc_ctx(&sflow_attr);
+    action->sample.action_rate = 1;
+    flower->action_count++;
+    return 0;
+}
+
  static int
  parse_sample_actions(const struct nlattr *actions,
                       struct dpif_sflow_attr *sflow_attr)
@@ -1713,7 +1750,7 @@ parse_sample_actions(const struct nlattr *actions,

      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
-            err = parse_userspace_userdata(nla, sflow_attr);
+            err = __parse_userspace_attributes(nla, sflow_attr);
          } else {
              /* We can't offload other nested actions */
              VLOG_DBG_RL(&error_rl,
@@ -2166,27 +2203,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
match *match,
                  goto out;
              }
          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
-            struct dpif_sflow_attr sflow_attr;
-
-            /* We only support offloading the userspace action for 
sFlow.
-             * This is when the sFlow cookie exists inside of a the 
userspace
-             * attribute. This is true when the sample rate for sFlow 
is 1.
-             */
-            memset(&sflow_attr, 0, sizeof sflow_attr);
-            if (flower.tunnel) {
-                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
-            }
-            err = parse_userspace_userdata(nla, &sflow_attr);
+
+            err = parse_userspace_userdata(&flower, action, nla, tnl);
              if (err) {
                  goto out;
              }
-            sflow_attr.sflow = nla;
-            sflow_attr.sflow_len = nla->nla_len;
-            group_id = gid_alloc_ctx(&sflow_attr);
-            action->type = TC_ACT_SAMPLE;
-            action->sample.action_group_id = group_id;
-            action->sample.action_rate = 1;
-            flower.action_count++;
          } else {
              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
                          nl_attr_type(nla));

>>
>>
>>> +        } else if (nl_attr_type(nla) == 
>>> OVS_ACTION_ATTR_USERSPACE) {
>>
>> The OVS_ACTION_ATTR_USERSPACE should not be handled here as a 
>> separate action, at least it should not end up as a TC_ACT_SAMPLE. 
>> What if someone else is having an OVS_ACTION_ATTR_USERPSACE in its 
>> chain, now it will be translated into a SAMPLE action.
>>
>> The parse_sample_action() function called above should loop trough 
>> the OVS_SAMPLE_ATTR_ACTIONS and find the OVS_ACTION_ATTR_USERSPACE, 
>> which it looks like it does. So this “if (nl_attr_type(nla) == 
>> OVS_ACTION_ATTR_USERSPACE)” part should not be needed at all.
> Please see above reply.

Ack see above
>>
>>> +            struct dpif_sflow_attr sflow_attr;
>>> +
>>> +            /* If there is a sFlow cookie inside of a 
>>> userspace attribute,
>>> +             * but no sample attribute, that means 
>>> sampling rate is 1.
>>> +             */
>>> +            memset(&sflow_attr, 0, sizeof sflow_attr);
>>> +            if (flower.tunnel) {
>>> +                sflow_attr.tunnel = 
>>> CONST_CAST(struct flow_tnl *, tnl);
>>> +            }
>>> +            err = parse_userspace_userdata(nla, 
>>> &sflow_attr);
>>> +            if (err) {
>>> +                goto out;
>>> +            }
>>> +            sflow_attr.sflow = nla;
>>> +            sflow_attr.sflow_len = nla->nla_len;
>>> +            group_id = gid_alloc_ctx(&sflow_attr);
>>> +            action->type = TC_ACT_SAMPLE;
>>> +            action->sample.action_group_id = group_id;
>>> +            action->sample.action_rate = 1;
>>> +            flower.action_count++;
>>>          } else {
>>>              VLOG_DBG_RL(&rl, "unsupported put action 
>>> type: %d",
>>>                          nl_attr_type(nla));
>>> -            return EOPNOTSUPP;
>>> +            err = EOPNOTSUPP;
>>> +            goto out;
>>>          }
>>>      }
>>>
>>
>> <SNIP>
>>
>> One other general command, how would we guarantee that the group ID 
>> value used by tc-sample is unique and not being used by any other 
>> application than OVS?
> Please see the comment:
>
>  239 /* Allocate a unique group id for the given set of flow 
> metadata.
>  240  * The ID space is 2^^32, so there should never be a situation 
> in which all
>  241  * the IDs are used up.  We loop until we find a free one. */
>  242 static struct gid_node *
>  243 gid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash)
>
> This id is similar to recirc id.
>> Do we assume tc-sample is elusively assigned to OVS?
> Yes, I think so.
>> This might not be the case in customer environments, so we might need 
>> to be able to disable offloading this action.
> If we can offload, why not disable it. If we can't offload, tc will 
> return EOPNOTSUPP and fail back to ovs datapath.
> So I don't think we need to be able to disable it.

Ack all three above, but I was referring to someone that uses sflow kmod 
on his management interface already, and now all of a sudden OVS will 
use it too. Which could cause a problem as the u32 space is started 
between all user space application. So I do think we need an option to 
disable it in OVS.


>>
>> One final question, do you know if this will be supported in your 
>> NICs as hardware offload?
> Sure. We submitted the first version of the OVS patch set in 
> September. We support offload before that.
> I believe the driver change will be in linux net-next branch soon.

Cool, looking forward to seeing the change…


If you do send out a new revision of this patchset, I’ll try to make 
some time and do a full review and some testing.
Chris Mi Dec. 14, 2020, 9:33 a.m. UTC | #4
On 12/4/2020 7:34 PM, Eelco Chaudron wrote:
>
>
> On 4 Dec 2020, at 6:34, Chris Mi wrote:
>
>> On 12/2/2020 11:08 PM, Eelco Chaudron wrote:
>>>
>>>
>>> On 30 Nov 2020, at 5:48, Chris Mi wrote:
>>>
>>>> Create a unique group ID to map the sFlow info when offloading sFlow
>>>> action to TC. When showing the offloaded datapath flows, translate the
>>>> group ID from TC sample action to sFlow info using the mapping.
>>>>
>>>> Signed-off-by: Chris Mi <cmi@nvidia.com>
>>>> Reviewed-by: Eli Britstein <elibr@nvidia.com>
>>>>
>>>
>>> <SNIP>
>>>
>>> This is not a full review, I was just trying to understand how you 
>>> implemented the conditional action set as I’m working on something 
>>> similar for dec_ttl.
>>>
>>> However, I noticed that you ignore this condition entirely, which I 
>>> think should be solved one way or the other in this patchset. See my 
>>> comments below.
>>>
>>>> @@ -2061,17 +2147,49 @@ netdev_tc_flow_put(struct netdev *netdev, 
>>>> struct match *match,
>>>>              struct dpif_sflow_attr sflow_attr;
>>>>
>>>>              memset(&sflow_attr, 0, sizeof sflow_attr);
>>>> -            gid_alloc_ctx(&sflow_attr);
>>>> +            if (flower.tunnel) {
>>>> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, 
>>>> tnl);
>>>> +            }
>>>> +            err = parse_sample_action(nla, &sflow_attr, action);
>>>
>>> The main problem here (in the above call) you only look for 
>>> OVS_ACTION_ATTR_USERSPACE, if it exists you are ok and handle ONLY 
>>> that action.
>>> If there are more actions like “userspace(),1,2”, they output to 
>>> ports 1 and 2 are ignored. Guess for your implementation you should 
>>> only allow TC offload if the action set consists of a single 
>>> userspace() action.
>> If sample rate is not 1, the sample action looks like this:
>> actions:sample(sample=50.0%,actions(userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions))),enp4s0f0_1 
>>
>
> actions:sample(sample=50.0%,actions(userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions)),output=12),enp4s0f0_1 
>
>
> You assume the only action in actions is userspace, there could be 
> others like for example output=12 above.
> Currently, this might not happen but could be in the future. So you 
> have to be sure all actions in the actions() set are offloadable. See 
> below for more details.
>
>> If sample rate is 1, the sample action looks like this:
>> actions:userspace(pid=2619320998,sFlow(vid=0,pcp=0,output=17),actions),enp4s0f0_1 
>>
>>
>> In your case, userspace(),1,2, if there is a sFlow cookie inside 
>> userspace action, we can offload. Otherwise, we don't offload.
>
> Thanks, now it makes sense to me :) Maybe the comment should be more 
> explicit, saying something that we can only offload user spaces 
> actions for sflow.
Done.
>
>> In the code, we deal with above 2 situations like this:
>> ...
>>         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
>> ...
>>         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>> ...
>>>
>>>> +            if (err) {
>>>> +                goto out;
>>>> +            }
>>>> +            group_id = gid_alloc_ctx(&sflow_attr);
>>>> +            action->sample.action_group_id = group_id;
>>>> +            flower.action_count++;
>>>
>>> I think these last three commands should be done as part of 
>>> parse_sample_action() as all other "if" conditions do either all in 
>>> the branch or in the called function.
>
Thanks for your patch. I revised the code according to your suggestion.
> I’m suggesting the following (including same ordering and name changes 
> to reflect what they point to):
>
> diff --git a/lib/dpif.h b/lib/dpif.h
> index ed4257210..150162034 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -878,6 +878,7 @@ void dpif_register_upcall_cb(struct dpif *, 
> upcall_callback *, void *aux);
>  struct dpif_sflow_attr {
>      const struct nlattr *sflow; /* sFlow action */
>      size_t sflow_len;           /* Length of 'sflow' in bytes. */
> +    // Why do we need to store sflow_len, it's part of the nlattr data?
We need to copy and compare the dpif_sflow_attr. So sflow_len is used 
for that.
>
>      void *userdata;             /* struct user_action_cookie */
>      size_t userdata_len;        /* struct user_action_cookie length */
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 75744aa3c..84991ff6a 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1722,20 +1722,25 @@ parse_action_userspace(const struct nlattr 
> *actions,
>  }
>
>  static int
> -parse_sample_action(const struct nlattr *actions,
> -                    struct dpif_sflow_attr *sflow_attr,
> -                    struct tc_action *tc_action)
> +parse_sample_action(struct tc_flower *flower, struct tc_action 
> *tc_action,
> +                    const struct nlattr *sample_action,
> +                    const struct flow_tnl *tnl)
>  {
> +    struct dpif_sflow_attr sflow_attr;
>      const struct nlattr *nla;
>      unsigned int left;
>      int ret = EINVAL;
>
> -    sflow_attr->sflow = actions;
> -    sflow_attr->sflow_len = actions->nla_len;
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    sflow_attr.sflow = sample_action;
> +    sflow_attr.sflow_len = sample_action->nla_len;
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
>
> -    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>          if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
> -            ret = parse_action_userspace(nla, sflow_attr);
> +            ret = parse_action_userspace(nla, &sflow_attr);
>          } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>              tc_action->type = TC_ACT_SAMPLE;
>              tc_action->sample.action_rate = UINT32_MAX / 
> nl_attr_get_u32(nla);
> @@ -1744,11 +1749,13 @@ parse_sample_action(const struct nlattr *actions,
>          }
>      }
>
> -    if (tc_action->sample.action_rate) {
> -        return ret;
> -    }
> +    if (!tc_action->sample.action_rate || !ret)
> +        return EINVAL;
>
> -    return EINVAL;
> +    tc_action->sample.action_group_id = gid_alloc_ctx(&sflow_attr);
> +    //In theory alloc can fail, so maybe we should check for it.
> +    flower->action_count++;
> +    return 0;
>  }
>
>  static int
> @@ -2144,19 +2151,10 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>              action->chain = 0;  /* 0 is reserved and not used by 
> recirc. */
>              flower.action_count++;
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
> -            struct dpif_sflow_attr sflow_attr;
> -
> -            memset(&sflow_attr, 0, sizeof sflow_attr);
> -            if (flower.tunnel) {
> -                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> -            }
> -            err = parse_sample_action(nla, &sflow_attr, action);
> +            err = parse_sample_action(&flower, action, nla, tnl);
>              if (err) {
>                  goto out;
>              }
> -            group_id = gid_alloc_ctx(&sflow_attr);
> -            action->sample.action_group_id = group_id;
> -            flower.action_count++;
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>              struct dpif_sflow_attr sflow_attr;
>
> In addition the main objection is that you ignore the other actions, 
> which I think can be solved with the following diff (your called your 
> function odd, this is why you probably did not notice it):
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 84991ff6a..7277b699f 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1704,21 +1704,31 @@ parse_userspace_userdata(const struct nlattr 
> *actions,
>  }
>
>  static int
> -parse_action_userspace(const struct nlattr *actions,
> -                       struct dpif_sflow_attr *sflow_attr)
> +parse_sample_actions(const struct nlattr *actions,
> +                     struct dpif_sflow_attr *sflow_attr)
>  {
>      const struct nlattr *nla;
>      unsigned int left;
> +    int err = EINVAL;
>
>      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> -            return parse_userspace_userdata(nla, sflow_attr);
> +            err = parse_userspace_userdata(nla, sflow_attr);
> +        } else {
> +            /* We can't offload other nested actions */
> +            VLOG_DBG_RL(&error_rl,
> +                        "%s: other than OVS_ACTION_ATTR_USERSPACE 
> attribute",
> +                        __func__);
> +            return EINVAL;
>          }
>      }
I think we should not log in this function. We may be overwhelmed for 
various usespace actions.
>
> -    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE attribute",
> -                __func__);
> -    return EINVAL;
> +    if (err) {
> +        //THIS IS NOT AN ERROR CASE, FROM A DATAPATH IT'S VALID TO DO 
> ANY ACTION
But sample action expects a userspace attribute.
> + VLOG_DBG_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE attribute",
> +                    __func__);
> +    }
> +    return err;
>  }
>
>  static int
> @@ -1740,7 +1750,7 @@ parse_sample_action(struct tc_flower *flower, 
> struct tc_action *tc_action,
>
>      NL_NESTED_FOR_EACH_UNSAFE (nla, left, sample_action) {
>          if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
> -            ret = parse_action_userspace(nla, &sflow_attr);
> +            ret = parse_sample_actions(nla, &sflow_attr);
>          } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
>              tc_action->type = TC_ACT_SAMPLE;
>              tc_action->sample.action_rate = UINT32_MAX / 
> nl_attr_get_u32(nla);
>
>
>> Please see my above reply. There are two situations. I have comment 
>> in the code:
>>
>> 2160         } else if (nl_attr_type(nla) == 
>> OVS_ACTION_ATTR_USERSPACE) {
>> 2161             struct dpif_sflow_attr flow_attr;
>> 2162
>> 2163             /* If there is a sFlow cookie inside of a userspace 
>> attribute,
>> 2164              * but no sample attribute, that means sampling rate 
>> is 1.
>> 2165              */
>
> Ack, maybe also wrap this in a single function… Something like (not 
> tested):
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 7277b699f..bf3cd3ada 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1680,14 +1680,15 @@ flower_match_to_tun_opt(struct tc_flower 
> *flower, const struct flow_tnl *tnl,
>      flower->mask.tunnel.metadata.present.len = 
> tnl->metadata.present.len;
>  }
>
> +
>  static int
> -parse_userspace_userdata(const struct nlattr *actions,
> -                         struct dpif_sflow_attr *sflow_attr)
> +__parse_userspace_attributes(const struct nlattr *userspace_action,
> +                             struct dpif_sflow_attr *sflow_attr)
>  {
>      const struct nlattr *nla;
>      unsigned int left;
>
> -    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
> +    NL_NESTED_FOR_EACH_UNSAFE (nla, left, userspace_action) {
>          if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
>              struct user_action_cookie *cookie;
>
> @@ -1695,14 +1696,50 @@ parse_userspace_userdata(const struct nlattr 
> *actions,
>              if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
>                  sflow_attr->userdata = CONST_CAST(void *, 
> nl_attr_get(nla));
>                  sflow_attr->userdata_len = nl_attr_get_size(nla);
> +                // ^ Why do we store this user data? It's part of the 
> data we
> +                //   store in sflow_attr.sflow.
>                  return 0;
>              }
> +            // What about the other attributes, don't we need them? 
> Or do
> +            // we process them in the slow path by sflow_attr.sflow?
> +            // As I mentioned earlier, I did not review the full code.
>          }
>      }
> -
>      return EINVAL;
>  }
>
> +static int
> +parse_userspace_attributes(struct tc_flower *flower, struct tc_action 
> *action,
> +                           const struct nlattr *userspace_action,
> +                           const struct flow_tnl *tnl)
> +{
> +    struct dpif_sflow_attr sflow_attr;
> +    int err;
> +
> +    /* We only support offloading the userspace action for sFlow.
> +     * This is when the sFlow cookie exists inside of a the userspace
> +     * attribute. This is true when the sample rate for sFlow is 1.
> +     */
> +    memset(&sflow_attr, 0, sizeof sflow_attr);
> +    if (flower->tunnel) {
> +        sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> +    }
> +
> +    err = __parse_userspace_attributes(userspace_action, &sflow_attr);
> +    if (err)
> +        return err;
> +
> +    sflow_attr.sflow = userspace_action;
> +    //Did you notice that you put the userspace_action here?
> +    //In the other case you put the sample_action here...
> +    sflow_attr.sflow_len = userspace_action->nla_len;
> +    action->type = TC_ACT_SAMPLE;
> +    action->sample.action_group_id = gid_alloc_ctx(&sflow_attr);
> +    action->sample.action_rate = 1;
> +    flower->action_count++;
> +    return 0;
> +}
> +
>  static int
>  parse_sample_actions(const struct nlattr *actions,
>                       struct dpif_sflow_attr *sflow_attr)
> @@ -1713,7 +1750,7 @@ parse_sample_actions(const struct nlattr *actions,
>
>      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> -            err = parse_userspace_userdata(nla, sflow_attr);
> +            err = __parse_userspace_attributes(nla, sflow_attr);
>          } else {
>              /* We can't offload other nested actions */
>              VLOG_DBG_RL(&error_rl,
> @@ -2166,27 +2203,11 @@ netdev_tc_flow_put(struct netdev *netdev, 
> struct match *match,
>                  goto out;
>              }
>          } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
> -            struct dpif_sflow_attr sflow_attr;
> -
> -            /* We only support offloading the userspace action for 
> sFlow.
> -             * This is when the sFlow cookie exists inside of a the 
> userspace
> -             * attribute. This is true when the sample rate for sFlow 
> is 1.
> -             */
> -            memset(&sflow_attr, 0, sizeof sflow_attr);
> -            if (flower.tunnel) {
> -                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
> -            }
> -            err = parse_userspace_userdata(nla, &sflow_attr);
> +
> +            err = parse_userspace_userdata(&flower, action, nla, tnl);
>              if (err) {
>                  goto out;
>              }
> -            sflow_attr.sflow = nla;
> -            sflow_attr.sflow_len = nla->nla_len;
> -            group_id = gid_alloc_ctx(&sflow_attr);
> -            action->type = TC_ACT_SAMPLE;
> -            action->sample.action_group_id = group_id;
> -            action->sample.action_rate = 1;
> -            flower.action_count++;
>          } else {
>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>                          nl_attr_type(nla));
>
>>>
>>>
>>>> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>>
>>> The OVS_ACTION_ATTR_USERSPACE should not be handled here as a 
>>> separate action, at least it should not end up as a TC_ACT_SAMPLE. 
>>> What if someone else is having an OVS_ACTION_ATTR_USERPSACE in its 
>>> chain, now it will be translated into a SAMPLE action.
>>>
>>> The parse_sample_action() function called above should loop trough 
>>> the OVS_SAMPLE_ATTR_ACTIONS and find the OVS_ACTION_ATTR_USERSPACE, 
>>> which it looks like it does. So this “if (nl_attr_type(nla) == 
>>> OVS_ACTION_ATTR_USERSPACE)” part should not be needed at all.
>> Please see above reply.
>
> Ack see above
>>>
>>>> +            struct dpif_sflow_attr sflow_attr;
>>>> +
>>>> +            /* If there is a sFlow cookie inside of a userspace 
>>>> attribute,
>>>> +             * but no sample attribute, that means sampling rate 
>>>> is 1.
>>>> +             */
>>>> +            memset(&sflow_attr, 0, sizeof sflow_attr);
>>>> +            if (flower.tunnel) {
>>>> +                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, 
>>>> tnl);
>>>> +            }
>>>> +            err = parse_userspace_userdata(nla, &sflow_attr);
>>>> +            if (err) {
>>>> +                goto out;
>>>> +            }
>>>> +            sflow_attr.sflow = nla;
>>>> +            sflow_attr.sflow_len = nla->nla_len;
>>>> +            group_id = gid_alloc_ctx(&sflow_attr);
>>>> +            action->type = TC_ACT_SAMPLE;
>>>> +            action->sample.action_group_id = group_id;
>>>> +            action->sample.action_rate = 1;
>>>> +            flower.action_count++;
>>>>          } else {
>>>>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>>>>                          nl_attr_type(nla));
>>>> -            return EOPNOTSUPP;
>>>> +            err = EOPNOTSUPP;
>>>> +            goto out;
>>>>          }
>>>>      }
>>>>
>>>
>>> <SNIP>
>>>
>>> One other general command, how would we guarantee that the group ID 
>>> value used by tc-sample is unique and not being used by any other 
>>> application than OVS?
>> Please see the comment:
>>
>>  239 /* Allocate a unique group id for the given set of flow metadata.
>>  240  * The ID space is 2^^32, so there should never be a situation 
>> in which all
>>  241  * the IDs are used up.  We loop until we find a free one. */
>>  242 static struct gid_node *
>>  243 gid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash)
>>
>> This id is similar to recirc id.
>>> Do we assume tc-sample is elusively assigned to OVS?
>> Yes, I think so.
>>> This might not be the case in customer environments, so we might 
>>> need to be able to disable offloading this action.
>> If we can offload, why not disable it. If we can't offload, tc will 
>> return EOPNOTSUPP and fail back to ovs datapath.
>> So I don't think we need to be able to disable it.
>
> Ack all three above, but I was referring to someone that uses sflow 
> kmod on his management interface already, and now all of a sudden OVS 
> will use it too. Which could cause a problem as the u32 space is 
> started between all user space application. So I do think we need an 
> option to disable it in OVS.
Sorry, what do you mean by sflow kmod?
>
>
>>>
>>> One final question, do you know if this will be supported in your 
>>> NICs as hardware offload?
>> Sure. We submitted the first version of the OVS patch set in 
>> September. We support offload before that.
>> I believe the driver change will be in linux net-next branch soon.
>
> Cool, looking forward to seeing the change…
>
>
> If you do send out a new revision of this patchset, I’ll try to make 
> some time and do a full review and some testing.
>
V8 will be sent for the full review.

Thanks,
Chris
Eelco Chaudron Dec. 14, 2020, 11:46 a.m. UTC | #5
Hi Chris,

Noticed your v8, will try to fully review it before the year is over ;)

//Eelco

On 14 Dec 2020, at 10:33, Chris Mi wrote:

<SNIP>

>> diff --git a/lib/dpif.h b/lib/dpif.h
>> index ed4257210..150162034 100644
>> --- a/lib/dpif.h
>> +++ b/lib/dpif.h
>> @@ -878,6 +878,7 @@ void dpif_register_upcall_cb(struct dpif *, 
>> upcall_callback *, void *aux);
>>  struct dpif_sflow_attr {
>>      const struct nlattr *sflow; /* sFlow action */
>>      size_t sflow_len;           /* Length of 'sflow' 
>> in bytes. */
>> +    // Why do we need to store sflow_len, it's part of the nlattr 
>> data?
> We need to copy and compare the dpif_sflow_attr. So sflow_len is used 
> for that.

But you could just use sflow->nla_len?

>>      void *userdata;             /* struct 
>> user_action_cookie */
>>      size_t userdata_len;        /* struct 
>> user_action_cookie length */
>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>> index 75744aa3c..84991ff6a 100644
>>
>>  static int
>> -parse_action_userspace(const struct nlattr *actions,
>> -                       struct dpif_sflow_attr 
>> *sflow_attr)
>> +parse_sample_actions(const struct nlattr *actions,
>> +                     struct dpif_sflow_attr 
>> *sflow_attr)
>>  {
>>      const struct nlattr *nla;
>>      unsigned int left;
>> +    int err = EINVAL;
>>
>>      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) 
>> {
>> -            return parse_userspace_userdata(nla, 
>> sflow_attr);
>> +            err = parse_userspace_userdata(nla, 
>> sflow_attr);
>> +        } else {
>> +            /* We can't offload other nested actions */
>> +            VLOG_DBG_RL(&error_rl,
>> +                        "%s: other than 
>> OVS_ACTION_ATTR_USERSPACE attribute",
>> +                        __func__);
>> +            return EINVAL;
>>          }
>>      }
> I think we should not log in this function. We may be overwhelmed for 
> various usespace actions.

It’s a debug message, and it might be useful to figure out why offload 
is failing, especially as this is a corner case.

>> -    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
>> attribute",
>> -                __func__);
>> -    return EINVAL;
>> +    if (err) {
>> +        //THIS IS NOT AN ERROR CASE, FROM A DATAPATH IT'S 
>> VALID TO DO ANY ACTION
> But sample action expects a userspace attribute.

This is the typical use case, but I think you can add one without it and 
just redirect the packet to a different port. I don’t have a setup 
right now, but I will try some examples when I’ll do the v8 review.

>> + VLOG_DBG_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
>> attribute",
>> +                    __func__);
>> +    }
>> +    return err;
>>  }
>>

<SNIP>

>>>>
>>>> One other general command, how would we guarantee that the group ID 
>>>> value used by tc-sample is unique and not being used by any other 
>>>> application than OVS?
>>> Please see the comment:
>>>
>>>  239 /* Allocate a unique group id for the given set of flow 
>>> metadata.
>>>  240  * The ID space is 2^^32, so there should never be a 
>>> situation in which all
>>>  241  * the IDs are used up.  We loop until we find a free one. 
>>> */
>>>  242 static struct gid_node *
>>>  243 gid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t 
>>> hash)
>>>
>>> This id is similar to recirc id.
>>>> Do we assume tc-sample is elusively assigned to OVS?
>>> Yes, I think so.
>>>> This might not be the case in customer environments, so we might 
>>>> need to be able to disable offloading this action.
>>> If we can offload, why not disable it. If we can't offload, tc will 
>>> return EOPNOTSUPP and fail back to ovs datapath.
>>> So I don't think we need to be able to disable it.
>>
>> Ack all three above, but I was referring to someone that uses sflow 
>> kmod on his management interface already, and now all of a sudden OVS 
>> will use it too. Which could cause a problem as the u32 space is 
>> started between all user space application. So I do think we need an 
>> option to disable it in OVS.
> Sorry, what do you mean by sflow kmod?

I mean the psample kmod, tc-sample feature.

>>>>
>>>> One final question, do you know if this will be supported in your 
>>>> NICs as hardware offload?
>>> Sure. We submitted the first version of the OVS patch set in 
>>> September. We support offload before that.
>>> I believe the driver change will be in linux net-next branch soon.
>>
>> Cool, looking forward to seeing the change…
>>
>>
>> If you do send out a new revision of this patchset, I’ll try to 
>> make some time and do a full review and some testing.
>>
> V8 will be sent for the full review.
>
> Thanks,
> Chris
Chris Mi Dec. 15, 2020, 3:31 a.m. UTC | #6
Hi Eelco,

Thanks for your review.

On 12/14/2020 7:46 PM, Eelco Chaudron wrote:
> Hi Chris,
>
> Noticed your v8, will try to fully review it before the year is over ;)
>
> //Eelco
>
> On 14 Dec 2020, at 10:33, Chris Mi wrote:
>
> <SNIP>
>
>>> diff --git a/lib/dpif.h b/lib/dpif.h
>>> index ed4257210..150162034 100644
>>> --- a/lib/dpif.h
>>> +++ b/lib/dpif.h
>>> @@ -878,6 +878,7 @@ void dpif_register_upcall_cb(struct dpif *, 
>>> upcall_callback *, void *aux);
>>>  struct dpif_sflow_attr {
>>>      const struct nlattr *sflow; /* sFlow action */
>>>      size_t sflow_len;           /* Length of 'sflow' in bytes. */
>>> +    // Why do we need to store sflow_len, it's part of the nlattr 
>>> data?
>> We need to copy and compare the dpif_sflow_attr. So sflow_len is used 
>> for that.
>
> But you could just use sflow->nla_len?
Done.
>
>>>      void *userdata;             /* struct user_action_cookie */
>>>      size_t userdata_len;        /* struct user_action_cookie length */
>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>> index 75744aa3c..84991ff6a 100644
>>>
>>>  static int
>>> -parse_action_userspace(const struct nlattr *actions,
>>> -                       struct dpif_sflow_attr *sflow_attr)
>>> +parse_sample_actions(const struct nlattr *actions,
>>> +                     struct dpif_sflow_attr *sflow_attr)
>>>  {
>>>      const struct nlattr *nla;
>>>      unsigned int left;
>>> +    int err = EINVAL;
>>>
>>>      NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
>>>          if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
>>> -            return parse_userspace_userdata(nla, sflow_attr);
>>> +            err = parse_userspace_userdata(nla, sflow_attr);
>>> +        } else {
>>> +            /* We can't offload other nested actions */
>>> +            VLOG_DBG_RL(&error_rl,
>>> +                        "%s: other than OVS_ACTION_ATTR_USERSPACE 
>>> attribute",
>>> +                        __func__);
>>> +            return EINVAL;
>>>          }
>>>      }
>> I think we should not log in this function. We may be overwhelmed for 
>> various usespace actions.
>
> It’s a debug message, and it might be useful to figure out why offload 
> is failing, especially as this is a corner case.
Done.
>
>>> -    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE 
>>> attribute",
>>> -                __func__);
>>> -    return EINVAL;
>>> +    if (err) {
>>> +        //THIS IS NOT AN ERROR CASE, FROM A DATAPATH IT'S VALID TO 
>>> DO ANY ACTION
>> But sample action expects a userspace attribute.
>
> This is the typical use case, but I think you can add one without it 
> and just redirect the packet to a different port. I don’t have a setup 
> right now, but I will try some examples when I’ll do the v8 review.
>
>>> + VLOG_DBG_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE attribute",
>>> +                    __func__);
>>> +    }
>>> +    return err;
>>>  }
>>>
>
> <SNIP>
>
>>>>>
>>>>> One other general command, how would we guarantee that the group 
>>>>> ID value used by tc-sample is unique and not being used by any 
>>>>> other application than OVS?
>>>> Please see the comment:
>>>>
>>>>  239 /* Allocate a unique group id for the given set of flow metadata.
>>>>  240  * The ID space is 2^^32, so there should never be a situation 
>>>> in which all
>>>>  241  * the IDs are used up.  We loop until we find a free one. */
>>>>  242 static struct gid_node *
>>>>  243 gid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash)
>>>>
>>>> This id is similar to recirc id.
>>>>> Do we assume tc-sample is elusively assigned to OVS?
>>>> Yes, I think so.
>>>>> This might not be the case in customer environments, so we might 
>>>>> need to be able to disable offloading this action.
>>>> If we can offload, why not disable it. If we can't offload, tc will 
>>>> return EOPNOTSUPP and fail back to ovs datapath.
>>>> So I don't think we need to be able to disable it.
>>>
>>> Ack all three above, but I was referring to someone that uses sflow 
>>> kmod on his management interface already, and now all of a sudden 
>>> OVS will use it too. Which could cause a problem as the u32 space is 
>>> started between all user space application. So I do think we need an 
>>> option to disable it in OVS.
>> Sorry, what do you mean by sflow kmod?
>
> I mean the psample kmod, tc-sample feature.
OK, I see what you mean. But we can disable offload via:
# ovs-vsctl set Open_vSwitch . other_config:hw-offload="false";

sFlow is not per flow. If user creates OVS sflow, I think almost all 
flows will have an additional sFlow action.
I don't think we need to disable sFlow action only. That means we 
disable offload for all flows.
>
>>>>>
>>>>> One final question, do you know if this will be supported in your 
>>>>> NICs as hardware offload?
>>>> Sure. We submitted the first version of the OVS patch set in 
>>>> September. We support offload before that.
>>>> I believe the driver change will be in linux net-next branch soon.
>>>
>>> Cool, looking forward to seeing the change…
>>>
>>>
>>> If you do send out a new revision of this patchset, I’ll try to make 
>>> some time and do a full review and some testing.
>>>
>> V8 will be sent for the full review.
I'll send v9 for above two small changes.

Thanks,
Chris
diff mbox series

Patch

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 10e704e6a..75744aa3c 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -989,6 +989,18 @@  parse_tc_flower_to_match(struct tc_flower *flower,
         action = flower->actions;
         for (i = 0; i < flower->action_count; i++, action++) {
             switch (action->type) {
+            case TC_ACT_SAMPLE: {
+                const struct gid_node *node;
+
+                node = gid_find(action->sample.action_group_id);
+                if (!node) {
+                    VLOG_ERR_RL(&error_rl, "gid node is NULL, gid: %d",
+                                action->sample.action_group_id);
+                    return ENOENT;
+                }
+                nl_msg_put(buf, node->sflow.sflow, node->sflow.sflow_len);
+            }
+            break;
             case TC_ACT_VLAN_POP: {
                 nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
             }
@@ -1668,6 +1680,77 @@  flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl,
     flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
 }
 
+static int
+parse_userspace_userdata(const struct nlattr *actions,
+                         struct dpif_sflow_attr *sflow_attr)
+{
+    const struct nlattr *nla;
+    unsigned int left;
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+        if (nl_attr_type(nla) == OVS_USERSPACE_ATTR_USERDATA) {
+            struct user_action_cookie *cookie;
+
+            cookie = CONST_CAST(struct user_action_cookie *, nl_attr_get(nla));
+            if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
+                sflow_attr->userdata = CONST_CAST(void *, nl_attr_get(nla));
+                sflow_attr->userdata_len = nl_attr_get_size(nla);
+                return 0;
+            }
+        }
+    }
+
+    return EINVAL;
+}
+
+static int
+parse_action_userspace(const struct nlattr *actions,
+                       struct dpif_sflow_attr *sflow_attr)
+{
+    const struct nlattr *nla;
+    unsigned int left;
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+        if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
+            return parse_userspace_userdata(nla, sflow_attr);
+        }
+    }
+
+    VLOG_ERR_RL(&error_rl, "%s: no OVS_ACTION_ATTR_USERSPACE attribute",
+                __func__);
+    return EINVAL;
+}
+
+static int
+parse_sample_action(const struct nlattr *actions,
+                    struct dpif_sflow_attr *sflow_attr,
+                    struct tc_action *tc_action)
+{
+    const struct nlattr *nla;
+    unsigned int left;
+    int ret = EINVAL;
+
+    sflow_attr->sflow = actions;
+    sflow_attr->sflow_len = actions->nla_len;
+
+    NL_NESTED_FOR_EACH_UNSAFE (nla, left, actions) {
+        if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_ACTIONS) {
+            ret = parse_action_userspace(nla, sflow_attr);
+        } else if (nl_attr_type(nla) == OVS_SAMPLE_ATTR_PROBABILITY) {
+            tc_action->type = TC_ACT_SAMPLE;
+            tc_action->sample.action_rate = UINT32_MAX / nl_attr_get_u32(nla);
+        } else {
+            return EINVAL;
+        }
+    }
+
+    if (tc_action->sample.action_rate) {
+        return ret;
+    }
+
+    return EINVAL;
+}
+
 static int
 netdev_tc_flow_put(struct netdev *netdev, struct match *match,
                    struct nlattr *actions, size_t actions_len,
@@ -1684,6 +1767,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     struct tc_action *action;
     bool recirc_act = false;
     uint32_t block_id = 0;
+    uint32_t group_id = 0;
     struct nlattr *nla;
     struct tcf_id id;
     uint32_t chain;
@@ -1973,7 +2057,8 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     NL_ATTR_FOR_EACH(nla, left, actions, actions_len) {
         if (flower.action_count >= TCA_ACT_MAX_NUM) {
             VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
-            return EOPNOTSUPP;
+            err = EOPNOTSUPP;
+            goto out;
         }
         action = &flower.actions[flower.action_count];
         if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
@@ -1983,7 +2068,8 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             if (!outdev) {
                 VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
-                return ENODEV;
+                err = ENODEV;
+                goto out;
             }
             action->out.ifindex_out = netdev_get_ifindex(outdev);
             action->out.ingress = is_internal_port(netdev_get_type(outdev));
@@ -2021,7 +2107,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             err = parse_put_flow_set_action(&flower, action, set, set_len);
             if (err) {
-                return err;
+                goto out;
             }
             if (action->type == TC_ACT_ENCAP) {
                 action->encap.tp_dst = info->tp_dst_port;
@@ -2034,7 +2120,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             err = parse_put_flow_set_masked_action(&flower, action, set,
                                                    set_len, true);
             if (err) {
-                return err;
+                goto out;
             }
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT) {
             const struct nlattr *ct = nl_attr_get(nla);
@@ -2042,7 +2128,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
 
             err = parse_put_flow_ct_action(&flower, action, ct, ct_len);
             if (err) {
-                return err;
+                goto out;
             }
         } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CT_CLEAR) {
             action->type = TC_ACT_CT;
@@ -2061,17 +2147,49 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             struct dpif_sflow_attr sflow_attr;
 
             memset(&sflow_attr, 0, sizeof sflow_attr);
-            gid_alloc_ctx(&sflow_attr);
+            if (flower.tunnel) {
+                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+            }
+            err = parse_sample_action(nla, &sflow_attr, action);
+            if (err) {
+                goto out;
+            }
+            group_id = gid_alloc_ctx(&sflow_attr);
+            action->sample.action_group_id = group_id;
+            flower.action_count++;
+        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_USERSPACE) {
+            struct dpif_sflow_attr sflow_attr;
+
+            /* If there is a sFlow cookie inside of a userspace attribute,
+             * but no sample attribute, that means sampling rate is 1.
+             */
+            memset(&sflow_attr, 0, sizeof sflow_attr);
+            if (flower.tunnel) {
+                sflow_attr.tunnel = CONST_CAST(struct flow_tnl *, tnl);
+            }
+            err = parse_userspace_userdata(nla, &sflow_attr);
+            if (err) {
+                goto out;
+            }
+            sflow_attr.sflow = nla;
+            sflow_attr.sflow_len = nla->nla_len;
+            group_id = gid_alloc_ctx(&sflow_attr);
+            action->type = TC_ACT_SAMPLE;
+            action->sample.action_group_id = group_id;
+            action->sample.action_rate = 1;
+            flower.action_count++;
         } else {
             VLOG_DBG_RL(&rl, "unsupported put action type: %d",
                         nl_attr_type(nla));
-            return EOPNOTSUPP;
+            err = EOPNOTSUPP;
+            goto out;
         }
     }
 
     if ((chain || recirc_act) && !info->recirc_id_shared_with_tc) {
         VLOG_ERR_RL(&error_rl, "flow_put: recirc_id sharing not supported");
-        return EOPNOTSUPP;
+        err = EOPNOTSUPP;
+        goto out;
     }
 
     if (get_ufid_tc_mapping(ufid, &id) == 0) {
@@ -2083,20 +2201,30 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     prio = get_prio_for_tc_flower(&flower);
     if (prio == 0) {
         VLOG_ERR_RL(&rl, "couldn't get tc prio: %s", ovs_strerror(ENOSPC));
-        return ENOSPC;
+        err = ENOSPC;
+        goto out;
     }
 
     flower.act_cookie.data = ufid;
     flower.act_cookie.len = sizeof *ufid;
 
     block_id = get_block_id_from_netdev(netdev);
-    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook);
+    id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook, group_id);
     err = tc_replace_flower(&id, &flower);
     if (!err) {
         if (stats) {
             memset(stats, 0, sizeof *stats);
         }
         add_ufid_tc_mapping(netdev, ufid, &id);
+    } else {
+        goto out;
+    }
+
+    return 0;
+
+out:
+    if (group_id) {
+        gid_free(group_id);
     }
 
     return err;
diff --git a/lib/tc.c b/lib/tc.c
index 8761304c9..2e31648fc 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -23,14 +23,15 @@ 
 #include <linux/if_packet.h>
 #include <linux/rtnetlink.h>
 #include <linux/tc_act/tc_csum.h>
+#include <linux/tc_act/tc_ct.h>
 #include <linux/tc_act/tc_gact.h>
 #include <linux/tc_act/tc_mirred.h>
 #include <linux/tc_act/tc_mpls.h>
 #include <linux/tc_act/tc_pedit.h>
+#include <linux/tc_act/tc_sample.h>
 #include <linux/tc_act/tc_skbedit.h>
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <linux/tc_act/tc_vlan.h>
-#include <linux/tc_act/tc_ct.h>
 #include <linux/gen_stats.h>
 #include <net/if.h>
 #include <unistd.h>
@@ -1289,6 +1290,38 @@  nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower)
     return 0;
 }
 
+static const struct nl_policy sample_policy[] = {
+    [TCA_SAMPLE_PARMS] = { .type = NL_A_UNSPEC,
+                           .min_len = sizeof(struct tc_sample),
+                           .optional = false, },
+    [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NL_A_U32,
+                                   .optional = false, },
+    [TCA_SAMPLE_RATE] = { .type = NL_A_U32,
+                          .optional = false, },
+};
+
+static int
+nl_parse_act_sample(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *sample_attrs[ARRAY_SIZE(sample_policy)];
+    struct tc_action *action;
+
+    if (!nl_parse_nested(options, sample_policy, sample_attrs,
+                         ARRAY_SIZE(sample_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse sample action options");
+        return EPROTO;
+    }
+
+    action = &flower->actions[flower->action_count++];
+    action->type = TC_ACT_SAMPLE;
+    action->sample.action_group_id =
+        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_PSAMPLE_GROUP]);
+    action->sample.action_rate =
+        nl_attr_get_u32(sample_attrs[TCA_SAMPLE_RATE]);
+
+    return 0;
+}
+
 static const struct nl_policy mirred_policy[] = {
     [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
                            .min_len = sizeof(struct tc_mirred),
@@ -1697,6 +1730,8 @@  nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
         /* Added for TC rule only (not in OvS rule) so ignore. */
     } else if (!strcmp(act_kind, "ct")) {
         nl_parse_act_ct(act_options, flower);
+    } else if (!strcmp(act_kind, "sample")) {
+        nl_parse_act_sample(act_options, flower);
     } else {
         VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
         err = EINVAL;
@@ -2292,6 +2327,23 @@  nl_msg_put_act_mirred(struct ofpbuf *request, int ifindex, int action,
     nl_msg_end_nested(request, offset);
 }
 
+static void
+nl_msg_put_act_sample(struct ofpbuf *request, uint32_t rate, uint32_t group_id)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "sample");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS | NLA_F_NESTED);
+    {
+        struct tc_sample parm = { .action = TC_ACT_PIPE };
+
+        nl_msg_put_unspec(request, TCA_SAMPLE_PARMS, &parm, sizeof parm);
+        nl_msg_put_u32(request, TCA_SAMPLE_RATE, rate);
+        nl_msg_put_u32(request, TCA_SAMPLE_PSAMPLE_GROUP, group_id);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
 static inline void
 nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
     if (ck->len) {
@@ -2551,6 +2603,13 @@  nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
                 nl_msg_end_nested(request, act_offset);
             }
             break;
+            case TC_ACT_SAMPLE: {
+                act_offset = nl_msg_start_nested(request, act_index++);
+                nl_msg_put_act_sample(request, action->sample.action_rate,
+                                      action->sample.action_group_id);
+                nl_msg_end_nested(request, act_offset);
+            }
+            break;
             case TC_ACT_OUTPUT: {
                 if (!released && flower->tunnel) {
                     act_offset = nl_msg_start_nested(request, act_index++);
diff --git a/lib/tc.h b/lib/tc.h
index cc2ad025d..143e225d1 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -171,6 +171,7 @@  enum tc_action_type {
     TC_ACT_MPLS_SET,
     TC_ACT_GOTO,
     TC_ACT_CT,
+    TC_ACT_SAMPLE,
 };
 
 enum nat_type {
@@ -253,6 +254,11 @@  struct tc_action {
             bool force;
             bool commit;
         } ct;
+
+        struct {
+            uint32_t action_rate;
+            uint32_t action_group_id;
+        } sample;
      };
 
      enum tc_action_type type;
@@ -292,11 +298,12 @@  tc_make_tcf_id(int ifindex, uint32_t block_id, uint16_t prio,
 
 static inline struct tcf_id
 tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain,
-                     uint16_t prio, enum tc_qdisc_hook hook)
+                     uint16_t prio, enum tc_qdisc_hook hook, uint32_t group_id)
 {
     struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook);
 
     id.chain = chain;
+    id.sflow_group_id = group_id;
 
     return id;
 }