diff mbox

[ovs-dev,ovs,V8,01/26] tc: Add tc flower interface

Message ID 1493824097-47495-2-git-send-email-roid@mellanox.com
State Changes Requested
Headers show

Commit Message

Roi Dayan May 3, 2017, 3:07 p.m. UTC
From: Paul Blakey <paulb@mellanox.com>

Add tc flower interface that will be used to offload flows via tc
flower classifier. Depending on the flag used (skip_sw/hw) flower
will pass those to HW or handle them itself.
Move some tc related functions from netdev-linux.c to tc.c

Co-authored-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 lib/automake.mk    |    2 +
 lib/netdev-linux.c |  164 ++------
 lib/tc.c           | 1109 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/tc.h           |  128 ++++++
 4 files changed, 1279 insertions(+), 124 deletions(-)
 create mode 100644 lib/tc.c
 create mode 100644 lib/tc.h

Comments

Simon Horman May 4, 2017, 4:35 p.m. UTC | #1
On Wed, May 03, 2017 at 06:07:52PM +0300, Roi Dayan wrote:
> From: Paul Blakey <paulb@mellanox.com>
> 
> Add tc flower interface that will be used to offload flows via tc
> flower classifier. Depending on the flag used (skip_sw/hw) flower
> will pass those to HW or handle them itself.
> Move some tc related functions from netdev-linux.c to tc.c
> 
> Co-authored-by: Shahar Klein <shahark@mellanox.com>
> Signed-off-by: Shahar Klein <shahark@mellanox.com>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> Reviewed-by: Simon Horman <simon.horman@netronome.com>
> ---
>  lib/automake.mk    |    2 +
>  lib/netdev-linux.c |  164 ++------
>  lib/tc.c           | 1109 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/tc.h           |  128 ++++++
>  4 files changed, 1279 insertions(+), 124 deletions(-)
>  create mode 100644 lib/tc.c
>  create mode 100644 lib/tc.h
> 
> diff --git a/lib/automake.mk b/lib/automake.mk
> index faace79..3d57610 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -352,6 +352,8 @@ if LINUX
>  lib_libopenvswitch_la_SOURCES += \
>  	lib/dpif-netlink.c \
>  	lib/dpif-netlink.h \
> +	lib/tc.h \
> +	lib/tc.c \

tc.c seems to contain two types of functions:

1. Code which is used by both (old) netdev-linux.c paths and
   code which is used by (new) tc-flower specific paths.
   For example tc_transact().
2. Code which is specific to tc-flower

The latter does not compile against old kernel headers.

As per Flavio Leitner's review or v7 it seems that the compilation problem
may be addressed by patch 23.

I think it would also be worth considering splitting the TC code such that
tc-flower specific code to is present in tc_flower.[ch] and leave shared
code is in tc.[ch].

Moving code to tc.[ch] could be a separate patch to adding tc_flower.[ch].
In my opinion smaller patches are easier to review and possibly merge
incrementally.

Overall this patch-set seems very large and I think it would be worth
considering ways to merge it incrementally.

...

> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 79e8273..a6bb515 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c

...

> @@ -2094,7 +2095,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
>  
>      COVERAGE_INC(netdev_set_policing);
>      /* Remove any existing ingress qdisc. */
> -    error = tc_add_del_ingress_qdisc(netdev_, false);
> +    error = tc_add_del_ingress_qdisc(ifindex, false);

This patch both changes the signature of tc_add_del_ingress_qdisc() and
moves it to tc.c. The signature change could be in a separate patch.

...

> @@ -2930,8 +2931,8 @@ codel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
>  
>      tc_del_qdisc(netdev);
>  
> -    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
> -                            NLM_F_EXCL | NLM_F_CREATE, &request);
> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
> +                                         NLM_F_EXCL | NLM_F_CREATE, &request);

Likewise, I think reworking tc_make_request() could be a separate patch.

...

> @@ -4222,13 +4224,11 @@ hfsc_setup_qdisc__(struct netdev * netdev)
>  
>      tc_del_qdisc(netdev);
>  
> -    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
> -                            NLM_F_EXCL | NLM_F_CREATE, &request);
> -
> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
> +                                         NLM_F_EXCL | NLM_F_CREATE, &request);
>      if (!tcmsg) {
>          return ENODEV;
>      }
> -

The change above seems spurious.

>      tcmsg->tcm_handle = tc_make_handle(1, 0);
>      tcmsg->tcm_parent = TC_H_ROOT;
>  
> @@ -4255,12 +4255,11 @@ hfsc_setup_class__(struct netdev *netdev, unsigned int handle,
>      struct ofpbuf request;
>      struct tc_service_curve min, max;
>  
> -    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
> -
> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
> +                                         NLM_F_CREATE, &request);
>      if (!tcmsg) {
>          return ENODEV;
>      }
> -

Ditto.

>      tcmsg->tcm_handle = handle;
>      tcmsg->tcm_parent = parent;
>  

...

> diff --git a/lib/tc.c b/lib/tc.c
> new file mode 100644
> index 0000000..cd06025
> --- /dev/null
> +++ b/lib/tc.c
> @@ -0,0 +1,1109 @@

...

> +static const struct nl_policy tca_flower_policy[] = {
> +    [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
> +                           .optional = true, },
> +    [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
> +                                 .min_len = ETH_ALEN, .optional = true, },
> +    [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
> +                                 .min_len = ETH_ALEN, .optional = true, },
> +    [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
> +                                      .min_len = ETH_ALEN,
> +                                      .optional = true, },
> +    [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
> +                                      .min_len = ETH_ALEN,
> +                                      .optional = true, },
> +    [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
> +    [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
> +    [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
> +    [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
> +    [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
> +                                  .min_len = sizeof(struct in6_addr),
> +                                  .optional = true, },
> +    [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
> +                                  .min_len = sizeof(struct in6_addr),
> +                                  .optional = true, },
> +    [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
> +                                       .min_len = sizeof(struct in6_addr),
> +                                       .optional = true, },
> +    [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
> +                                       .min_len = sizeof(struct in6_addr),
> +                                       .optional = true, },
> +    [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
> +    [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_BE32, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_BE32, .optional = true, },

I am wondering why the type of the above IPV4 attributes are NL_A_BE32 while
those further are are NL_A_U32.

> +    [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
> +                                      .min_len = sizeof(struct in6_addr),
> +                                      .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
> +                                      .min_len = sizeof(struct in6_addr),
> +                                      .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
> +                                           .min_len = sizeof(struct in6_addr),
> +                                           .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
> +                                           .min_len = sizeof(struct in6_addr),
> +                                           .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_BE16,
> +                                          .optional = true, },
> +};

...

> +static void
> +nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {

...

> +    if (ip_proto == IPPROTO_TCP) {
> +        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
> +            key->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
> +            mask->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
> +        }
> +        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
> +            key->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
> +            mask->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
> +        }
> +    } else if (ip_proto == IPPROTO_UDP) {
> +        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
> +            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
> +            mask->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
> +        }
> +        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
> +            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
> +            mask->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
> +        }
> +    }

As noted by Flavio Leitner in his review of v7 it seems likely that
SCTP could trivially be supported.

...

> +static int
> +nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower)
> +{
> +    struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
> +    const struct tc_gact *p;
> +    struct nlattr *gact_parms;
> +    const struct tcf_t *tm;
> +
> +    if (!nl_parse_nested(options, gact_policy, gact_attrs,
> +                         ARRAY_SIZE(gact_policy))) {
> +        VLOG_ERR_RL(&parse_err, "failed to parse gact action options");
> +        return EPROTO;
> +    }
> +
> +    gact_parms = gact_attrs[TCA_GACT_PARMS];
> +    p = nl_attr_get_unspec(gact_parms, sizeof *p);
> +
> +    if (p->action == TC_ACT_SHOT) {
> +    } else {

The following seems more logical to me:

       if (p->action != TC_ACT_SHOT) {

> +            VLOG_ERR_RL(&parse_err, "unknown gact action: %d", p->action);
> +            return EINVAL;
> +    }
> +
> +    tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
> +    nl_parse_tcf(tm, flower);
> +
> +    return 0;
> +}

...
Roi Dayan May 7, 2017, 11:46 a.m. UTC | #2
On 04/05/2017 19:35, Simon Horman wrote:
> On Wed, May 03, 2017 at 06:07:52PM +0300, Roi Dayan wrote:
>> From: Paul Blakey <paulb@mellanox.com>
>>
>> Add tc flower interface that will be used to offload flows via tc
>> flower classifier. Depending on the flag used (skip_sw/hw) flower
>> will pass those to HW or handle them itself.
>> Move some tc related functions from netdev-linux.c to tc.c
>>
>> Co-authored-by: Shahar Klein <shahark@mellanox.com>
>> Signed-off-by: Shahar Klein <shahark@mellanox.com>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>> Reviewed-by: Simon Horman <simon.horman@netronome.com>
>> ---
>>  lib/automake.mk    |    2 +
>>  lib/netdev-linux.c |  164 ++------
>>  lib/tc.c           | 1109 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  lib/tc.h           |  128 ++++++
>>  4 files changed, 1279 insertions(+), 124 deletions(-)
>>  create mode 100644 lib/tc.c
>>  create mode 100644 lib/tc.h
>>
>> diff --git a/lib/automake.mk b/lib/automake.mk
>> index faace79..3d57610 100644
>> --- a/lib/automake.mk
>> +++ b/lib/automake.mk
>> @@ -352,6 +352,8 @@ if LINUX
>>  lib_libopenvswitch_la_SOURCES += \
>>  	lib/dpif-netlink.c \
>>  	lib/dpif-netlink.h \
>> +	lib/tc.h \
>> +	lib/tc.c \
>
> tc.c seems to contain two types of functions:
>
> 1. Code which is used by both (old) netdev-linux.c paths and
>    code which is used by (new) tc-flower specific paths.
>    For example tc_transact().
> 2. Code which is specific to tc-flower
>
> The latter does not compile against old kernel headers.
>
> As per Flavio Leitner's review or v7 it seems that the compilation problem
> may be addressed by patch 23.

this is correct. we did first all work for hw offload and then added a 
compat fix commit.
Isn't it ok since there is no point for half work for hw offload?

>
> I think it would also be worth considering splitting the TC code such that
> tc-flower specific code to is present in tc_flower.[ch] and leave shared
> code is in tc.[ch].
>
> Moving code to tc.[ch] could be a separate patch to adding tc_flower.[ch].
> In my opinion smaller patches are easier to review and possibly merge
> incrementally.

I agree that first commit should do only the moving and second to add 
new code but most of the functions are flower related. I'm not sure how 
much will stay in tc.c after removing flower related code to a new file.

>
> Overall this patch-set seems very large and I think it would be worth
> considering ways to merge it incrementally.
>
> ...
>
>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>> index 79e8273..a6bb515 100644
>> --- a/lib/netdev-linux.c
>> +++ b/lib/netdev-linux.c
>
> ...
>
>> @@ -2094,7 +2095,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
>>
>>      COVERAGE_INC(netdev_set_policing);
>>      /* Remove any existing ingress qdisc. */
>> -    error = tc_add_del_ingress_qdisc(netdev_, false);
>> +    error = tc_add_del_ingress_qdisc(ifindex, false);
>
> This patch both changes the signature of tc_add_del_ingress_qdisc() and
> moves it to tc.c. The signature change could be in a separate patch.

ok

>
> ...
>
>> @@ -2930,8 +2931,8 @@ codel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
>>
>>      tc_del_qdisc(netdev);
>>
>> -    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
>> -                            NLM_F_EXCL | NLM_F_CREATE, &request);
>> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
>> +                                         NLM_F_EXCL | NLM_F_CREATE, &request);
>
> Likewise, I think reworking tc_make_request() could be a separate patch.
>
> ...
>
>> @@ -4222,13 +4224,11 @@ hfsc_setup_qdisc__(struct netdev * netdev)
>>
>>      tc_del_qdisc(netdev);
>>
>> -    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
>> -                            NLM_F_EXCL | NLM_F_CREATE, &request);
>> -
>> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
>> +                                         NLM_F_EXCL | NLM_F_CREATE, &request);
>>      if (!tcmsg) {
>>          return ENODEV;
>>      }
>> -
>
> The change above seems spurious.
>
>>      tcmsg->tcm_handle = tc_make_handle(1, 0);
>>      tcmsg->tcm_parent = TC_H_ROOT;
>>
>> @@ -4255,12 +4255,11 @@ hfsc_setup_class__(struct netdev *netdev, unsigned int handle,
>>      struct ofpbuf request;
>>      struct tc_service_curve min, max;
>>
>> -    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
>> -
>> +    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
>> +                                         NLM_F_CREATE, &request);
>>      if (!tcmsg) {
>>          return ENODEV;
>>      }
>> -
>
> Ditto.
>
>>      tcmsg->tcm_handle = handle;
>>      tcmsg->tcm_parent = parent;
>>
>
> ...
>
>> diff --git a/lib/tc.c b/lib/tc.c
>> new file mode 100644
>> index 0000000..cd06025
>> --- /dev/null
>> +++ b/lib/tc.c
>> @@ -0,0 +1,1109 @@
>
> ...
>
>> +static const struct nl_policy tca_flower_policy[] = {
>> +    [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
>> +                           .optional = true, },
>> +    [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
>> +                                 .min_len = ETH_ALEN, .optional = true, },
>> +    [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
>> +                                 .min_len = ETH_ALEN, .optional = true, },
>> +    [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
>> +                                      .min_len = ETH_ALEN,
>> +                                      .optional = true, },
>> +    [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
>> +                                      .min_len = ETH_ALEN,
>> +                                      .optional = true, },
>> +    [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
>> +    [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
>> +    [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
>> +    [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
>> +                                  .min_len = sizeof(struct in6_addr),
>> +                                  .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
>> +                                  .min_len = sizeof(struct in6_addr),
>> +                                  .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
>> +                                       .min_len = sizeof(struct in6_addr),
>> +                                       .optional = true, },
>> +    [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
>> +                                       .min_len = sizeof(struct in6_addr),
>> +                                       .optional = true, },
>> +    [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
>> +    [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_BE32, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_BE32, .optional = true, },
>
> I am wondering why the type of the above IPV4 attributes are NL_A_BE32 while
> those further are are NL_A_U32.

no reason. probably a mistake. should be U32 as in the kernel.

>
>> +    [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
>> +                                      .min_len = sizeof(struct in6_addr),
>> +                                      .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
>> +                                      .min_len = sizeof(struct in6_addr),
>> +                                      .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
>> +                                           .min_len = sizeof(struct in6_addr),
>> +                                           .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
>> +                                           .min_len = sizeof(struct in6_addr),
>> +                                           .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_BE16,
>> +                                          .optional = true, },
>> +};
>
> ...
>
>> +static void
>> +nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
>
> ...
>
>> +    if (ip_proto == IPPROTO_TCP) {
>> +        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
>> +            key->src_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
>> +            mask->src_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
>> +        }
>> +        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
>> +            key->dst_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
>> +            mask->dst_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
>> +        }
>> +    } else if (ip_proto == IPPROTO_UDP) {
>> +        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
>> +            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
>> +            mask->src_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
>> +        }
>> +        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
>> +            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
>> +            mask->dst_port =
>> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
>> +        }
>> +    }
>
> As noted by Flavio Leitner in his review of v7 it seems likely that
> SCTP could trivially be supported.

ok. probably missed that email.  I'll check SCTP.

>
> ...
>
>> +static int
>> +nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower)
>> +{
>> +    struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
>> +    const struct tc_gact *p;
>> +    struct nlattr *gact_parms;
>> +    const struct tcf_t *tm;
>> +
>> +    if (!nl_parse_nested(options, gact_policy, gact_attrs,
>> +                         ARRAY_SIZE(gact_policy))) {
>> +        VLOG_ERR_RL(&parse_err, "failed to parse gact action options");
>> +        return EPROTO;
>> +    }
>> +
>> +    gact_parms = gact_attrs[TCA_GACT_PARMS];
>> +    p = nl_attr_get_unspec(gact_parms, sizeof *p);
>> +
>> +    if (p->action == TC_ACT_SHOT) {
>> +    } else {
>
> The following seems more logical to me:
>
>        if (p->action != TC_ACT_SHOT) {

right. missed that.

>
>> +            VLOG_ERR_RL(&parse_err, "unknown gact action: %d", p->action);
>> +            return EINVAL;
>> +    }
>> +
>> +    tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
>> +    nl_parse_tcf(tm, flower);
>> +
>> +    return 0;
>> +}
>
> ...
>
Simon Horman May 8, 2017, 5:40 a.m. UTC | #3
On Sun, May 07, 2017 at 02:46:14PM +0300, Roi Dayan wrote:
> 
> 
> On 04/05/2017 19:35, Simon Horman wrote:
> >On Wed, May 03, 2017 at 06:07:52PM +0300, Roi Dayan wrote:
> >>From: Paul Blakey <paulb@mellanox.com>
> >>
> >>Add tc flower interface that will be used to offload flows via tc
> >>flower classifier. Depending on the flag used (skip_sw/hw) flower
> >>will pass those to HW or handle them itself.
> >>Move some tc related functions from netdev-linux.c to tc.c
> >>
> >>Co-authored-by: Shahar Klein <shahark@mellanox.com>
> >>Signed-off-by: Shahar Klein <shahark@mellanox.com>
> >>Signed-off-by: Paul Blakey <paulb@mellanox.com>
> >>Reviewed-by: Roi Dayan <roid@mellanox.com>
> >>Reviewed-by: Simon Horman <simon.horman@netronome.com>
> >>---
> >> lib/automake.mk    |    2 +
> >> lib/netdev-linux.c |  164 ++------
> >> lib/tc.c           | 1109 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> lib/tc.h           |  128 ++++++
> >> 4 files changed, 1279 insertions(+), 124 deletions(-)
> >> create mode 100644 lib/tc.c
> >> create mode 100644 lib/tc.h
> >>
> >>diff --git a/lib/automake.mk b/lib/automake.mk
> >>index faace79..3d57610 100644
> >>--- a/lib/automake.mk
> >>+++ b/lib/automake.mk
> >>@@ -352,6 +352,8 @@ if LINUX
> >> lib_libopenvswitch_la_SOURCES += \
> >> 	lib/dpif-netlink.c \
> >> 	lib/dpif-netlink.h \
> >>+	lib/tc.h \
> >>+	lib/tc.c \
> >
> >tc.c seems to contain two types of functions:
> >
> >1. Code which is used by both (old) netdev-linux.c paths and
> >   code which is used by (new) tc-flower specific paths.
> >   For example tc_transact().
> >2. Code which is specific to tc-flower
> >
> >The latter does not compile against old kernel headers.
> >
> >As per Flavio Leitner's review or v7 it seems that the compilation problem
> >may be addressed by patch 23.
> 
> this is correct. we did first all work for hw offload and then added a
> compat fix commit.
> Isn't it ok since there is no point for half work for hw offload?

Its not ok because this patch does not compile which breaks bisection.

It may be that Flavio's suggestion is not the best way to resolve the
problem - another idea I have is to conditionally compile the tc_flower.c
that I suggest below and provide stub functions in tc_flower.h for the case
where tc_flower.c is not compiled.

> >I think it would also be worth considering splitting the TC code such that
> >tc-flower specific code to is present in tc_flower.[ch] and leave shared
> >code is in tc.[ch].
> >
> >Moving code to tc.[ch] could be a separate patch to adding tc_flower.[ch].
> >In my opinion smaller patches are easier to review and possibly merge
> >incrementally.
> 
> I agree that first commit should do only the moving and second to add new
> code but most of the functions are flower related. I'm not sure how much
> will stay in tc.c after removing flower related code to a new file.

Thanks, I think that would make the patches rather a lot easier on the
eyes.

...

Thanks for your responses to the other, more specific, review comments.
diff mbox

Patch

diff --git a/lib/automake.mk b/lib/automake.mk
index faace79..3d57610 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -352,6 +352,8 @@  if LINUX
 lib_libopenvswitch_la_SOURCES += \
 	lib/dpif-netlink.c \
 	lib/dpif-netlink.h \
+	lib/tc.h \
+	lib/tc.c \
 	lib/if-notifier.c \
 	lib/if-notifier.h \
 	lib/netdev-linux.c \
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 79e8273..a6bb515 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -29,8 +29,6 @@ 
 #include <linux/types.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
 #include <linux/rtnetlink.h>
 #include <linux/sockios.h>
 #include <sys/types.h>
@@ -74,6 +72,7 @@ 
 #include "unaligned.h"
 #include "openvswitch/vlog.h"
 #include "util.h"
+#include "tc.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_linux);
 
@@ -434,18 +433,14 @@  static const struct tc_ops *const tcs[] = {
     NULL
 };
 
-static unsigned int tc_make_handle(unsigned int major, unsigned int minor);
-static unsigned int tc_get_major(unsigned int handle);
-static unsigned int tc_get_minor(unsigned int handle);
-
 static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks);
 static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
+static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *,
+                                                  int type,
+                                                  unsigned int flags,
+                                                  struct ofpbuf *);
 
-static struct tcmsg *tc_make_request(const struct netdev *, int type,
-                                     unsigned int flags, struct ofpbuf *);
-static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add);
 static int tc_add_policer(struct netdev *,
                           uint32_t kbits_rate, uint32_t kbits_burst);
 
@@ -2076,12 +2071,18 @@  netdev_linux_set_policing(struct netdev *netdev_,
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     const char *netdev_name = netdev_get_name(netdev_);
     int error;
+    int ifindex;
 
     kbits_burst = (!kbits_rate ? 0       /* Force to 0 if no rate specified. */
                    : !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
                    : kbits_burst);       /* Stick with user-specified value. */
 
     ovs_mutex_lock(&netdev->mutex);
+    error = get_ifindex(netdev_, &ifindex);
+    if (error) {
+        goto out;
+    }
+
     if (netdev->cache_valid & VALID_POLICING) {
         error = netdev->netdev_policing_error;
         if (error || (netdev->kbits_rate == kbits_rate &&
@@ -2094,7 +2095,7 @@  netdev_linux_set_policing(struct netdev *netdev_,
 
     COVERAGE_INC(netdev_set_policing);
     /* Remove any existing ingress qdisc. */
-    error = tc_add_del_ingress_qdisc(netdev_, false);
+    error = tc_add_del_ingress_qdisc(ifindex, false);
     if (error) {
         VLOG_WARN_RL(&rl, "%s: removing policing failed: %s",
                      netdev_name, ovs_strerror(error));
@@ -2102,7 +2103,7 @@  netdev_linux_set_policing(struct netdev *netdev_,
     }
 
     if (kbits_rate) {
-        error = tc_add_del_ingress_qdisc(netdev_, true);
+        error = tc_add_del_ingress_qdisc(ifindex, true);
         if (error) {
             VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
                          netdev_name, ovs_strerror(error));
@@ -2371,7 +2372,7 @@  start_queue_dump(const struct netdev *netdev, struct queue_dump_state *state)
     struct ofpbuf request;
     struct tcmsg *tcmsg;
 
-    tcmsg = tc_make_request(netdev, RTM_GETTCLASS, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_GETTCLASS, 0, &request);
     if (!tcmsg) {
         return false;
     }
@@ -2930,8 +2931,8 @@  codel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3148,8 +3149,8 @@  fqcodel_setup_qdisc__(struct netdev *netdev, uint32_t target, uint32_t limit,
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3372,8 +3373,8 @@  sfq_setup_qdisc__(struct netdev *netdev, uint32_t quantum, uint32_t perturb)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3559,8 +3560,8 @@  htb_setup_qdisc__(struct netdev *netdev)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -3613,7 +3614,8 @@  htb_setup_class__(struct netdev *netdev, unsigned int handle,
     opt.cbuffer = tc_calc_buffer(opt.ceil.rate, mtu, class->burst);
     opt.prio = class->priority;
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
+                                         NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -4222,13 +4224,11 @@  hfsc_setup_qdisc__(struct netdev * netdev)
 
     tc_del_qdisc(netdev);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
-
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
-
     tcmsg->tcm_handle = tc_make_handle(1, 0);
     tcmsg->tcm_parent = TC_H_ROOT;
 
@@ -4255,12 +4255,11 @@  hfsc_setup_class__(struct netdev *netdev, unsigned int handle,
     struct ofpbuf request;
     struct tc_service_curve min, max;
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request);
-
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTCLASS,
+                                         NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
-
     tcmsg->tcm_handle = handle;
     tcmsg->tcm_parent = parent;
 
@@ -4631,102 +4630,17 @@  static double ticks_per_s;
  */
 static unsigned int buffer_hz;
 
-/* Returns tc handle 'major':'minor'. */
-static unsigned int
-tc_make_handle(unsigned int major, unsigned int minor)
-{
-    return TC_H_MAKE(major << 16, minor);
-}
-
-/* Returns the major number from 'handle'. */
-static unsigned int
-tc_get_major(unsigned int handle)
-{
-    return TC_H_MAJ(handle) >> 16;
-}
-
-/* Returns the minor number from 'handle'. */
-static unsigned int
-tc_get_minor(unsigned int handle)
-{
-    return TC_H_MIN(handle);
-}
-
 static struct tcmsg *
-tc_make_request(const struct netdev *netdev, int type, unsigned int flags,
-                struct ofpbuf *request)
+netdev_linux_tc_make_request(const struct netdev *netdev, int type,
+                             unsigned int flags, struct ofpbuf *request)
 {
-    struct tcmsg *tcmsg;
     int ifindex;
-    int error;
 
-    error = get_ifindex(netdev, &ifindex);
-    if (error) {
+    if (get_ifindex(netdev, &ifindex)) {
         return NULL;
     }
 
-    ofpbuf_init(request, 512);
-    nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
-    tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
-    tcmsg->tcm_family = AF_UNSPEC;
-    tcmsg->tcm_ifindex = ifindex;
-    /* Caller should fill in tcmsg->tcm_handle. */
-    /* Caller should fill in tcmsg->tcm_parent. */
-
-    return tcmsg;
-}
-
-static int
-tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
-{
-    int error = nl_transact(NETLINK_ROUTE, request, replyp);
-    ofpbuf_uninit(request);
-    return error;
-}
-
-/* Adds or deletes a root ingress qdisc on 'netdev'.  We use this for
- * policing configuration.
- *
- * This function is equivalent to running the following when 'add' is true:
- *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
- *
- * This function is equivalent to running the following when 'add' is false:
- *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
- *
- * The configuration and stats may be seen with the following command:
- *     /sbin/tc -s qdisc show dev <devname>
- *
- * Returns 0 if successful, otherwise a positive errno value.
- */
-static int
-tc_add_del_ingress_qdisc(struct netdev *netdev, bool add)
-{
-    struct ofpbuf request;
-    struct tcmsg *tcmsg;
-    int error;
-    int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
-    int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
-
-    tcmsg = tc_make_request(netdev, type, flags, &request);
-    if (!tcmsg) {
-        return ENODEV;
-    }
-    tcmsg->tcm_handle = tc_make_handle(0xffff, 0);
-    tcmsg->tcm_parent = TC_H_INGRESS;
-    nl_msg_put_string(&request, TCA_KIND, "ingress");
-    nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
-
-    error = tc_transact(&request, NULL);
-    if (error) {
-        /* If we're deleting the qdisc, don't worry about some of the
-         * error conditions. */
-        if (!add && (error == ENOENT || error == EINVAL)) {
-            return 0;
-        }
-        return error;
-    }
-
-    return 0;
+    return tc_make_request(ifindex, type, flags, request);
 }
 
 /* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size
@@ -4769,8 +4683,8 @@  tc_add_policer(struct netdev *netdev,
     tc_police.burst = tc_bytes_to_ticks(
         tc_police.rate.rate, MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 8);
 
-    tcmsg = tc_make_request(netdev, RTM_NEWTFILTER,
-                            NLM_F_EXCL | NLM_F_CREATE, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTFILTER,
+                                         NLM_F_EXCL | NLM_F_CREATE, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5035,7 +4949,8 @@  tc_query_class(const struct netdev *netdev,
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev, RTM_GETTCLASS, NLM_F_ECHO, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_GETTCLASS,
+                                         NLM_F_ECHO, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5061,7 +4976,7 @@  tc_delete_class(const struct netdev *netdev, unsigned int handle)
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev, RTM_DELTCLASS, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev, RTM_DELTCLASS, 0, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5087,7 +5002,7 @@  tc_del_qdisc(struct netdev *netdev_)
     struct tcmsg *tcmsg;
     int error;
 
-    tcmsg = tc_make_request(netdev_, RTM_DELQDISC, 0, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev_, RTM_DELQDISC, 0, &request);
     if (!tcmsg) {
         return ENODEV;
     }
@@ -5168,7 +5083,8 @@  tc_query_qdisc(const struct netdev *netdev_)
      * in such a case we get no response at all from the kernel (!) if a
      * builtin qdisc is in use (which is later caught by "!error &&
      * !qdisc->size"). */
-    tcmsg = tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO, &request);
+    tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC,
+                                         NLM_F_ECHO, &request);
     if (!tcmsg) {
         return ENODEV;
     }
diff --git a/lib/tc.c b/lib/tc.c
new file mode 100644
index 0000000..cd06025
--- /dev/null
+++ b/lib/tc.c
@@ -0,0 +1,1109 @@ 
+/*
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <linux/gen_stats.h>
+#include "timeval.h"
+#include "netlink-socket.h"
+#include "netlink.h"
+#include "rtnetlink.h"
+#include "openvswitch/vlog.h"
+#include "openvswitch/ofpbuf.h"
+#include "tc.h"
+#include "util.h"
+#include "byte-order.h"
+
+VLOG_DEFINE_THIS_MODULE(tc);
+
+static struct vlog_rate_limit parse_err = VLOG_RATE_LIMIT_INIT(5, 5);
+
+/* Returns tc handle 'major':'minor'. */
+unsigned int
+tc_make_handle(unsigned int major, unsigned int minor)
+{
+    return TC_H_MAKE(major << 16, minor);
+}
+
+/* Returns the major number from 'handle'. */
+unsigned int
+tc_get_major(unsigned int handle)
+{
+    return TC_H_MAJ(handle) >> 16;
+}
+
+/* Returns the minor number from 'handle'. */
+unsigned int
+tc_get_minor(unsigned int handle)
+{
+    return TC_H_MIN(handle);
+}
+
+struct tcmsg *
+tc_make_request(int ifindex, int type, unsigned int flags,
+                struct ofpbuf *request)
+{
+    struct tcmsg *tcmsg;
+
+    ofpbuf_init(request, 512);
+    nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
+    tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
+    tcmsg->tcm_family = AF_UNSPEC;
+    tcmsg->tcm_ifindex = ifindex;
+    /* Caller should fill in tcmsg->tcm_handle. */
+    /* Caller should fill in tcmsg->tcm_parent. */
+
+    return tcmsg;
+}
+
+int
+tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
+{
+    int error = nl_transact(NETLINK_ROUTE, request, replyp);
+    ofpbuf_uninit(request);
+    return error;
+}
+
+/* Adds or deletes a root ingress qdisc on device with specified ifindex.
+ *
+ * This function is equivalent to running the following when 'add' is true:
+ *     /sbin/tc qdisc add dev <devname> handle ffff: ingress
+ *
+ * This function is equivalent to running the following when 'add' is false:
+ *     /sbin/tc qdisc del dev <devname> handle ffff: ingress
+ *
+ * Where dev <devname> is the device with specified ifindex name.
+ *
+ * The configuration and stats may be seen with the following command:
+ *     /sbin/tc -s qdisc show dev <devname>
+ *
+ *
+ * Returns 0 if successful, otherwise a positive errno value.
+ */
+int
+tc_add_del_ingress_qdisc(int ifindex, bool add)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    int error;
+    int type = add ? RTM_NEWQDISC : RTM_DELQDISC;
+    int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
+
+    tcmsg = tc_make_request(ifindex, type, flags, &request);
+    tcmsg->tcm_handle = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_parent = TC_H_INGRESS;
+    nl_msg_put_string(&request, TCA_KIND, "ingress");
+    nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0);
+
+    error = tc_transact(&request, NULL);
+    if (error) {
+        /* If we're deleting the qdisc, don't worry about some of the
+         * error conditions. */
+        if (!add && (error == ENOENT || error == EINVAL)) {
+            return 0;
+        }
+        return error;
+    }
+
+    return 0;
+}
+
+static const struct nl_policy tca_policy[] = {
+    [TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_STATS] = { .type = NL_A_UNSPEC,
+                    .min_len = sizeof(struct tc_stats), .optional = true, },
+    [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
+};
+
+static const struct nl_policy tca_flower_policy[] = {
+    [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
+                           .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+    [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+    [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                           .min_len = sizeof(struct in6_addr),
+                                           .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+                                           .min_len = sizeof(struct in6_addr),
+                                           .optional = true, },
+    [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_BE16,
+                                          .optional = true, },
+};
+
+static void
+nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct eth_addr *eth;
+
+    if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
+        memcpy(&flower->key.src_mac, eth, sizeof flower->key.src_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
+        memcpy(&flower->mask.src_mac, eth, sizeof flower->mask.src_mac);
+    }
+    if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
+        memcpy(&flower->key.dst_mac, eth, sizeof flower->key.dst_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
+        memcpy(&flower->mask.dst_mac, eth, sizeof flower->mask.dst_mac);
+    }
+}
+
+static void
+nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (flower->key.eth_type != htons(ETH_P_8021Q)) {
+        return;
+    }
+
+    flower->key.encap_eth_type =
+        nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
+
+    if (attrs[TCA_FLOWER_KEY_VLAN_ID]) {
+        flower->key.vlan_id =
+            nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]);
+    }
+    if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) {
+        flower->key.vlan_prio =
+            nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]);
+    }
+}
+
+static void
+nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (attrs[TCA_FLOWER_KEY_ENC_KEY_ID]) {
+        ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]);
+
+        flower->tunnel.id = be32_to_be64(id);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) {
+        flower->tunnel.ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]) {
+        flower->tunnel.ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]) {
+        flower->tunnel.ipv6.ipv6_src =
+            nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]) {
+        flower->tunnel.ipv6.ipv6_dst =
+            nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) {
+        flower->tunnel.tp_dst =
+            nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]);
+    }
+}
+
+static void
+nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
+    uint8_t ip_proto = 0;
+    struct tc_flower_key *key = &flower->key;
+    struct tc_flower_key *mask = &flower->mask;
+
+    if (attrs[TCA_FLOWER_KEY_IP_PROTO]) {
+        ip_proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]);
+        key->ip_proto = ip_proto;
+        mask->ip_proto = UINT8_MAX;
+    }
+
+    if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
+        key->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
+        mask->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]) {
+        key->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]);
+        mask->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_SRC];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK];
+
+        key->ipv6.ipv6_src = nl_attr_get_in6_addr(attr);
+        mask->ipv6.ipv6_src = nl_attr_get_in6_addr(attr_mask);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_DST];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_DST_MASK];
+
+        key->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr);
+        mask->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr_mask);
+    }
+
+    if (ip_proto == IPPROTO_TCP) {
+        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
+            key->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
+            key->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
+        }
+    } else if (ip_proto == IPPROTO_UDP) {
+        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
+            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
+            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
+        }
+    }
+}
+
+static const struct nl_policy tunnel_key_policy[] = {
+    [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
+                               .min_len = sizeof(struct tc_tunnel_key),
+                               .optional = false, },
+    [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_DST_PORT] = { .type = NL_A_BE16, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+};
+
+static int
+nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *tun_attrs[ARRAY_SIZE(tunnel_key_policy)];
+    const struct nlattr *tun_parms;
+    const struct tc_tunnel_key *tun;
+
+    if (!nl_parse_nested(options, tunnel_key_policy, tun_attrs,
+                ARRAY_SIZE(tunnel_key_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse tunnel_key action options");
+        return EPROTO;
+    }
+
+    tun_parms = tun_attrs[TCA_TUNNEL_KEY_PARMS];
+    tun = nl_attr_get_unspec(tun_parms, sizeof *tun);
+    if (tun->t_action == TCA_TUNNEL_KEY_ACT_SET) {
+        struct nlattr *id = tun_attrs[TCA_TUNNEL_KEY_ENC_KEY_ID];
+        struct nlattr *dst_port = tun_attrs[TCA_TUNNEL_KEY_ENC_DST_PORT];
+        struct nlattr *ipv4_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_SRC];
+        struct nlattr *ipv4_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_DST];
+        struct nlattr *ipv6_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_SRC];
+        struct nlattr *ipv6_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_DST];
+
+        flower->set.set = true;
+        flower->set.ipv4.ipv4_src = ipv4_src ? nl_attr_get_be32(ipv4_src) : 0;
+        flower->set.ipv4.ipv4_dst = ipv4_dst ? nl_attr_get_be32(ipv4_dst) : 0;
+        if (ipv6_src) {
+            flower->set.ipv6.ipv6_src = nl_attr_get_in6_addr(ipv6_src);
+        }
+        if (ipv6_dst) {
+            flower->set.ipv6.ipv6_dst = nl_attr_get_in6_addr(ipv6_dst);
+        }
+        flower->set.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0;
+        flower->set.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0;
+    } else if (tun->t_action == TCA_TUNNEL_KEY_ACT_RELEASE) {
+        flower->tunnel.tunnel = true;
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown tunnel actions: %d, %d",
+                    tun->action, tun->t_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy gact_policy[] = {
+    [TCA_GACT_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_gact),
+                         .optional = false, },
+    [TCA_GACT_TM] = { .type = NL_A_UNSPEC,
+                      .min_len = sizeof(struct tcf_t),
+                      .optional = false, },
+};
+
+static void
+nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
+{
+    unsigned long long int lastuse = tm->lastuse * 10;
+    unsigned long long int now = time_msec();
+
+    flower->lastused = now - lastuse;
+}
+
+static int
+nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
+    const struct tc_gact *p;
+    struct nlattr *gact_parms;
+    const struct tcf_t *tm;
+
+    if (!nl_parse_nested(options, gact_policy, gact_attrs,
+                         ARRAY_SIZE(gact_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse gact action options");
+        return EPROTO;
+    }
+
+    gact_parms = gact_attrs[TCA_GACT_PARMS];
+    p = nl_attr_get_unspec(gact_parms, sizeof *p);
+
+    if (p->action == TC_ACT_SHOT) {
+    } else {
+            VLOG_ERR_RL(&parse_err, "unknown gact action: %d", p->action);
+            return EINVAL;
+    }
+
+    tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy mirred_policy[] = {
+    [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
+                           .min_len = sizeof(struct tc_mirred),
+                           .optional = false, },
+    [TCA_MIRRED_TM] = { .type = NL_A_UNSPEC,
+                        .min_len = sizeof(struct tcf_t),
+                        .optional = false, },
+};
+
+static int
+nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower)
+{
+
+    struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)];
+    const struct tc_mirred *m;
+    const struct nlattr *mirred_parms;
+    const struct tcf_t *tm;
+    struct nlattr *mirred_tm;
+
+    if (!nl_parse_nested(options, mirred_policy, mirred_attrs,
+                         ARRAY_SIZE(mirred_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse mirred action options");
+        return EPROTO;
+    }
+
+    mirred_parms = mirred_attrs[TCA_MIRRED_PARMS];
+    m = nl_attr_get_unspec(mirred_parms, sizeof *m);
+
+    if (m->action != TC_ACT_STOLEN ||  m->eaction != TCA_EGRESS_REDIR) {
+        VLOG_ERR_RL(&parse_err, "unknown mirred action: %d, %d, %d",
+                 m->action, m->eaction, m->ifindex);
+        return EINVAL;
+    }
+
+    flower->ifindex_out = m->ifindex;
+
+    mirred_tm = mirred_attrs[TCA_MIRRED_TM];
+    tm = nl_attr_get_unspec(mirred_tm, sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy vlan_policy[] = {
+    [TCA_VLAN_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_vlan),
+                         .optional = false, },
+    [TCA_VLAN_PUSH_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NL_A_U8, .optional = true, },
+};
+
+static int
+nl_parse_act_vlan(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *vlan_attrs[ARRAY_SIZE(vlan_policy)];
+    const struct tc_vlan *v;
+    const struct nlattr *vlan_parms;
+
+    if (!nl_parse_nested(options, vlan_policy, vlan_attrs,
+                         ARRAY_SIZE(vlan_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse vlan action options");
+        return EPROTO;
+    }
+
+    vlan_parms = vlan_attrs[TCA_VLAN_PARMS];
+    v = nl_attr_get_unspec(vlan_parms, sizeof *v);
+    if (v->v_action == TCA_VLAN_ACT_PUSH) {
+        struct nlattr *vlan_id = vlan_attrs[TCA_VLAN_PUSH_VLAN_ID];
+        struct nlattr *vlan_prio = vlan_attrs[TCA_VLAN_PUSH_VLAN_PRIORITY];
+
+        flower->vlan_push_id = nl_attr_get_u16(vlan_id);
+        flower->vlan_push_prio = nl_attr_get_u8(vlan_prio);
+    } else if (v->v_action == TCA_VLAN_ACT_POP) {
+        flower->vlan_pop = 1;
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown vlan action: %d, %d",
+                    v->action, v->v_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy act_policy[] = {
+    [TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_ACT_COOKIE] = { .type = NL_A_UNSPEC, .optional = true, },
+    [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, },
+};
+
+static const struct nl_policy stats_policy[] = {
+    [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
+                          .min_len = sizeof(struct gnet_stats_basic),
+                          .optional = false, },
+};
+
+static int
+nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
+{
+    struct nlattr *act_options;
+    struct nlattr *act_stats;
+    struct nlattr *act_cookie;
+    const struct nlattr *stats_basic;
+    const char *act_kind;
+    struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
+    struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
+    struct ovs_flow_stats *stats = &flower->stats;
+    const struct gnet_stats_basic *bs;
+
+    if (!nl_parse_nested(action, act_policy, action_attrs,
+                         ARRAY_SIZE(act_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse single action options");
+        return EPROTO;
+    }
+
+    act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
+    act_options = action_attrs[TCA_ACT_OPTIONS];
+    act_cookie = action_attrs[TCA_ACT_COOKIE];
+
+    if (!strcmp(act_kind, "gact")) {
+        nl_parse_act_drop(act_options, flower);
+    } else if (!strcmp(act_kind, "mirred")) {
+        nl_parse_act_mirred(act_options, flower);
+    } else if (!strcmp(act_kind, "vlan")) {
+        nl_parse_act_vlan(act_options, flower);
+    } else if (!strcmp(act_kind, "tunnel_key")) {
+        nl_parse_act_tunnel_key(act_options, flower);
+    } else {
+        VLOG_ERR_RL(&parse_err, "unknown tc action kind: %s", act_kind);
+        return EINVAL;
+    }
+
+    if (act_cookie) {
+        flower->act_cookie.data = nl_attr_get(act_cookie);
+        flower->act_cookie.len = nl_attr_get_size(act_cookie);
+    }
+
+    act_stats = action_attrs[TCA_ACT_STATS];
+
+    if (!nl_parse_nested(act_stats, stats_policy, stats_attrs,
+                         ARRAY_SIZE(stats_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse action stats policy");
+        return EPROTO;
+    }
+
+    stats_basic = stats_attrs[TCA_STATS_BASIC];
+    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
+
+    stats->n_packets.lo = bs->packets;
+    stats->n_packets.hi = 0;
+    stats->n_bytes.hi = bs->bytes >> 32;
+    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
+
+    return 0;
+}
+
+#define TCA_ACT_MIN_PRIO 1
+
+static int
+nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
+    static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = {};
+    struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+    const int max_size = ARRAY_SIZE(actions_orders_policy);
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        actions_orders_policy[i].type = NL_A_NESTED;
+        actions_orders_policy[i].optional = true;
+    }
+
+    if (!nl_parse_nested(actions, actions_orders_policy, actions_orders,
+                         ARRAY_SIZE(actions_orders_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse flower order of actions");
+        return EPROTO;
+    }
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        if (actions_orders[i]) {
+            int err = nl_parse_single_action(actions_orders[i], flower);
+
+            if (err) {
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
+{
+    struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
+
+    if (!nl_parse_nested(nl_options, tca_flower_policy,
+                         attrs, ARRAY_SIZE(tca_flower_policy))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse flower classifier options");
+        return EPROTO;
+    }
+
+    nl_parse_flower_eth(attrs, flower);
+    nl_parse_flower_vlan(attrs, flower);
+    nl_parse_flower_ip(attrs, flower);
+    nl_parse_flower_tunnel(attrs, flower);
+    return nl_parse_flower_actions(attrs, flower);
+}
+
+int
+parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
+{
+    struct tcmsg *tc;
+    struct nlattr *ta[ARRAY_SIZE(tca_policy)];
+    const char *kind;
+
+    memset(flower, 0, sizeof *flower);
+    if (NLMSG_HDRLEN + (sizeof *tc) > reply->size) {
+        return EPROTO;
+    }
+
+    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+    flower->handle = tc->tcm_handle;
+    flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
+    flower->mask.eth_type = OVS_BE16_MAX;
+    flower->prio = tc_get_major(tc->tcm_info);
+
+    if (!flower->handle) {
+        return EAGAIN;
+    }
+
+    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
+                         tca_policy, ta, ARRAY_SIZE(ta))) {
+        VLOG_ERR_RL(&parse_err, "failed to parse tca policy");
+        return EPROTO;
+    }
+
+    kind = nl_attr_get_string(ta[TCA_KIND]);
+    if (strcmp(kind, "flower")) {
+        VLOG_ERR_RL(&parse_err, "failed to parse filter: not flower");
+        return EPROTO;
+    }
+
+    return nl_parse_flower_options(ta[TCA_OPTIONS], flower);
+}
+
+int
+tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+    tcmsg->tcm_handle = 0;
+
+    nl_dump_start(dump, NETLINK_ROUTE, &request);
+    ofpbuf_uninit(&request);
+
+    return 0;
+}
+
+int
+tc_flush(int ifindex)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+
+    return tc_transact(&request, NULL);
+}
+
+int
+tc_del_filter(int ifindex, int prio, int handle)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        ofpbuf_delete(reply);
+    }
+    return error;
+}
+
+int
+tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (error) {
+        return error;
+    }
+
+    error = parse_netlink_to_tc_flower(reply, flower);
+    ofpbuf_delete(reply);
+    return error;
+}
+
+static void
+nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_PUSH };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+        nl_msg_put_u16(request, TCA_VLAN_PUSH_VLAN_ID, vid);
+        nl_msg_put_u8(request, TCA_VLAN_PUSH_VLAN_PRIORITY, prio);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_pop_vlan(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_POP };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_release(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_RELEASE };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id,
+                                ovs_be32 ipv4_src, ovs_be32 ipv4_dst,
+                                struct in6_addr *ipv6_src,
+                                struct in6_addr *ipv6_dst,
+                                ovs_be16 tp_dst)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_SET };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+
+        ovs_be32 id32 = be64_to_be32(id);
+        nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32);
+        if (ipv4_dst) {
+            nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
+            nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
+        } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+            nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST,
+                                ipv6_dst);
+            nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC,
+                                ipv6_src);
+        }
+        nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_DST_PORT, tp_dst);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_drop(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "gact");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_gact p = { .action = TC_ACT_SHOT };
+
+        nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_redirect(struct ofpbuf *request, int ifindex)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "mirred");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_mirred m = { .action = TC_ACT_STOLEN,
+                               .eaction = TCA_EGRESS_REDIR,
+                               .ifindex = ifindex };
+
+        nl_msg_put_unspec(request, TCA_MIRRED_PARMS, &m, sizeof m);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static inline void
+nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
+    if (ck->len) {
+        nl_msg_put_unspec(request, TCA_ACT_COOKIE, ck->data, ck->len);
+    }
+}
+
+static void
+nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
+{
+    size_t offset;
+    size_t act_offset;
+
+    offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
+    {
+        uint16_t act_index = 1;
+
+        if (flower->set.set) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_set(request, flower->set.id,
+                                          flower->set.ipv4.ipv4_src,
+                                          flower->set.ipv4.ipv4_dst,
+                                          &flower->set.ipv6.ipv6_src,
+                                          &flower->set.ipv6.ipv6_dst,
+                                          flower->set.tp_dst);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->tunnel.tunnel) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_release(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_pop) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_pop_vlan(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_push_id) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_push_vlan(request,
+                                     flower->vlan_push_id,
+                                     flower->vlan_push_prio);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->ifindex_out) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_redirect(request, flower->ifindex_out);
+            nl_msg_put_act_cookie(request, &flower->act_cookie);
+            nl_msg_end_nested(request, act_offset);
+        } else {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_drop(request);
+            nl_msg_put_act_cookie(request, &flower->act_cookie);
+            nl_msg_end_nested(request, act_offset);
+        }
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_masked_value(struct ofpbuf *request, uint16_t type,
+                        uint16_t mask_type, const void *data,
+                        const void *mask_data, size_t len)
+{
+    if (mask_type != TCA_FLOWER_UNSPEC) {
+        if (is_all_zeros(mask_data, len)) {
+            return;
+        }
+        nl_msg_put_unspec(request, mask_type, mask_data, len);
+    }
+    nl_msg_put_unspec(request, type, data, len);
+}
+
+static void
+nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
+{
+    ovs_be32 ipv4_src = flower->tunnel.ipv4.ipv4_src;
+    ovs_be32 ipv4_dst = flower->tunnel.ipv4.ipv4_dst;
+    struct in6_addr *ipv6_src = &flower->tunnel.ipv6.ipv6_src;
+    struct in6_addr *ipv6_dst = &flower->tunnel.ipv6.ipv6_dst;
+    ovs_be16 tp_dst = flower->tunnel.tp_dst;
+    ovs_be32 id = be64_to_be32(flower->tunnel.id);
+
+    nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id);
+    if (ipv4_dst) {
+        nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
+        nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst);
+    } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+        nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src);
+        nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST, ipv6_dst);
+    }
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst);
+}
+
+static void
+nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
+{
+    uint16_t host_eth_type = ntohs(flower->key.eth_type);
+
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_DST,
+                            TCA_FLOWER_KEY_ETH_DST_MASK,
+                            &flower->key.dst_mac,
+                            &flower->mask.dst_mac, ETH_ALEN);
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_SRC,
+                            TCA_FLOWER_KEY_ETH_SRC_MASK,
+                            &flower->key.src_mac,
+                            &flower->mask.src_mac, ETH_ALEN);
+
+    if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) {
+        if (flower->mask.ip_proto && flower->key.ip_proto) {
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_IP_PROTO,
+                          flower->key.ip_proto);
+        }
+        if (flower->key.ip_proto == IPPROTO_UDP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_SRC,
+                                    TCA_FLOWER_KEY_UDP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_DST,
+                                    TCA_FLOWER_KEY_UDP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        } else if (flower->key.ip_proto == IPPROTO_TCP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_SRC,
+                                    TCA_FLOWER_KEY_TCP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_DST,
+                                    TCA_FLOWER_KEY_TCP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        }
+    }
+    if (host_eth_type == ETH_P_IP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_SRC,
+                                    TCA_FLOWER_KEY_IPV4_SRC_MASK,
+                                    &flower->key.ipv4.ipv4_src,
+                                    &flower->mask.ipv4.ipv4_src,
+                                    sizeof flower->key.ipv4.ipv4_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_DST,
+                                    TCA_FLOWER_KEY_IPV4_DST_MASK,
+                                    &flower->key.ipv4.ipv4_dst,
+                                    &flower->mask.ipv4.ipv4_dst,
+                                    sizeof flower->key.ipv4.ipv4_dst);
+    } else if (host_eth_type == ETH_P_IPV6) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_SRC,
+                                    TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                                    &flower->key.ipv6.ipv6_src,
+                                    &flower->mask.ipv6.ipv6_src,
+                                    sizeof flower->key.ipv6.ipv6_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_DST,
+                                    TCA_FLOWER_KEY_IPV6_DST_MASK,
+                                    &flower->key.ipv6.ipv6_dst,
+                                    &flower->mask.ipv6.ipv6_dst,
+                                    sizeof flower->key.ipv6.ipv6_dst);
+    }
+
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ETH_TYPE, flower->key.eth_type);
+
+    if (host_eth_type == ETH_P_8021Q) {
+        if (flower->key.vlan_id || flower->key.vlan_prio) {
+            nl_msg_put_u16(request, TCA_FLOWER_KEY_VLAN_ID,
+                           flower->key.vlan_id);
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_VLAN_PRIO,
+                          flower->key.vlan_prio);
+        }
+        if (flower->key.encap_eth_type) {
+            nl_msg_put_be16(request, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                            flower->key.encap_eth_type);
+        }
+    }
+
+    nl_msg_put_u32(request, TCA_FLOWER_FLAGS, 0);
+
+    if (flower->tunnel.tunnel) {
+        nl_msg_put_flower_tunnel(request, flower);
+    }
+
+    nl_msg_put_flower_acts(request, flower);
+}
+
+int
+tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                  struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error = 0;
+    size_t basic_offset;
+    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
+
+    tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER,
+                            NLM_F_CREATE | NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, eth_type);
+    tcmsg->tcm_handle = handle;
+
+    nl_msg_put_string(&request, TCA_KIND, "flower");
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    {
+        nl_msg_put_flower_options(&request, flower);
+    }
+    nl_msg_end_nested(&request, basic_offset);
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        struct tcmsg *tc =
+            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+
+        flower->prio = tc_get_major(tc->tcm_info);
+        flower->handle = tc->tcm_handle;
+        ofpbuf_delete(reply);
+    }
+
+    return error;
+}
diff --git a/lib/tc.h b/lib/tc.h
new file mode 100644
index 0000000..ec2e05b
--- /dev/null
+++ b/lib/tc.h
@@ -0,0 +1,128 @@ 
+/*
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC_H
+#define TC_H 1
+
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include "odp-netlink.h"
+#include "netlink-socket.h"
+
+#define TC_POLICY_DEFAULT "none"
+
+unsigned int tc_make_handle(unsigned int major, unsigned int minor);
+unsigned int tc_get_major(unsigned int handle);
+unsigned int tc_get_minor(unsigned int handle);
+struct tcmsg *tc_make_request(int ifindex, int type, unsigned int flags,
+                              struct ofpbuf *request);
+int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
+int tc_add_del_ingress_qdisc(int ifindex, bool add);
+
+struct tc_cookie {
+    const void *data;
+    size_t len;
+};
+
+struct tc_flower_key {
+    ovs_be16 eth_type;
+    uint8_t ip_proto;
+
+    struct eth_addr dst_mac;
+    struct eth_addr src_mac;
+
+    ovs_be16 src_port;
+    ovs_be16 dst_port;
+
+    uint16_t vlan_id;
+    uint8_t vlan_prio;
+
+    ovs_be16 encap_eth_type;
+
+    union {
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+    };
+};
+
+struct tc_flower {
+    uint32_t handle;
+    uint32_t prio;
+
+    struct tc_flower_key key;
+    struct tc_flower_key mask;
+
+    uint8_t vlan_pop;
+    uint16_t vlan_push_id;
+    uint8_t vlan_push_prio;
+
+    int ifindex;
+    int ifindex_out;
+
+    struct ovs_flow_stats stats;
+    uint64_t lastused;
+
+    struct {
+        bool set;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+    } set;
+
+    struct {
+        bool tunnel;
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+    } tunnel;
+
+    struct tc_cookie act_cookie;
+};
+
+int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                      struct tc_flower *flower);
+int tc_del_filter(int ifindex, int prio, int handle);
+int tc_get_flower(int ifindex, int prio, int handle,
+                  struct tc_flower *flower);
+int tc_flush(int ifindex);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
+int parse_netlink_to_tc_flower(struct ofpbuf *reply,
+                               struct tc_flower *flower);
+
+#endif /* tc.h */