diff mbox

[ovs-dev,V9,04/31] tc: Add tc flower functions

Message ID 1495972813-13475-5-git-send-email-roid@mellanox.com
State Superseded
Headers show

Commit Message

Roi Dayan May 28, 2017, 11:59 a.m. UTC
Add tc helper functions to query and manipulate the flower classifier.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Roi Dayan <roid@mellanox.com>
---
 lib/tc.c | 992 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/tc.h |  92 ++++++
 2 files changed, 1084 insertions(+)

Comments

Simon Horman May 30, 2017, 8:09 a.m. UTC | #1
On Sun, May 28, 2017 at 02:59:46PM +0300, Roi Dayan wrote:
> Add tc helper functions to query and manipulate the flower classifier.
> 
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Roi Dayan <roid@mellanox.com>

Thanks, this looks good to me and I would be happy to apply it if
someone provided a review.

Also, please see the note below.

...

> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -1,5 +1,6 @@

...

> +    if (ip_proto == IPPROTO_TCP) {
> +        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
> +            key->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
> +            mask->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
> +        }
> +        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
> +            key->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
> +            mask->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
> +        }
> +    } else if (ip_proto == IPPROTO_UDP) {
> +        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
> +            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
> +            mask->src_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
> +        }
> +        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
> +            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
> +            mask->dst_port =
> +                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
> +        }
> +    }

As mentioned a few times it seems that SCTP could be trivially supported
here. I think adding it would make for an excellent follow-up patch.

...
Joe Stringer May 31, 2017, 12:50 a.m. UTC | #2
On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
> Add tc helper functions to query and manipulate the flower classifier.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Signed-off-by: Roi Dayan <roid@mellanox.com>
> ---

Again is this co-authored? utilities/checkpatch.py checks for stuff like this.

I didn't go through all of the enums and make sure they're all
covered, correct types, etc.

<snip>

> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },

Line lengths.

<snip>

> +static void
> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
> +{
> +    unsigned long long int lastuse = tm->lastuse * 10;

Where does the 10 come from? What does it mean? Should we have a #define for it?

<snip>

> +    stats_basic = stats_attrs[TCA_STATS_BASIC];
> +    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
> +
> +    stats->n_packets.lo = bs->packets;
> +    stats->n_packets.hi = 0;
> +    stats->n_bytes.hi = bs->bytes >> 32;
> +    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;

Should this use put_32aligned_u64()?

> +
> +    return 0;
> +}
> +
> +#define TCA_ACT_MIN_PRIO 1

Stray define - Shouldn't this be in linux/pkt_cls.h?

<snip>

> +    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> +                         tca_policy, ta, ARRAY_SIZE(ta))) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
> +        return EPROTO;
> +    }
> +
> +    kind = nl_attr_get_string(ta[TCA_KIND]);
> +    if (strcmp(kind, "flower")) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse filter: not flower");

Printing the filter kind in the log message might be useful for
debugging purposes.

> +
> +    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
> +    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);

Do TC people usually define handles in terms of these specific
integers? I wonder if a subsequent follow-up patch would improve
readability by #defining these tc_make_handle(0xffff, 0) numbers to
show what they actually mean - like ingress, root.

> +int
> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
> +                  struct tc_flower *flower)
> +{
> +    struct ofpbuf request;
> +    struct tcmsg *tcmsg;
> +    struct ofpbuf *reply;
> +    int error = 0;
> +    size_t basic_offset;
> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;

Why does this need forcing?
Flavio Leitner May 31, 2017, 8:15 p.m. UTC | #3
On Tue, May 30, 2017 at 05:50:53PM -0700, Joe Stringer wrote:
> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
> > +static void
> > +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
> > +{
> > +    unsigned long long int lastuse = tm->lastuse * 10;
> 
> Where does the 10 come from? What does it mean? Should we have a #define for it?

If it's a conversion then maybe a macro is better.
Roi Dayan June 1, 2017, 2:39 p.m. UTC | #4
On 31/05/2017 03:50, Joe Stringer wrote:
> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>> Add tc helper functions to query and manipulate the flower classifier.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>> ---
>
> Again is this co-authored? utilities/checkpatch.py checks for stuff like this.
>
> I didn't go through all of the enums and make sure they're all
> covered, correct types, etc.
>
> <snip>
>
>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
>
> Line lengths.
>

ok

> <snip>
>
>> +static void
>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>> +{
>> +    unsigned long long int lastuse = tm->lastuse * 10;
>
> Where does the 10 come from? What does it mean? Should we have a #define for it?
>

we want to convert here jiffies to ms and used some default value.
as Flavio suggested maybe do it in a macro?
later I saw in netdev-linux.c the functions read_psched() and
tc_ticks_to_bytes(). can do a followup commit to refactor those
somewhere else and use them in tc.c as well.

> <snip>
>
>> +    stats_basic = stats_attrs[TCA_STATS_BASIC];
>> +    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
>> +
>> +    stats->n_packets.lo = bs->packets;
>> +    stats->n_packets.hi = 0;
>> +    stats->n_bytes.hi = bs->bytes >> 32;
>> +    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
>
> Should this use put_32aligned_u64()?

yes. wanted to do that as followup commit later to avoid too
many changes. I can merge it now though.

>
>> +
>> +    return 0;
>> +}
>> +
>> +#define TCA_ACT_MIN_PRIO 1
>
> Stray define - Shouldn't this be in linux/pkt_cls.h?
>

it's not in pkt_cls.h, we only have TCA_ACT_MAX_PRIO
We can also see in act_api.c in kernel that they loop from 1
and don't use a definition.
we added a local definition for readability.


> <snip>
>
>> +    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
>> +                         tca_policy, ta, ARRAY_SIZE(ta))) {
>> +        VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>> +        return EPROTO;
>> +    }
>> +
>> +    kind = nl_attr_get_string(ta[TCA_KIND]);
>> +    if (strcmp(kind, "flower")) {
>> +        VLOG_ERR_RL(&error_rl, "failed to parse filter: not flower");
>
> Printing the filter kind in the log message might be useful for
> debugging purposes.

ok

>
>> +
>> +    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
>> +    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
>
> Do TC people usually define handles in terms of these specific
> integers? I wonder if a subsequent follow-up patch would improve
> readability by #defining these tc_make_handle(0xffff, 0) numbers to
> show what they actually mean - like ingress, root.
>

currently we copied it from existing functions that already set
tcm_parent like this.
In tc we can use the word 'ingress' which then in the code is
translating to:

req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);

we can do a followup patch to do this as well or even on this add
another macro.

>> +int
>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>> +                  struct tc_flower *flower)
>> +{
>> +    struct ofpbuf request;
>> +    struct tcmsg *tcmsg;
>> +    struct ofpbuf *reply;
>> +    int error = 0;
>> +    size_t basic_offset;
>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>
> Why does this need forcing?
>

as eth_type is ove_be16 but tc_make_handle wants unsigned int.
we get a compilation warning from sparse about incorrect type.
this is to avoid sparse from failing.
Joe Stringer June 1, 2017, 5:53 p.m. UTC | #5
On 1 June 2017 at 07:39, Roi Dayan <roid@mellanox.com> wrote:
>
>
> On 31/05/2017 03:50, Joe Stringer wrote:
>>
>> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>>>
>>> Add tc helper functions to query and manipulate the flower classifier.
>>>
>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>>> ---
>>
>>
>> Again is this co-authored? utilities/checkpatch.py checks for stuff like
>> this.
>>
>> I didn't go through all of the enums and make sure they're all
>> covered, correct types, etc.
>>
>> <snip>
>>
>>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional =
>>> true, },
>>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional =
>>> true, },
>>
>>
>> Line lengths.
>>
>
> ok
>
>> <snip>
>>
>>> +static void
>>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>>> +{
>>> +    unsigned long long int lastuse = tm->lastuse * 10;
>>
>>
>> Where does the 10 come from? What does it mean? Should we have a #define
>> for it?
>>
>
> we want to convert here jiffies to ms and used some default value.
> as Flavio suggested maybe do it in a macro?
> later I saw in netdev-linux.c the functions read_psched() and
> tc_ticks_to_bytes(). can do a followup commit to refactor those
> somewhere else and use them in tc.c as well.

Please do.

>> <snip>
>>
>>> +    stats_basic = stats_attrs[TCA_STATS_BASIC];
>>> +    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
>>> +
>>> +    stats->n_packets.lo = bs->packets;
>>> +    stats->n_packets.hi = 0;
>>> +    stats->n_bytes.hi = bs->bytes >> 32;
>>> +    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
>>
>>
>> Should this use put_32aligned_u64()?
>
>
> yes. wanted to do that as followup commit later to avoid too
> many changes. I can merge it now though.

OK.

>>
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +#define TCA_ACT_MIN_PRIO 1
>>
>>
>> Stray define - Shouldn't this be in linux/pkt_cls.h?
>>
>
> it's not in pkt_cls.h, we only have TCA_ACT_MAX_PRIO
> We can also see in act_api.c in kernel that they loop from 1
> and don't use a definition.
> we added a local definition for readability.

Did you consider suggesting a patch upstream to add it to pkt_cls.h,
which may assist readability there too?

>>> +
>>> +    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP,
>>> &request);
>>> +    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
>>
>>
>> Do TC people usually define handles in terms of these specific
>> integers? I wonder if a subsequent follow-up patch would improve
>> readability by #defining these tc_make_handle(0xffff, 0) numbers to
>> show what they actually mean - like ingress, root.
>>
>
> currently we copied it from existing functions that already set
> tcm_parent like this.
> In tc we can use the word 'ingress' which then in the code is
> translating to:
>
> req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
>
> we can do a followup patch to do this as well or even on this add
> another macro.

Thanks, I think this would help.

>>> +int
>>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>> +                  struct tc_flower *flower)
>>> +{
>>> +    struct ofpbuf request;
>>> +    struct tcmsg *tcmsg;
>>> +    struct ofpbuf *reply;
>>> +    int error = 0;
>>> +    size_t basic_offset;
>>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>>
>>
>> Why does this need forcing?
>>
>
> as eth_type is ove_be16 but tc_make_handle wants unsigned int.
> we get a compilation warning from sparse about incorrect type.
> this is to avoid sparse from failing.

I see. Should it be unsigned int, then? Rather than casting a
big-endian value to store a host-endian variable of the same size?
Roi Dayan June 4, 2017, 5:22 a.m. UTC | #6
On 01/06/2017 20:53, Joe Stringer wrote:
> On 1 June 2017 at 07:39, Roi Dayan <roid@mellanox.com> wrote:
>>
>>
>> On 31/05/2017 03:50, Joe Stringer wrote:
>>>
>>> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>>>>
>>>> Add tc helper functions to query and manipulate the flower classifier.
>>>>
>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>>>> ---
>>>
>>>
>>> Again is this co-authored? utilities/checkpatch.py checks for stuff like
>>> this.
>>>
>>> I didn't go through all of the enums and make sure they're all
>>> covered, correct types, etc.
>>>
>>> <snip>
>>>
>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional =
>>>> true, },
>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional =
>>>> true, },
>>>
>>>
>>> Line lengths.
>>>
>>
>> ok
>>
>>> <snip>
>>>
>>>> +static void
>>>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>>>> +{
>>>> +    unsigned long long int lastuse = tm->lastuse * 10;
>>>
>>>
>>> Where does the 10 come from? What does it mean? Should we have a #define
>>> for it?
>>>
>>
>> we want to convert here jiffies to ms and used some default value.
>> as Flavio suggested maybe do it in a macro?
>> later I saw in netdev-linux.c the functions read_psched() and
>> tc_ticks_to_bytes(). can do a followup commit to refactor those
>> somewhere else and use them in tc.c as well.
>
> Please do.

just to be clear here, is it ok to use a macro for this series
and do the followup commit after this series?

>
>>> <snip>
>>>
>>>> +    stats_basic = stats_attrs[TCA_STATS_BASIC];
>>>> +    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
>>>> +
>>>> +    stats->n_packets.lo = bs->packets;
>>>> +    stats->n_packets.hi = 0;
>>>> +    stats->n_bytes.hi = bs->bytes >> 32;
>>>> +    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
>>>
>>>
>>> Should this use put_32aligned_u64()?
>>
>>
>> yes. wanted to do that as followup commit later to avoid too
>> many changes. I can merge it now though.
>
> OK.
>
>>>
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +#define TCA_ACT_MIN_PRIO 1
>>>
>>>
>>> Stray define - Shouldn't this be in linux/pkt_cls.h?
>>>
>>
>> it's not in pkt_cls.h, we only have TCA_ACT_MAX_PRIO
>> We can also see in act_api.c in kernel that they loop from 1
>> and don't use a definition.
>> we added a local definition for readability.
>
> Did you consider suggesting a patch upstream to add it to pkt_cls.h,
> which may assist readability there too?

yes but looking in doing so seems it's not specific to TCA_ACT_MAX_PRIO
but a generic way for nla_parse() to return results and it's using
index 1 to maxtype+1.
In this case TCA_ACT_MAX_PRIO is 32 and the result array will use
indexes 1 to 32 included but including index 0.
I looked what tc user space is doing and it's starting from 0
but also check array item is not empty. we are doing it as well so
basically we can also start the loop from 0 knowingly index 0 is not
being used.

>
>>>> +
>>>> +    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP,
>>>> &request);
>>>> +    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
>>>
>>>
>>> Do TC people usually define handles in terms of these specific
>>> integers? I wonder if a subsequent follow-up patch would improve
>>> readability by #defining these tc_make_handle(0xffff, 0) numbers to
>>> show what they actually mean - like ingress, root.
>>>
>>
>> currently we copied it from existing functions that already set
>> tcm_parent like this.
>> In tc we can use the word 'ingress' which then in the code is
>> translating to:
>>
>> req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
>>
>> we can do a followup patch to do this as well or even on this add
>> another macro.
>
> Thanks, I think this would help.
>
>>>> +int
>>>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>> +                  struct tc_flower *flower)
>>>> +{
>>>> +    struct ofpbuf request;
>>>> +    struct tcmsg *tcmsg;
>>>> +    struct ofpbuf *reply;
>>>> +    int error = 0;
>>>> +    size_t basic_offset;
>>>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>>>
>>>
>>> Why does this need forcing?
>>>
>>
>> as eth_type is ove_be16 but tc_make_handle wants unsigned int.
>> we get a compilation warning from sparse about incorrect type.
>> this is to avoid sparse from failing.
>
> I see. Should it be unsigned int, then? Rather than casting a
> big-endian value to store a host-endian variable of the same size?
>

it's not unsigned int to follow up with the rest of the user space code
that use ovs_be16 for eth_type.
also we are using match_set_dl_type() that expect ovs_be16.
Joe Stringer June 5, 2017, 5:36 p.m. UTC | #7
On 3 June 2017 at 22:22, Roi Dayan <roid@mellanox.com> wrote:
>
>
> On 01/06/2017 20:53, Joe Stringer wrote:
>>
>> On 1 June 2017 at 07:39, Roi Dayan <roid@mellanox.com> wrote:
>>>
>>>
>>>
>>> On 31/05/2017 03:50, Joe Stringer wrote:
>>>>
>>>>
>>>> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>>>>>
>>>>>
>>>>> Add tc helper functions to query and manipulate the flower classifier.
>>>>>
>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>>>>> ---
>>>>
>>>>
>>>>
>>>> Again is this co-authored? utilities/checkpatch.py checks for stuff like
>>>> this.
>>>>
>>>> I didn't go through all of the enums and make sure they're all
>>>> covered, correct types, etc.
>>>>
>>>> <snip>
>>>>
>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional
>>>>> =
>>>>> true, },
>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional
>>>>> =
>>>>> true, },
>>>>
>>>>
>>>>
>>>> Line lengths.
>>>>
>>>
>>> ok
>>>
>>>> <snip>
>>>>
>>>>> +static void
>>>>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>>>>> +{
>>>>> +    unsigned long long int lastuse = tm->lastuse * 10;
>>>>
>>>>
>>>>
>>>> Where does the 10 come from? What does it mean? Should we have a #define
>>>> for it?
>>>>
>>>
>>> we want to convert here jiffies to ms and used some default value.
>>> as Flavio suggested maybe do it in a macro?
>>> later I saw in netdev-linux.c the functions read_psched() and
>>> tc_ticks_to_bytes(). can do a followup commit to refactor those
>>> somewhere else and use them in tc.c as well.
>>
>>
>> Please do.
>
>
> just to be clear here, is it ok to use a macro for this series
> and do the followup commit after this series?

That sounds reasonable.

>>>>> +int
>>>>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>>> +                  struct tc_flower *flower)
>>>>> +{
>>>>> +    struct ofpbuf request;
>>>>> +    struct tcmsg *tcmsg;
>>>>> +    struct ofpbuf *reply;
>>>>> +    int error = 0;
>>>>> +    size_t basic_offset;
>>>>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>>>>
>>>>
>>>>
>>>> Why does this need forcing?
>>>>
>>>
>>> as eth_type is ove_be16 but tc_make_handle wants unsigned int.
>>> we get a compilation warning from sparse about incorrect type.
>>> this is to avoid sparse from failing.
>>
>>
>> I see. Should it be unsigned int, then? Rather than casting a
>> big-endian value to store a host-endian variable of the same size?
>>
>
> it's not unsigned int to follow up with the rest of the user space code
> that use ovs_be16 for eth_type.
> also we are using match_set_dl_type() that expect ovs_be16.

The rest of the code stores a big-endian eth_type in an ovs_be16, or
host byte-order version in uint16_t. The thing I found surprising in
this code was that the big endian version of the ethertype was being
placed into a uint16_t without converting to host byte order. In the
end, I think what we're trying to achieve is that the second parameter
to tc_make_handle() should be a big-endian ethertype but sparse would
complain if we passed it directly (because tc_make_handle()'s second
argument type isn't ovs_be16). So OVS_FORCE is necessary, though I
would have expected it to be used inside the arguments of
tc_make_handle().
Roi Dayan June 6, 2017, 6:01 a.m. UTC | #8
On 05/06/2017 20:36, Joe Stringer wrote:
> On 3 June 2017 at 22:22, Roi Dayan <roid@mellanox.com> wrote:
>>
>>
>> On 01/06/2017 20:53, Joe Stringer wrote:
>>>
>>> On 1 June 2017 at 07:39, Roi Dayan <roid@mellanox.com> wrote:
>>>>
>>>>
>>>>
>>>> On 31/05/2017 03:50, Joe Stringer wrote:
>>>>>
>>>>>
>>>>> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>>>>>>
>>>>>>
>>>>>> Add tc helper functions to query and manipulate the flower classifier.
>>>>>>
>>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>>>>>> ---
>>>>>
>>>>>
>>>>>
>>>>> Again is this co-authored? utilities/checkpatch.py checks for stuff like
>>>>> this.
>>>>>
>>>>> I didn't go through all of the enums and make sure they're all
>>>>> covered, correct types, etc.
>>>>>
>>>>> <snip>
>>>>>
>>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional
>>>>>> =
>>>>>> true, },
>>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional
>>>>>> =
>>>>>> true, },
>>>>>
>>>>>
>>>>>
>>>>> Line lengths.
>>>>>
>>>>
>>>> ok
>>>>
>>>>> <snip>
>>>>>
>>>>>> +static void
>>>>>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>>>>>> +{
>>>>>> +    unsigned long long int lastuse = tm->lastuse * 10;
>>>>>
>>>>>
>>>>>
>>>>> Where does the 10 come from? What does it mean? Should we have a #define
>>>>> for it?
>>>>>
>>>>
>>>> we want to convert here jiffies to ms and used some default value.
>>>> as Flavio suggested maybe do it in a macro?
>>>> later I saw in netdev-linux.c the functions read_psched() and
>>>> tc_ticks_to_bytes(). can do a followup commit to refactor those
>>>> somewhere else and use them in tc.c as well.
>>>
>>>
>>> Please do.
>>
>>
>> just to be clear here, is it ok to use a macro for this series
>> and do the followup commit after this series?
>
> That sounds reasonable.
>
>>>>>> +int
>>>>>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>>>> +                  struct tc_flower *flower)
>>>>>> +{
>>>>>> +    struct ofpbuf request;
>>>>>> +    struct tcmsg *tcmsg;
>>>>>> +    struct ofpbuf *reply;
>>>>>> +    int error = 0;
>>>>>> +    size_t basic_offset;
>>>>>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>>>>>
>>>>>
>>>>>
>>>>> Why does this need forcing?
>>>>>
>>>>
>>>> as eth_type is ove_be16 but tc_make_handle wants unsigned int.
>>>> we get a compilation warning from sparse about incorrect type.
>>>> this is to avoid sparse from failing.
>>>
>>>
>>> I see. Should it be unsigned int, then? Rather than casting a
>>> big-endian value to store a host-endian variable of the same size?
>>>
>>
>> it's not unsigned int to follow up with the rest of the user space code
>> that use ovs_be16 for eth_type.
>> also we are using match_set_dl_type() that expect ovs_be16.
>
> The rest of the code stores a big-endian eth_type in an ovs_be16, or
> host byte-order version in uint16_t. The thing I found surprising in
> this code was that the big endian version of the ethertype was being
> placed into a uint16_t without converting to host byte order. In the
> end, I think what we're trying to achieve is that the second parameter
> to tc_make_handle() should be a big-endian ethertype but sparse would
> complain if we passed it directly (because tc_make_handle()'s second
> argument type isn't ovs_be16). So OVS_FORCE is necessary, though I
> would have expected it to be used inside the arguments of
> tc_make_handle().
>

maybe. that should also apply to tc_get_minor() to return
ovs_be16 instead of uint.
in parsing back from netlink to tc_flower we do:
flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);

but a lot of callers use it for vlog and use %u format.
so changing it will probably also cause the compiler to complain.

I'll skip this for V10 as I don't think this is critical
and we can do a commit later to update to what will be agreed on
and update all callers if necessary.
Joe Stringer June 6, 2017, 4:41 p.m. UTC | #9
On 5 June 2017 at 23:01, Roi Dayan <roid@mellanox.com> wrote:
>
>
> On 05/06/2017 20:36, Joe Stringer wrote:
>>
>> On 3 June 2017 at 22:22, Roi Dayan <roid@mellanox.com> wrote:
>>>
>>>
>>>
>>> On 01/06/2017 20:53, Joe Stringer wrote:
>>>>
>>>>
>>>> On 1 June 2017 at 07:39, Roi Dayan <roid@mellanox.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> On 31/05/2017 03:50, Joe Stringer wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 28 May 2017 at 04:59, Roi Dayan <roid@mellanox.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Add tc helper functions to query and manipulate the flower
>>>>>>> classifier.
>>>>>>>
>>>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>>>> Signed-off-by: Roi Dayan <roid@mellanox.com>
>>>>>>> ---
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Again is this co-authored? utilities/checkpatch.py checks for stuff
>>>>>> like
>>>>>> this.
>>>>>>
>>>>>> I didn't go through all of the enums and make sure they're all
>>>>>> covered, correct types, etc.
>>>>>>
>>>>>> <snip>
>>>>>>
>>>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32,
>>>>>>> .optional
>>>>>>> =
>>>>>>> true, },
>>>>>>> +    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32,
>>>>>>> .optional
>>>>>>> =
>>>>>>> true, },
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Line lengths.
>>>>>>
>>>>>
>>>>> ok
>>>>>
>>>>>> <snip>
>>>>>>
>>>>>>> +static void
>>>>>>> +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
>>>>>>> +{
>>>>>>> +    unsigned long long int lastuse = tm->lastuse * 10;
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Where does the 10 come from? What does it mean? Should we have a
>>>>>> #define
>>>>>> for it?
>>>>>>
>>>>>
>>>>> we want to convert here jiffies to ms and used some default value.
>>>>> as Flavio suggested maybe do it in a macro?
>>>>> later I saw in netdev-linux.c the functions read_psched() and
>>>>> tc_ticks_to_bytes(). can do a followup commit to refactor those
>>>>> somewhere else and use them in tc.c as well.
>>>>
>>>>
>>>>
>>>> Please do.
>>>
>>>
>>>
>>> just to be clear here, is it ok to use a macro for this series
>>> and do the followup commit after this series?
>>
>>
>> That sounds reasonable.
>>
>>>>>>> +int
>>>>>>> +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
>>>>>>> +                  struct tc_flower *flower)
>>>>>>> +{
>>>>>>> +    struct ofpbuf request;
>>>>>>> +    struct tcmsg *tcmsg;
>>>>>>> +    struct ofpbuf *reply;
>>>>>>> +    int error = 0;
>>>>>>> +    size_t basic_offset;
>>>>>>> +    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Why does this need forcing?
>>>>>>
>>>>>
>>>>> as eth_type is ove_be16 but tc_make_handle wants unsigned int.
>>>>> we get a compilation warning from sparse about incorrect type.
>>>>> this is to avoid sparse from failing.
>>>>
>>>>
>>>>
>>>> I see. Should it be unsigned int, then? Rather than casting a
>>>> big-endian value to store a host-endian variable of the same size?
>>>>
>>>
>>> it's not unsigned int to follow up with the rest of the user space code
>>> that use ovs_be16 for eth_type.
>>> also we are using match_set_dl_type() that expect ovs_be16.
>>
>>
>> The rest of the code stores a big-endian eth_type in an ovs_be16, or
>> host byte-order version in uint16_t. The thing I found surprising in
>> this code was that the big endian version of the ethertype was being
>> placed into a uint16_t without converting to host byte order. In the
>> end, I think what we're trying to achieve is that the second parameter
>> to tc_make_handle() should be a big-endian ethertype but sparse would
>> complain if we passed it directly (because tc_make_handle()'s second
>> argument type isn't ovs_be16). So OVS_FORCE is necessary, though I
>> would have expected it to be used inside the arguments of
>> tc_make_handle().
>>
>
> maybe. that should also apply to tc_get_minor() to return
> ovs_be16 instead of uint.
> in parsing back from netlink to tc_flower we do:
> flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);

That sounds reasonable to me, although I think that the function may
be used for not just the tcm_info but also tcm_parent.

> but a lot of callers use it for vlog and use %u format.
> so changing it will probably also cause the compiler to complain.

I'm guessing that not all callers assume it's supposed to denote an ethertype.

> I'll skip this for V10 as I don't think this is critical
> and we can do a commit later to update to what will be agreed on
> and update all callers if necessary.

That's fine.
diff mbox

Patch

diff --git a/lib/tc.c b/lib/tc.c
index 644f30c..33448d7 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1,5 +1,6 @@ 
 /*
  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +18,26 @@ 
 #include <config.h>
 #include "tc.h"
 #include <errno.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <linux/gen_stats.h>
+#include "timeval.h"
 #include "netlink-socket.h"
 #include "netlink.h"
+#include "rtnetlink.h"
 #include "openvswitch/vlog.h"
 #include "openvswitch/ofpbuf.h"
+#include "util.h"
+#include "byte-order.h"
 
 VLOG_DEFINE_THIS_MODULE(tc);
 
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
 /* Returns tc handle 'major':'minor'. */
 unsigned int
 tc_make_handle(unsigned int major, unsigned int minor)
@@ -112,3 +126,981 @@  tc_add_del_ingress_qdisc(int ifindex, bool add)
 
     return 0;
 }
+
+static const struct nl_policy tca_policy[] = {
+    [TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_STATS] = { .type = NL_A_UNSPEC,
+                    .min_len = sizeof(struct tc_stats), .optional = true, },
+    [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
+};
+
+static const struct nl_policy tca_flower_policy[] = {
+    [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
+                           .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
+                                 .min_len = ETH_ALEN, .optional = true, },
+    [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
+                                      .min_len = ETH_ALEN,
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+    [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+    [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                  .min_len = sizeof(struct in6_addr),
+                                  .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+                                       .min_len = sizeof(struct in6_addr),
+                                       .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
+    [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+                                           .min_len = sizeof(struct in6_addr),
+                                           .optional = true, },
+    [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+                                           .min_len = sizeof(struct in6_addr),
+                                           .optional = true, },
+    [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_U16,
+                                          .optional = true, },
+};
+
+static void
+nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct eth_addr *eth;
+
+    if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN);
+        memcpy(&flower->key.src_mac, eth, sizeof flower->key.src_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN);
+        memcpy(&flower->mask.src_mac, eth, sizeof flower->mask.src_mac);
+    }
+    if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) {
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN);
+        memcpy(&flower->key.dst_mac, eth, sizeof flower->key.dst_mac);
+
+        eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN);
+        memcpy(&flower->mask.dst_mac, eth, sizeof flower->mask.dst_mac);
+    }
+}
+
+static void
+nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (flower->key.eth_type != htons(ETH_P_8021Q)) {
+        return;
+    }
+
+    flower->key.encap_eth_type =
+        nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
+
+    if (attrs[TCA_FLOWER_KEY_VLAN_ID]) {
+        flower->key.vlan_id =
+            nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]);
+    }
+    if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) {
+        flower->key.vlan_prio =
+            nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]);
+    }
+}
+
+static void
+nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower)
+{
+    if (attrs[TCA_FLOWER_KEY_ENC_KEY_ID]) {
+        ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]);
+
+        flower->tunnel.id = be32_to_be64(id);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) {
+        flower->tunnel.ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]) {
+        flower->tunnel.ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]) {
+        flower->tunnel.ipv6.ipv6_src =
+            nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]) {
+        flower->tunnel.ipv6.ipv6_dst =
+            nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]);
+    }
+    if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) {
+        flower->tunnel.tp_dst =
+            nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]);
+    }
+}
+
+static void
+nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
+    uint8_t ip_proto = 0;
+    struct tc_flower_key *key = &flower->key;
+    struct tc_flower_key *mask = &flower->mask;
+
+    if (attrs[TCA_FLOWER_KEY_IP_PROTO]) {
+        ip_proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]);
+        key->ip_proto = ip_proto;
+        mask->ip_proto = UINT8_MAX;
+    }
+
+    if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) {
+        key->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]);
+        mask->ipv4.ipv4_src =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]) {
+        key->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]);
+        mask->ipv4.ipv4_dst =
+            nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_SRC];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK];
+
+        key->ipv6.ipv6_src = nl_attr_get_in6_addr(attr);
+        mask->ipv6.ipv6_src = nl_attr_get_in6_addr(attr_mask);
+    }
+    if (attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]) {
+        struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_DST];
+        struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_DST_MASK];
+
+        key->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr);
+        mask->ipv6.ipv6_dst = nl_attr_get_in6_addr(attr_mask);
+    }
+
+    if (ip_proto == IPPROTO_TCP) {
+        if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) {
+            key->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) {
+            key->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]);
+        }
+    } else if (ip_proto == IPPROTO_UDP) {
+        if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) {
+            key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]);
+            mask->src_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]);
+        }
+        if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) {
+            key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]);
+            mask->dst_port =
+                nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]);
+        }
+    }
+}
+
+static const struct nl_policy tunnel_key_policy[] = {
+    [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
+                               .min_len = sizeof(struct tc_tunnel_key),
+                               .optional = false, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NL_A_U32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .type = NL_A_UNSPEC,
+                                      .min_len = sizeof(struct in6_addr),
+                                      .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
+    [TCA_TUNNEL_KEY_ENC_DST_PORT] = { .type = NL_A_U16, .optional = true, },
+};
+
+static int
+nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *tun_attrs[ARRAY_SIZE(tunnel_key_policy)];
+    const struct nlattr *tun_parms;
+    const struct tc_tunnel_key *tun;
+
+    if (!nl_parse_nested(options, tunnel_key_policy, tun_attrs,
+                ARRAY_SIZE(tunnel_key_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse tunnel_key action options");
+        return EPROTO;
+    }
+
+    tun_parms = tun_attrs[TCA_TUNNEL_KEY_PARMS];
+    tun = nl_attr_get_unspec(tun_parms, sizeof *tun);
+    if (tun->t_action == TCA_TUNNEL_KEY_ACT_SET) {
+        struct nlattr *id = tun_attrs[TCA_TUNNEL_KEY_ENC_KEY_ID];
+        struct nlattr *dst_port = tun_attrs[TCA_TUNNEL_KEY_ENC_DST_PORT];
+        struct nlattr *ipv4_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_SRC];
+        struct nlattr *ipv4_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_DST];
+        struct nlattr *ipv6_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_SRC];
+        struct nlattr *ipv6_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV6_DST];
+
+        flower->set.set = true;
+        flower->set.ipv4.ipv4_src = ipv4_src ? nl_attr_get_be32(ipv4_src) : 0;
+        flower->set.ipv4.ipv4_dst = ipv4_dst ? nl_attr_get_be32(ipv4_dst) : 0;
+        if (ipv6_src) {
+            flower->set.ipv6.ipv6_src = nl_attr_get_in6_addr(ipv6_src);
+        }
+        if (ipv6_dst) {
+            flower->set.ipv6.ipv6_dst = nl_attr_get_in6_addr(ipv6_dst);
+        }
+        flower->set.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0;
+        flower->set.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0;
+    } else if (tun->t_action == TCA_TUNNEL_KEY_ACT_RELEASE) {
+        flower->tunnel.tunnel = true;
+    } else {
+        VLOG_ERR_RL(&error_rl, "unknown tunnel actions: %d, %d",
+                    tun->action, tun->t_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy gact_policy[] = {
+    [TCA_GACT_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_gact),
+                         .optional = false, },
+    [TCA_GACT_TM] = { .type = NL_A_UNSPEC,
+                      .min_len = sizeof(struct tcf_t),
+                      .optional = false, },
+};
+
+static void
+nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
+{
+    unsigned long long int lastuse = tm->lastuse * 10;
+    unsigned long long int now = time_msec();
+
+    flower->lastused = now - lastuse;
+}
+
+static int
+nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)];
+    const struct tc_gact *p;
+    struct nlattr *gact_parms;
+    const struct tcf_t *tm;
+
+    if (!nl_parse_nested(options, gact_policy, gact_attrs,
+                         ARRAY_SIZE(gact_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse gact action options");
+        return EPROTO;
+    }
+
+    gact_parms = gact_attrs[TCA_GACT_PARMS];
+    p = nl_attr_get_unspec(gact_parms, sizeof *p);
+
+    if (p->action != TC_ACT_SHOT) {
+        VLOG_ERR_RL(&error_rl, "unknown gact action: %d", p->action);
+        return EINVAL;
+    }
+
+    tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy mirred_policy[] = {
+    [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC,
+                           .min_len = sizeof(struct tc_mirred),
+                           .optional = false, },
+    [TCA_MIRRED_TM] = { .type = NL_A_UNSPEC,
+                        .min_len = sizeof(struct tcf_t),
+                        .optional = false, },
+};
+
+static int
+nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower)
+{
+
+    struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)];
+    const struct tc_mirred *m;
+    const struct nlattr *mirred_parms;
+    const struct tcf_t *tm;
+    struct nlattr *mirred_tm;
+
+    if (!nl_parse_nested(options, mirred_policy, mirred_attrs,
+                         ARRAY_SIZE(mirred_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse mirred action options");
+        return EPROTO;
+    }
+
+    mirred_parms = mirred_attrs[TCA_MIRRED_PARMS];
+    m = nl_attr_get_unspec(mirred_parms, sizeof *m);
+
+    if (m->action != TC_ACT_STOLEN ||  m->eaction != TCA_EGRESS_REDIR) {
+        VLOG_ERR_RL(&error_rl, "unknown mirred action: %d, %d, %d",
+                 m->action, m->eaction, m->ifindex);
+        return EINVAL;
+    }
+
+    flower->ifindex_out = m->ifindex;
+
+    mirred_tm = mirred_attrs[TCA_MIRRED_TM];
+    tm = nl_attr_get_unspec(mirred_tm, sizeof *tm);
+    nl_parse_tcf(tm, flower);
+
+    return 0;
+}
+
+static const struct nl_policy vlan_policy[] = {
+    [TCA_VLAN_PARMS] = { .type = NL_A_UNSPEC,
+                         .min_len = sizeof(struct tc_vlan),
+                         .optional = false, },
+    [TCA_VLAN_PUSH_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NL_A_U16, .optional = true, },
+    [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NL_A_U8, .optional = true, },
+};
+
+static int
+nl_parse_act_vlan(struct nlattr *options, struct tc_flower *flower)
+{
+    struct nlattr *vlan_attrs[ARRAY_SIZE(vlan_policy)];
+    const struct tc_vlan *v;
+    const struct nlattr *vlan_parms;
+
+    if (!nl_parse_nested(options, vlan_policy, vlan_attrs,
+                         ARRAY_SIZE(vlan_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse vlan action options");
+        return EPROTO;
+    }
+
+    vlan_parms = vlan_attrs[TCA_VLAN_PARMS];
+    v = nl_attr_get_unspec(vlan_parms, sizeof *v);
+    if (v->v_action == TCA_VLAN_ACT_PUSH) {
+        struct nlattr *vlan_id = vlan_attrs[TCA_VLAN_PUSH_VLAN_ID];
+        struct nlattr *vlan_prio = vlan_attrs[TCA_VLAN_PUSH_VLAN_PRIORITY];
+
+        flower->vlan_push_id = nl_attr_get_u16(vlan_id);
+        flower->vlan_push_prio = nl_attr_get_u8(vlan_prio);
+    } else if (v->v_action == TCA_VLAN_ACT_POP) {
+        flower->vlan_pop = 1;
+    } else {
+        VLOG_ERR_RL(&error_rl, "unknown vlan action: %d, %d",
+                    v->action, v->v_action);
+        return EINVAL;
+    }
+    return 0;
+}
+
+static const struct nl_policy act_policy[] = {
+    [TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, },
+    [TCA_ACT_COOKIE] = { .type = NL_A_UNSPEC, .optional = true, },
+    [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+    [TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, },
+};
+
+static const struct nl_policy stats_policy[] = {
+    [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
+                          .min_len = sizeof(struct gnet_stats_basic),
+                          .optional = false, },
+};
+
+static int
+nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
+{
+    struct nlattr *act_options;
+    struct nlattr *act_stats;
+    struct nlattr *act_cookie;
+    const struct nlattr *stats_basic;
+    const char *act_kind;
+    struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
+    struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
+    struct ovs_flow_stats *stats = &flower->stats;
+    const struct gnet_stats_basic *bs;
+
+    if (!nl_parse_nested(action, act_policy, action_attrs,
+                         ARRAY_SIZE(act_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse single action options");
+        return EPROTO;
+    }
+
+    act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]);
+    act_options = action_attrs[TCA_ACT_OPTIONS];
+    act_cookie = action_attrs[TCA_ACT_COOKIE];
+
+    if (!strcmp(act_kind, "gact")) {
+        nl_parse_act_drop(act_options, flower);
+    } else if (!strcmp(act_kind, "mirred")) {
+        nl_parse_act_mirred(act_options, flower);
+    } else if (!strcmp(act_kind, "vlan")) {
+        nl_parse_act_vlan(act_options, flower);
+    } else if (!strcmp(act_kind, "tunnel_key")) {
+        nl_parse_act_tunnel_key(act_options, flower);
+    } else {
+        VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind);
+        return EINVAL;
+    }
+
+    if (act_cookie) {
+        flower->act_cookie.data = nl_attr_get(act_cookie);
+        flower->act_cookie.len = nl_attr_get_size(act_cookie);
+    }
+
+    act_stats = action_attrs[TCA_ACT_STATS];
+
+    if (!nl_parse_nested(act_stats, stats_policy, stats_attrs,
+                         ARRAY_SIZE(stats_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse action stats policy");
+        return EPROTO;
+    }
+
+    stats_basic = stats_attrs[TCA_STATS_BASIC];
+    bs = nl_attr_get_unspec(stats_basic, sizeof *bs);
+
+    stats->n_packets.lo = bs->packets;
+    stats->n_packets.hi = 0;
+    stats->n_bytes.hi = bs->bytes >> 32;
+    stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF;
+
+    return 0;
+}
+
+#define TCA_ACT_MIN_PRIO 1
+
+static int
+nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
+{
+    const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
+    static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = {};
+    struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+    const int max_size = ARRAY_SIZE(actions_orders_policy);
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        actions_orders_policy[i].type = NL_A_NESTED;
+        actions_orders_policy[i].optional = true;
+    }
+
+    if (!nl_parse_nested(actions, actions_orders_policy, actions_orders,
+                         ARRAY_SIZE(actions_orders_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse flower order of actions");
+        return EPROTO;
+    }
+
+    for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) {
+        if (actions_orders[i]) {
+            int err = nl_parse_single_action(actions_orders[i], flower);
+
+            if (err) {
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
+{
+    struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
+
+    if (!nl_parse_nested(nl_options, tca_flower_policy,
+                         attrs, ARRAY_SIZE(tca_flower_policy))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse flower classifier options");
+        return EPROTO;
+    }
+
+    nl_parse_flower_eth(attrs, flower);
+    nl_parse_flower_vlan(attrs, flower);
+    nl_parse_flower_ip(attrs, flower);
+    nl_parse_flower_tunnel(attrs, flower);
+    return nl_parse_flower_actions(attrs, flower);
+}
+
+int
+parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
+{
+    struct tcmsg *tc;
+    struct nlattr *ta[ARRAY_SIZE(tca_policy)];
+    const char *kind;
+
+    memset(flower, 0, sizeof *flower);
+    if (NLMSG_HDRLEN + (sizeof *tc) > reply->size) {
+        return EPROTO;
+    }
+
+    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+    flower->handle = tc->tcm_handle;
+    flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
+    flower->mask.eth_type = OVS_BE16_MAX;
+    flower->prio = tc_get_major(tc->tcm_info);
+
+    if (!flower->handle) {
+        return EAGAIN;
+    }
+
+    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
+                         tca_policy, ta, ARRAY_SIZE(ta))) {
+        VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
+        return EPROTO;
+    }
+
+    kind = nl_attr_get_string(ta[TCA_KIND]);
+    if (strcmp(kind, "flower")) {
+        VLOG_ERR_RL(&error_rl, "failed to parse filter: not flower");
+        return EPROTO;
+    }
+
+    return nl_parse_flower_options(ta[TCA_OPTIONS], flower);
+}
+
+int
+tc_dump_flower_start(int ifindex, struct nl_dump *dump)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+    tcmsg->tcm_handle = 0;
+
+    nl_dump_start(dump, NETLINK_ROUTE, &request);
+    ofpbuf_uninit(&request);
+
+    return 0;
+}
+
+int
+tc_flush(int ifindex)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = TC_H_UNSPEC;
+
+    return tc_transact(&request, NULL);
+}
+
+int
+tc_del_filter(int ifindex, int prio, int handle)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        ofpbuf_delete(reply);
+    }
+    return error;
+}
+
+int
+tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error;
+
+    tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, 0);
+    tcmsg->tcm_handle = handle;
+
+    error = tc_transact(&request, &reply);
+    if (error) {
+        return error;
+    }
+
+    error = parse_netlink_to_tc_flower(reply, flower);
+    ofpbuf_delete(reply);
+    return error;
+}
+
+static void
+nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_PUSH };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+        nl_msg_put_u16(request, TCA_VLAN_PUSH_VLAN_ID, vid);
+        nl_msg_put_u8(request, TCA_VLAN_PUSH_VLAN_PRIORITY, prio);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_pop_vlan(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "vlan");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_vlan parm = { .action = TC_ACT_PIPE,
+                                .v_action = TCA_VLAN_ACT_POP };
+
+        nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_release(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_RELEASE };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id,
+                                ovs_be32 ipv4_src, ovs_be32 ipv4_dst,
+                                struct in6_addr *ipv6_src,
+                                struct in6_addr *ipv6_dst,
+                                ovs_be16 tp_dst)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_tunnel_key tun = { .action = TC_ACT_PIPE,
+                                     .t_action = TCA_TUNNEL_KEY_ACT_SET };
+
+        nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun);
+
+        ovs_be32 id32 = be64_to_be32(id);
+        nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32);
+        if (ipv4_dst) {
+            nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
+            nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
+        } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+            nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST,
+                                ipv6_dst);
+            nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC,
+                                ipv6_src);
+        }
+        nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_DST_PORT, tp_dst);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_drop(struct ofpbuf *request)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "gact");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_gact p = { .action = TC_ACT_SHOT };
+
+        nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_act_redirect(struct ofpbuf *request, int ifindex)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "mirred");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    {
+        struct tc_mirred m = { .action = TC_ACT_STOLEN,
+                               .eaction = TCA_EGRESS_REDIR,
+                               .ifindex = ifindex };
+
+        nl_msg_put_unspec(request, TCA_MIRRED_PARMS, &m, sizeof m);
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static inline void
+nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) {
+    if (ck->len) {
+        nl_msg_put_unspec(request, TCA_ACT_COOKIE, ck->data, ck->len);
+    }
+}
+
+static void
+nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
+{
+    size_t offset;
+    size_t act_offset;
+
+    offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
+    {
+        uint16_t act_index = 1;
+
+        if (flower->set.set) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_set(request, flower->set.id,
+                                          flower->set.ipv4.ipv4_src,
+                                          flower->set.ipv4.ipv4_dst,
+                                          &flower->set.ipv6.ipv6_src,
+                                          &flower->set.ipv6.ipv6_dst,
+                                          flower->set.tp_dst);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->tunnel.tunnel) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_tunnel_key_release(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_pop) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_pop_vlan(request);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->vlan_push_id) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_push_vlan(request,
+                                     flower->vlan_push_id,
+                                     flower->vlan_push_prio);
+            nl_msg_end_nested(request, act_offset);
+        }
+        if (flower->ifindex_out) {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_redirect(request, flower->ifindex_out);
+            nl_msg_put_act_cookie(request, &flower->act_cookie);
+            nl_msg_end_nested(request, act_offset);
+        } else {
+            act_offset = nl_msg_start_nested(request, act_index++);
+            nl_msg_put_act_drop(request);
+            nl_msg_put_act_cookie(request, &flower->act_cookie);
+            nl_msg_end_nested(request, act_offset);
+        }
+    }
+    nl_msg_end_nested(request, offset);
+}
+
+static void
+nl_msg_put_masked_value(struct ofpbuf *request, uint16_t type,
+                        uint16_t mask_type, const void *data,
+                        const void *mask_data, size_t len)
+{
+    if (mask_type != TCA_FLOWER_UNSPEC) {
+        if (is_all_zeros(mask_data, len)) {
+            return;
+        }
+        nl_msg_put_unspec(request, mask_type, mask_data, len);
+    }
+    nl_msg_put_unspec(request, type, data, len);
+}
+
+static void
+nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
+{
+    ovs_be32 ipv4_src = flower->tunnel.ipv4.ipv4_src;
+    ovs_be32 ipv4_dst = flower->tunnel.ipv4.ipv4_dst;
+    struct in6_addr *ipv6_src = &flower->tunnel.ipv6.ipv6_src;
+    struct in6_addr *ipv6_dst = &flower->tunnel.ipv6.ipv6_dst;
+    ovs_be16 tp_dst = flower->tunnel.tp_dst;
+    ovs_be32 id = be64_to_be32(flower->tunnel.id);
+
+    nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id);
+    if (ipv4_dst) {
+        nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
+        nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst);
+    } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+        nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src);
+        nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST, ipv6_dst);
+    }
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst);
+}
+
+static void
+nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
+{
+    uint16_t host_eth_type = ntohs(flower->key.eth_type);
+
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_DST,
+                            TCA_FLOWER_KEY_ETH_DST_MASK,
+                            &flower->key.dst_mac,
+                            &flower->mask.dst_mac, ETH_ALEN);
+    nl_msg_put_masked_value(request,
+                            TCA_FLOWER_KEY_ETH_SRC,
+                            TCA_FLOWER_KEY_ETH_SRC_MASK,
+                            &flower->key.src_mac,
+                            &flower->mask.src_mac, ETH_ALEN);
+
+    if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) {
+        if (flower->mask.ip_proto && flower->key.ip_proto) {
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_IP_PROTO,
+                          flower->key.ip_proto);
+        }
+        if (flower->key.ip_proto == IPPROTO_UDP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_SRC,
+                                    TCA_FLOWER_KEY_UDP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_UDP_DST,
+                                    TCA_FLOWER_KEY_UDP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        } else if (flower->key.ip_proto == IPPROTO_TCP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_SRC,
+                                    TCA_FLOWER_KEY_TCP_SRC_MASK,
+                                    &flower->key.src_port,
+                                    &flower->mask.src_port, 2);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_TCP_DST,
+                                    TCA_FLOWER_KEY_TCP_DST_MASK,
+                                    &flower->key.dst_port,
+                                    &flower->mask.dst_port, 2);
+        }
+    }
+    if (host_eth_type == ETH_P_IP) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_SRC,
+                                    TCA_FLOWER_KEY_IPV4_SRC_MASK,
+                                    &flower->key.ipv4.ipv4_src,
+                                    &flower->mask.ipv4.ipv4_src,
+                                    sizeof flower->key.ipv4.ipv4_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV4_DST,
+                                    TCA_FLOWER_KEY_IPV4_DST_MASK,
+                                    &flower->key.ipv4.ipv4_dst,
+                                    &flower->mask.ipv4.ipv4_dst,
+                                    sizeof flower->key.ipv4.ipv4_dst);
+    } else if (host_eth_type == ETH_P_IPV6) {
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_SRC,
+                                    TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                                    &flower->key.ipv6.ipv6_src,
+                                    &flower->mask.ipv6.ipv6_src,
+                                    sizeof flower->key.ipv6.ipv6_src);
+            nl_msg_put_masked_value(request,
+                                    TCA_FLOWER_KEY_IPV6_DST,
+                                    TCA_FLOWER_KEY_IPV6_DST_MASK,
+                                    &flower->key.ipv6.ipv6_dst,
+                                    &flower->mask.ipv6.ipv6_dst,
+                                    sizeof flower->key.ipv6.ipv6_dst);
+    }
+
+    nl_msg_put_be16(request, TCA_FLOWER_KEY_ETH_TYPE, flower->key.eth_type);
+
+    if (host_eth_type == ETH_P_8021Q) {
+        if (flower->key.vlan_id || flower->key.vlan_prio) {
+            nl_msg_put_u16(request, TCA_FLOWER_KEY_VLAN_ID,
+                           flower->key.vlan_id);
+            nl_msg_put_u8(request, TCA_FLOWER_KEY_VLAN_PRIO,
+                          flower->key.vlan_prio);
+        }
+        if (flower->key.encap_eth_type) {
+            nl_msg_put_be16(request, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                            flower->key.encap_eth_type);
+        }
+    }
+
+    nl_msg_put_u32(request, TCA_FLOWER_FLAGS, 0);
+
+    if (flower->tunnel.tunnel) {
+        nl_msg_put_flower_tunnel(request, flower);
+    }
+
+    nl_msg_put_flower_acts(request, flower);
+}
+
+int
+tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                  struct tc_flower *flower)
+{
+    struct ofpbuf request;
+    struct tcmsg *tcmsg;
+    struct ofpbuf *reply;
+    int error = 0;
+    size_t basic_offset;
+    uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type;
+
+    tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER,
+                            NLM_F_CREATE | NLM_F_ECHO, &request);
+    tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
+    tcmsg->tcm_info = tc_make_handle(prio, eth_type);
+    tcmsg->tcm_handle = handle;
+
+    nl_msg_put_string(&request, TCA_KIND, "flower");
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    {
+        nl_msg_put_flower_options(&request, flower);
+    }
+    nl_msg_end_nested(&request, basic_offset);
+
+    error = tc_transact(&request, &reply);
+    if (!error) {
+        struct tcmsg *tc =
+            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+
+        flower->prio = tc_get_major(tc->tcm_info);
+        flower->handle = tc->tcm_handle;
+        ofpbuf_delete(reply);
+    }
+
+    return error;
+}
diff --git a/lib/tc.h b/lib/tc.h
index dc79208..816dce9 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -1,5 +1,6 @@ 
 /*
  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,4 +32,95 @@  struct tcmsg *tc_make_request(int ifindex, int type,
 int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
 int tc_add_del_ingress_qdisc(int ifindex, bool add);
 
+struct tc_cookie {
+    const void *data;
+    size_t len;
+};
+
+struct tc_flower_key {
+    ovs_be16 eth_type;
+    uint8_t ip_proto;
+
+    struct eth_addr dst_mac;
+    struct eth_addr src_mac;
+
+    ovs_be16 src_port;
+    ovs_be16 dst_port;
+
+    uint16_t vlan_id;
+    uint8_t vlan_prio;
+
+    ovs_be16 encap_eth_type;
+
+    union {
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+    };
+};
+
+struct tc_flower {
+    uint32_t handle;
+    uint32_t prio;
+
+    struct tc_flower_key key;
+    struct tc_flower_key mask;
+
+    uint8_t vlan_pop;
+    uint16_t vlan_push_id;
+    uint8_t vlan_push_prio;
+
+    int ifindex_out;
+
+    struct ovs_flow_stats stats;
+    uint64_t lastused;
+
+    struct {
+        bool set;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+    } set;
+
+    struct {
+        bool tunnel;
+        struct {
+            ovs_be32 ipv4_src;
+            ovs_be32 ipv4_dst;
+        } ipv4;
+        struct {
+            struct in6_addr ipv6_src;
+            struct in6_addr ipv6_dst;
+        } ipv6;
+        ovs_be64 id;
+        ovs_be16 tp_src;
+        ovs_be16 tp_dst;
+    } tunnel;
+
+    struct tc_cookie act_cookie;
+};
+
+int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle,
+                      struct tc_flower *flower);
+int tc_del_filter(int ifindex, int prio, int handle);
+int tc_get_flower(int ifindex, int prio, int handle,
+                  struct tc_flower *flower);
+int tc_flush(int ifindex);
+int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
+int parse_netlink_to_tc_flower(struct ofpbuf *reply,
+                               struct tc_flower *flower);
+
 #endif /* tc.h */