[ovs-dev,4/4] netdev-tc-offloads: Add support for action set

Message ID 1502202114-57266-5-git-send-email-roid@mellanox.com
State Changes Requested
Headers show

Commit Message

Roi Dayan Aug. 8, 2017, 2:21 p.m.
From: Paul Blakey <paulb@mellanox.com>

Implement support for offloading ovs action set using
tc header rewrite action.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
---
 lib/netdev-tc-offloads.c | 180 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 4 deletions(-)

Comments

Joe Stringer Aug. 15, 2017, 1:04 a.m. | #1
On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
> From: Paul Blakey <paulb@mellanox.com>
>
> Implement support for offloading ovs action set using
> tc header rewrite action.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> ---

<snip>

> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
>      return 0;
>  }
>
> +static void
> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
> +                                       struct tc_flower *flower)
> +{
> +    char *mask = (char *) &flower->rewrite.mask;
> +    char *data = (char *) &flower->rewrite.key;
> +
> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
> +        char *put = NULL;
> +        size_t nested = 0;
> +        int len = ovs_flow_key_attr_lens[type].len;
> +
> +        if (len <= 0) {
> +            continue;
> +        }
> +
> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
> +            struct netlink_field *f = &set_flower_map[type][j];
> +
> +            if (!f->size) {
> +                break;
> +            }

Seems like maybe there's similar feedback here to the previous patch
wrt sparsely populated array set_flower_map[].

> +
> +            if (!is_all_zeros(mask + f->flower_offset, f->size)) {
> +                if (!put) {
> +                    nested = nl_msg_start_nested(buf,
> +                                                 OVS_ACTION_ATTR_SET_MASKED);
> +                    put = nl_msg_put_unspec_zero(buf, type, len * 2);

Do we ever have multiple of these attributes in a single nested
OVS_ACTION_ATTR_SET_MASKED? If so, won't the following memcpys avoid
putting the netlink header for the subsequent sets?

> +                }
> +
> +                memcpy(put + f->offset, data + f->flower_offset, f->size);
> +                memcpy(put + len + f->offset,
> +                       mask + f->flower_offset, f->size);

Could we these combine with the nl_msg_put_unspec() above?
Paul Blakey Aug. 15, 2017, 7:28 a.m. | #2
On 15/08/2017 04:04, Joe Stringer wrote:
> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>> From: Paul Blakey <paulb@mellanox.com>
>>
>> Implement support for offloading ovs action set using
>> tc header rewrite action.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>> ---
> 
> <snip>
> 
>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
>>       return 0;
>>   }
>>
>> +static void
>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>> +                                       struct tc_flower *flower)
>> +{
>> +    char *mask = (char *) &flower->rewrite.mask;
>> +    char *data = (char *) &flower->rewrite.key;
>> +
>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>> +        char *put = NULL;
>> +        size_t nested = 0;
>> +        int len = ovs_flow_key_attr_lens[type].len;
>> +
>> +        if (len <= 0) {
>> +            continue;
>> +        }
>> +
>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>> +            struct netlink_field *f = &set_flower_map[type][j];
>> +
>> +            if (!f->size) {
>> +                break;
>> +            }
> 
> Seems like maybe there's similar feedback here to the previous patch
> wrt sparsely populated array set_flower_map[].
> 

I'll answer there.

>> +
>> +            if (!is_all_zeros(mask + f->flower_offset, f->size)) {
>> +                if (!put) {
>> +                    nested = nl_msg_start_nested(buf,
>> +                                                 OVS_ACTION_ATTR_SET_MASKED);
>> +                    put = nl_msg_put_unspec_zero(buf, type, len * 2);
> 
> Do we ever have multiple of these attributes in a single nested
> OVS_ACTION_ATTR_SET_MASKED? If so, won't the following memcpys avoid
> putting the netlink header for the subsequent sets?
> 

  OVS_ACTION_ATTR_SET_MASKED always has a single nested OVS_KEY_ATTR_* 
attribute so we put it (all zeros) the first time we find a sub 
attribute (e.g ipv4_dst of ipv4) that isn't zero and use memcpy below to 
overwrite the non zero sub attributes.

>> +                }
>> +
>> +                memcpy(put + f->offset, data + f->flower_offset, f->size);
>> +                memcpy(put + len + f->offset,
>> +                       mask + f->flower_offset, f->size);
> 
> Could we these combine with the nl_msg_put_unspec() above?
> 

We'd have to accumulate the data as we go over the inner loop and then 
write it.
We could, for example, create a stack buffer like char 
buff[MAX_ATTR_LEN] (kinda ugly), write it there, and if we wrote 
something to buff, flush it to a nested attribute in a single write.
Would that be better? It might be less readable and it is less flexible 
(defining of MAX_ATTR_LEN).
Paul Blakey Aug. 15, 2017, 8:19 a.m. | #3
On 15/08/2017 10:28, Paul Blakey wrote:
> 
> 
> On 15/08/2017 04:04, Joe Stringer wrote:
>> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>>> From: Paul Blakey <paulb@mellanox.com>
>>>
>>> Implement support for offloading ovs action set using
>>> tc header rewrite action.
>>>
>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>>> ---
>>
>> <snip>
>>
>>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct 
>>> netdev_flow_dump *dump)
>>>       return 0;
>>>   }
>>>
>>> +static void
>>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>>> +                                       struct tc_flower *flower)
>>> +{
>>> +    char *mask = (char *) &flower->rewrite.mask;
>>> +    char *data = (char *) &flower->rewrite.key;
>>> +
>>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>>> +        char *put = NULL;
>>> +        size_t nested = 0;
>>> +        int len = ovs_flow_key_attr_lens[type].len;
>>> +
>>> +        if (len <= 0) {
>>> +            continue;
>>> +        }
>>> +
>>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>>> +            struct netlink_field *f = &set_flower_map[type][j];
>>> +
>>> +            if (!f->size) {
>>> +                break;
>>> +            }
>>
>> Seems like maybe there's similar feedback here to the previous patch
>> wrt sparsely populated array set_flower_map[].
>>
> 
> I'll answer there.

Scratch that, I actually meant to write it here.

With this map we have fast (direct lookup) access on the put path 
(parse_put_flow_set_masked_action) , and slow access on the reverse dump 
path (parse_flower_rewrite_to_netlink_action) which iterates over all 
indices, although it should skips them  fast if they are empty.

Changing this to sparse map will slow both of the paths, but dumping 
would be faster. We didn't write this maps for performance but for 
convince of adding new supported attributes.

What we can do to both maps is:

1) Using ovs thread once, we can create a fast (direct lookup) access 
map for both paths - generating a reverse map at init time and using 
that instead for dumping.
2) Rewrite it in a non sparse way, use it for both.
3) Rewrite it using a switch case, the logic for now is pretty much the 
same for all attributes.

What do you think?



> 
>>> +
>>> +            if (!is_all_zeros(mask + f->flower_offset, f->size)) {
>>> +                if (!put) {
>>> +                    nested = nl_msg_start_nested(buf,
>>> +                                                 
>>> OVS_ACTION_ATTR_SET_MASKED);
>>> +                    put = nl_msg_put_unspec_zero(buf, type, len * 2);
>>
>> Do we ever have multiple of these attributes in a single nested
>> OVS_ACTION_ATTR_SET_MASKED? If so, won't the following memcpys avoid
>> putting the netlink header for the subsequent sets?
>>
> 
>   OVS_ACTION_ATTR_SET_MASKED always has a single nested OVS_KEY_ATTR_* 
> attribute so we put it (all zeros) the first time we find a sub 
> attribute (e.g ipv4_dst of ipv4) that isn't zero and use memcpy below to 
> overwrite the non zero sub attributes.
> 
>>> +                }
>>> +
>>> +                memcpy(put + f->offset, data + f->flower_offset, 
>>> f->size);
>>> +                memcpy(put + len + f->offset,
>>> +                       mask + f->flower_offset, f->size);
>>
>> Could we these combine with the nl_msg_put_unspec() above?
>>
> 
> We'd have to accumulate the data as we go over the inner loop and then 
> write it.
> We could, for example, create a stack buffer like char 
> buff[MAX_ATTR_LEN] (kinda ugly), write it there, and if we wrote 
> something to buff, flush it to a nested attribute in a single write.
> Would that be better? It might be less readable and it is less flexible 
> (defining of MAX_ATTR_LEN).
> 
> 
> 
> 
> 
> 
>
Joe Stringer Aug. 15, 2017, 5:24 p.m. | #4
On 15 August 2017 at 01:19, Paul Blakey <paulb@mellanox.com> wrote:
>
>
> On 15/08/2017 10:28, Paul Blakey wrote:
>>
>>
>>
>> On 15/08/2017 04:04, Joe Stringer wrote:
>>>
>>> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>>>>
>>>> From: Paul Blakey <paulb@mellanox.com>
>>>>
>>>> Implement support for offloading ovs action set using
>>>> tc header rewrite action.
>>>>
>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>>>> ---
>>>
>>>
>>> <snip>
>>>
>>>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump
>>>> *dump)
>>>>       return 0;
>>>>   }
>>>>
>>>> +static void
>>>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>>>> +                                       struct tc_flower *flower)
>>>> +{
>>>> +    char *mask = (char *) &flower->rewrite.mask;
>>>> +    char *data = (char *) &flower->rewrite.key;
>>>> +
>>>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>>>> +        char *put = NULL;
>>>> +        size_t nested = 0;
>>>> +        int len = ovs_flow_key_attr_lens[type].len;
>>>> +
>>>> +        if (len <= 0) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>>>> +            struct netlink_field *f = &set_flower_map[type][j];
>>>> +
>>>> +            if (!f->size) {
>>>> +                break;
>>>> +            }
>>>
>>>
>>> Seems like maybe there's similar feedback here to the previous patch
>>> wrt sparsely populated array set_flower_map[].
>>>
>>
>> I'll answer there.
>
>
> Scratch that, I actually meant to write it here.
>
> With this map we have fast (direct lookup) access on the put path
> (parse_put_flow_set_masked_action) , and slow access on the reverse dump
> path (parse_flower_rewrite_to_netlink_action) which iterates over all
> indices, although it should skips them  fast if they are empty.
>
> Changing this to sparse map will slow both of the paths, but dumping would
> be faster. We didn't write this maps for performance but for convince of
> adding new supported attributes.

OK, thanks for the explanation. I figured there must be a reason it
was indexed that way, I just didn't see it in my first review pass
through the series. Obviously fast path should be fast, and typically
we don't worry quite as much about dumping fast (although I have in
the past tested this to determine how many flows we will attempt to
populate in adverse conditions).

> What we can do to both maps is:
>
> 1) Using ovs thread once, we can create a fast (direct lookup) access map
> for both paths - generating a reverse map at init time and using that
> instead for dumping.

Until there's a clear, known advantage I'm not sure if we should do this.

> 2) Rewrite it in a non sparse way, use it for both.

I think that we generally are more interested in 'put' performance so
if this sacrifices it for 'dump' performance, then it's probably not a
good idea (though it may still be too early to really optimize these).

> 3) Rewrite it using a switch case, the logic for now is pretty much the same
> for all attributes.
>
> What do you think?

Do you think that this third option would improve the readability,
allow more compile-time checking on the code, or more code reuse?
Joe Stringer Aug. 15, 2017, 5:30 p.m. | #5
On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
> From: Paul Blakey <paulb@mellanox.com>
>
> Implement support for offloading ovs action set using
> tc header rewrite action.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> ---

<snip>

Another couple of bits I noticed while responding..

> @@ -454,10 +572,56 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>  }
>
>  static int
> +parse_put_flow_set_masked_action(struct tc_flower *flower,
> +                                 const struct nlattr *set,
> +                                 size_t set_len,
> +                                 bool hasmask)
> +{
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +    const struct nlattr *set_attr;
> +    char *key = (char *) &flower->rewrite.key;
> +    char *mask = (char *) &flower->rewrite.mask;
> +    size_t set_left;
> +    int i, j;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {
> +        int type = nl_attr_type(set_attr);
> +        size_t size = nl_attr_get_size(set_attr) / 2;
> +        char *set_data = CONST_CAST(char *, nl_attr_get(set_attr));
> +        char *set_mask = set_data + size;
> +
> +        if (type >= ARRAY_SIZE(set_flower_map)) {
> +            VLOG_DBG_RL(&rl, "unsupported set action type: %d", type);
> +            return EOPNOTSUPP;

I think that this assumes that 'set_flower_map' has every entry
populated up until the maximum supported key field - but are some
missing - for example OVS_KEY_ATTR_VLAN. Could we also check by
indexing into set_flower_map and checking if it's a valid entry?

> +        }
> +
> +        for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
> +            struct netlink_field *f = &set_flower_map[type][i];

Separate thought - you're iterating nlattrs above then iterating all
the key attributes in the flower_map here. Couldn't you just directly
index into the packet field that this action will modify?

> +
> +            if (!f->size) {
> +                break;
> +            }

I think that headers that are not in the set_flower_map will hit this,
which would be unsupported (similar to above comment).

> +
> +            for (j = 0; j < f->size; j++) {
> +                char maskval = hasmask ? set_mask[f->offset + j] : 0xFF;
> +
> +                key[f->flower_offset + j] = maskval & set_data[f->offset + j];
> +                mask[f->flower_offset + j] = maskval;
> +            }
> +        }
> +    }
> +
> +    if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) {
> +        flower->rewrite.rewrite = true;
> +    }
> +
> +    return 0;
> +}
> +
> +static int
>  parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
>                            size_t set_len)
>  {
> -    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>      const struct nlattr *set_attr;
>      size_t set_left;
>
Paul Blakey Aug. 17, 2017, 7:52 a.m. | #6
On 15/08/2017 20:24, Joe Stringer wrote:
> On 15 August 2017 at 01:19, Paul Blakey <paulb@mellanox.com> wrote:
>>
>>
>> On 15/08/2017 10:28, Paul Blakey wrote:
>>>
>>>
>>>
>>> On 15/08/2017 04:04, Joe Stringer wrote:
>>>>
>>>> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>>>>>
>>>>> From: Paul Blakey <paulb@mellanox.com>
>>>>>
>>>>> Implement support for offloading ovs action set using
>>>>> tc header rewrite action.
>>>>>
>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>>>>> ---
>>>>
>>>>
>>>> <snip>
>>>>
>>>>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump
>>>>> *dump)
>>>>>        return 0;
>>>>>    }
>>>>>
>>>>> +static void
>>>>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>>>>> +                                       struct tc_flower *flower)
>>>>> +{
>>>>> +    char *mask = (char *) &flower->rewrite.mask;
>>>>> +    char *data = (char *) &flower->rewrite.key;
>>>>> +
>>>>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>>>>> +        char *put = NULL;
>>>>> +        size_t nested = 0;
>>>>> +        int len = ovs_flow_key_attr_lens[type].len;
>>>>> +
>>>>> +        if (len <= 0) {
>>>>> +            continue;
>>>>> +        }
>>>>> +
>>>>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>>>>> +            struct netlink_field *f = &set_flower_map[type][j];
>>>>> +
>>>>> +            if (!f->size) {
>>>>> +                break;
>>>>> +            }
>>>>
>>>>
>>>> Seems like maybe there's similar feedback here to the previous patch
>>>> wrt sparsely populated array set_flower_map[].
>>>>
>>>
>>> I'll answer there.
>>
>>
>> Scratch that, I actually meant to write it here.
>>
>> With this map we have fast (direct lookup) access on the put path
>> (parse_put_flow_set_masked_action) , and slow access on the reverse dump
>> path (parse_flower_rewrite_to_netlink_action) which iterates over all
>> indices, although it should skips them  fast if they are empty.
>>
>> Changing this to sparse map will slow both of the paths, but dumping would
>> be faster. We didn't write this maps for performance but for convince of
>> adding new supported attributes.
> 
> OK, thanks for the explanation. I figured there must be a reason it
> was indexed that way, I just didn't see it in my first review pass
> through the series. Obviously fast path should be fast, and typically
> we don't worry quite as much about dumping fast (although I have in
> the past tested this to determine how many flows we will attempt to
> populate in adverse conditions).
> 
>> What we can do to both maps is:
>>
>> 1) Using ovs thread once, we can create a fast (direct lookup) access map;
>> for both paths - generating a reverse map at init time and using that
>> instead for dumping.
> 
> Until there's a clear, known advantage I'm not sure if we should do this.
> 
>> 2) Rewrite it in a non sparse way, use it for both.
> 
> I think that we generally are more interested in 'put' performance so
> if this sacrifices it for 'dump' performance, then it's probably not a
> good idea (though it may still be too early to really optimize these).
> 
>> 3) Rewrite it using a switch case, the logic for now is pretty much the same
>> for all attributes.
>>
>> What do you think?
> 
> Do you think that this third option would improve the readability,
> allow more compile-time checking on the code, or more code reuse?
> 

Hi Joe, Thanks for the review,

Since it can be implemented by a map, as with this patch set, all the 
switch cases will have the same logic of translating from OVS action set 
attributes to flower members, so I think its not much an increase in 
readability unless you're looking at a single case.
And regarding compile time checking, we still need to convert from 
flower key members to tc action pedit raw offsets/values so we won't 
have that there.

I'd like to remove the tc flower struct in the future
and work straight on struct match and actions
so then we will have only the set action to pedit conversion (and one 
conversion for all the rest).

I think I'd rather keep the map till it won't fit for the task or we do 
the above if that's OK with you.
Paul Blakey Aug. 17, 2017, 8:38 a.m. | #7
On 15/08/2017 20:30, Joe Stringer wrote:
> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>> From: Paul Blakey <paulb@mellanox.com>
>>
>> Implement support for offloading ovs action set using
>> tc header rewrite action.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>> ---
> 
> <snip>
> 
> Another couple of bits I noticed while responding..
> 
>> @@ -454,10 +572,56 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>>   }
>>
>>   static int
>> +parse_put_flow_set_masked_action(struct tc_flower *flower,
>> +                                 const struct nlattr *set,
>> +                                 size_t set_len,
>> +                                 bool hasmask)
>> +{
>> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>> +    const struct nlattr *set_attr;
>> +    char *key = (char *) &flower->rewrite.key;
>> +    char *mask = (char *) &flower->rewrite.mask;
>> +    size_t set_left;
>> +    int i, j;
>> +
>> +    NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {
>> +        int type = nl_attr_type(set_attr);
>> +        size_t size = nl_attr_get_size(set_attr) / 2;
>> +        char *set_data = CONST_CAST(char *, nl_attr_get(set_attr));
>> +        char *set_mask = set_data + size;
>> +
>> +        if (type >= ARRAY_SIZE(set_flower_map)) {
>> +            VLOG_DBG_RL(&rl, "unsupported set action type: %d", type);
>> +            return EOPNOTSUPP;
> 
> I think that this assumes that 'set_flower_map' has every entry
> populated up until the maximum supported key field - but are some
> missing - for example OVS_KEY_ATTR_VLAN. Could we also check by
> indexing into set_flower_map and checking if it's a valid entry?

Good catch, thanks, will be fixed.

> 
>> +        }
>> +
>> +        for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
>> +            struct netlink_field *f = &set_flower_map[type][i];
> 
> Separate thought - you're iterating nlattrs above then iterating all
> the key attributes in the flower_map here. Couldn't you just directly
> index into the packet field that this action will modify?
> 
>> +
>> +            if (!f->size) {
>> +                break;
>> +            }
> 
> I think that headers that are not in the set_flower_map will hit this,
> which would be unsupported (similar to above comment).

This is for null entries in the map, like OVS_KEY_ATTR_IPV6 has only
two entries in the second dimension (src and dst) but it can have up to 
3 so the third size is zeroed to skip it.

> 
>> +
>> +            for (j = 0; j < f->size; j++) {
>> +                char maskval = hasmask ? set_mask[f->offset + j] : 0xFF;
>> +
>> +                key[f->flower_offset + j] = maskval & set_data[f->offset + j];
>> +                mask[f->flower_offset + j] = maskval;
>> +            }
>> +        }
>> +    }
>> +
>> +    if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) {
>> +        flower->rewrite.rewrite = true;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>>   parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
>>                             size_t set_len)
>>   {
>> -    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>       const struct nlattr *set_attr;
>>       size_t set_left;
>>


Thanks for the review guys, We'll send a new patch set.
Joe Stringer Aug. 17, 2017, 11:52 p.m. | #8
On 17 August 2017 at 00:52, Paul Blakey <paulb@mellanox.com> wrote:
>
>
> On 15/08/2017 20:24, Joe Stringer wrote:
>>
>> On 15 August 2017 at 01:19, Paul Blakey <paulb@mellanox.com> wrote:
>>>
>>>
>>>
>>> On 15/08/2017 10:28, Paul Blakey wrote:
>>>>
>>>>
>>>>
>>>>
>>>> On 15/08/2017 04:04, Joe Stringer wrote:
>>>>>
>>>>>
>>>>> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>>>>>>
>>>>>>
>>>>>> From: Paul Blakey <paulb@mellanox.com>
>>>>>>
>>>>>> Implement support for offloading ovs action set using
>>>>>> tc header rewrite action.
>>>>>>
>>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>>>>>> ---
>>>>>
>>>>>
>>>>>
>>>>> <snip>
>>>>>
>>>>>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct
>>>>>> netdev_flow_dump
>>>>>> *dump)
>>>>>>        return 0;
>>>>>>    }
>>>>>>
>>>>>> +static void
>>>>>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>>>>>> +                                       struct tc_flower *flower)
>>>>>> +{
>>>>>> +    char *mask = (char *) &flower->rewrite.mask;
>>>>>> +    char *data = (char *) &flower->rewrite.key;
>>>>>> +
>>>>>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>>>>>> +        char *put = NULL;
>>>>>> +        size_t nested = 0;
>>>>>> +        int len = ovs_flow_key_attr_lens[type].len;
>>>>>> +
>>>>>> +        if (len <= 0) {
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>>>>>> +            struct netlink_field *f = &set_flower_map[type][j];
>>>>>> +
>>>>>> +            if (!f->size) {
>>>>>> +                break;
>>>>>> +            }
>>>>>
>>>>>
>>>>>
>>>>> Seems like maybe there's similar feedback here to the previous patch
>>>>> wrt sparsely populated array set_flower_map[].
>>>>>
>>>>
>>>> I'll answer there.
>>>
>>>
>>>
>>> Scratch that, I actually meant to write it here.
>>>
>>> With this map we have fast (direct lookup) access on the put path
>>> (parse_put_flow_set_masked_action) , and slow access on the reverse dump
>>> path (parse_flower_rewrite_to_netlink_action) which iterates over all
>>> indices, although it should skips them  fast if they are empty.
>>>
>>> Changing this to sparse map will slow both of the paths, but dumping
>>> would
>>> be faster. We didn't write this maps for performance but for convince of
>>> adding new supported attributes.
>>
>>
>> OK, thanks for the explanation. I figured there must be a reason it
>> was indexed that way, I just didn't see it in my first review pass
>> through the series. Obviously fast path should be fast, and typically
>> we don't worry quite as much about dumping fast (although I have in
>> the past tested this to determine how many flows we will attempt to
>> populate in adverse conditions).
>>
>>> What we can do to both maps is:
>>>
>>> 1) Using ovs thread once, we can create a fast (direct lookup) access
>>> map;
>>> for both paths - generating a reverse map at init time and using that
>>> instead for dumping.
>>
>>
>> Until there's a clear, known advantage I'm not sure if we should do this.
>>
>>> 2) Rewrite it in a non sparse way, use it for both.
>>
>>
>> I think that we generally are more interested in 'put' performance so
>> if this sacrifices it for 'dump' performance, then it's probably not a
>> good idea (though it may still be too early to really optimize these).
>>
>>> 3) Rewrite it using a switch case, the logic for now is pretty much the
>>> same
>>> for all attributes.
>>>
>>> What do you think?
>>
>>
>> Do you think that this third option would improve the readability,
>> allow more compile-time checking on the code, or more code reuse?
>>
>
> Hi Joe, Thanks for the review,
>
> Since it can be implemented by a map, as with this patch set, all the switch
> cases will have the same logic of translating from OVS action set attributes
> to flower members, so I think its not much an increase in readability unless
> you're looking at a single case.
> And regarding compile time checking, we still need to convert from flower
> key members to tc action pedit raw offsets/values so we won't have that
> there.
>
> I'd like to remove the tc flower struct in the future
> and work straight on struct match and actions
> so then we will have only the set action to pedit conversion (and one
> conversion for all the rest).
>
> I think I'd rather keep the map till it won't fit for the task or we do the
> above if that's OK with you.

That seems reasonable enough to me.

I'm not sure exactly what you have in mind for removing the tc flower
struct in future, but I recognise that there's a bunch of inefficient
translation between different formats in this path today so I hope
we'll be able to improve this in the future. One of the things I've
had in mind for some time is to update the dpif layer to use 'struct
flow' for matches rather than OVS netlink-formatted matches, so then
the conversion can be pushed closer down to the kernel and we might
also be able to get rid of some of those conversions in this TC code.
We'll see.
Paul Blakey Aug. 20, 2017, 8:53 a.m. | #9
On 18/08/2017 02:52, Joe Stringer wrote:
> On 17 August 2017 at 00:52, Paul Blakey <paulb@mellanox.com> wrote:
>>
>>
>> On 15/08/2017 20:24, Joe Stringer wrote:
>>>
>>> On 15 August 2017 at 01:19, Paul Blakey <paulb@mellanox.com> wrote:
>>>>
>>>>
>>>>
>>>> On 15/08/2017 10:28, Paul Blakey wrote:
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> On 15/08/2017 04:04, Joe Stringer wrote:
>>>>>>
>>>>>>
>>>>>> On 8 August 2017 at 07:21, Roi Dayan <roid@mellanox.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>> From: Paul Blakey <paulb@mellanox.com>
>>>>>>>
>>>>>>> Implement support for offloading ovs action set using
>>>>>>> tc header rewrite action.
>>>>>>>
>>>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>>>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>>>>>>> ---
>>>>>>
>>>>>>
>>>>>>
>>>>>> <snip>
>>>>>>
>>>>>>> @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct
>>>>>>> netdev_flow_dump
>>>>>>> *dump)
>>>>>>>         return 0;
>>>>>>>     }
>>>>>>>
>>>>>>> +static void
>>>>>>> +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
>>>>>>> +                                       struct tc_flower *flower)
>>>>>>> +{
>>>>>>> +    char *mask = (char *) &flower->rewrite.mask;
>>>>>>> +    char *data = (char *) &flower->rewrite.key;
>>>>>>> +
>>>>>>> +    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
>>>>>>> +        char *put = NULL;
>>>>>>> +        size_t nested = 0;
>>>>>>> +        int len = ovs_flow_key_attr_lens[type].len;
>>>>>>> +
>>>>>>> +        if (len <= 0) {
>>>>>>> +            continue;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
>>>>>>> +            struct netlink_field *f = &set_flower_map[type][j];
>>>>>>> +
>>>>>>> +            if (!f->size) {
>>>>>>> +                break;
>>>>>>> +            }
>>>>>>
>>>>>>
>>>>>>
>>>>>> Seems like maybe there's similar feedback here to the previous patch
>>>>>> wrt sparsely populated array set_flower_map[].
>>>>>>
>>>>>
>>>>> I'll answer there.
>>>>
>>>>
>>>>
>>>> Scratch that, I actually meant to write it here.
>>>>
>>>> With this map we have fast (direct lookup) access on the put path
>>>> (parse_put_flow_set_masked_action) , and slow access on the reverse dump
>>>> path (parse_flower_rewrite_to_netlink_action) which iterates over all
>>>> indices, although it should skips them  fast if they are empty.
>>>>
>>>> Changing this to sparse map will slow both of the paths, but dumping
>>>> would
>>>> be faster. We didn't write this maps for performance but for convince of
>>>> adding new supported attributes.
>>>
>>>
>>> OK, thanks for the explanation. I figured there must be a reason it
>>> was indexed that way, I just didn't see it in my first review pass
>>> through the series. Obviously fast path should be fast, and typically
>>> we don't worry quite as much about dumping fast (although I have in
>>> the past tested this to determine how many flows we will attempt to
>>> populate in adverse conditions).
>>>
>>>> What we can do to both maps is:
>>>>
>>>> 1) Using ovs thread once, we can create a fast (direct lookup) access
>>>> map;
>>>> for both paths - generating a reverse map at init time and using that
>>>> instead for dumping.
>>>
>>>
>>> Until there's a clear, known advantage I'm not sure if we should do this.
>>>
>>>> 2) Rewrite it in a non sparse way, use it for both.
>>>
>>>
>>> I think that we generally are more interested in 'put' performance so
>>> if this sacrifices it for 'dump' performance, then it's probably not a
>>> good idea (though it may still be too early to really optimize these).
>>>
>>>> 3) Rewrite it using a switch case, the logic for now is pretty much the
>>>> same
>>>> for all attributes.
>>>>
>>>> What do you think?
>>>
>>>
>>> Do you think that this third option would improve the readability,
>>> allow more compile-time checking on the code, or more code reuse?
>>>
>>
>> Hi Joe, Thanks for the review,
>>
>> Since it can be implemented by a map, as with this patch set, all the switch
>> cases will have the same logic of translating from OVS action set attributes
>> to flower members, so I think its not much an increase in readability unless
>> you're looking at a single case.
>> And regarding compile time checking, we still need to convert from flower
>> key members to tc action pedit raw offsets/values so we won't have that
>> there.
>>
>> I'd like to remove the tc flower struct in the future
>> and work straight on struct match and actions
>> so then we will have only the set action to pedit conversion (and one
>> conversion for all the rest).
>>
>> I think I'd rather keep the map till it won't fit for the task or we do the
>> above if that's OK with you.
> 
> That seems reasonable enough to me.
> 
> I'm not sure exactly what you have in mind for removing the tc flower
> struct in future, but I recognise that there's a bunch of inefficient
> translation between different formats in this path today so I hope
> we'll be able to improve this in the future. One of the things I've
> had in mind for some time is to update the dpif layer to use 'struct
> flow' for matches rather than OVS netlink-formatted matches, so then
> the conversion can be pushed closer down to the kernel and we might
> also be able to get rid of some of those conversions in this TC code.
> We'll see.
> 

Sounds good, that will save another conversion from netlink buf to 
struct match that dpif netdev and offloading do.

Patch

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 2af1546..505e25e 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -27,11 +27,13 @@ 
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/thread.h"
 #include "openvswitch/types.h"
+#include "openvswitch/util.h"
 #include "openvswitch/vlog.h"
 #include "netdev-linux.h"
 #include "netlink.h"
 #include "netlink-socket.h"
 #include "odp-netlink.h"
+#include "odp-util.h"
 #include "tc.h"
 #include "unaligned.h"
 #include "util.h"
@@ -41,6 +43,76 @@  VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads);
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
 static struct hmap ufid_tc = HMAP_INITIALIZER(&ufid_tc);
+
+struct netlink_field {
+    int offset;
+    int flower_offset;
+    int size;
+};
+
+static struct netlink_field set_flower_map[][3] = {
+    [OVS_KEY_ATTR_IPV4] = {
+        { offsetof(struct ovs_key_ipv4, ipv4_src),
+          offsetof(struct tc_flower_key, ipv4.ipv4_src),
+          MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
+        },
+        { offsetof(struct ovs_key_ipv4, ipv4_dst),
+          offsetof(struct tc_flower_key, ipv4.ipv4_dst),
+          MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
+        },
+        { offsetof(struct ovs_key_ipv4, ipv4_ttl),
+          offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
+          MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
+        },
+    },
+    [OVS_KEY_ATTR_IPV6] = {
+        { offsetof(struct ovs_key_ipv6, ipv6_src),
+          offsetof(struct tc_flower_key, ipv6.ipv6_src),
+          MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
+        },
+        { offsetof(struct ovs_key_ipv6, ipv6_dst),
+          offsetof(struct tc_flower_key, ipv6.ipv6_dst),
+          MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
+        },
+    },
+    [OVS_KEY_ATTR_ETHERNET] = {
+        { offsetof(struct ovs_key_ethernet, eth_src),
+          offsetof(struct tc_flower_key, src_mac),
+          MEMBER_SIZEOF(struct tc_flower_key, src_mac)
+        },
+        { offsetof(struct ovs_key_ethernet, eth_dst),
+          offsetof(struct tc_flower_key, dst_mac),
+          MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
+        },
+    },
+    [OVS_KEY_ATTR_ETHERTYPE] = {
+        { 0,
+          offsetof(struct tc_flower_key, eth_type),
+          MEMBER_SIZEOF(struct tc_flower_key, eth_type)
+        },
+    },
+    [OVS_KEY_ATTR_TCP] = {
+        { offsetof(struct ovs_key_tcp, tcp_src),
+          offsetof(struct tc_flower_key, tcp_src),
+          MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
+        },
+        { offsetof(struct ovs_key_tcp, tcp_dst),
+          offsetof(struct tc_flower_key, tcp_dst),
+          MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
+        },
+    },
+    [OVS_KEY_ATTR_UDP] = {
+        { offsetof(struct ovs_key_udp, udp_src),
+          offsetof(struct tc_flower_key, udp_src),
+          MEMBER_SIZEOF(struct tc_flower_key, udp_src)
+        },
+        { offsetof(struct ovs_key_udp, udp_dst),
+          offsetof(struct tc_flower_key, udp_dst),
+          MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
+        },
+    },
+};
+
 static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
 
 /**
@@ -274,6 +346,48 @@  netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
     return 0;
 }
 
+static void
+parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
+                                       struct tc_flower *flower)
+{
+    char *mask = (char *) &flower->rewrite.mask;
+    char *data = (char *) &flower->rewrite.key;
+
+    for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
+        char *put = NULL;
+        size_t nested = 0;
+        int len = ovs_flow_key_attr_lens[type].len;
+
+        if (len <= 0) {
+            continue;
+        }
+
+        for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
+            struct netlink_field *f = &set_flower_map[type][j];
+
+            if (!f->size) {
+                break;
+            }
+
+            if (!is_all_zeros(mask + f->flower_offset, f->size)) {
+                if (!put) {
+                    nested = nl_msg_start_nested(buf,
+                                                 OVS_ACTION_ATTR_SET_MASKED);
+                    put = nl_msg_put_unspec_zero(buf, type, len * 2);
+                }
+
+                memcpy(put + f->offset, data + f->flower_offset, f->size);
+                memcpy(put + len + f->offset,
+                       mask + f->flower_offset, f->size);
+            }
+        }
+
+        if (put) {
+            nl_msg_end_nested(buf, nested);
+        }
+    }
+}
+
 static int
 parse_tc_flower_to_match(struct tc_flower *flower,
                          struct match *match,
@@ -364,6 +478,10 @@  parse_tc_flower_to_match(struct tc_flower *flower,
                                    | VLAN_CFI);
         }
 
+        if (flower->rewrite.rewrite) {
+            parse_flower_rewrite_to_netlink_action(buf, flower);
+        }
+
         if (flower->set.set) {
             size_t set_offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SET);
             size_t tunnel_offset =
@@ -454,10 +572,56 @@  netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
 }
 
 static int
+parse_put_flow_set_masked_action(struct tc_flower *flower,
+                                 const struct nlattr *set,
+                                 size_t set_len,
+                                 bool hasmask)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    const struct nlattr *set_attr;
+    char *key = (char *) &flower->rewrite.key;
+    char *mask = (char *) &flower->rewrite.mask;
+    size_t set_left;
+    int i, j;
+
+    NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {
+        int type = nl_attr_type(set_attr);
+        size_t size = nl_attr_get_size(set_attr) / 2;
+        char *set_data = CONST_CAST(char *, nl_attr_get(set_attr));
+        char *set_mask = set_data + size;
+
+        if (type >= ARRAY_SIZE(set_flower_map)) {
+            VLOG_DBG_RL(&rl, "unsupported set action type: %d", type);
+            return EOPNOTSUPP;
+        }
+
+        for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
+            struct netlink_field *f = &set_flower_map[type][i];
+
+            if (!f->size) {
+                break;
+            }
+
+            for (j = 0; j < f->size; j++) {
+                char maskval = hasmask ? set_mask[f->offset + j] : 0xFF;
+
+                key[f->flower_offset + j] = maskval & set_data[f->offset + j];
+                mask[f->flower_offset + j] = maskval;
+            }
+        }
+    }
+
+    if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) {
+        flower->rewrite.rewrite = true;
+    }
+
+    return 0;
+}
+
+static int
 parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
                           size_t set_len)
 {
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
     const struct nlattr *set_attr;
     size_t set_left;
 
@@ -504,9 +668,8 @@  parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
                 }
             }
         } else {
-            VLOG_DBG_RL(&rl, "unsupported set action type: %d",
-                        nl_attr_type(set_attr));
-            return EOPNOTSUPP;
+            return parse_put_flow_set_masked_action(flower, set, set_len,
+                                                    false);
         }
     }
     return 0;
@@ -830,6 +993,15 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             if (err) {
                 return err;
             }
+        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
+            const struct nlattr *set = nl_attr_get(nla);
+            const size_t set_len = nl_attr_get_size(nla);
+
+            err = parse_put_flow_set_masked_action(&flower, set, set_len,
+                                                   true);
+            if (err) {
+                return err;
+            }
         } else {
             VLOG_DBG_RL(&rl, "unsupported put action type: %d",
                         nl_attr_type(nla));