diff mbox series

[ovs-dev] netdev-offload-tc: verify the flower rule installed

Message ID 162125761321.4661.6113737795293023919.stgit@wsfd-netdev64.ntdv.lab.eng.bos.redhat.com
State Accepted
Headers show
Series [ovs-dev] netdev-offload-tc: verify the flower rule installed | expand

Commit Message

Eelco Chaudron May 17, 2021, 1:20 p.m. UTC
When OVs installs the flower rule, it only checks for the OK from the
kernel. It does not check if the rule requested matches the one
actually programmed. This change will add this check and warns the
user if this is not the case.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

Comments

Marcelo Leitner May 21, 2021, 2:29 p.m. UTC | #1
On Mon, May 17, 2021 at 09:20:28AM -0400, Eelco Chaudron wrote:
> When OVs installs the flower rule, it only checks for the OK from the
> kernel. It does not check if the rule requested matches the one
> actually programmed. This change will add this check and warns the
> user if this is not the case.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

Reviewed-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Ilya Maximets July 8, 2021, 8:18 p.m. UTC | #2
On 5/17/21 3:20 PM, Eelco Chaudron wrote:
> When OVs installs the flower rule, it only checks for the OK from the
> kernel. It does not check if the rule requested matches the one
> actually programmed. This change will add this check and warns the
> user if this is not the case.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 59 insertions(+)
> 
> diff --git a/lib/tc.c b/lib/tc.c
> index a27cca2cc..e134f6a06 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>      return 0;
>  }
>  
> +static bool
> +cmp_tc_flower_match_action(const struct tc_flower *a,
> +                           const struct tc_flower *b)
> +{
> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
> +        return false;
> +    }
> +
> +    /* We can not memcmp() the key as some keys might be set while the mask
> +     * is not.*/
> +
> +    for (int i = 0; i < sizeof a->key; i++) {
> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
> +
> +        if (key_a != key_b) {
> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
> +                        "%d", i);
> +            return false;
> +        }
> +    }
> +
> +    /* Compare the actions. */
> +    const struct tc_action *action_a = a->actions;
> +    const struct tc_action *action_b = b->actions;
> +
> +    if (a->action_count != b->action_count) {
> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
> +        return false;
> +    }
> +
> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
> +                        "for %d", i);
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
>  int
>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>  {
> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>  
>          id->prio = tc_get_major(tc->tcm_info);
>          id->handle = tc->tcm_handle;
> +
> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
> +            struct tc_flower flower_out;
> +            struct tcf_id id_out;
> +            int ret;
> +
> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
> +                                             false);
> +
> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
> +                             "not match request!\n Set dpif_netlink to dbg to "
> +                             "see which rule caused this error.");

So we're only printing the warning and not reverting the change
and not returning an error, right?  So, OVS will continue to
work with the incorrect rule installed?

I think, we should revert the incorrect change and return the
error, so the flow could be installed to the OVS kernel datapath,
but maybe this is a task for a separate change.

What do you think?

Best regards, Ilya Maximets.
Marcelo Leitner July 8, 2021, 8:41 p.m. UTC | #3
On Thu, Jul 08, 2021 at 10:18:50PM +0200, Ilya Maximets wrote:
> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
> > When OVs installs the flower rule, it only checks for the OK from the
> > kernel. It does not check if the rule requested matches the one
> > actually programmed. This change will add this check and warns the
> > user if this is not the case.
> >
> > Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> > ---
> >  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 59 insertions(+)
> >
> > diff --git a/lib/tc.c b/lib/tc.c
> > index a27cca2cc..e134f6a06 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
> >      return 0;
> >  }
> >
> > +static bool
> > +cmp_tc_flower_match_action(const struct tc_flower *a,
> > +                           const struct tc_flower *b)
> > +{
> > +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
> > +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
> > +        return false;
> > +    }
> > +
> > +    /* We can not memcmp() the key as some keys might be set while the mask
> > +     * is not.*/
> > +
> > +    for (int i = 0; i < sizeof a->key; i++) {
> > +        uint8_t mask = ((uint8_t *)&a->mask)[i];
> > +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
> > +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
> > +
> > +        if (key_a != key_b) {
> > +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
> > +                        "%d", i);
> > +            return false;
> > +        }
> > +    }
> > +
> > +    /* Compare the actions. */
> > +    const struct tc_action *action_a = a->actions;
> > +    const struct tc_action *action_b = b->actions;
> > +
> > +    if (a->action_count != b->action_count) {
> > +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
> > +        return false;
> > +    }
> > +
> > +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
> > +        if (memcmp(action_a, action_b, sizeof *action_a)) {
> > +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
> > +                        "for %d", i);
> > +            return false;
> > +        }
> > +    }
> > +
> > +    return true;
> > +}
> > +
> >  int
> >  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
> >  {
> > @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
> >
> >          id->prio = tc_get_major(tc->tcm_info);
> >          id->handle = tc->tcm_handle;
> > +
> > +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
> > +            struct tc_flower flower_out;
> > +            struct tcf_id id_out;
> > +            int ret;
> > +
> > +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
> > +                                             false);
> > +
> > +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
> > +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
> > +                             "not match request!\n Set dpif_netlink to dbg to "
> > +                             "see which rule caused this error.");
>
> So we're only printing the warning and not reverting the change
> and not returning an error, right?  So, OVS will continue to
> work with the incorrect rule installed?
>
> I think, we should revert the incorrect change and return the
> error, so the flow could be installed to the OVS kernel datapath,
> but maybe this is a task for a separate change.
>
> What do you think?

While I agree that it would make it more robust, I don't expect that
this situation will be exercised often. It's only expected to happen
on a mismatch of versions or something like that. It's not something
that someone should rely on. As in, to aid development, and not
sysadmins.

With the big warning there, which is not intrusive and can be
backported without much risk, someone could already spot a reason for
erratic behavior.

My 2c ;-)

Best regards,
Marcelo

>
> Best regards, Ilya Maximets.
>
Eelco Chaudron July 9, 2021, 8:35 a.m. UTC | #4
On 8 Jul 2021, at 22:18, Ilya Maximets wrote:

> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
>> When OVs installs the flower rule, it only checks for the OK from the
>> kernel. It does not check if the rule requested matches the one
>> actually programmed. This change will add this check and warns the
>> user if this is not the case.
>>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 59 insertions(+)
>>
>> diff --git a/lib/tc.c b/lib/tc.c
>> index a27cca2cc..e134f6a06 100644
>> --- a/lib/tc.c
>> +++ b/lib/tc.c
>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>      return 0;
>>  }
>>
>> +static bool
>> +cmp_tc_flower_match_action(const struct tc_flower *a,
>> +                           const struct tc_flower *b)
>> +{
>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
>> +        return false;
>> +    }
>> +
>> +    /* We can not memcmp() the key as some keys might be set while the mask
>> +     * is not.*/
>> +
>> +    for (int i = 0; i < sizeof a->key; i++) {
>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
>> +
>> +        if (key_a != key_b) {
>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
>> +                        "%d", i);
>> +            return false;
>> +        }
>> +    }
>> +
>> +    /* Compare the actions. */
>> +    const struct tc_action *action_a = a->actions;
>> +    const struct tc_action *action_b = b->actions;
>> +
>> +    if (a->action_count != b->action_count) {
>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
>> +        return false;
>> +    }
>> +
>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
>> +                        "for %d", i);
>> +            return false;
>> +        }
>> +    }
>> +
>> +    return true;
>> +}
>> +
>>  int
>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>  {
>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>
>>          id->prio = tc_get_major(tc->tcm_info);
>>          id->handle = tc->tcm_handle;
>> +
>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
>> +            struct tc_flower flower_out;
>> +            struct tcf_id id_out;
>> +            int ret;
>> +
>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
>> +                                             false);
>> +
>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
>> +                             "not match request!\n Set dpif_netlink to dbg to "
>> +                             "see which rule caused this error.");
>
> So we're only printing the warning and not reverting the change
> and not returning an error, right?  So, OVS will continue to
> work with the incorrect rule installed?
> I think, we should revert the incorrect change and return the
> error, so the flow could be installed to the OVS kernel datapath,
> but maybe this is a task for a separate change.
>
> What do you think?

The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.

With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.

//Eelco
Ilya Maximets July 9, 2021, 6:23 p.m. UTC | #5
On 7/9/21 10:35 AM, Eelco Chaudron wrote:
> 
> 
> On 8 Jul 2021, at 22:18, Ilya Maximets wrote:
> 
>> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
>>> When OVs installs the flower rule, it only checks for the OK from the
>>> kernel. It does not check if the rule requested matches the one
>>> actually programmed. This change will add this check and warns the
>>> user if this is not the case.
>>>
>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>> ---
>>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 59 insertions(+)
>>>
>>> diff --git a/lib/tc.c b/lib/tc.c
>>> index a27cca2cc..e134f6a06 100644
>>> --- a/lib/tc.c
>>> +++ b/lib/tc.c
>>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>      return 0;
>>>  }
>>>
>>> +static bool
>>> +cmp_tc_flower_match_action(const struct tc_flower *a,
>>> +                           const struct tc_flower *b)
>>> +{
>>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
>>> +        return false;
>>> +    }
>>> +
>>> +    /* We can not memcmp() the key as some keys might be set while the mask
>>> +     * is not.*/
>>> +
>>> +    for (int i = 0; i < sizeof a->key; i++) {
>>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
>>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
>>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
>>> +
>>> +        if (key_a != key_b) {
>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
>>> +                        "%d", i);
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>> +    /* Compare the actions. */
>>> +    const struct tc_action *action_a = a->actions;
>>> +    const struct tc_action *action_b = b->actions;
>>> +
>>> +    if (a->action_count != b->action_count) {
>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
>>> +        return false;
>>> +    }
>>> +
>>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
>>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
>>> +                        "for %d", i);
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>>  int
>>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>  {
>>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>
>>>          id->prio = tc_get_major(tc->tcm_info);
>>>          id->handle = tc->tcm_handle;
>>> +
>>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
>>> +            struct tc_flower flower_out;
>>> +            struct tcf_id id_out;
>>> +            int ret;
>>> +
>>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
>>> +                                             false);
>>> +
>>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
>>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
>>> +                             "not match request!\n Set dpif_netlink to dbg to "
>>> +                             "see which rule caused this error.");
>>
>> So we're only printing the warning and not reverting the change
>> and not returning an error, right?  So, OVS will continue to
>> work with the incorrect rule installed?
>> I think, we should revert the incorrect change and return the
>> error, so the flow could be installed to the OVS kernel datapath,
>> but maybe this is a task for a separate change.
>>
>> What do you think?
> 
> The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.
> 
> With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.

Make sense.  I removed '\n' from a warning (these doesn't look good in the log)
and applied to master.

You and Marcelo are talking about backporting, do you think it make sense to
backport to stable branches?

Bets regards, Ilya Maximets.
Eelco Chaudron July 12, 2021, 8:28 a.m. UTC | #6
On 9 Jul 2021, at 20:23, Ilya Maximets wrote:

> On 7/9/21 10:35 AM, Eelco Chaudron wrote:
>>
>>
>> On 8 Jul 2021, at 22:18, Ilya Maximets wrote:
>>
>>> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
>>>> When OVs installs the flower rule, it only checks for the OK from the
>>>> kernel. It does not check if the rule requested matches the one
>>>> actually programmed. This change will add this check and warns the
>>>> user if this is not the case.
>>>>
>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>> ---
>>>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 59 insertions(+)
>>>>
>>>> diff --git a/lib/tc.c b/lib/tc.c
>>>> index a27cca2cc..e134f6a06 100644
>>>> --- a/lib/tc.c
>>>> +++ b/lib/tc.c
>>>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>>      return 0;
>>>>  }
>>>>
>>>> +static bool
>>>> +cmp_tc_flower_match_action(const struct tc_flower *a,
>>>> +                           const struct tc_flower *b)
>>>> +{
>>>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    /* We can not memcmp() the key as some keys might be set while the mask
>>>> +     * is not.*/
>>>> +
>>>> +    for (int i = 0; i < sizeof a->key; i++) {
>>>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
>>>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
>>>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
>>>> +
>>>> +        if (key_a != key_b) {
>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
>>>> +                        "%d", i);
>>>> +            return false;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    /* Compare the actions. */
>>>> +    const struct tc_action *action_a = a->actions;
>>>> +    const struct tc_action *action_b = b->actions;
>>>> +
>>>> +    if (a->action_count != b->action_count) {
>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
>>>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
>>>> +                        "for %d", i);
>>>> +            return false;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>>  int
>>>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>  {
>>>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>
>>>>          id->prio = tc_get_major(tc->tcm_info);
>>>>          id->handle = tc->tcm_handle;
>>>> +
>>>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
>>>> +            struct tc_flower flower_out;
>>>> +            struct tcf_id id_out;
>>>> +            int ret;
>>>> +
>>>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
>>>> +                                             false);
>>>> +
>>>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
>>>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
>>>> +                             "not match request!\n Set dpif_netlink to dbg to "
>>>> +                             "see which rule caused this error.");
>>>
>>> So we're only printing the warning and not reverting the change
>>> and not returning an error, right?  So, OVS will continue to
>>> work with the incorrect rule installed?
>>> I think, we should revert the incorrect change and return the
>>> error, so the flow could be installed to the OVS kernel datapath,
>>> but maybe this is a task for a separate change.
>>>
>>> What do you think?
>>
>> The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.
>>
>> With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.
>
> Make sense.  I removed '\n' from a warning (these doesn't look good in the log)
> and applied to master.

Thanks!

> You and Marcelo are talking about backporting, do you think it make sense to
> backport to stable branches?

If it applies cleanly, I would suggest backporting it all the way to 2.13. Marcelo?

//Eelco
Marcelo Leitner July 12, 2021, 12:54 p.m. UTC | #7
On Mon, Jul 12, 2021 at 10:28:15AM +0200, Eelco Chaudron wrote:
>
>
> On 9 Jul 2021, at 20:23, Ilya Maximets wrote:
>
> > On 7/9/21 10:35 AM, Eelco Chaudron wrote:
> >>
> >>
> >> On 8 Jul 2021, at 22:18, Ilya Maximets wrote:
> >>
> >>> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
> >>>> When OVs installs the flower rule, it only checks for the OK from the
> >>>> kernel. It does not check if the rule requested matches the one
> >>>> actually programmed. This change will add this check and warns the
> >>>> user if this is not the case.
> >>>>
> >>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >>>> ---
> >>>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 59 insertions(+)
> >>>>
> >>>> diff --git a/lib/tc.c b/lib/tc.c
> >>>> index a27cca2cc..e134f6a06 100644
> >>>> --- a/lib/tc.c
> >>>> +++ b/lib/tc.c
> >>>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
> >>>>      return 0;
> >>>>  }
> >>>>
> >>>> +static bool
> >>>> +cmp_tc_flower_match_action(const struct tc_flower *a,
> >>>> +                           const struct tc_flower *b)
> >>>> +{
> >>>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
> >>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
> >>>> +        return false;
> >>>> +    }
> >>>> +
> >>>> +    /* We can not memcmp() the key as some keys might be set while the mask
> >>>> +     * is not.*/
> >>>> +
> >>>> +    for (int i = 0; i < sizeof a->key; i++) {
> >>>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
> >>>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
> >>>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
> >>>> +
> >>>> +        if (key_a != key_b) {
> >>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
> >>>> +                        "%d", i);
> >>>> +            return false;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>> +    /* Compare the actions. */
> >>>> +    const struct tc_action *action_a = a->actions;
> >>>> +    const struct tc_action *action_b = b->actions;
> >>>> +
> >>>> +    if (a->action_count != b->action_count) {
> >>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
> >>>> +        return false;
> >>>> +    }
> >>>> +
> >>>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
> >>>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
> >>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
> >>>> +                        "for %d", i);
> >>>> +            return false;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>> +    return true;
> >>>> +}
> >>>> +
> >>>>  int
> >>>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
> >>>>  {
> >>>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
> >>>>
> >>>>          id->prio = tc_get_major(tc->tcm_info);
> >>>>          id->handle = tc->tcm_handle;
> >>>> +
> >>>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
> >>>> +            struct tc_flower flower_out;
> >>>> +            struct tcf_id id_out;
> >>>> +            int ret;
> >>>> +
> >>>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
> >>>> +                                             false);
> >>>> +
> >>>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
> >>>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
> >>>> +                             "not match request!\n Set dpif_netlink to dbg to "
> >>>> +                             "see which rule caused this error.");
> >>>
> >>> So we're only printing the warning and not reverting the change
> >>> and not returning an error, right?  So, OVS will continue to
> >>> work with the incorrect rule installed?
> >>> I think, we should revert the incorrect change and return the
> >>> error, so the flow could be installed to the OVS kernel datapath,
> >>> but maybe this is a task for a separate change.
> >>>
> >>> What do you think?
> >>
> >> The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.
> >>
> >> With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.
> >
> > Make sense.  I removed '\n' from a warning (these doesn't look good in the log)
> > and applied to master.
>
> Thanks!
>
> > You and Marcelo are talking about backporting, do you think it make sense to
> > backport to stable branches?
>
> If it applies cleanly, I would suggest backporting it all the way to 2.13. Marcelo?

I don't know how different is the support for 2.13 and 2.15. I mean,
if 2.13 is only for critical patches or so. Anyhow, I'd say 2.15 yes
and 2.13 on best effort. :)

>
> //Eelco
>
Eelco Chaudron July 14, 2021, 11:18 a.m. UTC | #8
On 12 Jul 2021, at 14:54, Marcelo Ricardo Leitner wrote:

> On Mon, Jul 12, 2021 at 10:28:15AM +0200, Eelco Chaudron wrote:
>>
>>
>> On 9 Jul 2021, at 20:23, Ilya Maximets wrote:
>>
>>> On 7/9/21 10:35 AM, Eelco Chaudron wrote:
>>>>
>>>>
>>>> On 8 Jul 2021, at 22:18, Ilya Maximets wrote:
>>>>
>>>>> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
>>>>>> When OVs installs the flower rule, it only checks for the OK from the
>>>>>> kernel. It does not check if the rule requested matches the one
>>>>>> actually programmed. This change will add this check and warns the
>>>>>> user if this is not the case.
>>>>>>
>>>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>>>> ---
>>>>>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 59 insertions(+)
>>>>>>
>>>>>> diff --git a/lib/tc.c b/lib/tc.c
>>>>>> index a27cca2cc..e134f6a06 100644
>>>>>> --- a/lib/tc.c
>>>>>> +++ b/lib/tc.c
>>>>>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>>>>      return 0;
>>>>>>  }
>>>>>>
>>>>>> +static bool
>>>>>> +cmp_tc_flower_match_action(const struct tc_flower *a,
>>>>>> +                           const struct tc_flower *b)
>>>>>> +{
>>>>>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
>>>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
>>>>>> +        return false;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* We can not memcmp() the key as some keys might be set while the mask
>>>>>> +     * is not.*/
>>>>>> +
>>>>>> +    for (int i = 0; i < sizeof a->key; i++) {
>>>>>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
>>>>>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
>>>>>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
>>>>>> +
>>>>>> +        if (key_a != key_b) {
>>>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
>>>>>> +                        "%d", i);
>>>>>> +            return false;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Compare the actions. */
>>>>>> +    const struct tc_action *action_a = a->actions;
>>>>>> +    const struct tc_action *action_b = b->actions;
>>>>>> +
>>>>>> +    if (a->action_count != b->action_count) {
>>>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
>>>>>> +        return false;
>>>>>> +    }
>>>>>> +
>>>>>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
>>>>>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
>>>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
>>>>>> +                        "for %d", i);
>>>>>> +            return false;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    return true;
>>>>>> +}
>>>>>> +
>>>>>>  int
>>>>>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>>>  {
>>>>>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>>>
>>>>>>          id->prio = tc_get_major(tc->tcm_info);
>>>>>>          id->handle = tc->tcm_handle;
>>>>>> +
>>>>>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
>>>>>> +            struct tc_flower flower_out;
>>>>>> +            struct tcf_id id_out;
>>>>>> +            int ret;
>>>>>> +
>>>>>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
>>>>>> +                                             false);
>>>>>> +
>>>>>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
>>>>>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
>>>>>> +                             "not match request!\n Set dpif_netlink to dbg to "
>>>>>> +                             "see which rule caused this error.");
>>>>>
>>>>> So we're only printing the warning and not reverting the change
>>>>> and not returning an error, right?  So, OVS will continue to
>>>>> work with the incorrect rule installed?
>>>>> I think, we should revert the incorrect change and return the
>>>>> error, so the flow could be installed to the OVS kernel datapath,
>>>>> but maybe this is a task for a separate change.
>>>>>
>>>>> What do you think?
>>>>
>>>> The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.
>>>>
>>>> With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.
>>>
>>> Make sense.  I removed '\n' from a warning (these doesn't look good in the log)
>>> and applied to master.
>>
>> Thanks!
>>
>>> You and Marcelo are talking about backporting, do you think it make sense to
>>> backport to stable branches?
>>
>> If it applies cleanly, I would suggest backporting it all the way to 2.13. Marcelo?
>
> I don't know how different is the support for 2.13 and 2.15. I mean,
> if 2.13 is only for critical patches or so. Anyhow, I'd say 2.15 yes
> and 2.13 on best effort. :)

Ilya is 2.13-5 possible? Do you need a specific patch?

//Eelco
Ilya Maximets Oct. 27, 2021, 9:51 p.m. UTC | #9
On 7/14/21 13:18, Eelco Chaudron wrote:
> 
> 
> On 12 Jul 2021, at 14:54, Marcelo Ricardo Leitner wrote:
> 
>> On Mon, Jul 12, 2021 at 10:28:15AM +0200, Eelco Chaudron wrote:
>>>
>>>
>>> On 9 Jul 2021, at 20:23, Ilya Maximets wrote:
>>>
>>>> On 7/9/21 10:35 AM, Eelco Chaudron wrote:
>>>>>
>>>>>
>>>>> On 8 Jul 2021, at 22:18, Ilya Maximets wrote:
>>>>>
>>>>>> On 5/17/21 3:20 PM, Eelco Chaudron wrote:
>>>>>>> When OVs installs the flower rule, it only checks for the OK from the
>>>>>>> kernel. It does not check if the rule requested matches the one
>>>>>>> actually programmed. This change will add this check and warns the
>>>>>>> user if this is not the case.
>>>>>>>
>>>>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>>>>> ---
>>>>>>>  lib/tc.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>  1 file changed, 59 insertions(+)
>>>>>>>
>>>>>>> diff --git a/lib/tc.c b/lib/tc.c
>>>>>>> index a27cca2cc..e134f6a06 100644
>>>>>>> --- a/lib/tc.c
>>>>>>> +++ b/lib/tc.c
>>>>>>> @@ -2979,6 +2979,50 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
>>>>>>>      return 0;
>>>>>>>  }
>>>>>>>
>>>>>>> +static bool
>>>>>>> +cmp_tc_flower_match_action(const struct tc_flower *a,
>>>>>>> +                           const struct tc_flower *b)
>>>>>>> +{
>>>>>>> +    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
>>>>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
>>>>>>> +        return false;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* We can not memcmp() the key as some keys might be set while the mask
>>>>>>> +     * is not.*/
>>>>>>> +
>>>>>>> +    for (int i = 0; i < sizeof a->key; i++) {
>>>>>>> +        uint8_t mask = ((uint8_t *)&a->mask)[i];
>>>>>>> +        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
>>>>>>> +        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
>>>>>>> +
>>>>>>> +        if (key_a != key_b) {
>>>>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
>>>>>>> +                        "%d", i);
>>>>>>> +            return false;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Compare the actions. */
>>>>>>> +    const struct tc_action *action_a = a->actions;
>>>>>>> +    const struct tc_action *action_b = b->actions;
>>>>>>> +
>>>>>>> +    if (a->action_count != b->action_count) {
>>>>>>> +        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
>>>>>>> +        return false;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
>>>>>>> +        if (memcmp(action_a, action_b, sizeof *action_a)) {
>>>>>>> +            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
>>>>>>> +                        "for %d", i);
>>>>>>> +            return false;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return true;
>>>>>>> +}
>>>>>>> +
>>>>>>>  int
>>>>>>>  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>>>>  {
>>>>>>> @@ -3010,6 +3054,21 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>>>>>>>
>>>>>>>          id->prio = tc_get_major(tc->tcm_info);
>>>>>>>          id->handle = tc->tcm_handle;
>>>>>>> +
>>>>>>> +        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
>>>>>>> +            struct tc_flower flower_out;
>>>>>>> +            struct tcf_id id_out;
>>>>>>> +            int ret;
>>>>>>> +
>>>>>>> +            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
>>>>>>> +                                             false);
>>>>>>> +
>>>>>>> +            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
>>>>>>> +                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
>>>>>>> +                             "not match request!\n Set dpif_netlink to dbg to "
>>>>>>> +                             "see which rule caused this error.");
>>>>>>
>>>>>> So we're only printing the warning and not reverting the change
>>>>>> and not returning an error, right?  So, OVS will continue to
>>>>>> work with the incorrect rule installed?
>>>>>> I think, we should revert the incorrect change and return the
>>>>>> error, so the flow could be installed to the OVS kernel datapath,
>>>>>> but maybe this is a task for a separate change.
>>>>>>
>>>>>> What do you think?
>>>>>
>>>>> The goal was to make sure we do not break anything, in case there is an existing kernel bug. As unfortunately, we are missing a good set of TC unit tests.
>>>>>
>>>>> With the "warning only" option, we can backport this. And if in the field we do not see any (false) reports, a follow-up patch can do as you suggested.
>>>>
>>>> Make sense.  I removed '\n' from a warning (these doesn't look good in the log)
>>>> and applied to master.
>>>
>>> Thanks!
>>>
>>>> You and Marcelo are talking about backporting, do you think it make sense to
>>>> backport to stable branches?
>>>
>>> If it applies cleanly, I would suggest backporting it all the way to 2.13. Marcelo?
>>
>> I don't know how different is the support for 2.13 and 2.15. I mean,
>> if 2.13 is only for critical patches or so. Anyhow, I'd say 2.15 yes
>> and 2.13 on best effort. :)
> 
> Ilya is 2.13-5 possible? Do you need a specific patch?

Sorry for delay.  This one fell through the cracks.
Backported now down to 2.13.

I also spotted a lot of cases where this code actually triggers
a valid warning while running check-kernel with hardcoded enabled
offloading.  And we need to fix them.  Both in OVS and in kernel.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/tc.c b/lib/tc.c
index a27cca2cc..e134f6a06 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2979,6 +2979,50 @@  nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
     return 0;
 }
 
+static bool
+cmp_tc_flower_match_action(const struct tc_flower *a,
+                           const struct tc_flower *b)
+{
+    if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
+        VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
+        return false;
+    }
+
+    /* We can not memcmp() the key as some keys might be set while the mask
+     * is not.*/
+
+    for (int i = 0; i < sizeof a->key; i++) {
+        uint8_t mask = ((uint8_t *)&a->mask)[i];
+        uint8_t key_a = ((uint8_t *)&a->key)[i] & mask;
+        uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
+
+        if (key_a != key_b) {
+            VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
+                        "%d", i);
+            return false;
+        }
+    }
+
+    /* Compare the actions. */
+    const struct tc_action *action_a = a->actions;
+    const struct tc_action *action_b = b->actions;
+
+    if (a->action_count != b->action_count) {
+        VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
+        return false;
+    }
+
+    for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
+        if (memcmp(action_a, action_b, sizeof *action_a)) {
+            VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
+                        "for %d", i);
+            return false;
+        }
+    }
+
+    return true;
+}
+
 int
 tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
 {
@@ -3010,6 +3054,21 @@  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
 
         id->prio = tc_get_major(tc->tcm_info);
         id->handle = tc->tcm_handle;
+
+        if (id->prio != TC_RESERVED_PRIORITY_POLICE) {
+            struct tc_flower flower_out;
+            struct tcf_id id_out;
+            int ret;
+
+            ret = parse_netlink_to_tc_flower(reply, &id_out, &flower_out,
+                                             false);
+
+            if (ret || !cmp_tc_flower_match_action(flower, &flower_out)) {
+                VLOG_WARN_RL(&error_rl, "Kernel flower acknowledgment does "
+                             "not match request!\n Set dpif_netlink to dbg to "
+                             "see which rule caused this error.");
+            }
+        }
         ofpbuf_delete(reply);
     }