diff mbox series

[ovs-dev,v5] tc: fix crash on malformed reply from kernel.

Message ID 20230531134915.1607013-1-frode.nordahl@canonical.com
State Changes Requested
Headers show
Series [ovs-dev,v5] tc: fix crash on malformed reply from kernel. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Frode Nordahl May 31, 2023, 1:49 p.m. UTC
The tc module combines the use of the `tc_transact` helper
function for communication with the in-kernel tc infrastructure
with assertions on the reply data by `ofpbuf_at_assert` on the
received data prior to further processing.

With the presence of bugs on the kernel side, we need to treat
the kernel as an unreliable service provider and replace assertions
on the reply from it with checks to avoid a fatal crash of OVS.

For the record, the symptom of the crash is this in the log:
EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()

And an excerpt of the backtrace looks like this:
0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384

Reported-At: https://launchpad.net/bugs/2018500
Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
---
 lib/netdev-linux.c      |  7 ++---
 lib/netdev-offload-tc.c |  8 ++++-
 lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
 3 files changed, 60 insertions(+), 20 deletions(-)

Comments

Ilya Maximets June 5, 2023, 9:03 p.m. UTC | #1
On 5/31/23 15:49, Frode Nordahl wrote:
> The tc module combines the use of the `tc_transact` helper
> function for communication with the in-kernel tc infrastructure
> with assertions on the reply data by `ofpbuf_at_assert` on the
> received data prior to further processing.
> 
> With the presence of bugs on the kernel side, we need to treat
> the kernel as an unreliable service provider and replace assertions
> on the reply from it with checks to avoid a fatal crash of OVS.
> 
> For the record, the symptom of the crash is this in the log:
> EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()
> 
> And an excerpt of the backtrace looks like this:
> 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
> 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
> 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
> match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
> parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
> try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384
> 
> Reported-At: https://launchpad.net/bugs/2018500
> Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> ---
>  lib/netdev-linux.c      |  7 ++---
>  lib/netdev-offload-tc.c |  8 ++++-
>  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
>  3 files changed, 60 insertions(+), 20 deletions(-)

Thanks for the update!  This version looks mostly fine to me.
See some comments inline.

Best regards, Ilya Maximets.

> 
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..0a1a08527 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>       * On Linux 2.6.35+ we use the straightforward method because it allows us
>       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).  However,
>       * in such a case we get no response at all from the kernel (!) if a
> -     * builtin qdisc is in use (which is later caught by "!error &&
> -     * !qdisc->size"). */
> +     * builtin qdisc is in use (which is later caught by "error == EPROTO" */
>      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO,
>                                           &request);
>      if (!tcmsg) {
> @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>  
>      /* Figure out what tc class to instantiate. */
>      error = tc_transact(&request, &qdisc);
> -    if (!error && qdisc->size) {
> +    if (!error) {
>          const char *kind;
>  
>          error = tc_parse_qdisc(qdisc, &kind, NULL);
> @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>                  ops = &tc_ops_other;
>              }
>          }
> -    } else if ((!error && !qdisc->size) || error == ENOENT) {
> +    } else if (error == ENOENT || error == EPROTO) {
>          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a qdisc
>           * set up by some other entity that doesn't have a handle 1:0.  We will
>           * assume that it's the system default qdisc. */

There are 3 more cases of ofpbuf_at_assert() in this file:
  - tc_update_policer_action_stats
  - tc_parse_class
  - tc_add_matchall_policer

First two already have a length check before asserting, so changes are not
strictly necessary.  The last one though asserts right after tc_transact, so
needs fixing.  It doesn't seem to do anything useful with the reply, so maybe
we need to just delete that access or not provide the 'reply' pointer in the
first place.

> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 4f26dd8cc..f89aa3270 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev *netdev,
>      }
>      nl_msg_end_nested(buf, act_off);
>  
> -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
> +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
> +    if (!act) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly found no "
> +                    "actions.");
> +        return -EPROTO;
> +    }

We can keep an assertion here.  It's not a buffer received from the kernel,
it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous line
already called the same ofpbuf_at_assert(), so it should not be possible
to reach this check.

> +    *actions = act;
>  
>      parse_tc_flower_to_stats(flower, stats);
>      parse_tc_flower_to_attrs(flower, attrs);
> diff --git a/lib/tc.c b/lib/tc.c
> index 5c32c6f97..6a763d171 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type,
>      }
>  }
>  
> +/* The `tc_transact` function is a wrapper around `nl_transact` with the
> + * addition of:
> + *
> + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
> + *    regardless of success or failure.
> + *
> + * 2. When a 'replyp' pointer is provided; in the event of the kernel
> + *    providing an empty response, a positive error return will be provided
> + *    with the value of EPROTO.
> + *
> + * Please acquaint yourself with the documentation of the `nl_transact`
> + * function in the netlink-socket module before making use of this function.
> + */
>  int
>  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
>  {
>      int error = nl_transact(NETLINK_ROUTE, request, replyp);
>      ofpbuf_uninit(request);
> +
> +    if (!error && replyp && *replyp && !(*replyp)->size) {
> +        /* We replicate the behavior of `nl_transact` for error conditions and
> +         * free any allocations before setting the 'replyp' buffer to NULL. */
> +        ofpbuf_delete(*replyp);
> +        *replyp = NULL;
> +
> +        return EPROTO;

You didn't add a log message here and its probably fine.  But maybe we can
add a coverage counter to at least know that the error condition happened?
Somehting like 'tc_malformed_netlink_reply' ?  We could use the same counter
also in all the cases below.

What do you think?


> +    }
> +
>      return error;
>  }
>  
> @@ -2190,18 +2213,18 @@ int
>  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>                             struct tc_flower *flower, bool terse)
>  {
> -    struct tcmsg *tc;
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>      const char *kind;
>  
> -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> +    if (!nlmsg || !tc) {
>          return EPROTO;
>      }
>  
>      memset(flower, 0, sizeof *flower);
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> -
>      flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
>      flower->mask.eth_type = OVS_BE16_MAX;
>      id->prio = tc_get_major(tc->tcm_info);
> @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>          return EAGAIN;
>      }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>          return EPROTO;
>      }
> @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>  int
>  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
>  {
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> -    struct tcmsg *tc;
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +    if (!nlmsg || !tc) {
> +        return EPROTO;
> +    }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
>          return EINVAL;
>      }
> @@ -2307,21 +2332,26 @@ int
>  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
>  {
>      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>      const int max_size = ARRAY_SIZE(actions_orders_policy);
> +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>      const struct nlattr *actions;
>      struct tc_flower flower;
> -    struct tcamsg *tca;
>      int i, cnt = 0;
>      int err;
>  
> +    if (!nlmsg || !tca) {
> +        return EPROTO;
> +    }
> +
>      for (i = 0; i < max_size; i++) {
>          actions_orders_policy[i].type = NL_A_NESTED;
>          actions_orders_policy[i].optional = true;
>      }
>  
> -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
>      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
>                                       actions_orders, max_size)) {
>          VLOG_ERR_RL(&error_rl,
> @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>  
>      error = tc_transact(&request, &reply);
>      if (!error) {
> -        struct tcmsg *tc =
> -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> +
> +        if (!nlmsg || !tc) {
> +            return EPROTO;
> +        }
>  
>          id->prio = tc_get_major(tc->tcm_info);
>          id->handle = tc->tcm_handle;
Frode Nordahl June 5, 2023, 9:45 p.m. UTC | #2
man. 5. jun. 2023, 23:02 skrev Ilya Maximets <i.maximets@ovn.org>:

> On 5/31/23 15:49, Frode Nordahl wrote:
> > The tc module combines the use of the `tc_transact` helper
> > function for communication with the in-kernel tc infrastructure
> > with assertions on the reply data by `ofpbuf_at_assert` on the
> > received data prior to further processing.
> >
> > With the presence of bugs on the kernel side, we need to treat
> > the kernel as an unreliable service provider and replace assertions
> > on the reply from it with checks to avoid a fatal crash of OVS.
> >
> > For the record, the symptom of the crash is this in the log:
> > EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <=
> b->size failed in ofpbuf_at_assert()
> >
> > And an excerpt of the backtrace looks like this:
> > 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20,
> b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> > tc_replace_flower (id=<optimized out>, flower=<optimized out>) at
> ../lib/tc.c:3223
> > 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840,
> match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> > ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at
> ../lib/netdev-offload-tc.c:2096
> > 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>,
> info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>,
> actions=<optimized out>,
> > match=0x7fb65b7ba980, netdev=0x561dacf91840) at
> ../lib/netdev-offload.c:257
> > parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at
> ../lib/dpif-netlink.c:2297
> > try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at
> ../lib/dpif-netlink.c:2384
> >
> > Reported-At: https://launchpad.net/bugs/2018500
> > Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com>
> > ---
> >  lib/netdev-linux.c      |  7 ++---
> >  lib/netdev-offload-tc.c |  8 ++++-
> >  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
> >  3 files changed, 60 insertions(+), 20 deletions(-)
>
> Thanks for the update!  This version looks mostly fine to me.
> See some comments inline.
>

Thanks for taking the time to review, I have one question below and will
provide an update asap.

Best regards, Ilya Maximets.
>
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index 36620199e..0a1a08527 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >       * On Linux 2.6.35+ we use the straightforward method because it
> allows us
> >       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).
> However,
> >       * in such a case we get no response at all from the kernel (!) if a
> > -     * builtin qdisc is in use (which is later caught by "!error &&
> > -     * !qdisc->size"). */
> > +     * builtin qdisc is in use (which is later caught by "error ==
> EPROTO" */
> >      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC,
> NLM_F_ECHO,
> >                                           &request);
> >      if (!tcmsg) {
> > @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >
> >      /* Figure out what tc class to instantiate. */
> >      error = tc_transact(&request, &qdisc);
> > -    if (!error && qdisc->size) {
> > +    if (!error) {
> >          const char *kind;
> >
> >          error = tc_parse_qdisc(qdisc, &kind, NULL);
> > @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >                  ops = &tc_ops_other;
> >              }
> >          }
> > -    } else if ((!error && !qdisc->size) || error == ENOENT) {
> > +    } else if (error == ENOENT || error == EPROTO) {
> >          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's
> a qdisc
> >           * set up by some other entity that doesn't have a handle 1:0.
> We will
> >           * assume that it's the system default qdisc. */
>
> There are 3 more cases of ofpbuf_at_assert() in this file:
>   - tc_update_policer_action_stats
>   - tc_parse_class
>   - tc_add_matchall_policer
>
> First two already have a length check before asserting, so changes are not
> strictly necessary.  The last one though asserts right after tc_transact,
> so
> needs fixing.  It doesn't seem to do anything useful with the reply, so
> maybe
> we need to just delete that access or not provide the 'reply' pointer in
> the
> first place.
>

I must have missed those as we changed approach from v4 to v5, sorry about
that. Will check and update!

> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 4f26dd8cc..f89aa3270 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev
> *netdev,
> >      }
> >      nl_msg_end_nested(buf, act_off);
> >
> > -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
> > +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
> > +    if (!act) {
> > +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly
> found no "
> > +                    "actions.");
> > +        return -EPROTO;
> > +    }
>
> We can keep an assertion here.  It's not a buffer received from the kernel,
> it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous line
> already called the same ofpbuf_at_assert(), so it should not be possible
> to reach this check.
>

Ack

> +    *actions = act;
> >
> >      parse_tc_flower_to_stats(flower, stats);
> >      parse_tc_flower_to_attrs(flower, attrs);
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 5c32c6f97..6a763d171 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id *id,
> uint16_t eth_type,
> >      }
> >  }
> >
> > +/* The `tc_transact` function is a wrapper around `nl_transact` with the
> > + * addition of:
> > + *
> > + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
> > + *    regardless of success or failure.
> > + *
> > + * 2. When a 'replyp' pointer is provided; in the event of the kernel
> > + *    providing an empty response, a positive error return will be
> provided
> > + *    with the value of EPROTO.
> > + *
> > + * Please acquaint yourself with the documentation of the `nl_transact`
> > + * function in the netlink-socket module before making use of this
> function.
> > + */
> >  int
> >  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
> >  {
> >      int error = nl_transact(NETLINK_ROUTE, request, replyp);
> >      ofpbuf_uninit(request);
> > +
> > +    if (!error && replyp && *replyp && !(*replyp)->size) {
> > +        /* We replicate the behavior of `nl_transact` for error
> conditions and
> > +         * free any allocations before setting the 'replyp' buffer to
> NULL. */
> > +        ofpbuf_delete(*replyp);
> > +        *replyp = NULL;
> > +
> > +        return EPROTO;
>
> You didn't add a log message here and its probably fine.  But maybe we can
> add a coverage counter to at least know that the error condition happened?
> Somehting like 'tc_malformed_netlink_reply' ?  We could use the same
> counter
> also in all the cases below.
>

Not adding log in this specific location was deliberate as some consumers
(tc_query_qdisc()) squelch errors from here and as such logging something
here would cause the ERR/WARN check in tests to trigger.

For the other places I concluded that the error would be logged somewhere
else.

What do you think?
>

Adding a coverage counter is fine and the proposed name is good. I did
almost carry that over from the previous approach, but I found that there
are lots of places where we return EPROTO, so the patch became "noisy" and
hard to backport.

Asking to avoid potential respin: Would it make sense to only add the
coverage counter those places the patch converts to using ofpbuf_try_pull()
+ return EPROTO, and not everywhere else EPROTO is returned on the back of
other checks in TC related modules?

--
Frode Nordahl


> > +    }
> > +
> >      return error;
> >  }
> >
> > @@ -2190,18 +2213,18 @@ int
> >  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
> >                             struct tc_flower *flower, bool terse)
> >  {
> > -    struct tcmsg *tc;
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
> >      const char *kind;
> >
> > -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> > +    if (!nlmsg || !tc) {
> >          return EPROTO;
> >      }
> >
> >      memset(flower, 0, sizeof *flower);
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > -
> >      flower->key.eth_type = (OVS_FORCE ovs_be16)
> tc_get_minor(tc->tcm_info);
> >      flower->mask.eth_type = OVS_BE16_MAX;
> >      id->prio = tc_get_major(tc->tcm_info);
> > @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply,
> struct tcf_id *id,
> >          return EAGAIN;
> >      }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
> >          return EPROTO;
> >      }
> > @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply,
> struct tcf_id *id,
> >  int
> >  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
> >  {
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> > -    struct tcmsg *tc;
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +    if (!nlmsg || !tc) {
> > +        return EPROTO;
> > +    }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
> >          return EINVAL;
> >      }
> > @@ -2307,21 +2332,26 @@ int
> >  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
> >  {
> >      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] =
> {};
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> >      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> >      const int max_size = ARRAY_SIZE(actions_orders_policy);
> > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
> >      const struct nlattr *actions;
> >      struct tc_flower flower;
> > -    struct tcamsg *tca;
> >      int i, cnt = 0;
> >      int err;
> >
> > +    if (!nlmsg || !tca) {
> > +        return EPROTO;
> > +    }
> > +
> >      for (i = 0; i < max_size; i++) {
> >          actions_orders_policy[i].type = NL_A_NESTED;
> >          actions_orders_policy[i].optional = true;
> >      }
> >
> > -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> > -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca,
> TCA_ACT_TAB);
> > +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
> >      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
> >                                       actions_orders, max_size)) {
> >          VLOG_ERR_RL(&error_rl,
> > @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct
> tc_flower *flower)
> >
> >      error = tc_transact(&request, &reply);
> >      if (!error) {
> > -        struct tcmsg *tc =
> > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> > +
> > +        if (!nlmsg || !tc) {
> > +            return EPROTO;
> > +        }
> >
> >          id->prio = tc_get_major(tc->tcm_info);
> >          id->handle = tc->tcm_handle;
>
>
Ilya Maximets June 5, 2023, 10:37 p.m. UTC | #3
On 6/5/23 23:45, Frode Nordahl wrote:
> 
> 
> man. 5. jun. 2023, 23:02 skrev Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>>:
> 
>     On 5/31/23 15:49, Frode Nordahl wrote:
>     > The tc module combines the use of the `tc_transact` helper
>     > function for communication with the in-kernel tc infrastructure
>     > with assertions on the reply data by `ofpbuf_at_assert` on the
>     > received data prior to further processing.
>     >
>     > With the presence of bugs on the kernel side, we need to treat
>     > the kernel as an unreliable service provider and replace assertions
>     > on the reply from it with checks to avoid a fatal crash of OVS.
>     >
>     > For the record, the symptom of the crash is this in the log:
>     > EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert()
>     >
>     > And an excerpt of the backtrace looks like this:
>     > 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
>     > tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223
>     > 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
>     > ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096
>     > 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>,
>     > match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
>     > parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297
>     > try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384
>     >
>     > Reported-At: https://launchpad.net/bugs/2018500 <https://launchpad.net/bugs/2018500>
>     > Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
>     > Signed-off-by: Frode Nordahl <frode.nordahl@canonical.com <mailto:frode.nordahl@canonical.com>>
>     > ---
>     >  lib/netdev-linux.c      |  7 ++---
>     >  lib/netdev-offload-tc.c |  8 ++++-
>     >  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
>     >  3 files changed, 60 insertions(+), 20 deletions(-)
> 
>     Thanks for the update!  This version looks mostly fine to me.
>     See some comments inline.
> 
> 
> Thanks for taking the time to review, I have one question below and will provide an update asap.
> 
>     Best regards, Ilya Maximets.
> 
>     >
>     > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>     > index 36620199e..0a1a08527 100644
>     > --- a/lib/netdev-linux.c
>     > +++ b/lib/netdev-linux.c
>     > @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     >       * On Linux 2.6.35+ we use the straightforward method because it allows us
>     >       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).  However,
>     >       * in such a case we get no response at all from the kernel (!) if a
>     > -     * builtin qdisc is in use (which is later caught by "!error &&
>     > -     * !qdisc->size"). */
>     > +     * builtin qdisc is in use (which is later caught by "error == EPROTO" */
>     >      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO,
>     >                                           &request);
>     >      if (!tcmsg) {
>     > @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     > 
>     >      /* Figure out what tc class to instantiate. */
>     >      error = tc_transact(&request, &qdisc);
>     > -    if (!error && qdisc->size) {
>     > +    if (!error) {
>     >          const char *kind;
>     > 
>     >          error = tc_parse_qdisc(qdisc, &kind, NULL);
>     > @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     >                  ops = &tc_ops_other;
>     >              }
>     >          }
>     > -    } else if ((!error && !qdisc->size) || error == ENOENT) {
>     > +    } else if (error == ENOENT || error == EPROTO) {
>     >          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a qdisc
>     >           * set up by some other entity that doesn't have a handle 1:0.  We will
>     >           * assume that it's the system default qdisc. */
> 
>     There are 3 more cases of ofpbuf_at_assert() in this file:
>       - tc_update_policer_action_stats
>       - tc_parse_class
>       - tc_add_matchall_policer
> 
>     First two already have a length check before asserting, so changes are not
>     strictly necessary.  The last one though asserts right after tc_transact, so
>     needs fixing.  It doesn't seem to do anything useful with the reply, so maybe
>     we need to just delete that access or not provide the 'reply' pointer in the
>     first place.
> 
> 
> I must have missed those as we changed approach from v4 to v5, sorry about that. Will check and update!
> 
>     > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>     > index 4f26dd8cc..f89aa3270 100644
>     > --- a/lib/netdev-offload-tc.c
>     > +++ b/lib/netdev-offload-tc.c
>     > @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev *netdev,
>     >      }
>     >      nl_msg_end_nested(buf, act_off);
>     > 
>     > -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
>     > +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
>     > +    if (!act) {
>     > +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly found no "
>     > +                    "actions.");
>     > +        return -EPROTO;
>     > +    }
> 
>     We can keep an assertion here.  It's not a buffer received from the kernel,
>     it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous line
>     already called the same ofpbuf_at_assert(), so it should not be possible
>     to reach this check.
> 
> 
> Ack
> 
>     > +    *actions = act;
>     > 
>     >      parse_tc_flower_to_stats(flower, stats);
>     >      parse_tc_flower_to_attrs(flower, attrs);
>     > diff --git a/lib/tc.c b/lib/tc.c
>     > index 5c32c6f97..6a763d171 100644
>     > --- a/lib/tc.c
>     > +++ b/lib/tc.c
>     > @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type,
>     >      }
>     >  }
>     > 
>     > +/* The `tc_transact` function is a wrapper around `nl_transact` with the
>     > + * addition of:
>     > + *
>     > + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
>     > + *    regardless of success or failure.
>     > + *
>     > + * 2. When a 'replyp' pointer is provided; in the event of the kernel
>     > + *    providing an empty response, a positive error return will be provided
>     > + *    with the value of EPROTO.
>     > + *
>     > + * Please acquaint yourself with the documentation of the `nl_transact`
>     > + * function in the netlink-socket module before making use of this function.
>     > + */
>     >  int
>     >  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
>     >  {
>     >      int error = nl_transact(NETLINK_ROUTE, request, replyp);
>     >      ofpbuf_uninit(request);
>     > +
>     > +    if (!error && replyp && *replyp && !(*replyp)->size) {
>     > +        /* We replicate the behavior of `nl_transact` for error conditions and
>     > +         * free any allocations before setting the 'replyp' buffer to NULL. */
>     > +        ofpbuf_delete(*replyp);
>     > +        *replyp = NULL;
>     > +
>     > +        return EPROTO;
> 
>     You didn't add a log message here and its probably fine.  But maybe we can
>     add a coverage counter to at least know that the error condition happened?
>     Somehting like 'tc_malformed_netlink_reply' ?  We could use the same counter
>     also in all the cases below.
> 
> 
> Not adding log in this specific location was deliberate as some consumers (tc_query_qdisc()) squelch errors from here and as such logging something here would cause the ERR/WARN check in tests to trigger.

Hrm.  RTM_GETQDISC indeed doesn't fail and doesn't return anything
if only built-in qdosc is present.  IMHO, that is a horrible design.

I see two ways out of this:

1. Remove error checking from tc_transact() and only check in callers,
   because EPROTO is really not the correct thing to return for
   RTM_GETQDISC, if that is how it "supposed to work".  This way
   tc_query_qdisc() can keep its current checks.

2. Keep the error checking in tc_transact() in this patch and convert
   this oddball tc_query_qdisc() to use nl_transact() directly, since
   it is the only weird interface we have (hopefully).

Both cases are OK from my point of view.

> 
> For the other places I concluded that the error would be logged somewhere else.
> 
>     What do you think?
> 
> 
> Adding a coverage counter is fine and the proposed name is good. I did almost carry that over from the previous approach, but I found that there are lots of places where we return EPROTO, so the patch became "noisy" and hard to backport.
> 
> Asking to avoid potential respin: Would it make sense to only add the coverage counter those places the patch converts to using ofpbuf_try_pull() + return EPROTO, and not everywhere else EPROTO is returned on the back of other checks in TC related modules?

I meant the counter to count only "hard failures", i.e. missing base
netlink header or base tc[a]msg header.  Most of deeper parsing code
already has some form of error messages in place.  So having a counter
only in places you're changing below in lib/tc.c is probably OK for now.

Thinking more about it we may want to have separate counter for the
no-reply case in tc_transact() and another counter for truncated or
broken base headers during the later parsing.  Something like:
 - tc_netlink_malformed_reply
 - tc_netlink_no_reply/missing_reply

Of course, this is only if we go with case #2 above.  Otherwise, the
no-reply counter is not needed as we shouldn't count the RTM_GETQDISC
as error.


> 
> --
> Frode Nordahl 
> 
> 
>     > +    }
>     > +
>     >      return error;
>     >  }
>     > 
>     > @@ -2190,18 +2213,18 @@ int
>     >  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>     >                             struct tc_flower *flower, bool terse)
>     >  {
>     > -    struct tcmsg *tc;
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>     >      const char *kind;
>     > 
>     > -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
>     > +    if (!nlmsg || !tc) {
>     >          return EPROTO;
>     >      }
>     > 
>     >      memset(flower, 0, sizeof *flower);
>     > 
>     > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > -
>     >      flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
>     >      flower->mask.eth_type = OVS_BE16_MAX;
>     >      id->prio = tc_get_major(tc->tcm_info);
>     > @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>     >          return EAGAIN;
>     >      }
>     > 
>     > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
>     > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
>     > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>     >          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>     >          return EPROTO;
>     >      }
>     > @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>     >  int
>     >  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
>     >  {
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     >      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
>     > -    struct tcmsg *tc;
>     > 
>     > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > +    if (!nlmsg || !tc) {
>     > +        return EPROTO;
>     > +    }
>     > 
>     > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
>     > -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>     > +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>     >          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
>     >          return EINVAL;
>     >      }
>     > @@ -2307,21 +2332,26 @@ int
>     >  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
>     >  {
>     >      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>     >      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     >      const int max_size = ARRAY_SIZE(actions_orders_policy);
>     > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>     >      const struct nlattr *actions;
>     >      struct tc_flower flower;
>     > -    struct tcamsg *tca;
>     >      int i, cnt = 0;
>     >      int err;
>     > 
>     > +    if (!nlmsg || !tca) {
>     > +        return EPROTO;
>     > +    }
>     > +
>     >      for (i = 0; i < max_size; i++) {
>     >          actions_orders_policy[i].type = NL_A_NESTED;
>     >          actions_orders_policy[i].optional = true;
>     >      }
>     > 
>     > -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
>     > -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
>     > +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
>     >      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
>     >                                       actions_orders, max_size)) {
>     >          VLOG_ERR_RL(&error_rl,
>     > @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
>     > 
>     >      error = tc_transact(&request, &reply);
>     >      if (!error) {
>     > -        struct tcmsg *tc =
>     > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>     > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     > +
>     > +        if (!nlmsg || !tc) {
>     > +            return EPROTO;
>     > +        }
>     > 
>     >          id->prio = tc_get_major(tc->tcm_info);
>     >          id->handle = tc->tcm_handle;
>
diff mbox series

Patch

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 36620199e..0a1a08527 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -6235,8 +6235,7 @@  tc_query_qdisc(const struct netdev *netdev_)
      * On Linux 2.6.35+ we use the straightforward method because it allows us
      * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).  However,
      * in such a case we get no response at all from the kernel (!) if a
-     * builtin qdisc is in use (which is later caught by "!error &&
-     * !qdisc->size"). */
+     * builtin qdisc is in use (which is later caught by "error == EPROTO" */
     tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO,
                                          &request);
     if (!tcmsg) {
@@ -6247,7 +6246,7 @@  tc_query_qdisc(const struct netdev *netdev_)
 
     /* Figure out what tc class to instantiate. */
     error = tc_transact(&request, &qdisc);
-    if (!error && qdisc->size) {
+    if (!error) {
         const char *kind;
 
         error = tc_parse_qdisc(qdisc, &kind, NULL);
@@ -6262,7 +6261,7 @@  tc_query_qdisc(const struct netdev *netdev_)
                 ops = &tc_ops_other;
             }
         }
-    } else if ((!error && !qdisc->size) || error == ENOENT) {
+    } else if (error == ENOENT || error == EPROTO) {
         /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a qdisc
          * set up by some other entity that doesn't have a handle 1:0.  We will
          * assume that it's the system default qdisc. */
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 4f26dd8cc..f89aa3270 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1247,7 +1247,13 @@  parse_tc_flower_to_match(const struct netdev *netdev,
     }
     nl_msg_end_nested(buf, act_off);
 
-    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
+    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
+    if (!act) {
+        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly found no "
+                    "actions.");
+        return -EPROTO;
+    }
+    *actions = act;
 
     parse_tc_flower_to_stats(flower, stats);
     parse_tc_flower_to_attrs(flower, attrs);
diff --git a/lib/tc.c b/lib/tc.c
index 5c32c6f97..6a763d171 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -237,11 +237,34 @@  static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type,
     }
 }
 
+/* The `tc_transact` function is a wrapper around `nl_transact` with the
+ * addition of:
+ *
+ * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
+ *    regardless of success or failure.
+ *
+ * 2. When a 'replyp' pointer is provided; in the event of the kernel
+ *    providing an empty response, a positive error return will be provided
+ *    with the value of EPROTO.
+ *
+ * Please acquaint yourself with the documentation of the `nl_transact`
+ * function in the netlink-socket module before making use of this function.
+ */
 int
 tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
 {
     int error = nl_transact(NETLINK_ROUTE, request, replyp);
     ofpbuf_uninit(request);
+
+    if (!error && replyp && *replyp && !(*replyp)->size) {
+        /* We replicate the behavior of `nl_transact` for error conditions and
+         * free any allocations before setting the 'replyp' buffer to NULL. */
+        ofpbuf_delete(*replyp);
+        *replyp = NULL;
+
+        return EPROTO;
+    }
+
     return error;
 }
 
@@ -2190,18 +2213,18 @@  int
 parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
                            struct tc_flower *flower, bool terse)
 {
-    struct tcmsg *tc;
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
     struct nlattr *ta[ARRAY_SIZE(tca_policy)];
     const char *kind;
 
-    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
+    if (!nlmsg || !tc) {
         return EPROTO;
     }
 
     memset(flower, 0, sizeof *flower);
 
-    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
-
     flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
     flower->mask.eth_type = OVS_BE16_MAX;
     id->prio = tc_get_major(tc->tcm_info);
@@ -2215,8 +2238,7 @@  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
         return EAGAIN;
     }
 
-    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
-                         tca_policy, ta, ARRAY_SIZE(ta))) {
+    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
         VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
         return EPROTO;
     }
@@ -2237,13 +2259,16 @@  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
 int
 parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
 {
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
     struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
-    struct tcmsg *tc;
 
-    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+    if (!nlmsg || !tc) {
+        return EPROTO;
+    }
 
-    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
-                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
+    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
         VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
         return EINVAL;
     }
@@ -2307,21 +2332,26 @@  int
 parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
 {
     static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
+    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
     struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
+    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
     const int max_size = ARRAY_SIZE(actions_orders_policy);
+    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
     const struct nlattr *actions;
     struct tc_flower flower;
-    struct tcamsg *tca;
     int i, cnt = 0;
     int err;
 
+    if (!nlmsg || !tca) {
+        return EPROTO;
+    }
+
     for (i = 0; i < max_size; i++) {
         actions_orders_policy[i].type = NL_A_NESTED;
         actions_orders_policy[i].optional = true;
     }
 
-    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
-    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
+    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
     if (!actions || !nl_parse_nested(actions, actions_orders_policy,
                                      actions_orders, max_size)) {
         VLOG_ERR_RL(&error_rl,
@@ -3823,8 +3853,13 @@  tc_replace_flower(struct tcf_id *id, struct tc_flower *flower)
 
     error = tc_transact(&request, &reply);
     if (!error) {
-        struct tcmsg *tc =
-            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
+        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
+
+        if (!nlmsg || !tc) {
+            return EPROTO;
+        }
 
         id->prio = tc_get_major(tc->tcm_info);
         id->handle = tc->tcm_handle;