diff mbox series

[ovs-dev,1/2] tun_metadata: Fix coredump caused by use-after-free bug

Message ID 1585252702-8649-1-git-send-email-pkusunyifeng@gmail.com
State Superseded
Headers show
Series [ovs-dev,1/2] tun_metadata: Fix coredump caused by use-after-free bug | expand

Commit Message

Yifeng Sun March 26, 2020, 7:58 p.m. UTC
Tun_metadata can be referened by flow and frozen_state at the same
time. When ovs-vswitchd handles TLV table mod message, the involved
tun_metadata gets freed. The call trace to free tun_metadata is
shown as below:

ofproto_run
- handle_openflow
  - handle_single_part_openflow
    - handle_tlv_table_mod
      - tun_metadata_table_mod
        - tun_metadata_postpone_free

Unfortunately, this tun_metadata can be still used by some frozen_state,
and later on when frozen_state tries to access its tun_metadata table,
ovs-vswitchd crashes. The call trace to access tun_metadata from
frozen_state is shown as below:

udpif_upcall_handler
- recv_upcalls
  - process_upcall
    - frozen_metadata_to_flow

This patch fixes it by introducing a reference count to tun_metadata.
Whenever a pointer of tun_metadata is passed between flow and
frozen_state, we increase its reference count. Reference count
is decreased at deallocation.

In present code, pointer of tun_metadata can be passed between flows.
It is safe because of RCU mechanism.

VMware-BZ: #2526222
Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
---
 lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
 lib/tun-metadata.h         |  2 ++
 ofproto/ofproto-dpif-rid.c |  8 ++++++++
 ofproto/ofproto-dpif-rid.h |  2 ++
 4 files changed, 40 insertions(+), 1 deletion(-)

Comments

Tonghao Zhang March 27, 2020, 9:44 a.m. UTC | #1
On Fri, Mar 27, 2020 at 3:58 AM Yifeng Sun <pkusunyifeng@gmail.com> wrote:
>
> Tun_metadata can be referened by flow and frozen_state at the same
> time. When ovs-vswitchd handles TLV table mod message, the involved
> tun_metadata gets freed. The call trace to free tun_metadata is
> shown as below:
>
> ofproto_run
> - handle_openflow
>   - handle_single_part_openflow
>     - handle_tlv_table_mod
>       - tun_metadata_table_mod
>         - tun_metadata_postpone_free
>
> Unfortunately, this tun_metadata can be still used by some frozen_state,
> and later on when frozen_state tries to access its tun_metadata table,
> ovs-vswitchd crashes. The call trace to access tun_metadata from
> frozen_state is shown as below:
>
> udpif_upcall_handler
> - recv_upcalls
>   - process_upcall
>     - frozen_metadata_to_flow
>
> This patch fixes it by introducing a reference count to tun_metadata.
> Whenever a pointer of tun_metadata is passed between flow and
> frozen_state, we increase its reference count. Reference count
> is decreased at deallocation.
>
> In present code, pointer of tun_metadata can be passed between flows.
> It is safe because of RCU mechanism.
>
> VMware-BZ: #2526222
> Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> ---
>  lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
>  lib/tun-metadata.h         |  2 ++
>  ofproto/ofproto-dpif-rid.c |  8 ++++++++
>  ofproto/ofproto-dpif-rid.h |  2 ++
>  4 files changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> index f8a0e19524e9..c4218a034a92 100644
> --- a/lib/tun-metadata.c
> +++ b/lib/tun-metadata.c
> @@ -25,6 +25,7 @@
>  #include "nx-match.h"
>  #include "odp-netlink.h"
>  #include "openvswitch/ofp-match.h"
> +#include "ovs-atomic.h"
>  #include "ovs-rcu.h"
>  #include "packets.h"
>  #include "tun-metadata.h"
> @@ -40,6 +41,11 @@ struct tun_meta_entry {
>  /* Maps from TLV option class+type to positions in a struct tun_metadata's
>   * 'opts' array.  */
>  struct tun_table {
> +     /* Struct tun_table can be referenced by struct frozen_state for a long
> +      * time. This ref_cnt protects tun_table from being freed if it is still
> +      * being used somewhere. */
> +    struct ovs_refcount ref_cnt;
> +
>      /* TUN_METADATA<i> is stored in element <i>. */
>      struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
>
> @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
>      return key & 0xff;
>  }
>
> +void
> +tun_metadata_ref(const struct tun_table *tab)
> +{
> +    if (tab) {
> +        ovs_refcount_ref(&CONST_CAST(struct tun_table *, tab)->ref_cnt);
> +    }
> +}
> +
> +unsigned int
> +tun_metadata_unref(const struct tun_table *tab)
> +{
> +    if (tab) {
> +        return ovs_refcount_unref_relaxed(
> +                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
In general, xxx_unref will free the struct data, for example,
stp_unref/netdev_unref/lldp_unref.

> +    }
> +    return -1;
> +}
> +
>  /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then the new
>   * tun_table is a deep copy of the old one. */
>  struct tun_table *
> @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
>          hmap_init(&new_map->key_hmap);
>      }
>
> +    ovs_refcount_init(&new_map->ref_cnt);
>      return new_map;
>  }
>
> @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
>  void
>  tun_metadata_postpone_free(struct tun_table *tab)
>  {
> -    ovsrcu_postpone(tun_metadata_free, tab);
> +    if (tun_metadata_unref(tab) == 1) {
> +        ovsrcu_postpone(tun_metadata_free, tab);
> +    }
>  }
>
>  enum ofperr
> diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> index 7dad9504b8da..933021a0f679 100644
> --- a/lib/tun-metadata.h
> +++ b/lib/tun-metadata.h
> @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
>  struct ofputil_tlv_table_reply;
>  struct tun_table;
>
> +void tun_metadata_ref(const struct tun_table *tab);
> +unsigned int tun_metadata_unref(const struct tun_table *tab);
>  struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
>  void tun_metadata_free(struct tun_table *);
>  void tun_metadata_postpone_free(struct tun_table *);
> diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> index 29aafc2c0b40..d479e53d9b2d 100644
> --- a/ofproto/ofproto-dpif-rid.c
> +++ b/ofproto/ofproto-dpif-rid.c
> @@ -201,6 +201,7 @@ static void
>  frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
>  {
>      *new = *old;
> +    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
>      new->stack = (new->stack_size
>                    ? xmemdup(new->stack, new->stack_size)
>                    : NULL);
> @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
>  static void
>  frozen_state_free(struct frozen_state *state)
>  {
> +    struct tun_table *tab;
> +
>      free(state->stack);
>      free(state->ofpacts);
>      free(state->action_set);
>      free(state->userdata);
> +
> +    tab = CONST_CAST(struct tun_table *, state->metadata.tunnel.metadata.tab);
> +    if (tun_metadata_unref(tab) == 1) {
> +        tun_metadata_free(tab);
> +    }
>  }
>
>  /* Allocate a unique recirculation id for the given set of flow metadata.
> diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> index e5d02caf28a3..1fefbf53b94a 100644
> --- a/ofproto/ofproto-dpif-rid.h
> +++ b/ofproto/ofproto-dpif-rid.h
> @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata *md,
>  {
>      memset(md, 0, sizeof *md);
>      md->tunnel = flow->tunnel;
> +    tun_metadata_ref(flow->tunnel.metadata.tab);
should we ref it fristly, and then assign it.

>      md->metadata = flow->metadata;
>      memcpy(md->regs, flow->regs, sizeof md->regs);
>      md->in_port = flow->in_port.ofp_port;
> @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct frozen_metadata *md,
>                          struct flow *flow)
>  {
>      flow->tunnel = md->tunnel;
> +    tun_metadata_ref(md->tunnel.metadata.tab);
>      flow->metadata = md->metadata;
>      memcpy(flow->regs, md->regs, sizeof flow->regs);
>      flow->in_port.ofp_port = md->in_port;
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Yifeng Sun March 30, 2020, 5:28 p.m. UTC | #2
Thanks Tonghao.

tun_metadata_ref/unref follows the practice of ovs_refcount_ref/unref.
In this patch, we need return value of tun_metadata_unref to decide
the way to free it.

Thanks,
Yifeng

On Fri, Mar 27, 2020 at 2:45 AM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Fri, Mar 27, 2020 at 3:58 AM Yifeng Sun <pkusunyifeng@gmail.com> wrote:
> >
> > Tun_metadata can be referened by flow and frozen_state at the same
> > time. When ovs-vswitchd handles TLV table mod message, the involved
> > tun_metadata gets freed. The call trace to free tun_metadata is
> > shown as below:
> >
> > ofproto_run
> > - handle_openflow
> >   - handle_single_part_openflow
> >     - handle_tlv_table_mod
> >       - tun_metadata_table_mod
> >         - tun_metadata_postpone_free
> >
> > Unfortunately, this tun_metadata can be still used by some frozen_state,
> > and later on when frozen_state tries to access its tun_metadata table,
> > ovs-vswitchd crashes. The call trace to access tun_metadata from
> > frozen_state is shown as below:
> >
> > udpif_upcall_handler
> > - recv_upcalls
> >   - process_upcall
> >     - frozen_metadata_to_flow
> >
> > This patch fixes it by introducing a reference count to tun_metadata.
> > Whenever a pointer of tun_metadata is passed between flow and
> > frozen_state, we increase its reference count. Reference count
> > is decreased at deallocation.
> >
> > In present code, pointer of tun_metadata can be passed between flows.
> > It is safe because of RCU mechanism.
> >
> > VMware-BZ: #2526222
> > Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> > ---
> >  lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
> >  lib/tun-metadata.h         |  2 ++
> >  ofproto/ofproto-dpif-rid.c |  8 ++++++++
> >  ofproto/ofproto-dpif-rid.h |  2 ++
> >  4 files changed, 40 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> > index f8a0e19524e9..c4218a034a92 100644
> > --- a/lib/tun-metadata.c
> > +++ b/lib/tun-metadata.c
> > @@ -25,6 +25,7 @@
> >  #include "nx-match.h"
> >  #include "odp-netlink.h"
> >  #include "openvswitch/ofp-match.h"
> > +#include "ovs-atomic.h"
> >  #include "ovs-rcu.h"
> >  #include "packets.h"
> >  #include "tun-metadata.h"
> > @@ -40,6 +41,11 @@ struct tun_meta_entry {
> >  /* Maps from TLV option class+type to positions in a struct tun_metadata's
> >   * 'opts' array.  */
> >  struct tun_table {
> > +     /* Struct tun_table can be referenced by struct frozen_state for a long
> > +      * time. This ref_cnt protects tun_table from being freed if it is still
> > +      * being used somewhere. */
> > +    struct ovs_refcount ref_cnt;
> > +
> >      /* TUN_METADATA<i> is stored in element <i>. */
> >      struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
> >
> > @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
> >      return key & 0xff;
> >  }
> >
> > +void
> > +tun_metadata_ref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        ovs_refcount_ref(&CONST_CAST(struct tun_table *, tab)->ref_cnt);
> > +    }
> > +}
> > +
> > +unsigned int
> > +tun_metadata_unref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        return ovs_refcount_unref_relaxed(
> > +                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
> In general, xxx_unref will free the struct data, for example,
> stp_unref/netdev_unref/lldp_unref.
>
> > +    }
> > +    return -1;
> > +}
> > +
> >  /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then the new
> >   * tun_table is a deep copy of the old one. */
> >  struct tun_table *
> > @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
> >          hmap_init(&new_map->key_hmap);
> >      }
> >
> > +    ovs_refcount_init(&new_map->ref_cnt);
> >      return new_map;
> >  }
> >
> > @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
> >  void
> >  tun_metadata_postpone_free(struct tun_table *tab)
> >  {
> > -    ovsrcu_postpone(tun_metadata_free, tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        ovsrcu_postpone(tun_metadata_free, tab);
> > +    }
> >  }
> >
> >  enum ofperr
> > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> > index 7dad9504b8da..933021a0f679 100644
> > --- a/lib/tun-metadata.h
> > +++ b/lib/tun-metadata.h
> > @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
> >  struct ofputil_tlv_table_reply;
> >  struct tun_table;
> >
> > +void tun_metadata_ref(const struct tun_table *tab);
> > +unsigned int tun_metadata_unref(const struct tun_table *tab);
> >  struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
> >  void tun_metadata_free(struct tun_table *);
> >  void tun_metadata_postpone_free(struct tun_table *);
> > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> > index 29aafc2c0b40..d479e53d9b2d 100644
> > --- a/ofproto/ofproto-dpif-rid.c
> > +++ b/ofproto/ofproto-dpif-rid.c
> > @@ -201,6 +201,7 @@ static void
> >  frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
> >  {
> >      *new = *old;
> > +    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
> >      new->stack = (new->stack_size
> >                    ? xmemdup(new->stack, new->stack_size)
> >                    : NULL);
> > @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
> >  static void
> >  frozen_state_free(struct frozen_state *state)
> >  {
> > +    struct tun_table *tab;
> > +
> >      free(state->stack);
> >      free(state->ofpacts);
> >      free(state->action_set);
> >      free(state->userdata);
> > +
> > +    tab = CONST_CAST(struct tun_table *, state->metadata.tunnel.metadata.tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        tun_metadata_free(tab);
> > +    }
> >  }
> >
> >  /* Allocate a unique recirculation id for the given set of flow metadata.
> > diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> > index e5d02caf28a3..1fefbf53b94a 100644
> > --- a/ofproto/ofproto-dpif-rid.h
> > +++ b/ofproto/ofproto-dpif-rid.h
> > @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata *md,
> >  {
> >      memset(md, 0, sizeof *md);
> >      md->tunnel = flow->tunnel;
> > +    tun_metadata_ref(flow->tunnel.metadata.tab);
> should we ref it fristly, and then assign it.
>
> >      md->metadata = flow->metadata;
> >      memcpy(md->regs, flow->regs, sizeof md->regs);
> >      md->in_port = flow->in_port.ofp_port;
> > @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct frozen_metadata *md,
> >                          struct flow *flow)
> >  {
> >      flow->tunnel = md->tunnel;
> > +    tun_metadata_ref(md->tunnel.metadata.tab);
> >      flow->metadata = md->metadata;
> >      memcpy(flow->regs, md->regs, sizeof flow->regs);
> >      flow->in_port.ofp_port = md->in_port;
> > --
> > 2.7.4
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
>
> --
> Best regards, Tonghao
William Tu April 6, 2020, 3:47 p.m. UTC | #3
On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote:
> Tun_metadata can be referened by flow and frozen_state at the same
> time. When ovs-vswitchd handles TLV table mod message, the involved
> tun_metadata gets freed. The call trace to free tun_metadata is
> shown as below:
> 
> ofproto_run
> - handle_openflow
>   - handle_single_part_openflow
>     - handle_tlv_table_mod
>       - tun_metadata_table_mod
>         - tun_metadata_postpone_free
> 
> Unfortunately, this tun_metadata can be still used by some frozen_state,
> and later on when frozen_state tries to access its tun_metadata table,
> ovs-vswitchd crashes. The call trace to access tun_metadata from
> frozen_state is shown as below:
> 
> udpif_upcall_handler
> - recv_upcalls
>   - process_upcall
>     - frozen_metadata_to_flow
> 
> This patch fixes it by introducing a reference count to tun_metadata.
> Whenever a pointer of tun_metadata is passed between flow and
> frozen_state, we increase its reference count. Reference count
> is decreased at deallocation.
> 
> In present code, pointer of tun_metadata can be passed between flows.
> It is safe because of RCU mechanism.
> 
> VMware-BZ: #2526222
> Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> ---
>  lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
>  lib/tun-metadata.h         |  2 ++
>  ofproto/ofproto-dpif-rid.c |  8 ++++++++
>  ofproto/ofproto-dpif-rid.h |  2 ++
>  4 files changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> index f8a0e19524e9..c4218a034a92 100644
> --- a/lib/tun-metadata.c
> +++ b/lib/tun-metadata.c
> @@ -25,6 +25,7 @@
>  #include "nx-match.h"
>  #include "odp-netlink.h"
>  #include "openvswitch/ofp-match.h"
> +#include "ovs-atomic.h"
>  #include "ovs-rcu.h"
>  #include "packets.h"
>  #include "tun-metadata.h"
> @@ -40,6 +41,11 @@ struct tun_meta_entry {
>  /* Maps from TLV option class+type to positions in a struct tun_metadata's
>   * 'opts' array.  */
>  struct tun_table {
> +     /* Struct tun_table can be referenced by struct frozen_state for a long
> +      * time. This ref_cnt protects tun_table from being freed if it is still
> +      * being used somewhere. */
> +    struct ovs_refcount ref_cnt;
> +
>      /* TUN_METADATA<i> is stored in element <i>. */
>      struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
>  
> @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
>      return key & 0xff;
>  }
>  
> +void
> +tun_metadata_ref(const struct tun_table *tab)
> +{
> +    if (tab) {
> +        ovs_refcount_ref(&CONST_CAST(struct tun_table *, tab)->ref_cnt);
> +    }
> +}
> +
> +unsigned int
> +tun_metadata_unref(const struct tun_table *tab)
> +{
> +    if (tab) {
> +        return ovs_refcount_unref_relaxed(
> +                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
> +    }
> +    return -1;
return -1 looks weird since it's unsigned int.

> +}
> +
>  /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then the new
>   * tun_table is a deep copy of the old one. */
>  struct tun_table *
> @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
>          hmap_init(&new_map->key_hmap);
>      }
>  
> +    ovs_refcount_init(&new_map->ref_cnt);
>      return new_map;
>  }
>  
> @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
>  void
>  tun_metadata_postpone_free(struct tun_table *tab)
>  {
> -    ovsrcu_postpone(tun_metadata_free, tab);
> +    if (tun_metadata_unref(tab) == 1) {
> +        ovsrcu_postpone(tun_metadata_free, tab);

If this is the last ref and "tun_metadata_unref(tab)== 1"
why not just call tun_metadata_free(tab)?

Regards,
William

> +    }
>  }
>  
>  enum ofperr
> diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> index 7dad9504b8da..933021a0f679 100644
> --- a/lib/tun-metadata.h
> +++ b/lib/tun-metadata.h
> @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
>  struct ofputil_tlv_table_reply;
>  struct tun_table;
>  
> +void tun_metadata_ref(const struct tun_table *tab);
> +unsigned int tun_metadata_unref(const struct tun_table *tab);
>  struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
>  void tun_metadata_free(struct tun_table *);
>  void tun_metadata_postpone_free(struct tun_table *);
> diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> index 29aafc2c0b40..d479e53d9b2d 100644
> --- a/ofproto/ofproto-dpif-rid.c
> +++ b/ofproto/ofproto-dpif-rid.c
> @@ -201,6 +201,7 @@ static void
>  frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
>  {
>      *new = *old;
> +    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
>      new->stack = (new->stack_size
>                    ? xmemdup(new->stack, new->stack_size)
>                    : NULL);
> @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
>  static void
>  frozen_state_free(struct frozen_state *state)
>  {
> +    struct tun_table *tab;
> +
>      free(state->stack);
>      free(state->ofpacts);
>      free(state->action_set);
>      free(state->userdata);
> +
> +    tab = CONST_CAST(struct tun_table *, state->metadata.tunnel.metadata.tab);
> +    if (tun_metadata_unref(tab) == 1) {
> +        tun_metadata_free(tab);
> +    }
>  }
>  
>  /* Allocate a unique recirculation id for the given set of flow metadata.
> diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> index e5d02caf28a3..1fefbf53b94a 100644
> --- a/ofproto/ofproto-dpif-rid.h
> +++ b/ofproto/ofproto-dpif-rid.h
> @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata *md,
>  {
>      memset(md, 0, sizeof *md);
>      md->tunnel = flow->tunnel;
> +    tun_metadata_ref(flow->tunnel.metadata.tab);
>      md->metadata = flow->metadata;
>      memcpy(md->regs, flow->regs, sizeof md->regs);
>      md->in_port = flow->in_port.ofp_port;
> @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct frozen_metadata *md,
>                          struct flow *flow)
>  {
>      flow->tunnel = md->tunnel;
> +    tun_metadata_ref(md->tunnel.metadata.tab);
>      flow->metadata = md->metadata;
>      memcpy(flow->regs, md->regs, sizeof flow->regs);
>      flow->in_port.ofp_port = md->in_port;
> -- 
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
William Tu April 7, 2020, 12:22 a.m. UTC | #4
On Mon, Apr 6, 2020 at 8:47 AM William Tu <u9012063@gmail.com> wrote:
>
> On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote:
> > Tun_metadata can be referened by flow and frozen_state at the same
> > time. When ovs-vswitchd handles TLV table mod message, the involved
> > tun_metadata gets freed. The call trace to free tun_metadata is
> > shown as below:
> >
> > ofproto_run
> > - handle_openflow
> >   - handle_single_part_openflow
> >     - handle_tlv_table_mod
> >       - tun_metadata_table_mod
> >         - tun_metadata_postpone_free
> >
> > Unfortunately, this tun_metadata can be still used by some frozen_state,
> > and later on when frozen_state tries to access its tun_metadata table,
> > ovs-vswitchd crashes. The call trace to access tun_metadata from
> > frozen_state is shown as below:
> >
> > udpif_upcall_handler
> > - recv_upcalls
> >   - process_upcall
> >     - frozen_metadata_to_flow
> >
> > This patch fixes it by introducing a reference count to tun_metadata.
> > Whenever a pointer of tun_metadata is passed between flow and
> > frozen_state, we increase its reference count. Reference count
> > is decreased at deallocation.
> >
> > In present code, pointer of tun_metadata can be passed between flows.
> > It is safe because of RCU mechanism.
> >
> > VMware-BZ: #2526222
> > Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> > ---

Hi Yifeng,

Instead of introducing the ref count, how about just using the latest tun_tab?
The patch below also fixes the use-after-free coredump.

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 8dfa05b71df4..9046f7d79952 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1535,6 +1535,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
             }

             frozen_metadata_to_flow(&state->metadata, &frozen_flow);
+            frozen_flow.tunnel.metadata.tab =
ofproto_get_tun_tab(&upcall->ofproto->up);
             flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);

             ofproto_dpif_send_async_msg(upcall->ofproto, am);
Yifeng Sun April 7, 2020, 9:18 p.m. UTC | #5
Thanks William for the review.

We need to use ovsrcu_postpone(tun_metadata_free, tab) because
tun_table is protected by RCU and we can only free tun_table when
all threads quiesce.

It turns out that this fix based on reference count is not beautiful, it
doesn't fit well with current RCU mechanism.

A previous commit seems fixing the samiliar issue:
254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by
tun_table)

It is unsafe for frozen_state to reference tun_table because tun_table is
protected by RCU while the lifecycle of frozen_state can span several RCU
quiesce states.

As we discussed offline, we can simply nullify tun_table in frozen_state.
If frozen_state needs tun_table, the latest valid tun_table can be found
by ofproto_get_tun_tab() efficiently.

I will submit a new version.
Yifeng




On Mon, Apr 6, 2020 at 8:47 AM William Tu <u9012063@gmail.com> wrote:
>
> On Thu, Mar 26, 2020 at 12:58:21PM -0700, Yifeng Sun wrote:
> > Tun_metadata can be referened by flow and frozen_state at the same
> > time. When ovs-vswitchd handles TLV table mod message, the involved
> > tun_metadata gets freed. The call trace to free tun_metadata is
> > shown as below:
> >
> > ofproto_run
> > - handle_openflow
> >   - handle_single_part_openflow
> >     - handle_tlv_table_mod
> >       - tun_metadata_table_mod
> >         - tun_metadata_postpone_free
> >
> > Unfortunately, this tun_metadata can be still used by some frozen_state,
> > and later on when frozen_state tries to access its tun_metadata table,
> > ovs-vswitchd crashes. The call trace to access tun_metadata from
> > frozen_state is shown as below:
> >
> > udpif_upcall_handler
> > - recv_upcalls
> >   - process_upcall
> >     - frozen_metadata_to_flow
> >
> > This patch fixes it by introducing a reference count to tun_metadata.
> > Whenever a pointer of tun_metadata is passed between flow and
> > frozen_state, we increase its reference count. Reference count
> > is decreased at deallocation.
> >
> > In present code, pointer of tun_metadata can be passed between flows.
> > It is safe because of RCU mechanism.
> >
> > VMware-BZ: #2526222
> > Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
> > ---
> >  lib/tun-metadata.c         | 29 ++++++++++++++++++++++++++++-
> >  lib/tun-metadata.h         |  2 ++
> >  ofproto/ofproto-dpif-rid.c |  8 ++++++++
> >  ofproto/ofproto-dpif-rid.h |  2 ++
> >  4 files changed, 40 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
> > index f8a0e19524e9..c4218a034a92 100644
> > --- a/lib/tun-metadata.c
> > +++ b/lib/tun-metadata.c
> > @@ -25,6 +25,7 @@
> >  #include "nx-match.h"
> >  #include "odp-netlink.h"
> >  #include "openvswitch/ofp-match.h"
> > +#include "ovs-atomic.h"
> >  #include "ovs-rcu.h"
> >  #include "packets.h"
> >  #include "tun-metadata.h"
> > @@ -40,6 +41,11 @@ struct tun_meta_entry {
> >  /* Maps from TLV option class+type to positions in a struct
tun_metadata's
> >   * 'opts' array.  */
> >  struct tun_table {
> > +     /* Struct tun_table can be referenced by struct frozen_state for
a long
> > +      * time. This ref_cnt protects tun_table from being freed if it
is still
> > +      * being used somewhere. */
> > +    struct ovs_refcount ref_cnt;
> > +
> >      /* TUN_METADATA<i> is stored in element <i>. */
> >      struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
> >
> > @@ -79,6 +85,24 @@ tun_key_type(uint32_t key)
> >      return key & 0xff;
> >  }
> >
> > +void
> > +tun_metadata_ref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        ovs_refcount_ref(&CONST_CAST(struct tun_table *,
tab)->ref_cnt);
> > +    }
> > +}
> > +
> > +unsigned int
> > +tun_metadata_unref(const struct tun_table *tab)
> > +{
> > +    if (tab) {
> > +        return ovs_refcount_unref_relaxed(
> > +                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
> > +    }
> > +    return -1;
> return -1 looks weird since it's unsigned int.
>
> > +}
> > +
> >  /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then
the new
> >   * tun_table is a deep copy of the old one. */
> >  struct tun_table *
> > @@ -111,6 +135,7 @@ tun_metadata_alloc(const struct tun_table *old_map)
> >          hmap_init(&new_map->key_hmap);
> >      }
> >
> > +    ovs_refcount_init(&new_map->ref_cnt);
> >      return new_map;
> >  }
> >
> > @@ -135,7 +160,9 @@ tun_metadata_free(struct tun_table *map)
> >  void
> >  tun_metadata_postpone_free(struct tun_table *tab)
> >  {
> > -    ovsrcu_postpone(tun_metadata_free, tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        ovsrcu_postpone(tun_metadata_free, tab);
>
> If this is the last ref and "tun_metadata_unref(tab)== 1"
> why not just call tun_metadata_free(tab)?
>
> Regards,
> William
>
> > +    }
> >  }
> >
> >  enum ofperr
> > diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
> > index 7dad9504b8da..933021a0f679 100644
> > --- a/lib/tun-metadata.h
> > +++ b/lib/tun-metadata.h
> > @@ -33,6 +33,8 @@ struct ofputil_tlv_table_mod;
> >  struct ofputil_tlv_table_reply;
> >  struct tun_table;
> >
> > +void tun_metadata_ref(const struct tun_table *tab);
> > +unsigned int tun_metadata_unref(const struct tun_table *tab);
> >  struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
> >  void tun_metadata_free(struct tun_table *);
> >  void tun_metadata_postpone_free(struct tun_table *);
> > diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> > index 29aafc2c0b40..d479e53d9b2d 100644
> > --- a/ofproto/ofproto-dpif-rid.c
> > +++ b/ofproto/ofproto-dpif-rid.c
> > @@ -201,6 +201,7 @@ static void
> >  frozen_state_clone(struct frozen_state *new, const struct frozen_state
*old)
> >  {
> >      *new = *old;
> > +    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
> >      new->stack = (new->stack_size
> >                    ? xmemdup(new->stack, new->stack_size)
> >                    : NULL);
> > @@ -218,10 +219,17 @@ frozen_state_clone(struct frozen_state *new,
const struct frozen_state *old)
> >  static void
> >  frozen_state_free(struct frozen_state *state)
> >  {
> > +    struct tun_table *tab;
> > +
> >      free(state->stack);
> >      free(state->ofpacts);
> >      free(state->action_set);
> >      free(state->userdata);
> > +
> > +    tab = CONST_CAST(struct tun_table *,
state->metadata.tunnel.metadata.tab);
> > +    if (tun_metadata_unref(tab) == 1) {
> > +        tun_metadata_free(tab);
> > +    }
> >  }
> >
> >  /* Allocate a unique recirculation id for the given set of flow
metadata.
> > diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
> > index e5d02caf28a3..1fefbf53b94a 100644
> > --- a/ofproto/ofproto-dpif-rid.h
> > +++ b/ofproto/ofproto-dpif-rid.h
> > @@ -115,6 +115,7 @@ frozen_metadata_from_flow(struct frozen_metadata
*md,
> >  {
> >      memset(md, 0, sizeof *md);
> >      md->tunnel = flow->tunnel;
> > +    tun_metadata_ref(flow->tunnel.metadata.tab);
> >      md->metadata = flow->metadata;
> >      memcpy(md->regs, flow->regs, sizeof md->regs);
> >      md->in_port = flow->in_port.ofp_port;
> > @@ -125,6 +126,7 @@ frozen_metadata_to_flow(const struct
frozen_metadata *md,
> >                          struct flow *flow)
> >  {
> >      flow->tunnel = md->tunnel;
> > +    tun_metadata_ref(md->tunnel.metadata.tab);
> >      flow->metadata = md->metadata;
> >      memcpy(flow->regs, md->regs, sizeof flow->regs);
> >      flow->in_port.ofp_port = md->in_port;
> > --
> > 2.7.4
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index f8a0e19524e9..c4218a034a92 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -25,6 +25,7 @@ 
 #include "nx-match.h"
 #include "odp-netlink.h"
 #include "openvswitch/ofp-match.h"
+#include "ovs-atomic.h"
 #include "ovs-rcu.h"
 #include "packets.h"
 #include "tun-metadata.h"
@@ -40,6 +41,11 @@  struct tun_meta_entry {
 /* Maps from TLV option class+type to positions in a struct tun_metadata's
  * 'opts' array.  */
 struct tun_table {
+     /* Struct tun_table can be referenced by struct frozen_state for a long
+      * time. This ref_cnt protects tun_table from being freed if it is still
+      * being used somewhere. */
+    struct ovs_refcount ref_cnt;
+
     /* TUN_METADATA<i> is stored in element <i>. */
     struct tun_meta_entry entries[TUN_METADATA_NUM_OPTS];
 
@@ -79,6 +85,24 @@  tun_key_type(uint32_t key)
     return key & 0xff;
 }
 
+void
+tun_metadata_ref(const struct tun_table *tab)
+{
+    if (tab) {
+        ovs_refcount_ref(&CONST_CAST(struct tun_table *, tab)->ref_cnt);
+    }
+}
+
+unsigned int
+tun_metadata_unref(const struct tun_table *tab)
+{
+    if (tab) {
+        return ovs_refcount_unref_relaxed(
+                &CONST_CAST(struct tun_table *, tab)->ref_cnt);
+    }
+    return -1;
+}
+
 /* Returns a newly allocated tun_table.  If 'old_map' is nonnull then the new
  * tun_table is a deep copy of the old one. */
 struct tun_table *
@@ -111,6 +135,7 @@  tun_metadata_alloc(const struct tun_table *old_map)
         hmap_init(&new_map->key_hmap);
     }
 
+    ovs_refcount_init(&new_map->ref_cnt);
     return new_map;
 }
 
@@ -135,7 +160,9 @@  tun_metadata_free(struct tun_table *map)
 void
 tun_metadata_postpone_free(struct tun_table *tab)
 {
-    ovsrcu_postpone(tun_metadata_free, tab);
+    if (tun_metadata_unref(tab) == 1) {
+        ovsrcu_postpone(tun_metadata_free, tab);
+    }
 }
 
 enum ofperr
diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
index 7dad9504b8da..933021a0f679 100644
--- a/lib/tun-metadata.h
+++ b/lib/tun-metadata.h
@@ -33,6 +33,8 @@  struct ofputil_tlv_table_mod;
 struct ofputil_tlv_table_reply;
 struct tun_table;
 
+void tun_metadata_ref(const struct tun_table *tab);
+unsigned int tun_metadata_unref(const struct tun_table *tab);
 struct tun_table *tun_metadata_alloc(const struct tun_table *old_map);
 void tun_metadata_free(struct tun_table *);
 void tun_metadata_postpone_free(struct tun_table *);
diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index 29aafc2c0b40..d479e53d9b2d 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -201,6 +201,7 @@  static void
 frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
 {
     *new = *old;
+    tun_metadata_ref(old->metadata.tunnel.metadata.tab);
     new->stack = (new->stack_size
                   ? xmemdup(new->stack, new->stack_size)
                   : NULL);
@@ -218,10 +219,17 @@  frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
 static void
 frozen_state_free(struct frozen_state *state)
 {
+    struct tun_table *tab;
+
     free(state->stack);
     free(state->ofpacts);
     free(state->action_set);
     free(state->userdata);
+
+    tab = CONST_CAST(struct tun_table *, state->metadata.tunnel.metadata.tab);
+    if (tun_metadata_unref(tab) == 1) {
+        tun_metadata_free(tab);
+    }
 }
 
 /* Allocate a unique recirculation id for the given set of flow metadata.
diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index e5d02caf28a3..1fefbf53b94a 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -115,6 +115,7 @@  frozen_metadata_from_flow(struct frozen_metadata *md,
 {
     memset(md, 0, sizeof *md);
     md->tunnel = flow->tunnel;
+    tun_metadata_ref(flow->tunnel.metadata.tab);
     md->metadata = flow->metadata;
     memcpy(md->regs, flow->regs, sizeof md->regs);
     md->in_port = flow->in_port.ofp_port;
@@ -125,6 +126,7 @@  frozen_metadata_to_flow(const struct frozen_metadata *md,
                         struct flow *flow)
 {
     flow->tunnel = md->tunnel;
+    tun_metadata_ref(md->tunnel.metadata.tab);
     flow->metadata = md->metadata;
     memcpy(flow->regs, md->regs, sizeof flow->regs);
     flow->in_port.ofp_port = md->in_port;