diff mbox series

[ovs-dev,ovs-dev,v2] ofproto:fix use-after-free of ofproto

Message ID 20210308033411.32040-1-guohongzhi1@huawei.com
State New
Headers show
Series [ovs-dev,ovs-dev,v2] ofproto:fix use-after-free of ofproto | expand

Commit Message

Hongzhi Guo March 8, 2021, 3:34 a.m. UTC
ASAN report use-after-free of ofproto when destroy ofproto_rule.
The rule uses both RCU and refcount, while the ofproto uses only RCU,
and the rule retains the pointer of the proto.
More importantly, ofproto cannot guarantee a longer grace period than the rule.
So when the rule is deleted, it is possible that ofproto has been released,
resulting in use-after-free of ofproto.
This patch add ref_count for ofproto to avoid use-after-free.

===================
ASAN report as following:
==10399==ERROR: AddressSanitizer: heap-use-after-free on address 0xffff61e1e420 at pc 0xaaaadcc29d1c bp 0xffff6c5fde40 sp 0xffff6c5fde60
 READ of size 8 at 0xffff61e1e420 thread T12 (urcu2)
    #0 0xaaaadcc29d1b (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
    #1 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
    #2 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
    #3 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
    #4 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
    #5 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
0xffff61e1e420 is located 32 bytes inside of 34496-byte region [0xffff61e1e400,0xffff61e26ac0)
 freed by thread T12 (urcu2) here:
    #0 0xffff8214fe33 in free (/lib64/libasan.so.4+0xd2e33)
    #1 0xaaaadcc576df (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:734)
    #2 0xaaaadcc21acb (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:1687)
    #3 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
    #4 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
    #5 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
    #6 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
    #7 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
previously allocated by thread T0 here:
    #0 0xffff821503c3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33c3)
    #1 0xaaaadd034717 in xcalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:99 (discriminator 3))
    #2 0xaaaadd034767 in xzalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:110)
    #3 0xaaaadcc576ab (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:726)
    #4 0xaaaadcc1be93 in ofproto_create (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:505)
    #5 0xaaaadcbd793f (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:1208)
    #6 0xaaaadcbeefb7 in bridge_run (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:3944 (discriminator 4))
    #7 0xaaaadcbfdb83 in main (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/ovs-vswitchd.c:240)
    #8 0xffff80845adf in __libc_start_main (/lib64/libc.so.6+0x20adf)
    #9 0xaaaadcbd19b3 (/usr/sbin/ovs-vswitchd-2.12.asan+0x26f9b3)
    SUMMARY: AddressSanitizer: heap-use-after-free (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)

===================
TEST:
1.ASAN test for three days

CC: Jarno Rajahalme <jrajahalme@nicira.com>
Fixes: 39c9459355b6 ("Use classifier versioning.")

Signed-off-by: Hongzhi Guo <guohongzhi1@huawei.com>
Signed-off-by: hepeng <xnhp0320@gmail.com>
---
 ofproto/ofproto-dpif-xlate-cache.c |  1 +
 ofproto/ofproto-dpif.c             | 20 ++++++++++-------
 ofproto/ofproto-provider.h         |  5 +++++
 ofproto/ofproto.c                  | 35 ++++++++++++++++++++++++++----
 4 files changed, 49 insertions(+), 12 deletions(-)

Comments

Peng He March 9, 2021, 12:50 p.m. UTC | #1
Is this a version that mixes using refcount and RCU?
do we have reached an agreement?

BTW, please use my company email address: hepeng.0320@bytedance.com

thanks.

Hongzhi Guo <guohongzhi1@huawei.com> 于2021年3月8日周一 上午11:34写道:
>
> ASAN report use-after-free of ofproto when destroy ofproto_rule.
> The rule uses both RCU and refcount, while the ofproto uses only RCU,
> and the rule retains the pointer of the proto.
> More importantly, ofproto cannot guarantee a longer grace period than the rule.
> So when the rule is deleted, it is possible that ofproto has been released,
> resulting in use-after-free of ofproto.
> This patch add ref_count for ofproto to avoid use-after-free.
>
> ===================
> ASAN report as following:
> ==10399==ERROR: AddressSanitizer: heap-use-after-free on address 0xffff61e1e420 at pc 0xaaaadcc29d1c bp 0xffff6c5fde40 sp 0xffff6c5fde60
>  READ of size 8 at 0xffff61e1e420 thread T12 (urcu2)
>     #0 0xaaaadcc29d1b (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
>     #1 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
>     #2 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
>     #3 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
>     #4 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
>     #5 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
> 0xffff61e1e420 is located 32 bytes inside of 34496-byte region [0xffff61e1e400,0xffff61e26ac0)
>  freed by thread T12 (urcu2) here:
>     #0 0xffff8214fe33 in free (/lib64/libasan.so.4+0xd2e33)
>     #1 0xaaaadcc576df (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:734)
>     #2 0xaaaadcc21acb (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:1687)
>     #3 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
>     #4 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
>     #5 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
>     #6 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
>     #7 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
> previously allocated by thread T0 here:
>     #0 0xffff821503c3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33c3)
>     #1 0xaaaadd034717 in xcalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:99 (discriminator 3))
>     #2 0xaaaadd034767 in xzalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:110)
>     #3 0xaaaadcc576ab (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:726)
>     #4 0xaaaadcc1be93 in ofproto_create (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:505)
>     #5 0xaaaadcbd793f (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:1208)
>     #6 0xaaaadcbeefb7 in bridge_run (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:3944 (discriminator 4))
>     #7 0xaaaadcbfdb83 in main (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/ovs-vswitchd.c:240)
>     #8 0xffff80845adf in __libc_start_main (/lib64/libc.so.6+0x20adf)
>     #9 0xaaaadcbd19b3 (/usr/sbin/ovs-vswitchd-2.12.asan+0x26f9b3)
>     SUMMARY: AddressSanitizer: heap-use-after-free (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
>
> ===================
> TEST:
> 1.ASAN test for three days
>
> CC: Jarno Rajahalme <jrajahalme@nicira.com>
> Fixes: 39c9459355b6 ("Use classifier versioning.")
>
> Signed-off-by: Hongzhi Guo <guohongzhi1@huawei.com>
> Signed-off-by: hepeng <xnhp0320@gmail.com>
> ---
>  ofproto/ofproto-dpif-xlate-cache.c |  1 +
>  ofproto/ofproto-dpif.c             | 20 ++++++++++-------
>  ofproto/ofproto-provider.h         |  5 +++++
>  ofproto/ofproto.c                  | 35 ++++++++++++++++++++++++++----
>  4 files changed, 49 insertions(+), 12 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
> index dcc91cb38..0deee365d 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
>  {
>      switch (entry->type) {
>      case XC_TABLE:
> +        ofproto_unref(&(entry->table.ofproto->up));
>          break;
>      case XC_RULE:
>          ofproto_rule_unref(&entry->rule->up);
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index fd0b2fdea..8b22eda5a 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4416,10 +4416,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>              if (xcache) {
>                  struct xc_entry *entry;
>
> -                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -                entry->table.ofproto = ofproto;
> -                entry->table.id = *table_id;
> -                entry->table.match = true;
> +                if (ofproto_try_ref(&ofproto->up)) {
> +                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                    entry->table.ofproto = ofproto;
> +                    entry->table.id = *table_id;
> +                    entry->table.match = true;
> +                }
>              }
>              return rule;
>          }
> @@ -4452,10 +4454,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>          if (xcache) {
>              struct xc_entry *entry;
>
> -            entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -            entry->table.ofproto = ofproto;
> -            entry->table.id = next_id;
> -            entry->table.match = (rule != NULL);
> +            if (ofproto_try_ref(&ofproto->up)) {
> +                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                entry->table.ofproto = ofproto;
> +                entry->table.id = next_id;
> +                entry->table.match = (rule != NULL);
> +            }
>          }
>          if (rule) {
>              goto out;   /* Match. */
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 9ad2b71d2..8f2f41e0e 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -139,6 +139,8 @@ struct ofproto {
>      /* Variable length mf_field mapping. Stores all configured variable length
>       * meta-flow fields (struct mf_field) in a switch. */
>      struct vl_mff_map vl_mff_map;
> +
> +    struct ovs_refcount ref_count;
>  };
>
>  void ofproto_init_tables(struct ofproto *, int n_tables);
> @@ -442,6 +444,9 @@ struct rule {
>  void ofproto_rule_ref(struct rule *);
>  bool ofproto_rule_try_ref(struct rule *);
>  void ofproto_rule_unref(struct rule *);
> +void ofproto_ref(struct ofproto *);
> +bool ofproto_try_ref(struct ofproto *);
> +void ofproto_unref(struct ofproto *);
>
>  static inline const struct rule_actions * rule_get_actions(const struct rule *);
>  static inline bool rule_is_table_miss(const struct rule *);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index b91517cd2..7037a9684 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -545,6 +545,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
>
>      ovs_mutex_init(&ofproto->vl_mff_map.mutex);
>      cmap_init(&ofproto->vl_mff_map.cmap);
> +    ovs_refcount_init(&ofproto->ref_count);
>
>      error = ofproto->ofproto_class->construct(ofproto);
>      if (error) {
> @@ -1738,8 +1739,7 @@ ofproto_destroy(struct ofproto *p, bool del)
>      p->connmgr = NULL;
>      ovs_mutex_unlock(&ofproto_mutex);
>
> -    /* Destroying rules is deferred, must have 'ofproto' around for them. */
> -    ovsrcu_postpone(ofproto_destroy_defer__, p);
> +    ofproto_unref(p);
>  }
>
>  /* Destroys the datapath with the respective 'name' and 'type'.  With the Linux
> @@ -2928,6 +2928,7 @@ ofproto_rule_destroy__(struct rule *rule)
>      cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
>      rule_actions_destroy(rule_get_actions(rule));
>      ovs_mutex_destroy(&rule->mutex);
> +    ofproto_unref(rule->ofproto);
>      rule->ofproto->ofproto_class->rule_dealloc(rule);
>  }
>
> @@ -2979,6 +2980,28 @@ ofproto_rule_unref(struct rule *rule)
>      }
>  }
>
> +void ofproto_ref(struct ofproto *ofproto)
> +{
> +    if (ofproto) {
> +        ovs_refcount_ref(&ofproto->ref_count);
> +    }
> +}
> +
> +bool ofproto_try_ref(struct ofproto *ofproto)
> +{
> +    if (ofproto) {
> +        return ovs_refcount_try_ref_rcu(&ofproto->ref_count);
> +    }
> +    return false;
> +}
> +
> +void ofproto_unref(struct ofproto *ofproto)
> +{
> +    if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) == 1) {
> +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
> +    }
> +}
> +
>  static void
>  remove_rule_rcu__(struct rule *rule)
>      OVS_REQUIRES(ofproto_mutex)
> @@ -3068,6 +3091,7 @@ group_destroy_cb(struct ofgroup *group)
>                                                  &group->props));
>      ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
>                                             &group->buckets));
> +    ofproto_unref(group->ofproto);
>      group->ofproto->ofproto_class->group_dealloc(group);
>  }
>
> @@ -5266,6 +5290,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
>
>      /* Initialize base state. */
>      *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
>      cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
>      ovs_refcount_init(&rule->ref_count);
>
> @@ -6214,7 +6239,7 @@ handle_flow_mod__(struct ofproto *ofproto, const struct ofputil_flow_mod *fm,
>      error = ofproto_flow_mod_start(ofproto, &ofm);
>      if (!error) {
>          ofproto_bump_tables_version(ofproto);
> -        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
> +        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
>          ofmonitor_flush(ofproto->connmgr);
>      }
>      ovs_mutex_unlock(&ofproto_mutex);
> @@ -7331,6 +7356,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
>      }
>
>      *CONST_CAST(struct ofproto **, &(*ofgroup)->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
>      *CONST_CAST(uint32_t *, &((*ofgroup)->group_id)) = gm->group_id;
>      *CONST_CAST(enum ofp11_group_type *, &(*ofgroup)->type) = gm->type;
>      *CONST_CAST(long long int *, &((*ofgroup)->created)) = now;
> @@ -7363,6 +7389,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
>          ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
>                                                 &(*ofgroup)->buckets));
>          ofproto->ofproto_class->group_dealloc(*ofgroup);
> +        ofproto_unref(ofproto);
>      }
>      return error;
>  }
> @@ -8199,7 +8226,7 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
>              /* Send error referring to the original message. */
>              ofconn_send_error(ofconn, be->msg, error);
>              error = OFPERR_OFPBFC_MSG_FAILED;
> -
> +
>              /* 2. Revert.  Undo all the changes made above. */
>              LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
>                  if (be->type == OFPTYPE_FLOW_MOD) {
> --
> 2.21.0.windows.1
>
Gaëtan Rivet April 12, 2021, 12:07 a.m. UTC | #2
On Tue, Mar 9, 2021, at 13:50, 贺鹏 wrote:
> Is this a version that mixes using refcount and RCU?
> do we have reached an agreement?
> 
> BTW, please use my company email address: hepeng.0320@bytedance.com
> 
> thanks.
> 
> Hongzhi Guo <guohongzhi1@huawei.com> 于2021年3月8日周一 上午11:34写道:
> >
> > ASAN report use-after-free of ofproto when destroy ofproto_rule.
> > The rule uses both RCU and refcount, while the ofproto uses only RCU,
> > and the rule retains the pointer of the proto.
> > More importantly, ofproto cannot guarantee a longer grace period than the rule.
> > So when the rule is deleted, it is possible that ofproto has been released,
> > resulting in use-after-free of ofproto.
> > This patch add ref_count for ofproto to avoid use-after-free.
> >

Is this a v3 of the previous patch on the subject [1]?
I don't understand the status of this series, the previous (still) v2 has a 'requested changes' state on patchwork,
but there is no changelog with the commit that could help following.

[1]: https://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0320@bytedance.com/

> > ===================
> > ASAN report as following:
> > ==10399==ERROR: AddressSanitizer: heap-use-after-free on address 0xffff61e1e420 at pc 0xaaaadcc29d1c bp 0xffff6c5fde40 sp 0xffff6c5fde60
> >  READ of size 8 at 0xffff61e1e420 thread T12 (urcu2)
> >     #0 0xaaaadcc29d1b (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
> >     #1 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
> >     #2 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
> >     #3 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
> >     #4 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
> >     #5 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
> > 0xffff61e1e420 is located 32 bytes inside of 34496-byte region [0xffff61e1e400,0xffff61e26ac0)
> >  freed by thread T12 (urcu2) here:
> >     #0 0xffff8214fe33 in free (/lib64/libasan.so.4+0xd2e33)
> >     #1 0xaaaadcc576df (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:734)
> >     #2 0xaaaadcc21acb (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:1687)
> >     #3 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
> >     #4 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
> >     #5 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
> >     #6 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
> >     #7 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
> > previously allocated by thread T0 here:
> >     #0 0xffff821503c3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33c3)
> >     #1 0xaaaadd034717 in xcalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:99 (discriminator 3))
> >     #2 0xaaaadd034767 in xzalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:110)
> >     #3 0xaaaadcc576ab (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:726)
> >     #4 0xaaaadcc1be93 in ofproto_create (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:505)
> >     #5 0xaaaadcbd793f (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:1208)
> >     #6 0xaaaadcbeefb7 in bridge_run (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:3944 (discriminator 4))
> >     #7 0xaaaadcbfdb83 in main (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/ovs-vswitchd.c:240)
> >     #8 0xffff80845adf in __libc_start_main (/lib64/libc.so.6+0x20adf)
> >     #9 0xaaaadcbd19b3 (/usr/sbin/ovs-vswitchd-2.12.asan+0x26f9b3)
> >     SUMMARY: AddressSanitizer: heap-use-after-free (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
> >
> > ===================
> > TEST:
> > 1.ASAN test for three days

Is there a way to reproduce the issue faster? Which ASAN test is it?

If you delete and recreate a datapath in a loop for example (by adding some bridges then
deleting them all for example), it could stress test ofproto deletion, maybe this would help accelerate
reproduction?

> >
> > CC: Jarno Rajahalme <jrajahalme@nicira.com>
> > Fixes: 39c9459355b6 ("Use classifier versioning.")
> >
> > Signed-off-by: Hongzhi Guo <guohongzhi1@huawei.com>
> > Signed-off-by: hepeng <xnhp0320@gmail.com>
> > ---
> >  ofproto/ofproto-dpif-xlate-cache.c |  1 +
> >  ofproto/ofproto-dpif.c             | 20 ++++++++++-------
> >  ofproto/ofproto-provider.h         |  5 +++++
> >  ofproto/ofproto.c                  | 35 ++++++++++++++++++++++++++----
> >  4 files changed, 49 insertions(+), 12 deletions(-)
> >
> > diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
> > index dcc91cb38..0deee365d 100644
> > --- a/ofproto/ofproto-dpif-xlate-cache.c
> > +++ b/ofproto/ofproto-dpif-xlate-cache.c
> > @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
> >  {
> >      switch (entry->type) {
> >      case XC_TABLE:
> > +        ofproto_unref(&(entry->table.ofproto->up));
> >          break;
> >      case XC_RULE:
> >          ofproto_rule_unref(&entry->rule->up);
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index fd0b2fdea..8b22eda5a 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -4416,10 +4416,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
> >              if (xcache) {
> >                  struct xc_entry *entry;
> >
> > -                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > -                entry->table.ofproto = ofproto;
> > -                entry->table.id = *table_id;
> > -                entry->table.match = true;
> > +                if (ofproto_try_ref(&ofproto->up)) {
> > +                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > +                    entry->table.ofproto = ofproto;

I recall Ilya asking about it, but I haven't seen a change on the subject.
In ofproto/ofproto-dpif-xlate.c, line 3028 a reference to ofproto is taken
by an XC_NORMAL entry. It probably needs a reference taken as well.

> > +                    entry->table.id = *table_id;
> > +                    entry->table.match = true;
> > +                }
> >              }
> >              return rule;
> >          }
> > @@ -4452,10 +4454,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
> >          if (xcache) {
> >              struct xc_entry *entry;
> >
> > -            entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > -            entry->table.ofproto = ofproto;
> > -            entry->table.id = next_id;
> > -            entry->table.match = (rule != NULL);
> > +            if (ofproto_try_ref(&ofproto->up)) {
> > +                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > +                entry->table.ofproto = ofproto;
> > +                entry->table.id = next_id;
> > +                entry->table.match = (rule != NULL);
> > +            }
> >          }
> >          if (rule) {
> >              goto out;   /* Match. */
> > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> > index 9ad2b71d2..8f2f41e0e 100644
> > --- a/ofproto/ofproto-provider.h
> > +++ b/ofproto/ofproto-provider.h
> > @@ -139,6 +139,8 @@ struct ofproto {
> >      /* Variable length mf_field mapping. Stores all configured variable length
> >       * meta-flow fields (struct mf_field) in a switch. */
> >      struct vl_mff_map vl_mff_map;
> > +
> > +    struct ovs_refcount ref_count;

A comment on the field is missing.

> >  };
> >
> >  void ofproto_init_tables(struct ofproto *, int n_tables);
> > @@ -442,6 +444,9 @@ struct rule {
> >  void ofproto_rule_ref(struct rule *);
> >  bool ofproto_rule_try_ref(struct rule *);
> >  void ofproto_rule_unref(struct rule *);
> > +void ofproto_ref(struct ofproto *);
> > +bool ofproto_try_ref(struct ofproto *);
> > +void ofproto_unref(struct ofproto *);
> >
> >  static inline const struct rule_actions * rule_get_actions(const struct rule *);
> >  static inline bool rule_is_table_miss(const struct rule *);
> > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> > index b91517cd2..7037a9684 100644
> > --- a/ofproto/ofproto.c
> > +++ b/ofproto/ofproto.c
> > @@ -545,6 +545,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
> >
> >      ovs_mutex_init(&ofproto->vl_mff_map.mutex);
> >      cmap_init(&ofproto->vl_mff_map.cmap);
> > +    ovs_refcount_init(&ofproto->ref_count);
> >
> >      error = ofproto->ofproto_class->construct(ofproto);
> >      if (error) {
> > @@ -1738,8 +1739,7 @@ ofproto_destroy(struct ofproto *p, bool del)
> >      p->connmgr = NULL;
> >      ovs_mutex_unlock(&ofproto_mutex);
> >
> > -    /* Destroying rules is deferred, must have 'ofproto' around for them. */
> > -    ovsrcu_postpone(ofproto_destroy_defer__, p);
> > +    ofproto_unref(p);
> >  }
> >
> >  /* Destroys the datapath with the respective 'name' and 'type'.  With the Linux
> > @@ -2928,6 +2928,7 @@ ofproto_rule_destroy__(struct rule *rule)
> >      cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
> >      rule_actions_destroy(rule_get_actions(rule));
> >      ovs_mutex_destroy(&rule->mutex);
> > +    ofproto_unref(rule->ofproto);
> >      rule->ofproto->ofproto_class->rule_dealloc(rule);

It was mentioned before, but rule->ofproto is being used here, while is has been unref just before.
It is correct, as long as ofproto is being freed after a grace period.
As you have had a remark about it before, it's a sign this is a probable pitfall for a future reader.
It might be worth it to add a small comment describing why this is ok.

> >  }
> >
> > @@ -2979,6 +2980,28 @@ ofproto_rule_unref(struct rule *rule)
> >      }
> >  }
> >
> > +void ofproto_ref(struct ofproto *ofproto)
> > +{
> > +    if (ofproto) {

This NULL check is counter-productive.
If a bad code passes a NULL ofproto, it will ignore the issue and not take a ref to it.
It will break logic while avoiding a crash. In some unknown amount of time a crash
will happen due to a missing ref, which will be harder to debug due to this check.

This comment is valid for all three functions.

> > +        ovs_refcount_ref(&ofproto->ref_count);
> > +    }
> > +}
> > +
> > +bool ofproto_try_ref(struct ofproto *ofproto)
> > +{
> > +    if (ofproto) {
> > +        return ovs_refcount_try_ref_rcu(&ofproto->ref_count);
> > +    }
> > +    return false;
> > +}
> > +
> > +void ofproto_unref(struct ofproto *ofproto)
> > +{
> > +    if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) == 1) {

Why not use ovs_refcount_unref()?

> > +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);

Was a consensus reached on RCU + refcount use?

They are not mutually exclusive, but RCU was a crutch hiding the need for a refcount before.
The RCU on ofproto can be removed, with some additional changes (taking & releasing an ofproto ref
around CMAP iterations). The question is whether this should be part of this patch.

RCU is used to allow concurrent readers, refcount is used to systematize memory reclamation.
That RCU offers concurrent reads through deferred memory reclamation should not lead us
to consider them a complete functional overlap.

RCU was misused before, but beyond the incorrect double-defer, it still simplifies using some
ofproto fields. We can do without, but is it worth the added complexity?

> > +    }
> > +}
> > +
> >  static void
> >  remove_rule_rcu__(struct rule *rule)
> >      OVS_REQUIRES(ofproto_mutex)
> > @@ -3068,6 +3091,7 @@ group_destroy_cb(struct ofgroup *group)
> >                                                  &group->props));
> >      ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
> >                                             &group->buckets));
> > +    ofproto_unref(group->ofproto);
> >      group->ofproto->ofproto_class->group_dealloc(group);
> >  }
> >
> > @@ -5266,6 +5290,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
> >
> >      /* Initialize base state. */
> >      *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> > +    ofproto_ref(ofproto);

You may have a race between a datapath being destroyed and a rule being created.
You could use _try_ref() instead, at the start of the function to minimize the amount of initialization done.

Maybe something like:

@@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
     struct rule *rule;
     enum ofperr error;

+    if (!ofproto_try_ref(ofproto)) {
+        cls_rule_destroy(cr);
+        return OFPERR_OFPFMFC_UNKNOWN;
+    }

> >      cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
> >      ovs_refcount_init(&rule->ref_count);
> >
> > @@ -6214,7 +6239,7 @@ handle_flow_mod__(struct ofproto *ofproto, const struct ofputil_flow_mod *fm,
> >      error = ofproto_flow_mod_start(ofproto, &ofm);
> >      if (!error) {
> >          ofproto_bump_tables_version(ofproto);
> > -        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
> > +        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
> >          ofmonitor_flush(ofproto->connmgr);
> >      }
> >      ovs_mutex_unlock(&ofproto_mutex);
> > @@ -7331,6 +7356,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
> >      }
> >
> >      *CONST_CAST(struct ofproto **, &(*ofgroup)->ofproto) = ofproto;
> > +    ofproto_ref(ofproto);

Is there a chance ofproto_try_ref() should be used there as well?

> >      *CONST_CAST(uint32_t *, &((*ofgroup)->group_id)) = gm->group_id;
> >      *CONST_CAST(enum ofp11_group_type *, &(*ofgroup)->type) = gm->type;
> >      *CONST_CAST(long long int *, &((*ofgroup)->created)) = now;
> > @@ -7363,6 +7389,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
> >          ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
> >                                                 &(*ofgroup)->buckets));
> >          ofproto->ofproto_class->group_dealloc(*ofgroup);
> > +        ofproto_unref(ofproto);

Here you unref after using the ofproto reference, inverse to what is done in rule_destroy().
It would be better to be consistent and avoid surprising the reader.

> >      }
> >      return error;
> >  }
> > @@ -8199,7 +8226,7 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
> >              /* Send error referring to the original message. */
> >              ofconn_send_error(ofconn, be->msg, error);
> >              error = OFPERR_OFPBFC_MSG_FAILED;
> > -
> > +
> >              /* 2. Revert.  Undo all the changes made above. */
> >              LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
> >                  if (be->type == OFPTYPE_FLOW_MOD) {
> > --
> > 2.21.0.windows.1
> >
> 
> 
> -- 
> hepeng
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

Best regards,
Guohongzhi (Russell Lab) June 5, 2021, 11:32 a.m. UTC | #3
Gaëtan Rivet, Thank you for your detailed reviews and useful comments.
The status of this issue can be viewed in the following patches:
http://patchwork.ozlabs.org/project/openvswitch/patch/20210308033411.32040-1-guohongzhi1@huawei.com/
http://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0320@bytedance.com/

In addition,Ilya Maximets, Gregory, William Tu, Ben Pfaff and He Peng, whehter we have reached a consensus on the issue of using RCU + refcount?
If not, should we continue to discuss a better solution to this problem.

Best regards,  Hongzhi Guo.

-----Original Message-----
From: Gaëtan Rivet [mailto:grive@u256.net] 
Sent: 2021.4.12  8:07
To: 贺鹏 <xnhp0320@gmail.com>; Guohongzhi (Russell Lab) <guohongzhi1@huawei.com>
Cc: Darragh O'Reilly via dev <ovs-dev@openvswitch.org>; Lilijun (Jerry) <jerry.lilijun@huawei.com>; Ilya Maximets <i.maximets@ovn.org>; Jarno Rajahalme <jrajahalme@nicira.com>; Zhoujingbin <zhoujingbin@huawei.com>; chenchanghu <chenchanghu@huawei.com>
Subject: Re: [ovs-dev] [PATCH] [ovs-dev v2]ofproto:fix use-after-free of ofproto

On Tue, Mar 9, 2021, at 13:50, 贺鹏 wrote:
> Is this a version that mixes using refcount and RCU?
> do we have reached an agreement?
> 
> BTW, please use my company email address: hepeng.0320@bytedance.com
> 
> thanks.
> 
> Hongzhi Guo <guohongzhi1@huawei.com> 2021.3.8  am 11:34:
> >
> > ASAN report use-after-free of ofproto when destroy ofproto_rule.
> > The rule uses both RCU and refcount, while the ofproto uses only 
> > RCU, and the rule retains the pointer of the proto.
> > More importantly, ofproto cannot guarantee a longer grace period than the rule.
> > So when the rule is deleted, it is possible that ofproto has been 
> > released, resulting in use-after-free of ofproto.
> > This patch add ref_count for ofproto to avoid use-after-free.
> >

Is this a v3 of the previous patch on the subject [1]?
I don't understand the status of this series, the previous (still) v2 has a 'requested changes' state on patchwork, but there is no changelog with the commit that could help following.

[1]: https://patchwork.ozlabs.org/project/openvswitch/patch/20200717015041.82746-1-hepeng.0320@bytedance.com/

> > ===================
> > ASAN report as following:
> > ==10399==ERROR: AddressSanitizer: heap-use-after-free on address 
> > 0xffff61e1e420 at pc 0xaaaadcc29d1c bp 0xffff6c5fde40 sp 0xffff6c5fde60  READ of size 8 at 0xffff61e1e420 thread T12 (urcu2)
> >     #0 0xaaaadcc29d1b (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916)
> >     #1 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
> >     #2 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
> >     #3 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
> >     #4 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
> >     #5 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb)
> > 0xffff61e1e420 is located 32 bytes inside of 34496-byte region 
> > [0xffff61e1e400,0xffff61e26ac0)  freed by thread T12 (urcu2) here:
> >     #0 0xffff8214fe33 in free (/lib64/libasan.so.4+0xd2e33)
> >     #1 0xaaaadcc576df (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:734)
> >     #2 0xaaaadcc21acb (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:1687)
> >     #3 0xaaaadcf76f5f (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:348 (discriminator 3))
> >     #4 0xaaaadcf770c7 (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-rcu.c:363)
> >     #5 0xaaaadcf7fa9b (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/ovs-thread.c:708)
> >     #6 0xffff80fde8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
> >     #7 0xffff808fa5cb in thread_start (/lib64/libc.so.6+0xd55cb) 
> > previously allocated by thread T0 here:
> >     #0 0xffff821503c3 in __interceptor_calloc (/lib64/libasan.so.4+0xd33c3)
> >     #1 0xaaaadd034717 in xcalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:99 (discriminator 3))
> >     #2 0xaaaadd034767 in xzalloc (/usr/src/debug/openvswitch-2.12.asan.aarch64/lib/util.c:110)
> >     #3 0xaaaadcc576ab (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto-dpif.c:726)
> >     #4 0xaaaadcc1be93 in ofproto_create (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:505)
> >     #5 0xaaaadcbd793f (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:1208)
> >     #6 0xaaaadcbeefb7 in bridge_run (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/bridge.c:3944 (discriminator 4))
> >     #7 0xaaaadcbfdb83 in main (/usr/src/debug/openvswitch-2.12.asan.aarch64/vswitchd/ovs-vswitchd.c:240)
> >     #8 0xffff80845adf in __libc_start_main (/lib64/libc.so.6+0x20adf)
> >     #9 0xaaaadcbd19b3 (/usr/sbin/ovs-vswitchd-2.12.asan+0x26f9b3)
> >     SUMMARY: AddressSanitizer: heap-use-after-free 
> > (/usr/src/debug/openvswitch-2.12.asan.aarch64/ofproto/ofproto.c:2916
> > )
> >
> > ===================
> > TEST:
> > 1.ASAN test for three days

Is there a way to reproduce the issue faster? Which ASAN test is it?

If you delete and recreate a datapath in a loop for example (by adding some bridges then deleting them all for example), it could stress test ofproto deletion, maybe this would help accelerate reproduction?

> >
> > CC: Jarno Rajahalme <jrajahalme@nicira.com>
> > Fixes: 39c9459355b6 ("Use classifier versioning.")
> >
> > Signed-off-by: Hongzhi Guo <guohongzhi1@huawei.com>
> > Signed-off-by: hepeng <xnhp0320@gmail.com>
> > ---
> >  ofproto/ofproto-dpif-xlate-cache.c |  1 +
> >  ofproto/ofproto-dpif.c             | 20 ++++++++++-------
> >  ofproto/ofproto-provider.h         |  5 +++++
> >  ofproto/ofproto.c                  | 35 ++++++++++++++++++++++++++----
> >  4 files changed, 49 insertions(+), 12 deletions(-)
> >
> > diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
> > b/ofproto/ofproto-dpif-xlate-cache.c
> > index dcc91cb38..0deee365d 100644
> > --- a/ofproto/ofproto-dpif-xlate-cache.c
> > +++ b/ofproto/ofproto-dpif-xlate-cache.c
> > @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)  
> > {
> >      switch (entry->type) {
> >      case XC_TABLE:
> > +        ofproto_unref(&(entry->table.ofproto->up));
> >          break;
> >      case XC_RULE:
> >          ofproto_rule_unref(&entry->rule->up);
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 
> > fd0b2fdea..8b22eda5a 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -4416,10 +4416,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
> >              if (xcache) {
> >                  struct xc_entry *entry;
> >
> > -                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > -                entry->table.ofproto = ofproto;
> > -                entry->table.id = *table_id;
> > -                entry->table.match = true;
> > +                if (ofproto_try_ref(&ofproto->up)) {
> > +                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > +                    entry->table.ofproto = ofproto;

I recall Ilya asking about it, but I haven't seen a change on the subject.
In ofproto/ofproto-dpif-xlate.c, line 3028 a reference to ofproto is taken by an XC_NORMAL entry. It probably needs a reference taken as well.

> > +                    entry->table.id = *table_id;
> > +                    entry->table.match = true;
> > +                }
> >              }
> >              return rule;
> >          }
> > @@ -4452,10 +4454,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
> >          if (xcache) {
> >              struct xc_entry *entry;
> >
> > -            entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > -            entry->table.ofproto = ofproto;
> > -            entry->table.id = next_id;
> > -            entry->table.match = (rule != NULL);
> > +            if (ofproto_try_ref(&ofproto->up)) {
> > +                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> > +                entry->table.ofproto = ofproto;
> > +                entry->table.id = next_id;
> > +                entry->table.match = (rule != NULL);
> > +            }
> >          }
> >          if (rule) {
> >              goto out;   /* Match. */
> > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h 
> > index 9ad2b71d2..8f2f41e0e 100644
> > --- a/ofproto/ofproto-provider.h
> > +++ b/ofproto/ofproto-provider.h
> > @@ -139,6 +139,8 @@ struct ofproto {
> >      /* Variable length mf_field mapping. Stores all configured variable length
> >       * meta-flow fields (struct mf_field) in a switch. */
> >      struct vl_mff_map vl_mff_map;
> > +
> > +    struct ovs_refcount ref_count;

A comment on the field is missing.

> >  };
> >
> >  void ofproto_init_tables(struct ofproto *, int n_tables); @@ -442,6 
> > +444,9 @@ struct rule {  void ofproto_rule_ref(struct rule *);  bool 
> > ofproto_rule_try_ref(struct rule *);  void ofproto_rule_unref(struct 
> > rule *);
> > +void ofproto_ref(struct ofproto *); bool ofproto_try_ref(struct 
> > +ofproto *); void ofproto_unref(struct ofproto *);
> >
> >  static inline const struct rule_actions * rule_get_actions(const 
> > struct rule *);  static inline bool rule_is_table_miss(const struct 
> > rule *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 
> > b91517cd2..7037a9684 100644
> > --- a/ofproto/ofproto.c
> > +++ b/ofproto/ofproto.c
> > @@ -545,6 +545,7 @@ ofproto_create(const char *datapath_name, const 
> > char *datapath_type,
> >
> >      ovs_mutex_init(&ofproto->vl_mff_map.mutex);
> >      cmap_init(&ofproto->vl_mff_map.cmap);
> > +    ovs_refcount_init(&ofproto->ref_count);
> >
> >      error = ofproto->ofproto_class->construct(ofproto);
> >      if (error) {
> > @@ -1738,8 +1739,7 @@ ofproto_destroy(struct ofproto *p, bool del)
> >      p->connmgr = NULL;
> >      ovs_mutex_unlock(&ofproto_mutex);
> >
> > -    /* Destroying rules is deferred, must have 'ofproto' around for them. */
> > -    ovsrcu_postpone(ofproto_destroy_defer__, p);
> > +    ofproto_unref(p);
> >  }
> >
> >  /* Destroys the datapath with the respective 'name' and 'type'.  
> > With the Linux @@ -2928,6 +2928,7 @@ ofproto_rule_destroy__(struct rule *rule)
> >      cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
> >      rule_actions_destroy(rule_get_actions(rule));
> >      ovs_mutex_destroy(&rule->mutex);
> > +    ofproto_unref(rule->ofproto);
> >      rule->ofproto->ofproto_class->rule_dealloc(rule);

It was mentioned before, but rule->ofproto is being used here, while is has been unref just before.
It is correct, as long as ofproto is being freed after a grace period.
As you have had a remark about it before, it's a sign this is a probable pitfall for a future reader.
It might be worth it to add a small comment describing why this is ok.

> >  }
> >
> > @@ -2979,6 +2980,28 @@ ofproto_rule_unref(struct rule *rule)
> >      }
> >  }
> >
> > +void ofproto_ref(struct ofproto *ofproto) {
> > +    if (ofproto) {

This NULL check is counter-productive.
If a bad code passes a NULL ofproto, it will ignore the issue and not take a ref to it.
It will break logic while avoiding a crash. In some unknown amount of time a crash will happen due to a missing ref, which will be harder to debug due to this check.

This comment is valid for all three functions.

> > +        ovs_refcount_ref(&ofproto->ref_count);
> > +    }
> > +}
> > +
> > +bool ofproto_try_ref(struct ofproto *ofproto) {
> > +    if (ofproto) {
> > +        return ovs_refcount_try_ref_rcu(&ofproto->ref_count);
> > +    }
> > +    return false;
> > +}
> > +
> > +void ofproto_unref(struct ofproto *ofproto) {
> > +    if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) 
> > +== 1) {

Why not use ovs_refcount_unref()?

> > +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);

Was a consensus reached on RCU + refcount use?

They are not mutually exclusive, but RCU was a crutch hiding the need for a refcount before.
The RCU on ofproto can be removed, with some additional changes (taking & releasing an ofproto ref around CMAP iterations). The question is whether this should be part of this patch.

RCU is used to allow concurrent readers, refcount is used to systematize memory reclamation.
That RCU offers concurrent reads through deferred memory reclamation should not lead us to consider them a complete functional overlap.

RCU was misused before, but beyond the incorrect double-defer, it still simplifies using some ofproto fields. We can do without, but is it worth the added complexity?

> > +    }
> > +}
> > +
> >  static void
> >  remove_rule_rcu__(struct rule *rule)
> >      OVS_REQUIRES(ofproto_mutex)
> > @@ -3068,6 +3091,7 @@ group_destroy_cb(struct ofgroup *group)
> >                                                  &group->props));
> >      ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
> >                                             &group->buckets));
> > +    ofproto_unref(group->ofproto);
> >      group->ofproto->ofproto_class->group_dealloc(group);
> >  }
> >
> > @@ -5266,6 +5290,7 @@ ofproto_rule_create(struct ofproto *ofproto, 
> > struct cls_rule *cr,
> >
> >      /* Initialize base state. */
> >      *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> > +    ofproto_ref(ofproto);

You may have a race between a datapath being destroyed and a rule being created.
You could use _try_ref() instead, at the start of the function to minimize the amount of initialization done.

Maybe something like:

@@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
     struct rule *rule;
     enum ofperr error;

+    if (!ofproto_try_ref(ofproto)) {
+        cls_rule_destroy(cr);
+        return OFPERR_OFPFMFC_UNKNOWN;
+    }

> >      cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
> >      ovs_refcount_init(&rule->ref_count);
> >
> > @@ -6214,7 +6239,7 @@ handle_flow_mod__(struct ofproto *ofproto, const struct ofputil_flow_mod *fm,
> >      error = ofproto_flow_mod_start(ofproto, &ofm);
> >      if (!error) {
> >          ofproto_bump_tables_version(ofproto);
> > -        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
> > +        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
> >          ofmonitor_flush(ofproto->connmgr);
> >      }
> >      ovs_mutex_unlock(&ofproto_mutex); @@ -7331,6 +7356,7 @@ 
> > init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
> >      }
> >
> >      *CONST_CAST(struct ofproto **, &(*ofgroup)->ofproto) = ofproto;
> > +    ofproto_ref(ofproto);

Is there a chance ofproto_try_ref() should be used there as well?

> >      *CONST_CAST(uint32_t *, &((*ofgroup)->group_id)) = gm->group_id;
> >      *CONST_CAST(enum ofp11_group_type *, &(*ofgroup)->type) = gm->type;
> >      *CONST_CAST(long long int *, &((*ofgroup)->created)) = now; @@ 
> > -7363,6 +7389,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
> >          ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
> >                                                 &(*ofgroup)->buckets));
> >          ofproto->ofproto_class->group_dealloc(*ofgroup);
> > +        ofproto_unref(ofproto);

Here you unref after using the ofproto reference, inverse to what is done in rule_destroy().
It would be better to be consistent and avoid surprising the reader.

> >      }
> >      return error;
> >  }
> > @@ -8199,7 +8226,7 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
> >              /* Send error referring to the original message. */
> >              ofconn_send_error(ofconn, be->msg, error);
> >              error = OFPERR_OFPBFC_MSG_FAILED;
> > -
> > +
> >              /* 2. Revert.  Undo all the changes made above. */
> >              LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
> >                  if (be->type == OFPTYPE_FLOW_MOD) {
> > --
> > 2.21.0.windows.1
> >
> 
> 
> --
> hepeng
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

Best regards,
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
index dcc91cb38..0deee365d 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -209,6 +209,7 @@  xlate_cache_clear_entry(struct xc_entry *entry)
 {
     switch (entry->type) {
     case XC_TABLE:
+        ofproto_unref(&(entry->table.ofproto->up));
         break;
     case XC_RULE:
         ofproto_rule_unref(&entry->rule->up);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index fd0b2fdea..8b22eda5a 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4416,10 +4416,12 @@  rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
             if (xcache) {
                 struct xc_entry *entry;
 
-                entry = xlate_cache_add_entry(xcache, XC_TABLE);
-                entry->table.ofproto = ofproto;
-                entry->table.id = *table_id;
-                entry->table.match = true;
+                if (ofproto_try_ref(&ofproto->up)) {
+                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
+                    entry->table.ofproto = ofproto;
+                    entry->table.id = *table_id;
+                    entry->table.match = true;
+                }
             }
             return rule;
         }
@@ -4452,10 +4454,12 @@  rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
         if (xcache) {
             struct xc_entry *entry;
 
-            entry = xlate_cache_add_entry(xcache, XC_TABLE);
-            entry->table.ofproto = ofproto;
-            entry->table.id = next_id;
-            entry->table.match = (rule != NULL);
+            if (ofproto_try_ref(&ofproto->up)) {
+                entry = xlate_cache_add_entry(xcache, XC_TABLE);
+                entry->table.ofproto = ofproto;
+                entry->table.id = next_id;
+                entry->table.match = (rule != NULL);
+            }
         }
         if (rule) {
             goto out;   /* Match. */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 9ad2b71d2..8f2f41e0e 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -139,6 +139,8 @@  struct ofproto {
     /* Variable length mf_field mapping. Stores all configured variable length
      * meta-flow fields (struct mf_field) in a switch. */
     struct vl_mff_map vl_mff_map;
+
+    struct ovs_refcount ref_count;
 };
 
 void ofproto_init_tables(struct ofproto *, int n_tables);
@@ -442,6 +444,9 @@  struct rule {
 void ofproto_rule_ref(struct rule *);
 bool ofproto_rule_try_ref(struct rule *);
 void ofproto_rule_unref(struct rule *);
+void ofproto_ref(struct ofproto *);
+bool ofproto_try_ref(struct ofproto *);
+void ofproto_unref(struct ofproto *);
 
 static inline const struct rule_actions * rule_get_actions(const struct rule *);
 static inline bool rule_is_table_miss(const struct rule *);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index b91517cd2..7037a9684 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -545,6 +545,7 @@  ofproto_create(const char *datapath_name, const char *datapath_type,
 
     ovs_mutex_init(&ofproto->vl_mff_map.mutex);
     cmap_init(&ofproto->vl_mff_map.cmap);
+    ovs_refcount_init(&ofproto->ref_count);
 
     error = ofproto->ofproto_class->construct(ofproto);
     if (error) {
@@ -1738,8 +1739,7 @@  ofproto_destroy(struct ofproto *p, bool del)
     p->connmgr = NULL;
     ovs_mutex_unlock(&ofproto_mutex);
 
-    /* Destroying rules is deferred, must have 'ofproto' around for them. */
-    ovsrcu_postpone(ofproto_destroy_defer__, p);
+    ofproto_unref(p);
 }
 
 /* Destroys the datapath with the respective 'name' and 'type'.  With the Linux
@@ -2928,6 +2928,7 @@  ofproto_rule_destroy__(struct rule *rule)
     cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
     rule_actions_destroy(rule_get_actions(rule));
     ovs_mutex_destroy(&rule->mutex);
+    ofproto_unref(rule->ofproto);
     rule->ofproto->ofproto_class->rule_dealloc(rule);
 }
 
@@ -2979,6 +2980,28 @@  ofproto_rule_unref(struct rule *rule)
     }
 }
 
+void ofproto_ref(struct ofproto *ofproto)
+{
+    if (ofproto) {
+        ovs_refcount_ref(&ofproto->ref_count);
+    }
+}
+
+bool ofproto_try_ref(struct ofproto *ofproto)
+{
+    if (ofproto) {
+        return ovs_refcount_try_ref_rcu(&ofproto->ref_count);
+    }
+    return false;
+}
+
+void ofproto_unref(struct ofproto *ofproto)
+{
+    if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) == 1) {
+        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
+    }
+}
+
 static void
 remove_rule_rcu__(struct rule *rule)
     OVS_REQUIRES(ofproto_mutex)
@@ -3068,6 +3091,7 @@  group_destroy_cb(struct ofgroup *group)
                                                 &group->props));
     ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
                                            &group->buckets));
+    ofproto_unref(group->ofproto);
     group->ofproto->ofproto_class->group_dealloc(group);
 }
 
@@ -5266,6 +5290,7 @@  ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
 
     /* Initialize base state. */
     *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
+    ofproto_ref(ofproto);
     cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
     ovs_refcount_init(&rule->ref_count);
 
@@ -6214,7 +6239,7 @@  handle_flow_mod__(struct ofproto *ofproto, const struct ofputil_flow_mod *fm,
     error = ofproto_flow_mod_start(ofproto, &ofm);
     if (!error) {
         ofproto_bump_tables_version(ofproto);
-        error = ofproto_flow_mod_finish(ofproto, &ofm, req);        
+        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
         ofmonitor_flush(ofproto->connmgr);
     }
     ovs_mutex_unlock(&ofproto_mutex);
@@ -7331,6 +7356,7 @@  init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
     }
 
     *CONST_CAST(struct ofproto **, &(*ofgroup)->ofproto) = ofproto;
+    ofproto_ref(ofproto);
     *CONST_CAST(uint32_t *, &((*ofgroup)->group_id)) = gm->group_id;
     *CONST_CAST(enum ofp11_group_type *, &(*ofgroup)->type) = gm->type;
     *CONST_CAST(long long int *, &((*ofgroup)->created)) = now;
@@ -7363,6 +7389,7 @@  init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
         ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
                                                &(*ofgroup)->buckets));
         ofproto->ofproto_class->group_dealloc(*ofgroup);
+        ofproto_unref(ofproto);
     }
     return error;
 }
@@ -8199,7 +8226,7 @@  do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
             /* Send error referring to the original message. */
             ofconn_send_error(ofconn, be->msg, error);
             error = OFPERR_OFPBFC_MSG_FAILED;
- 
+
             /* 2. Revert.  Undo all the changes made above. */
             LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
                 if (be->type == OFPTYPE_FLOW_MOD) {