diff mbox series

[ovs-dev,ovs-dev,v2] ofproto: add refcount to ofproto to fix crash at ofproto_destroy

Message ID 20200717015041.82746-1-hepeng.0320@bytedance.com
State Changes Requested
Headers show
Series [ovs-dev,ovs-dev,v2] ofproto: add refcount to ofproto to fix crash at ofproto_destroy | expand

Commit Message

.贺鹏 July 17, 2020, 1:50 a.m. UTC
From: Peng He <hepeng.0320@bytedance.com>

The call stack of rule_destroy_cb:

remove_rules_postponed (one grace period)
    -> remove_rule_rcu
        -> remove_rule_rcu__
            -> ofproto_rule_unref -> ref count != 1
                -> ... more grace periods.
                -> rule_destroy_cb (> 2 grace periods)
                    -> free

Currently ofproto_destory waits only two grace periods, this means when
rule_destroy_cb is called, the ofproto might have been already destroyed.

This patch adds a refcount for ofproto to ensure ofproto exists when it
is needed to call free functions.

This patch fixes the crashes found:
Program terminated with signal SIGSEGV, Segmentation fault.
0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
1  0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
5  0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
(gdb) p rule->ofproto->ofproto_class
$3 = (const struct ofproto_class *) 0x0

Also:
0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
1  0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
   recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
8  0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
(gdb) p ofproto->up.ofproto_class
$3 = (const struct ofproto_class *) 0x0

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
---
 ofproto/ofproto-dpif-xlate-cache.c |  1 +
 ofproto/ofproto-dpif.c             | 22 ++++++++--------
 ofproto/ofproto-provider.h         |  2 ++
 ofproto/ofproto.c                  | 41 +++++++++++++++++++++++++++---
 ofproto/ofproto.h                  |  4 +++
 5 files changed, 56 insertions(+), 14 deletions(-)

Comments

0-day Robot July 17, 2020, 3:23 a.m. UTC | #1
Bleep bloop.  Greetings hepeng.0320, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Inappropriate bracing around statement
#182 FILE: ofproto/ofproto.c:1734:
    if (ovs_refcount_read(&ofproto->refcount) != 1)

ERROR: Inappropriate bracing around statement
#184 FILE: ofproto/ofproto.c:1736:
    else

Lines checked: 222, Warnings: 0, Errors: 2


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Gregory Rose July 20, 2020, 5:21 p.m. UTC | #2
On 7/16/2020 6:50 PM, hepeng.0320 wrote:
> From: Peng He <hepeng.0320@bytedance.com>
> 
> The call stack of rule_destroy_cb:
> 
> remove_rules_postponed (one grace period)
>      -> remove_rule_rcu
>          -> remove_rule_rcu__
>              -> ofproto_rule_unref -> ref count != 1
>                  -> ... more grace periods.
>                  -> rule_destroy_cb (> 2 grace periods)
>                      -> free
> 
> Currently ofproto_destory waits only two grace periods, this means when
> rule_destroy_cb is called, the ofproto might have been already destroyed.
> 
> This patch adds a refcount for ofproto to ensure ofproto exists when it
> is needed to call free functions.
> 
> This patch fixes the crashes found:
> Program terminated with signal SIGSEGV, Segmentation fault.
> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
> 5  0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) p rule->ofproto->ofproto_class
> $3 = (const struct ofproto_class *) 0x0
> 
> Also:
> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
> 1  0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
>     recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
> 8  0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) p ofproto->up.ofproto_class
> $3 = (const struct ofproto_class *) 0x0
> 
> Signed-off-by: Peng He <hepeng.0320@bytedance.com>

Hi Peng,

thank you for working on this.  I was wondering if you have a reliable
way to reproduce this bug?

Besides the checkpatch errors, overall the code looks fine to me but
I'd like to be able to test it.
Have you considered adding a test for it in tests/ofproto.at?

Couple other comments below.

Thanks,

- Greg

> ---
>   ofproto/ofproto-dpif-xlate-cache.c |  1 +
>   ofproto/ofproto-dpif.c             | 22 ++++++++--------
>   ofproto/ofproto-provider.h         |  2 ++
>   ofproto/ofproto.c                  | 41 +++++++++++++++++++++++++++---
>   ofproto/ofproto.h                  |  4 +++
>   5 files changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
> index dcc91cb38..0deee365d 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
>   {
>       switch (entry->type) {
>       case XC_TABLE:
> +        ofproto_unref(&(entry->table.ofproto->up));
>           break;
>       case XC_RULE:
>           ofproto_rule_unref(&entry->rule->up);
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 4f0638f23..6480b3c78 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4414,11 +4414,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>               }
>               if (xcache) {
>                   struct xc_entry *entry;
> -
> -                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -                entry->table.ofproto = ofproto;
> -                entry->table.id = *table_id;
> -                entry->table.match = true;
> +                if (ofproto_try_ref(&ofproto->up)) {
> +                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                    entry->table.ofproto = ofproto;
> +                    entry->table.id = *table_id;
> +                    entry->table.match = true;
> +                }
>               }
>               return rule;
>           }
> @@ -4450,11 +4451,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>           }
>           if (xcache) {
>               struct xc_entry *entry;
> -
> -            entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -            entry->table.ofproto = ofproto;
> -            entry->table.id = next_id;
> -            entry->table.match = (rule != NULL);
> +            if (ofproto_try_ref(&ofproto->up)) {
> +                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                entry->table.ofproto = ofproto;
> +                entry->table.id = next_id;
> +                entry->table.match = (rule != NULL);
> +            }
>           }
>           if (rule) {
>               goto out;   /* Match. */
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index afecb24cb..34074fd62 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -139,6 +139,8 @@ struct ofproto {
>       /* Variable length mf_field mapping. Stores all configured variable length
>        * meta-flow fields (struct mf_field) in a switch. */
>       struct vl_mff_map vl_mff_map;
> +
> +    struct ovs_refcount refcount;
>   };
>   

The other fields in struct ofproto have comments for each.  I think this
field deserves a comment as well.  Maybe describe where used and why
added.


>   void ofproto_init_tables(struct ofproto *, int n_tables);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 59f06aa94..0842c76c1 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -474,6 +474,24 @@ ofproto_bump_tables_version(struct ofproto *ofproto)
>                                                  ofproto->tables_version);
>   }
>   
> +void
> +ofproto_ref(struct ofproto *ofproto)
> +{
> +    ovs_refcount_ref(&ofproto->refcount);
> +}
> +
> +bool
> +ofproto_try_ref(struct ofproto *ofproto)
> +{
> +    return ovs_refcount_try_ref_rcu(&ofproto->refcount);
> +}
> +
> +void
> +ofproto_unref(struct ofproto *ofproto)
> +{
> +    ovs_refcount_unref(&ofproto->refcount);
> +}
> +
>   int
>   ofproto_create(const char *datapath_name, const char *datapath_type,
>                  struct ofproto **ofprotop)
> @@ -545,6 +563,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
>   
>       ovs_mutex_init(&ofproto->vl_mff_map.mutex);
>       cmap_init(&ofproto->vl_mff_map.cmap);
> +    ovs_refcount_init(&ofproto->refcount);
>   
>       error = ofproto->ofproto_class->construct(ofproto);
>       if (error) {
> @@ -1696,14 +1715,26 @@ ofproto_destroy__(struct ofproto *ofproto)
>       ofproto->ofproto_class->dealloc(ofproto);
>   }
>   
> -/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
> - * - 1st we defer the removal of the rules from the classifier
> - * - 2nd we defer the actual destruction of the rules. */
> +/* destroying a rule may have to wait multiple grace periods:
> + * remove_rules_postponed (one grace period)
> + *       -> remove_rule_rcu
> + *           -> remove_rule_rcu__
> + *               -> ofproto_rule_unref -> ref count != 1
> + *                   -> ... more grace periods.
> + *                   -> rule_destroy_cb (> 2 grace periods)
> + *                       -> free
> + *
> + * So we have to check the refcount for sure all the rules
> + * have been destoryed.

s/destoryed/destroyed

> + */
>   static void
>   ofproto_destroy_defer__(struct ofproto *ofproto)
>       OVS_EXCLUDED(ofproto_mutex)
>   {
> -    ovsrcu_postpone(ofproto_destroy__, ofproto);
> +    if (ovs_refcount_read(&ofproto->refcount) != 1)
> +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
> +    else
> +        ovsrcu_postpone(ofproto_destroy__, ofproto);
>   }
>   
>   void
> @@ -2927,6 +2958,7 @@ ofproto_rule_destroy__(struct rule *rule)
>       cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
>       rule_actions_destroy(rule_get_actions(rule));
>       ovs_mutex_destroy(&rule->mutex);
> +    ofproto_unref(rule->ofproto);
>       rule->ofproto->ofproto_class->rule_dealloc(rule);
>   }
>   
> @@ -5265,6 +5297,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
>   
>       /* Initialize base state. */
>       *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
>       cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
>       ovs_refcount_init(&rule->ref_count);
>   
> diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
> index 2dd253167..3d3937b35 100644
> --- a/ofproto/ofproto.h
> +++ b/ofproto/ofproto.h
> @@ -562,6 +562,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
>   enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
>                                                         uint8_t table_id);
>   
> +void ofproto_ref(struct ofproto *p);
> +void ofproto_unref(struct ofproto *p);
> +bool ofproto_try_ref(struct ofproto *p);
> +
>   #ifdef  __cplusplus
>   }
>   #endif
>
Ilya Maximets Feb. 24, 2021, 2:29 p.m. UTC | #3
On 7/17/20 3:50 AM, hepeng.0320 wrote:
> From: Peng He <hepeng.0320@bytedance.com>
> 
> The call stack of rule_destroy_cb:
> 
> remove_rules_postponed (one grace period)
>     -> remove_rule_rcu
>         -> remove_rule_rcu__
>             -> ofproto_rule_unref -> ref count != 1
>                 -> ... more grace periods.
>                 -> rule_destroy_cb (> 2 grace periods)
>                     -> free
> 
> Currently ofproto_destory waits only two grace periods, this means when
> rule_destroy_cb is called, the ofproto might have been already destroyed.
> 
> This patch adds a refcount for ofproto to ensure ofproto exists when it
> is needed to call free functions.
> 
> This patch fixes the crashes found:
> Program terminated with signal SIGSEGV, Segmentation fault.
> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
> 5  0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) p rule->ofproto->ofproto_class
> $3 = (const struct ofproto_class *) 0x0
> 
> Also:
> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
> 1  0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
>    recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
> 8  0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) p ofproto->up.ofproto_class
> $3 = (const struct ofproto_class *) 0x0
> 
> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> ---
>  ofproto/ofproto-dpif-xlate-cache.c |  1 +
>  ofproto/ofproto-dpif.c             | 22 ++++++++--------
>  ofproto/ofproto-provider.h         |  2 ++
>  ofproto/ofproto.c                  | 41 +++++++++++++++++++++++++++---
>  ofproto/ofproto.h                  |  4 +++
>  5 files changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
> index dcc91cb38..0deee365d 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
>  {
>      switch (entry->type) {
>      case XC_TABLE:
> +        ofproto_unref(&(entry->table.ofproto->up));
>          break;
>      case XC_RULE:
>          ofproto_rule_unref(&entry->rule->up);
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 4f0638f23..6480b3c78 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4414,11 +4414,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>              }
>              if (xcache) {
>                  struct xc_entry *entry;
> -
> -                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -                entry->table.ofproto = ofproto;
> -                entry->table.id = *table_id;
> -                entry->table.match = true;
> +                if (ofproto_try_ref(&ofproto->up)) {
> +                    entry = xlate_cache_add_entry(xcache, XC_TABLE);

You're handling only XC_TABLE cache entries.  Do we need to ref ofproto
for other types of cache?  XC_NORMAL holds the pointer too.

> +                    entry->table.ofproto = ofproto;
> +                    entry->table.id = *table_id;
> +                    entry->table.match = true;
> +                }
>              }
>              return rule;
>          }
> @@ -4450,11 +4451,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
>          }
>          if (xcache) {
>              struct xc_entry *entry;
> -
> -            entry = xlate_cache_add_entry(xcache, XC_TABLE);
> -            entry->table.ofproto = ofproto;
> -            entry->table.id = next_id;
> -            entry->table.match = (rule != NULL);
> +            if (ofproto_try_ref(&ofproto->up)) {
> +                entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                entry->table.ofproto = ofproto;
> +                entry->table.id = next_id;
> +                entry->table.match = (rule != NULL);
> +            }
>          }
>          if (rule) {
>              goto out;   /* Match. */
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index afecb24cb..34074fd62 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -139,6 +139,8 @@ struct ofproto {
>      /* Variable length mf_field mapping. Stores all configured variable length
>       * meta-flow fields (struct mf_field) in a switch. */
>      struct vl_mff_map vl_mff_map;
> +
> +    struct ovs_refcount refcount;

As Gregory pointed out, some little comment might be good here.

>  };
>  
>  void ofproto_init_tables(struct ofproto *, int n_tables);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 59f06aa94..0842c76c1 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -474,6 +474,24 @@ ofproto_bump_tables_version(struct ofproto *ofproto)
>                                                 ofproto->tables_version);
>  }
>  
> +void
> +ofproto_ref(struct ofproto *ofproto)
> +{
> +    ovs_refcount_ref(&ofproto->refcount);
> +}
> +
> +bool
> +ofproto_try_ref(struct ofproto *ofproto)
> +{
> +    return ovs_refcount_try_ref_rcu(&ofproto->refcount);
> +}
> +
> +void
> +ofproto_unref(struct ofproto *ofproto)
> +{
> +    ovs_refcount_unref(&ofproto->refcount);
> +}
> +
>  int
>  ofproto_create(const char *datapath_name, const char *datapath_type,
>                 struct ofproto **ofprotop)
> @@ -545,6 +563,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
>  
>      ovs_mutex_init(&ofproto->vl_mff_map.mutex);
>      cmap_init(&ofproto->vl_mff_map.cmap);
> +    ovs_refcount_init(&ofproto->refcount);
>  
>      error = ofproto->ofproto_class->construct(ofproto);
>      if (error) {
> @@ -1696,14 +1715,26 @@ ofproto_destroy__(struct ofproto *ofproto)
>      ofproto->ofproto_class->dealloc(ofproto);
>  }
>  
> -/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
> - * - 1st we defer the removal of the rules from the classifier
> - * - 2nd we defer the actual destruction of the rules. */
> +/* destroying a rule may have to wait multiple grace periods:
> + * remove_rules_postponed (one grace period)
> + *       -> remove_rule_rcu
> + *           -> remove_rule_rcu__
> + *               -> ofproto_rule_unref -> ref count != 1
> + *                   -> ... more grace periods.
> + *                   -> rule_destroy_cb (> 2 grace periods)
> + *                       -> free
> + *
> + * So we have to check the refcount for sure all the rules
> + * have been destoryed.
> + */
>  static void
>  ofproto_destroy_defer__(struct ofproto *ofproto)
>      OVS_EXCLUDED(ofproto_mutex)
>  {
> -    ovsrcu_postpone(ofproto_destroy__, ofproto);
> +    if (ovs_refcount_read(&ofproto->refcount) != 1)
> +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
> +    else
> +        ovsrcu_postpone(ofproto_destroy__, ofproto);
>  }

Mixing refcounts with rcu doesn't look right and it's prone to error.
The usual schema is to destroy the object once refcount reached
zero inside the 'unref' function.

>  
>  void
> @@ -2927,6 +2958,7 @@ ofproto_rule_destroy__(struct rule *rule)
>      cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
>      rule_actions_destroy(rule_get_actions(rule));
>      ovs_mutex_destroy(&rule->mutex);
> +    ofproto_unref(rule->ofproto);
>      rule->ofproto->ofproto_class->rule_dealloc(rule);

Unreferencing the object and using it on the very next line doesn't
look right.

>  }
>  
> @@ -5265,6 +5297,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
>  
>      /* Initialize base state. */
>      *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
>      cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
>      ovs_refcount_init(&rule->ref_count);
>  
> diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
> index 2dd253167..3d3937b35 100644
> --- a/ofproto/ofproto.h
> +++ b/ofproto/ofproto.h
> @@ -562,6 +562,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
>  enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
>                                                        uint8_t table_id);
>  
> +void ofproto_ref(struct ofproto *p);
> +void ofproto_unref(struct ofproto *p);
> +bool ofproto_try_ref(struct ofproto *p);

No need to have a variable name in a prototype.

> +
>  #ifdef  __cplusplus
>  }
>  #endif
>
Ilya Maximets Feb. 24, 2021, 4:01 p.m. UTC | #4
On 2/24/21 3:29 PM, Ilya Maximets wrote:
> On 7/17/20 3:50 AM, hepeng.0320 wrote:
>> From: Peng He <hepeng.0320@bytedance.com>
>>
>> The call stack of rule_destroy_cb:
>>
>> remove_rules_postponed (one grace period)
>>     -> remove_rule_rcu
>>         -> remove_rule_rcu__
>>             -> ofproto_rule_unref -> ref count != 1
>>                 -> ... more grace periods.
>>                 -> rule_destroy_cb (> 2 grace periods)
>>                     -> free
>>
>> Currently ofproto_destory waits only two grace periods, this means when
>> rule_destroy_cb is called, the ofproto might have been already destroyed.
>>
>> This patch adds a refcount for ofproto to ensure ofproto exists when it
>> is needed to call free functions.
>>
>> This patch fixes the crashes found:
>> Program terminated with signal SIGSEGV, Segmentation fault.
>> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
>> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
>> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
>> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
>> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
>> 5  0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p rule->ofproto->ofproto_class
>> $3 = (const struct ofproto_class *) 0x0
>>
>> Also:
>> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
>> 1  0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
>> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
>> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
>>    recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
>> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
>> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
>> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
>> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
>> 8  0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p ofproto->up.ofproto_class
>> $3 = (const struct ofproto_class *) 0x0
>>
>> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
>> ---

CC: Hongzhi Guo

And I just remembered that there is a very similar patch that
was submitted to the mail-list several months betore this one.
Ben started review, but didn't finish:

http://patchwork.ozlabs.org/project/openvswitch/patch/20200306130555.19884-1-guohongzhi1@huawei.com/

Both patches has smilar issues, i.e. mixing RCU with refcounters
and not covering all the cases.

There are at least 2 major places where ofproto needs to be refcounted:
1. rules
   1.a xlate caches?
2. ofproto groups

IMO, mixing RCU with refcounts basically means that we do not have a
clear understanding on how it works and using RCU as a safety net.
We shuld stop using RCU for orproto and just have refcounts.  Once
ofproto refcount reaches zero it should be immediately destroyed.
All the places where we need to increase refcount needs to be tracked
down.

It'll be great if you can cooperate to prepare a complete solution
for this issue.

Best regards, Ilya Maximets.
Hongzhi Guo Feb. 25, 2021, 2:45 a.m. UTC | #5
It's true that the patch has been reviewed by ben and a question has been raised.I have replied the question, but I did not get a follow-up reply.

In short, I agree with you very much and would be happy to prepare a complete solution for this issue.

-----Original Message-----
From: Ilya Maximets [mailto:i.maximets@ovn.org] 
Sent: 2/25/2021 0:02 AM
To: Ilya Maximets <i.maximets@ovn.org>; hepeng.0320 <hepeng.0320@bytedance.com>; dev@openvswitch.org
Cc: William Tu <u9012063@gmail.com>; Yifeng Sun <pkusunyifeng@gmail.com>; Guohongzhi (Russell Lab) <guohongzhi1@huawei.com>
Subject: Re: [ovs-dev] [ovs-dev v2] ofproto: add refcount to ofproto to fix crash at ofproto_destroy

On 2/24/21 3:29 PM, Ilya Maximets wrote:
> On 7/17/20 3:50 AM, hepeng.0320 wrote:
>> From: Peng He <hepeng.0320@bytedance.com>
>>
>> The call stack of rule_destroy_cb:
>>
>> remove_rules_postponed (one grace period)
>>     -> remove_rule_rcu
>>         -> remove_rule_rcu__
>>             -> ofproto_rule_unref -> ref count != 1
>>                 -> ... more grace periods.
>>                 -> rule_destroy_cb (> 2 grace periods)
>>                     -> free
>>
>> Currently ofproto_destory waits only two grace periods, this means 
>> when rule_destroy_cb is called, the ofproto might have been already destroyed.
>>
>> This patch adds a refcount for ofproto to ensure ofproto exists when 
>> it is needed to call free functions.
>>
>> This patch fixes the crashes found:
>> Program terminated with signal SIGSEGV, Segmentation fault.
>> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at 
>> ofproto/ofproto.c:2956
>> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at 
>> lib/ovs-rcu.c:348
>> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) 
>> at lib/ovs-rcu.c:364
>> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at 
>> lib/ovs-thread.c:383
>> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at 
>> pthread_create.c:456
>> 5  0x00007febe6990d0f in clone () at 
>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p rule->ofproto->ofproto_class
>> $3 = (const struct ofproto_class *) 0x0
>>
>> Also:
>> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, 
>> table_id=0 '\000', n_matches=5, n_misses=0) at 
>> ofproto/ofproto-dpif.c:4310
>> 1  0x0000558354c68514 in xlate_push_stats_entry 
>> (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) 
>> at ofproto/ofproto-dpif-xlate-cache.c:99
>> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, 
>> stats=stats@entry=0x7ff49af30b60) at 
>> ofproto/ofproto-dpif-xlate-cache.c:181
>> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
>>    recircs=recircs@entry=0x7ff49af30cd0) at 
>> ofproto/ofproto-dpif-upcall.c:2292
>> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at 
>> ofproto/ofproto-dpif-upcall.c:2681
>> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at 
>> ofproto/ofproto-dpif-upcall.c:934
>> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at 
>> lib/ovs-thread.c:383
>> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at 
>> pthread_create.c:456
>> 8  0x00007ff4a3fd3d0f in clone () at 
>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p ofproto->up.ofproto_class
>> $3 = (const struct ofproto_class *) 0x0
>>
>> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
>> ---

CC: Hongzhi Guo

And I just remembered that there is a very similar patch that was submitted to the mail-list several months betore this one.
Ben started review, but didn't finish:

http://patchwork.ozlabs.org/project/openvswitch/patch/20200306130555.19884-1-guohongzhi1@huawei.com/

Both patches has smilar issues, i.e. mixing RCU with refcounters and not covering all the cases.

There are at least 2 major places where ofproto needs to be refcounted:
1. rules
   1.a xlate caches?
2. ofproto groups

IMO, mixing RCU with refcounts basically means that we do not have a clear understanding on how it works and using RCU as a safety net.
We shuld stop using RCU for orproto and just have refcounts.  Once ofproto refcount reaches zero it should be immediately destroyed.
All the places where we need to increase refcount needs to be tracked down.

It'll be great if you can cooperate to prepare a complete solution for this issue.

Best regards, Ilya Maximets.
Peng He Feb. 25, 2021, 3:14 a.m. UTC | #6
Ilya Maximets <i.maximets@ovn.org> 于2021年2月25日周四 上午12:01写道:
>
> On 2/24/21 3:29 PM, Ilya Maximets wrote:
> > On 7/17/20 3:50 AM, hepeng.0320 wrote:
> >> From: Peng He <hepeng.0320@bytedance.com>
> >>
> >> The call stack of rule_destroy_cb:
> >>
> >> remove_rules_postponed (one grace period)
> >>     -> remove_rule_rcu
> >>         -> remove_rule_rcu__
> >>             -> ofproto_rule_unref -> ref count != 1
> >>                 -> ... more grace periods.
> >>                 -> rule_destroy_cb (> 2 grace periods)
> >>                     -> free
> >>
> >> Currently ofproto_destory waits only two grace periods, this means when
> >> rule_destroy_cb is called, the ofproto might have been already destroyed.
> >>
> >> This patch adds a refcount for ofproto to ensure ofproto exists when it
> >> is needed to call free functions.
> >>
> >> This patch fixes the crashes found:
> >> Program terminated with signal SIGSEGV, Segmentation fault.
> >> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956
> >> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348
> >> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364
> >> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> >> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456
> >> 5  0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >> (gdb) p rule->ofproto->ofproto_class
> >> $3 = (const struct ofproto_class *) 0x0
> >>
> >> Also:
> >> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310
> >> 1  0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99
> >> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181
> >> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
> >>    recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292
> >> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681
> >> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934
> >> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383
> >> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456
> >> 8  0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >> (gdb) p ofproto->up.ofproto_class
> >> $3 = (const struct ofproto_class *) 0x0
> >>
> >> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> >> ---
>
> CC: Hongzhi Guo
>
> And I just remembered that there is a very similar patch that
> was submitted to the mail-list several months betore this one.
> Ben started review, but didn't finish:
>
> http://patchwork.ozlabs.org/project/openvswitch/patch/20200306130555.19884-1-guohongzhi1@huawei.com/
>
> Both patches has smilar issues, i.e. mixing RCU with refcounters
> and not covering all the cases.
>
> There are at least 2 major places where ofproto needs to be refcounted:
> 1. rules
>    1.a xlate caches?
> 2. ofproto groups
>
> IMO, mixing RCU with refcounts basically means that we do not have a
> clear understanding on how it works and using RCU as a safety net.
> We shuld stop using RCU for orproto and just have refcounts.  Once
> ofproto refcount reaches zero it should be immediately destroyed.
> All the places where we need to increase refcount needs to be tracked
> down.

IMO, I think RCU and refcounts are not mutually exclusive. In kernel
connntack system such as IPVS, sessions are used with both RCU and
refcounts.

Essentially, the RCU implies a refcount if there are no places holding
a pointer to the object other than hash tables (or other RCU enabled
data structure), thus by removing it from hash tables, and wait a
grace period,
we can safely free the object because we are sure that no one holds a
pointer to the object. Removing it from hash tables is like reducing
the refcount by 1, and since hash tables are
the only place that holds the pointer and this is the only reference
that lives longer than a grace period, so by removing it, we are sure
that no other ref can live longer than a grace period, and after the
period, we are safe to free it.

If we use only refcounts, every time we access the object, we have to
add refcounts.
But if use RCU combined with refcount, we only need to add ref, if
there are other places than the hashtables that live longer than one
grace period.

So I think using RCU with refcounts actually makes code changes less,
as mostly, the access to object is ephemeral, you lookup the
hashtables and store the pointer using a stack variable.
As long as you don't store the pointer in a data structure that lives
longer than a grace period, then you do not have to add refcount.

>
> It'll be great if you can cooperate to prepare a complete solution
> for this issue.
>
> Best regards, Ilya Maximets.
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Hongzhi Guo Feb. 25, 2021, 3:32 a.m. UTC | #7
Refcount and RCU are not mutually exclusive.
IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.


-----Original Message-----
From: 贺鹏 [mailto:xnhp0320@gmail.com] 
Sent: 25/2/2021 11:14
To: Ilya Maximets <i.maximets@ovn.org>
Cc: hepeng.0320 <hepeng.0320@bytedance.com>; <dev@openvswitch.org> <dev@openvswitch.org>; Guohongzhi (Russell Lab) <guohongzhi1@huawei.com>
Subject: Re: [ovs-dev] [ovs-dev v2] ofproto: add refcount to ofproto to fix crash at ofproto_destroy

Ilya Maximets <i.maximets@ovn.org> 于2021年2月25日周四 上午12:01写道:
>
> On 2/24/21 3:29 PM, Ilya Maximets wrote:
> > On 7/17/20 3:50 AM, hepeng.0320 wrote:
> >> From: Peng He <hepeng.0320@bytedance.com>
> >>
> >> The call stack of rule_destroy_cb:
> >>
> >> remove_rules_postponed (one grace period)
> >>     -> remove_rule_rcu
> >>         -> remove_rule_rcu__
> >>             -> ofproto_rule_unref -> ref count != 1
> >>                 -> ... more grace periods.
> >>                 -> rule_destroy_cb (> 2 grace periods)
> >>                     -> free
> >>
> >> Currently ofproto_destory waits only two grace periods, this means 
> >> when rule_destroy_cb is called, the ofproto might have been already destroyed.
> >>
> >> This patch adds a refcount for ofproto to ensure ofproto exists 
> >> when it is needed to call free functions.
> >>
> >> This patch fixes the crashes found:
> >> Program terminated with signal SIGSEGV, Segmentation fault.
> >> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at 
> >> ofproto/ofproto.c:2956
> >> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at 
> >> lib/ovs-rcu.c:348
> >> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized 
> >> out>) at lib/ovs-rcu.c:364
> >> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) 
> >> at lib/ovs-thread.c:383
> >> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at 
> >> pthread_create.c:456
> >> 5  0x00007febe6990d0f in clone () at 
> >> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >> (gdb) p rule->ofproto->ofproto_class
> >> $3 = (const struct ofproto_class *) 0x0
> >>
> >> Also:
> >> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, 
> >> table_id=0 '\000', n_matches=5, n_misses=0) at 
> >> ofproto/ofproto-dpif.c:4310
> >> 1  0x0000558354c68514 in xlate_push_stats_entry 
> >> (entry=entry@entry=0x7ff488031398, 
> >> stats=stats@entry=0x7ff49af30b60) at 
> >> ofproto/ofproto-dpif-xlate-cache.c:99
> >> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, 
> >> stats=stats@entry=0x7ff49af30b60) at 
> >> ofproto/ofproto-dpif-xlate-cache.c:181
> >> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
> >>    recircs=recircs@entry=0x7ff49af30cd0) at 
> >> ofproto/ofproto-dpif-upcall.c:2292
> >> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) 
> >> at ofproto/ofproto-dpif-upcall.c:2681
> >> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at 
> >> ofproto/ofproto-dpif-upcall.c:934
> >> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) 
> >> at lib/ovs-thread.c:383
> >> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at 
> >> pthread_create.c:456
> >> 8  0x00007ff4a3fd3d0f in clone () at 
> >> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >> (gdb) p ofproto->up.ofproto_class
> >> $3 = (const struct ofproto_class *) 0x0
> >>
> >> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
> >> ---
>
> CC: Hongzhi Guo
>
> And I just remembered that there is a very similar patch that was 
> submitted to the mail-list several months betore this one.
> Ben started review, but didn't finish:
>
> http://patchwork.ozlabs.org/project/openvswitch/patch/20200306130555.1
> 9884-1-guohongzhi1@huawei.com/
>
> Both patches has smilar issues, i.e. mixing RCU with refcounters and 
> not covering all the cases.
>
> There are at least 2 major places where ofproto needs to be refcounted:
> 1. rules
>    1.a xlate caches?
> 2. ofproto groups
>
> IMO, mixing RCU with refcounts basically means that we do not have a 
> clear understanding on how it works and using RCU as a safety net.
> We shuld stop using RCU for orproto and just have refcounts.  Once 
> ofproto refcount reaches zero it should be immediately destroyed.
> All the places where we need to increase refcount needs to be tracked 
> down.

IMO, I think RCU and refcounts are not mutually exclusive. In kernel connntack system such as IPVS, sessions are used with both RCU and refcounts.

Essentially, the RCU implies a refcount if there are no places holding a pointer to the object other than hash tables (or other RCU enabled data structure), thus by removing it from hash tables, and wait a grace period, we can safely free the object because we are sure that no one holds a pointer to the object. Removing it from hash tables is like reducing the refcount by 1, and since hash tables are the only place that holds the pointer and this is the only reference that lives longer than a grace period, so by removing it, we are sure that no other ref can live longer than a grace period, and after the period, we are safe to free it.

If we use only refcounts, every time we access the object, we have to add refcounts.
But if use RCU combined with refcount, we only need to add ref, if there are other places than the hashtables that live longer than one grace period.

So I think using RCU with refcounts actually makes code changes less, as mostly, the access to object is ephemeral, you lookup the hashtables and store the pointer using a stack variable.
As long as you don't store the pointer in a data structure that lives longer than a grace period, then you do not have to add refcount.

>
> It'll be great if you can cooperate to prepare a complete solution for 
> this issue.
>
> Best regards, Ilya Maximets.
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



--
hepeng
Ilya Maximets Feb. 25, 2021, 12:54 p.m. UTC | #8
On 2/25/21 4:32 AM, Guohongzhi (Russell Lab) wrote:
> Refcount and RCU are not mutually exclusive.
> IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.
> 

I understand that refcount and RCU are not mutually exclusive.
However, in this particular case RCU for ofproto was introduced for one
and only one reason - to avoid freeing ofproto while rules are still
alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
rule destruction.").  The goal was to allow using rules without
refcounting them within a single grace period.  And that forced us
to postpone destruction of the ofproto for a single grace period.
Later commit 39c9459355b6 ("Use classifier versioning.") made it
possible for rules to be alive for more than one grace period, so
the commit made ofproto wait for 2 grace periods by double postponing.
As we can see now, that wasn't enough and we have to wait for more
than 2 grace periods in certain cases.

Now we're introducing refcounts to wait for all rules to be destroyed
before destroying the ofproto.  But what is the point of having
RCU if we already know that if refcount is zero than all rules are
already destroyed and we don't need to wait anything and could just
destroy the ofproto?

RCU doesn't protect ofproto itself here, it actually protects rules,
i.e. keeps ofproto alive while rules alive, but we can fully replace
this with refcounts and I see no value in having RCU additionally.

To have a full picture: right now we also have groups protected by
RCU, so we need to refcount ofproto for them too.  But that is almost
exactly same situation as we have with rules.

> 
> -----Original Message-----
> From: 贺鹏 [mailto:xnhp0320@gmail.com] 
> Sent: 25/2/2021 11:14
> To: Ilya Maximets <i.maximets@ovn.org>
> Cc: hepeng.0320 <hepeng.0320@bytedance.com>; <dev@openvswitch.org> <dev@openvswitch.org>; Guohongzhi (Russell Lab) <guohongzhi1@huawei.com>
> Subject: Re: [ovs-dev] [ovs-dev v2] ofproto: add refcount to ofproto to fix crash at ofproto_destroy
> 
> Ilya Maximets <i.maximets@ovn.org> 于2021年2月25日周四 上午12:01写道:
>>
>> On 2/24/21 3:29 PM, Ilya Maximets wrote:
>>> On 7/17/20 3:50 AM, hepeng.0320 wrote:
>>>> From: Peng He <hepeng.0320@bytedance.com>
>>>>
>>>> The call stack of rule_destroy_cb:
>>>>
>>>> remove_rules_postponed (one grace period)
>>>>     -> remove_rule_rcu
>>>>         -> remove_rule_rcu__
>>>>             -> ofproto_rule_unref -> ref count != 1
>>>>                 -> ... more grace periods.
>>>>                 -> rule_destroy_cb (> 2 grace periods)
>>>>                     -> free
>>>>
>>>> Currently ofproto_destory waits only two grace periods, this means 
>>>> when rule_destroy_cb is called, the ofproto might have been already destroyed.
>>>>
>>>> This patch adds a refcount for ofproto to ensure ofproto exists 
>>>> when it is needed to call free functions.
>>>>
>>>> This patch fixes the crashes found:
>>>> Program terminated with signal SIGSEGV, Segmentation fault.
>>>> 0  0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at 
>>>> ofproto/ofproto.c:2956
>>>> 1  0x0000562a552623d6 in ovsrcu_call_postponed () at 
>>>> lib/ovs-rcu.c:348
>>>> 2  0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized 
>>>> out>) at lib/ovs-rcu.c:364
>>>> 3  0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) 
>>>> at lib/ovs-thread.c:383
>>>> 4  0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at 
>>>> pthread_create.c:456
>>>> 5  0x00007febe6990d0f in clone () at 
>>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>> (gdb) p rule->ofproto->ofproto_class
>>>> $3 = (const struct ofproto_class *) 0x0
>>>>
>>>> Also:
>>>> 0  ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, 
>>>> table_id=0 '\000', n_matches=5, n_misses=0) at 
>>>> ofproto/ofproto-dpif.c:4310
>>>> 1  0x0000558354c68514 in xlate_push_stats_entry 
>>>> (entry=entry@entry=0x7ff488031398, 
>>>> stats=stats@entry=0x7ff49af30b60) at 
>>>> ofproto/ofproto-dpif-xlate-cache.c:99
>>>> 2  0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, 
>>>> stats=stats@entry=0x7ff49af30b60) at 
>>>> ofproto/ofproto-dpif-xlate-cache.c:181
>>>> 3  0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456,
>>>>    recircs=recircs@entry=0x7ff49af30cd0) at 
>>>> ofproto/ofproto-dpif-upcall.c:2292
>>>> 4  0x0000558354c57cbc in revalidate (revalidator=<optimized out>) 
>>>> at ofproto/ofproto-dpif-upcall.c:2681
>>>> 5  0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at 
>>>> ofproto/ofproto-dpif-upcall.c:934
>>>> 6  0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) 
>>>> at lib/ovs-thread.c:383
>>>> 7  0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at 
>>>> pthread_create.c:456
>>>> 8  0x00007ff4a3fd3d0f in clone () at 
>>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>> (gdb) p ofproto->up.ofproto_class
>>>> $3 = (const struct ofproto_class *) 0x0
>>>>
>>>> Signed-off-by: Peng He <hepeng.0320@bytedance.com>
>>>> ---
>>
>> CC: Hongzhi Guo
>>
>> And I just remembered that there is a very similar patch that was 
>> submitted to the mail-list several months betore this one.
>> Ben started review, but didn't finish:
>>
>> http://patchwork.ozlabs.org/project/openvswitch/patch/20200306130555.1
>> 9884-1-guohongzhi1@huawei.com/
>>
>> Both patches has smilar issues, i.e. mixing RCU with refcounters and 
>> not covering all the cases.
>>
>> There are at least 2 major places where ofproto needs to be refcounted:
>> 1. rules
>>    1.a xlate caches?
>> 2. ofproto groups
>>
>> IMO, mixing RCU with refcounts basically means that we do not have a 
>> clear understanding on how it works and using RCU as a safety net.
>> We shuld stop using RCU for orproto and just have refcounts.  Once 
>> ofproto refcount reaches zero it should be immediately destroyed.
>> All the places where we need to increase refcount needs to be tracked 
>> down.
> 
> IMO, I think RCU and refcounts are not mutually exclusive. In kernel connntack system such as IPVS, sessions are used with both RCU and refcounts.
> 
> Essentially, the RCU implies a refcount if there are no places holding a pointer to the object other than hash tables (or other RCU enabled data structure), thus by removing it from hash tables, and wait a grace period, we can safely free the object because we are sure that no one holds a pointer to the object. Removing it from hash tables is like reducing the refcount by 1, and since hash tables are the only place that holds the pointer and this is the only reference that lives longer than a grace period, so by removing it, we are sure that no other ref can live longer than a grace period, and after the period, we are safe to free it.
> 
> If we use only refcounts, every time we access the object, we have to add refcounts.
> But if use RCU combined with refcount, we only need to add ref, if there are other places than the hashtables that live longer than one grace period.
> 
> So I think using RCU with refcounts actually makes code changes less, as mostly, the access to object is ephemeral, you lookup the hashtables and store the pointer using a stack variable.
> As long as you don't store the pointer in a data structure that lives longer than a grace period, then you do not have to add refcount.
> 
>>
>> It'll be great if you can cooperate to prepare a complete solution for 
>> this issue.
>>
>> Best regards, Ilya Maximets.
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
> 
> 
> --
> hepeng
>
William Tu Feb. 25, 2021, 6:31 p.m. UTC | #9
On Thu, Feb 25, 2021 at 4:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/25/21 4:32 AM, Guohongzhi (Russell Lab) wrote:
> > Refcount and RCU are not mutually exclusive.
> > IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.
> >
>
> I understand that refcount and RCU are not mutually exclusive.
> However, in this particular case RCU for ofproto was introduced for one
> and only one reason - to avoid freeing ofproto while rules are still
> alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
> rule destruction.").  The goal was to allow using rules without
> refcounting them within a single grace period.  And that forced us
> to postpone destruction of the ofproto for a single grace period.
> Later commit 39c9459355b6 ("Use classifier versioning.") made it
> possible for rules to be alive for more than one grace period, so
> the commit made ofproto wait for 2 grace periods by double postponing.
> As we can see now, that wasn't enough and we have to wait for more
> than 2 grace periods in certain cases.
>
> Now we're introducing refcounts to wait for all rules to be destroyed
> before destroying the ofproto.  But what is the point of having
> RCU if we already know that if refcount is zero than all rules are
> already destroyed and we don't need to wait anything and could just
> destroy the ofproto?
>
> RCU doesn't protect ofproto itself here, it actually protects rules,
> i.e. keeps ofproto alive while rules alive, but we can fully replace
> this with refcounts and I see no value in having RCU additionally.
>
> To have a full picture: right now we also have groups protected by
> RCU, so we need to refcount ofproto for them too.  But that is almost
> exactly same situation as we have with rules.
>
Thanks Guohongzhi, Hepeng, and Ilya, I learned a lot from your discussions.

I think refcount and RCU are mutually exclusive. They are two different ways of
doing synchronization and somehow we mix them together by using RCU to
optimize refcount.

Before the commit f416c8d61601 ("ofproto: RCU postpone rule destruction."),
we are using refcount to protect rules, and as a result every time OVS
references
a rule, we have to take refcount. It works ok but this probably has
high performance
overhead because it's doing atomic operations.

So the commit f416c8d61601 optimizes it by doing
1) not taking refcount of rule "within a grace period"
2) introduce RCU for rule
The assumption is that a grace period is like refcount == 0, so it's
valid to do so.
A side effect is that ofproto_destroy__()  needs to be postponed.
Note that if a rule is alive across grace period, it has to take refcount.

However, later commit 39c9459355b6 ("Use classifier versioning.")
makes rule alive across grace period. It breaks the first commit's assumption
and it has to introduce double postponing for ofproto, which we found
out is the problem now.

Since my point is RCU and refcount are mutually exclusive (A grace period
is like refcount ==0) , I agree with Ilya that we can just use refcount to fix
ofproto issue.

Regards,
William
Peng He Feb. 27, 2021, 8:53 a.m. UTC | #10
Hi,

Thanks William and Ilya for a detailed revisiting of the origin of the
problem. I learned a lot.

I now understand that the mix using of RCU and refcounts is not
intended in the first place.
But my point is that mix using RCU and refcounts now gives you more
choices, and actually eases the code changes.

For example, the code for * ofproto_dpif_lookup_by_name* or other
ofproto lookup function,
when only using refcounts, you need to change it to:

 struct ofproto_dpif *
 ofproto_dpif_lookup_by_uuid(const struct uuid *uuid)
 {
     struct ofproto_dpif *ofproto;

     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node,
                              uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) {
         if (uuid_equals(&ofproto->uuid, uuid)) {

             ---> if  ovs_refcount_try_ref(ofproto)

             return ofproto;
         }
     }
     return NULL;
 }

and after finish its usage, you have to do ofproto_unref the ofproto.

This is why I said, most accessing to ofproto is ephemeral. If you
change to use the pure refcounts solution, you have
to add refcount and release it every time you access the ofproto. We
should be more careful and remember to unref
the ofproto.

However, if using RCU and refcounts, in the above case, you do not
need to change the code, since the RCU ensures that
these ephemeral accesses are safe.

you only need to add refcount, when you find that the pointer to
ofproto lives longer than one grace period.


This is why in my patch, I do not add ref to ofproto after its
creation. I agree the patch is not complete and has issues,
and understand it could confuse people if changes into mix RCU and
refcounts version.


William Tu <u9012063@gmail.com> 于2021年2月26日周五 上午2:32写道:
>
> On Thu, Feb 25, 2021 at 4:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >
> > On 2/25/21 4:32 AM, Guohongzhi (Russell Lab) wrote:
> > > Refcount and RCU are not mutually exclusive.
> > > IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.
> > >
> >
> > I understand that refcount and RCU are not mutually exclusive.
> > However, in this particular case RCU for ofproto was introduced for one
> > and only one reason - to avoid freeing ofproto while rules are still
> > alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
> > rule destruction.").  The goal was to allow using rules without
> > refcounting them within a single grace period.  And that forced us
> > to postpone destruction of the ofproto for a single grace period.
> > Later commit 39c9459355b6 ("Use classifier versioning.") made it
> > possible for rules to be alive for more than one grace period, so
> > the commit made ofproto wait for 2 grace periods by double postponing.
> > As we can see now, that wasn't enough and we have to wait for more
> > than 2 grace periods in certain cases.
> >
> > Now we're introducing refcounts to wait for all rules to be destroyed
> > before destroying the ofproto.  But what is the point of having
> > RCU if we already know that if refcount is zero than all rules are
> > already destroyed and we don't need to wait anything and could just
> > destroy the ofproto?
> >
> > RCU doesn't protect ofproto itself here, it actually protects rules,
> > i.e. keeps ofproto alive while rules alive, but we can fully replace
> > this with refcounts and I see no value in having RCU additionally.
> >
> > To have a full picture: right now we also have groups protected by
> > RCU, so we need to refcount ofproto for them too.  But that is almost
> > exactly same situation as we have with rules.
> >
> Thanks Guohongzhi, Hepeng, and Ilya, I learned a lot from your discussions.
>
> I think refcount and RCU are mutually exclusive. They are two different ways of
> doing synchronization and somehow we mix them together by using RCU to
> optimize refcount.
>
> Before the commit f416c8d61601 ("ofproto: RCU postpone rule destruction."),
> we are using refcount to protect rules, and as a result every time OVS
> references
> a rule, we have to take refcount. It works ok but this probably has
> high performance
> overhead because it's doing atomic operations.
>
> So the commit f416c8d61601 optimizes it by doing
> 1) not taking refcount of rule "within a grace period"
> 2) introduce RCU for rule
> The assumption is that a grace period is like refcount == 0, so it's
> valid to do so.
> A side effect is that ofproto_destroy__()  needs to be postponed.
> Note that if a rule is alive across grace period, it has to take refcount.
>
> However, later commit 39c9459355b6 ("Use classifier versioning.")
> makes rule alive across grace period. It breaks the first commit's assumption
> and it has to introduce double postponing for ofproto, which we found
> out is the problem now.
>
> Since my point is RCU and refcount are mutually exclusive (A grace period
> is like refcount ==0) , I agree with Ilya that we can just use refcount to fix
> ofproto issue.
>
> Regards,
> William
William Tu Feb. 27, 2021, 4:35 p.m. UTC | #11
Hi Hepeng,
Thanks, now I understand.

On Sat, Feb 27, 2021 at 12:54 AM 贺鹏 <xnhp0320@gmail.com> wrote:
>
> Hi,
>
> Thanks William and Ilya for a detailed revisiting of the origin of the
> problem. I learned a lot.
>
> I now understand that the mix using of RCU and refcounts is not
> intended in the first place.
> But my point is that mix using RCU and refcounts now gives you more
> choices, and actually eases the code changes.
>
> For example, the code for * ofproto_dpif_lookup_by_name* or other
> ofproto lookup function,
> when only using refcounts, you need to change it to:
>
>  struct ofproto_dpif *
>  ofproto_dpif_lookup_by_uuid(const struct uuid *uuid)
>  {
>      struct ofproto_dpif *ofproto;
>
>      HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node,
>                               uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) {
>          if (uuid_equals(&ofproto->uuid, uuid)) {
>
>              ---> if  ovs_refcount_try_ref(ofproto)
>
>              return ofproto;
>          }
>      }
>      return NULL;
>  }
>
> and after finish its usage, you have to do ofproto_unref the ofproto.
>
> This is why I said, most accessing to ofproto is ephemeral. If you
> change to use the pure refcounts solution, you have
> to add refcount and release it every time you access the ofproto. We
> should be more careful and remember to unref
> the ofproto.
>
> However, if using RCU and refcounts, in the above case, you do not
> need to change the code, since the RCU ensures that
> these ephemeral accesses are safe.

How do we know which access is ephemeral, so no refcount is needed,
and which access is not, so we have to add refcount?

>
> you only need to add refcount, when you find that the pointer to
> ofproto lives longer than one grace period.
>
>
> This is why in my patch, I do not add ref to ofproto after its
> creation. I agree the patch is not complete and has issues,
> and understand it could confuse people if changes into mix RCU and
> refcounts version.
>
I see, thanks!
William
Ilya Maximets March 1, 2021, 8:15 p.m. UTC | #12
On 2/27/21 9:53 AM, 贺鹏 wrote:
> Hi,
> 
> Thanks William and Ilya for a detailed revisiting of the origin of the
> problem. I learned a lot.
> 
> I now understand that the mix using of RCU and refcounts is not
> intended in the first place.
> But my point is that mix using RCU and refcounts now gives you more
> choices, and actually eases the code changes.
> 
> For example, the code for * ofproto_dpif_lookup_by_name* or other
> ofproto lookup function,
> when only using refcounts, you need to change it to:
> 
>  struct ofproto_dpif *
>  ofproto_dpif_lookup_by_uuid(const struct uuid *uuid)
>  {
>      struct ofproto_dpif *ofproto;
> 
>      HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node,
>                               uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) {
>          if (uuid_equals(&ofproto->uuid, uuid)) {
> 
>              ---> if  ovs_refcount_try_ref(ofproto)
> 
>              return ofproto;
>          }
>      }
>      return NULL;
>  }

That is an interesting example here...
I can't help but notice that this function is typically called
from different handler or pmd threads and modified by the main thread
while upcalls enabled.  And hmap is not a thread-safe structure.
I guess, we have another possible problem here.  We need to protect
at least this hmap and the other one with rw lock or something...
Am I right or am I missing something?  What else we need to protect?

> 
> and after finish its usage, you have to do ofproto_unref the ofproto.
> 
> This is why I said, most accessing to ofproto is ephemeral. If you
> change to use the pure refcounts solution, you have
> to add refcount and release it every time you access the ofproto. We
> should be more careful and remember to unref
> the ofproto.
> 
> However, if using RCU and refcounts, in the above case, you do not
> need to change the code, since the RCU ensures that
> these ephemeral accesses are safe.
> 
> you only need to add refcount, when you find that the pointer to
> ofproto lives longer than one grace period.
> 
> 
> This is why in my patch, I do not add ref to ofproto after its
> creation. I agree the patch is not complete and has issues,
> and understand it could confuse people if changes into mix RCU and
> refcounts version.
> 
> 
> William Tu <u9012063@gmail.com> 于2021年2月26日周五 上午2:32写道:
>>
>> On Thu, Feb 25, 2021 at 4:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>
>>> On 2/25/21 4:32 AM, Guohongzhi (Russell Lab) wrote:
>>>> Refcount and RCU are not mutually exclusive.
>>>> IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.
>>>>
>>>
>>> I understand that refcount and RCU are not mutually exclusive.
>>> However, in this particular case RCU for ofproto was introduced for one
>>> and only one reason - to avoid freeing ofproto while rules are still
>>> alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
>>> rule destruction.").  The goal was to allow using rules without
>>> refcounting them within a single grace period.  And that forced us
>>> to postpone destruction of the ofproto for a single grace period.
>>> Later commit 39c9459355b6 ("Use classifier versioning.") made it
>>> possible for rules to be alive for more than one grace period, so
>>> the commit made ofproto wait for 2 grace periods by double postponing.
>>> As we can see now, that wasn't enough and we have to wait for more
>>> than 2 grace periods in certain cases.
>>>
>>> Now we're introducing refcounts to wait for all rules to be destroyed
>>> before destroying the ofproto.  But what is the point of having
>>> RCU if we already know that if refcount is zero than all rules are
>>> already destroyed and we don't need to wait anything and could just
>>> destroy the ofproto?
>>>
>>> RCU doesn't protect ofproto itself here, it actually protects rules,
>>> i.e. keeps ofproto alive while rules alive, but we can fully replace
>>> this with refcounts and I see no value in having RCU additionally.
>>>
>>> To have a full picture: right now we also have groups protected by
>>> RCU, so we need to refcount ofproto for them too.  But that is almost
>>> exactly same situation as we have with rules.
>>>
>> Thanks Guohongzhi, Hepeng, and Ilya, I learned a lot from your discussions.
>>
>> I think refcount and RCU are mutually exclusive. They are two different ways of
>> doing synchronization and somehow we mix them together by using RCU to
>> optimize refcount.
>>
>> Before the commit f416c8d61601 ("ofproto: RCU postpone rule destruction."),
>> we are using refcount to protect rules, and as a result every time OVS
>> references
>> a rule, we have to take refcount. It works ok but this probably has
>> high performance
>> overhead because it's doing atomic operations.
>>
>> So the commit f416c8d61601 optimizes it by doing
>> 1) not taking refcount of rule "within a grace period"
>> 2) introduce RCU for rule
>> The assumption is that a grace period is like refcount == 0, so it's
>> valid to do so.
>> A side effect is that ofproto_destroy__()  needs to be postponed.
>> Note that if a rule is alive across grace period, it has to take refcount.
>>
>> However, later commit 39c9459355b6 ("Use classifier versioning.")
>> makes rule alive across grace period. It breaks the first commit's assumption
>> and it has to introduce double postponing for ofproto, which we found
>> out is the problem now.
>>
>> Since my point is RCU and refcount are mutually exclusive (A grace period
>> is like refcount ==0) , I agree with Ilya that we can just use refcount to fix
>> ofproto issue.
>>
>> Regards,
>> William
> 
> 
>
Ben Pfaff March 2, 2021, 12:07 a.m. UTC | #13
On Thu, Feb 25, 2021 at 10:31:35AM -0800, William Tu wrote:
> I think refcount and RCU are mutually exclusive. They are two
> different ways of doing synchronization and somehow we mix them
> together by using RCU to optimize refcount.

I think that you are correct in your description of the possibilities.
A data structure can be protected by RCU, or by a refcount, or by a
refcount that is only needed if access to a data structure needs to span
a grace period.  But to me, "mutually exclusive" means two things that
cannot exist together, and I think that the third possibility is in fact
a case where RCU and refcount exist together, so I would not call this
"mutually exclusive".
.贺鹏 March 2, 2021, 2:58 a.m. UTC | #14
On Tue, Mar 2, 2021 at 4:15 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/27/21 9:53 AM, 贺鹏 wrote:
> > Hi,
> >
> > Thanks William and Ilya for a detailed revisiting of the origin of the
> > problem. I learned a lot.
> >
> > I now understand that the mix using of RCU and refcounts is not
> > intended in the first place.
> > But my point is that mix using RCU and refcounts now gives you more
> > choices, and actually eases the code changes.
> >
> > For example, the code for * ofproto_dpif_lookup_by_name* or other
> > ofproto lookup function,
> > when only using refcounts, you need to change it to:
> >
> >  struct ofproto_dpif *
> >  ofproto_dpif_lookup_by_uuid(const struct uuid *uuid)
> >  {
> >      struct ofproto_dpif *ofproto;
> >
> >      HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node,
> >                               uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) {
> >          if (uuid_equals(&ofproto->uuid, uuid)) {
> >
> >              ---> if  ovs_refcount_try_ref(ofproto)
> >
> >              return ofproto;
> >          }
> >      }
> >      return NULL;
> >  }
>
> That is an interesting example here...
> I can't help but notice that this function is typically called
> from different handler or pmd threads and modified by the main thread
> while upcalls enabled.  And hmap is not a thread-safe structure.
> I guess, we have another possible problem here.  We need to protect
> at least this hmap and the other one with rw lock or something...
> Am I right or am I missing something?  What else we need to protect?

I guess the function needs to be changed into using cmap ....

But fortunately, ofproto_dpif_lookup_by_uuid is called by
upcall_receive, and for most upcalls
(with the type MISS_UPCALL), it will not be called.

...
     } else if (upcall->type == MISS_UPCALL) {
         error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
                              &upcall->sflow, NULL, &upcall->ofp_in_port);
         if (error) {
             return error;
         }
     } else {
         struct ofproto_dpif *ofproto
             = ofproto_dpif_lookup_by_uuid(&upcall->cookie.ofproto_uuid);
         if (!ofproto) {
             VLOG_INFO_RL(&rl, "upcall could not find ofproto");
             return ENODEV;
         }
         upcall->ofproto = ofproto;
         upcall->ipfix = ofproto->ipfix;
         upcall->sflow = ofproto->sflow;
         upcall->ofp_in_port = upcall->cookie.ofp_in_port;
     }
...

So for the people who use sflow, I guess there could be a risk, but
this is also rare,
as the bridges rarely change.


another case ofproto_dpif_lookup_by_name looks fine, it will only be called
in the main thread.


>
> >
> > and after finish its usage, you have to do ofproto_unref the ofproto.
> >
> > This is why I said, most accessing to ofproto is ephemeral. If you
> > change to use the pure refcounts solution, you have
> > to add refcount and release it every time you access the ofproto. We
> > should be more careful and remember to unref
> > the ofproto.
> >
> > However, if using RCU and refcounts, in the above case, you do not
> > need to change the code, since the RCU ensures that
> > these ephemeral accesses are safe.
> >
> > you only need to add refcount, when you find that the pointer to
> > ofproto lives longer than one grace period.
> >
> >
> > This is why in my patch, I do not add ref to ofproto after its
> > creation. I agree the patch is not complete and has issues,
> > and understand it could confuse people if changes into mix RCU and
> > refcounts version.
> >
> >
> > William Tu <u9012063@gmail.com> 于2021年2月26日周五 上午2:32写道:
> >>
> >> On Thu, Feb 25, 2021 at 4:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>
> >>> On 2/25/21 4:32 AM, Guohongzhi (Russell Lab) wrote:
> >>>> Refcount and RCU are not mutually exclusive.
> >>>> IMO, the main reason for this problem is that the rule uses both refcount and rcu, while the ofproto uses only rcu, and the rule retains the pointer of the ofproto. More importantly, ofproto cannot guarantee a longer grace period than the rule.
> >>>>
> >>>
> >>> I understand that refcount and RCU are not mutually exclusive.
> >>> However, in this particular case RCU for ofproto was introduced for one
> >>> and only one reason - to avoid freeing ofproto while rules are still
> >>> alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
> >>> rule destruction.").  The goal was to allow using rules without
> >>> refcounting them within a single grace period.  And that forced us
> >>> to postpone destruction of the ofproto for a single grace period.
> >>> Later commit 39c9459355b6 ("Use classifier versioning.") made it
> >>> possible for rules to be alive for more than one grace period, so
> >>> the commit made ofproto wait for 2 grace periods by double postponing.
> >>> As we can see now, that wasn't enough and we have to wait for more
> >>> than 2 grace periods in certain cases.
> >>>
> >>> Now we're introducing refcounts to wait for all rules to be destroyed
> >>> before destroying the ofproto.  But what is the point of having
> >>> RCU if we already know that if refcount is zero than all rules are
> >>> already destroyed and we don't need to wait anything and could just
> >>> destroy the ofproto?
> >>>
> >>> RCU doesn't protect ofproto itself here, it actually protects rules,
> >>> i.e. keeps ofproto alive while rules alive, but we can fully replace
> >>> this with refcounts and I see no value in having RCU additionally.
> >>>
> >>> To have a full picture: right now we also have groups protected by
> >>> RCU, so we need to refcount ofproto for them too.  But that is almost
> >>> exactly same situation as we have with rules.
> >>>
> >> Thanks Guohongzhi, Hepeng, and Ilya, I learned a lot from your discussions.
> >>
> >> I think refcount and RCU are mutually exclusive. They are two different ways of
> >> doing synchronization and somehow we mix them together by using RCU to
> >> optimize refcount.
> >>
> >> Before the commit f416c8d61601 ("ofproto: RCU postpone rule destruction."),
> >> we are using refcount to protect rules, and as a result every time OVS
> >> references
> >> a rule, we have to take refcount. It works ok but this probably has
> >> high performance
> >> overhead because it's doing atomic operations.
> >>
> >> So the commit f416c8d61601 optimizes it by doing
> >> 1) not taking refcount of rule "within a grace period"
> >> 2) introduce RCU for rule
> >> The assumption is that a grace period is like refcount == 0, so it's
> >> valid to do so.
> >> A side effect is that ofproto_destroy__()  needs to be postponed.
> >> Note that if a rule is alive across grace period, it has to take refcount.
> >>
> >> However, later commit 39c9459355b6 ("Use classifier versioning.")
> >> makes rule alive across grace period. It breaks the first commit's assumption
> >> and it has to introduce double postponing for ofproto, which we found
> >> out is the problem now.
> >>
> >> Since my point is RCU and refcount are mutually exclusive (A grace period
> >> is like refcount ==0) , I agree with Ilya that we can just use refcount to fix
> >> ofproto issue.
> >>
> >> Regards,
> >> William
> >
> >
> >
>
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
index dcc91cb38..0deee365d 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -209,6 +209,7 @@  xlate_cache_clear_entry(struct xc_entry *entry)
 {
     switch (entry->type) {
     case XC_TABLE:
+        ofproto_unref(&(entry->table.ofproto->up));
         break;
     case XC_RULE:
         ofproto_rule_unref(&entry->rule->up);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 4f0638f23..6480b3c78 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4414,11 +4414,12 @@  rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
             }
             if (xcache) {
                 struct xc_entry *entry;
-
-                entry = xlate_cache_add_entry(xcache, XC_TABLE);
-                entry->table.ofproto = ofproto;
-                entry->table.id = *table_id;
-                entry->table.match = true;
+                if (ofproto_try_ref(&ofproto->up)) {
+                    entry = xlate_cache_add_entry(xcache, XC_TABLE);
+                    entry->table.ofproto = ofproto;
+                    entry->table.id = *table_id;
+                    entry->table.match = true;
+                }
             }
             return rule;
         }
@@ -4450,11 +4451,12 @@  rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
         }
         if (xcache) {
             struct xc_entry *entry;
-
-            entry = xlate_cache_add_entry(xcache, XC_TABLE);
-            entry->table.ofproto = ofproto;
-            entry->table.id = next_id;
-            entry->table.match = (rule != NULL);
+            if (ofproto_try_ref(&ofproto->up)) {
+                entry = xlate_cache_add_entry(xcache, XC_TABLE);
+                entry->table.ofproto = ofproto;
+                entry->table.id = next_id;
+                entry->table.match = (rule != NULL);
+            }
         }
         if (rule) {
             goto out;   /* Match. */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index afecb24cb..34074fd62 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -139,6 +139,8 @@  struct ofproto {
     /* Variable length mf_field mapping. Stores all configured variable length
      * meta-flow fields (struct mf_field) in a switch. */
     struct vl_mff_map vl_mff_map;
+
+    struct ovs_refcount refcount;
 };
 
 void ofproto_init_tables(struct ofproto *, int n_tables);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 59f06aa94..0842c76c1 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -474,6 +474,24 @@  ofproto_bump_tables_version(struct ofproto *ofproto)
                                                ofproto->tables_version);
 }
 
+void
+ofproto_ref(struct ofproto *ofproto)
+{
+    ovs_refcount_ref(&ofproto->refcount);
+}
+
+bool
+ofproto_try_ref(struct ofproto *ofproto)
+{
+    return ovs_refcount_try_ref_rcu(&ofproto->refcount);
+}
+
+void
+ofproto_unref(struct ofproto *ofproto)
+{
+    ovs_refcount_unref(&ofproto->refcount);
+}
+
 int
 ofproto_create(const char *datapath_name, const char *datapath_type,
                struct ofproto **ofprotop)
@@ -545,6 +563,7 @@  ofproto_create(const char *datapath_name, const char *datapath_type,
 
     ovs_mutex_init(&ofproto->vl_mff_map.mutex);
     cmap_init(&ofproto->vl_mff_map.cmap);
+    ovs_refcount_init(&ofproto->refcount);
 
     error = ofproto->ofproto_class->construct(ofproto);
     if (error) {
@@ -1696,14 +1715,26 @@  ofproto_destroy__(struct ofproto *ofproto)
     ofproto->ofproto_class->dealloc(ofproto);
 }
 
-/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
- * - 1st we defer the removal of the rules from the classifier
- * - 2nd we defer the actual destruction of the rules. */
+/* destroying a rule may have to wait multiple grace periods:
+ * remove_rules_postponed (one grace period)
+ *       -> remove_rule_rcu
+ *           -> remove_rule_rcu__
+ *               -> ofproto_rule_unref -> ref count != 1
+ *                   -> ... more grace periods.
+ *                   -> rule_destroy_cb (> 2 grace periods)
+ *                       -> free
+ *
+ * So we have to check the refcount for sure all the rules
+ * have been destoryed.
+ */
 static void
 ofproto_destroy_defer__(struct ofproto *ofproto)
     OVS_EXCLUDED(ofproto_mutex)
 {
-    ovsrcu_postpone(ofproto_destroy__, ofproto);
+    if (ovs_refcount_read(&ofproto->refcount) != 1)
+        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
+    else
+        ovsrcu_postpone(ofproto_destroy__, ofproto);
 }
 
 void
@@ -2927,6 +2958,7 @@  ofproto_rule_destroy__(struct rule *rule)
     cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
     rule_actions_destroy(rule_get_actions(rule));
     ovs_mutex_destroy(&rule->mutex);
+    ofproto_unref(rule->ofproto);
     rule->ofproto->ofproto_class->rule_dealloc(rule);
 }
 
@@ -5265,6 +5297,7 @@  ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
 
     /* Initialize base state. */
     *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
+    ofproto_ref(ofproto);
     cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
     ovs_refcount_init(&rule->ref_count);
 
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 2dd253167..3d3937b35 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -562,6 +562,10 @@  int ofproto_port_get_cfm_status(const struct ofproto *,
 enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
                                                       uint8_t table_id);
 
+void ofproto_ref(struct ofproto *p);
+void ofproto_unref(struct ofproto *p);
+bool ofproto_try_ref(struct ofproto *p);
+
 #ifdef  __cplusplus
 }
 #endif