[ovs-dev,v1] lib: use acquire-release semantics for pvector size
diff mbox series

Message ID 20200225015206.18986-1-Yanqin.Wei@arm.com
State Changes Requested
Headers show
Series
  • [ovs-dev,v1] lib: use acquire-release semantics for pvector size
Related show

Commit Message

Yanqin Wei Feb. 25, 2020, 1:52 a.m. UTC
Read/write concurrency of pvector library is implemented by a temp vector
and RCU protection. Considering performance reason, insertion does not
follow this scheme.
In insertion function, a thread fence ensures size incrementation is done
after new entry is stored. But there is no barrier in the iteration
fuction(pvector_cursor_init). Entry point access may be reorderd before
loading vector size, so the invalid entry point may be loaded when vector
iteration.
This patch fixes it by acquire-release pair. It can guarantee new size is
observed by reader after new entry stored by writer. And this is
implemented by one-way barrier instead of two-way memory fence.

Reviewed-by: Gavin Hu <Gavin.Hu@arm.com>
Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
---
 lib/pvector.c | 14 +++++++-------
 lib/pvector.h | 12 +++++++-----
 2 files changed, 14 insertions(+), 12 deletions(-)

Comments

Ilya Maximets Feb. 25, 2020, 9:53 a.m. UTC | #1
On 2/25/20 2:52 AM, Yanqin Wei wrote:
> Read/write concurrency of pvector library is implemented by a temp vector
> and RCU protection. Considering performance reason, insertion does not
> follow this scheme.
> In insertion function, a thread fence ensures size incrementation is done
> after new entry is stored. But there is no barrier in the iteration
> fuction(pvector_cursor_init). Entry point access may be reorderd before
> loading vector size, so the invalid entry point may be loaded when vector
> iteration.
> This patch fixes it by acquire-release pair. It can guarantee new size is
> observed by reader after new entry stored by writer. And this is
> implemented by one-way barrier instead of two-way memory fence.
> 
> Reviewed-by: Gavin Hu <Gavin.Hu@arm.com>
> Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
> Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
> ---
>  lib/pvector.c | 14 +++++++-------
>  lib/pvector.h | 12 +++++++-----
>  2 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/pvector.c b/lib/pvector.c
> index aaeee9214..12c599c97 100644
> --- a/lib/pvector.c
> +++ b/lib/pvector.c
> @@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size)
>      struct pvector_impl *impl;
>  
>      impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]);
> -    impl->size = 0;
> +    atomic_init(&impl->size, 0);
>      impl->allocated = size;
>  
>      return impl;
> @@ -117,18 +117,18 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority)
>  {
>      struct pvector_impl *temp = pvec->temp;
>      struct pvector_impl *old = pvector_impl_get(pvec);
> +    size_t size = old->size;

Why this is not an atomic read?  I understand that insertions are not thread-safe
and must be protected by the mutex or be always executed from the same thread.
However, if we're choosing to read this variable non-atomically, we could avoid
introduction of additional variable here and at the same time avoid modification
of most of the code lines in this function.  A comment, why we're reading it
non-atomically might be good anyway since we should be consistent and use atomic
operations for variables marked as atomic as possible.

>  
>      ovs_assert(ptr != NULL);
>  
>      /* Check if can add to the end without reallocation. */
> -    if (!temp && old->allocated > old->size &&
> -        (!old->size || priority <= old->vector[old->size - 1].priority)) {
> -        old->vector[old->size].ptr = ptr;
> -        old->vector[old->size].priority = priority;
> +    if (!temp && old->allocated > size &&
> +        (!size || priority <= old->vector[size - 1].priority)) {
> +        old->vector[size].ptr = ptr;
> +        old->vector[size].priority = priority;
>          /* Size increment must not be visible to the readers before the new
>           * entry is stored. */
> -        atomic_thread_fence(memory_order_release);
> -        ++old->size;
> +        atomic_store_explicit(&old->size, size + 1, memory_order_release);
>      } else {
>          if (!temp) {
>              temp = pvector_impl_dup(old);
> diff --git a/lib/pvector.h b/lib/pvector.h
> index b990ed9d5..430bdf746 100644
> --- a/lib/pvector.h
> +++ b/lib/pvector.h
> @@ -69,8 +69,8 @@ struct pvector_entry {
>  };
>  
>  struct pvector_impl {
> -    size_t size;       /* Number of entries in the vector. */
> -    size_t allocated;  /* Number of allocated entries. */
> +    ATOMIC(size_t) size;  /* Number of entries in the vector. */

atomic_size_t

> +    size_t allocated;     /* Number of allocated entries. */
>      struct pvector_entry vector[];
>  };
>  
> @@ -172,7 +172,7 @@ static inline void pvector_cursor_lookahead(const struct pvector_cursor *,
>  #define PVECTOR_CURSOR_FOR_EACH_CONTINUE(PTR, CURSOR)                   \
>      for (; ((PTR) = pvector_cursor_next(CURSOR, INT_MIN, 0, 0)) != NULL; )
>  
> -

Don't remove the form feed character.

> +
>  /* Inline implementations. */
>  
>  static inline struct pvector_cursor
> @@ -181,12 +181,14 @@ pvector_cursor_init(const struct pvector *pvec,
>  {
>      const struct pvector_impl *impl;
>      struct pvector_cursor cursor;
> +    size_t size;
>  
>      impl = ovsrcu_get(struct pvector_impl *, &pvec->impl);
>  
> -    ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
> +    atomic_read_explicit(&impl->size, &size, memory_order_acquire);
> +    ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]);
>  
> -    cursor.size = impl->size;
> +    cursor.size = size;
>      cursor.vector = impl->vector;
>      cursor.entry_idx = -1;
>  
>
Yanqin Wei Feb. 25, 2020, 10:04 a.m. UTC | #2
Hi Ilya,

> -----Original Message-----
> From: Ilya Maximets <i.maximets@ovn.org>
> Sent: Tuesday, February 25, 2020 5:54 PM
> To: Yanqin Wei <Yanqin.Wei@arm.com>; dev@openvswitch.org
> Cc: Lijian Zhang <Lijian.Zhang@arm.com>; Gavin Hu <Gavin.Hu@arm.com>; nd
> <nd@arm.com>; i.maximets@ovn.org; Ben Pfaff <blp@ovn.org>
> Subject: Re: [ovs-dev] [PATCH v1] lib: use acquire-release semantics for pvector
> size
> 
> On 2/25/20 2:52 AM, Yanqin Wei wrote:
> > Read/write concurrency of pvector library is implemented by a temp
> > vector and RCU protection. Considering performance reason, insertion
> > does not follow this scheme.
> > In insertion function, a thread fence ensures size incrementation is
> > done after new entry is stored. But there is no barrier in the
> > iteration fuction(pvector_cursor_init). Entry point access may be
> > reorderd before loading vector size, so the invalid entry point may be
> > loaded when vector iteration.
> > This patch fixes it by acquire-release pair. It can guarantee new size
> > is observed by reader after new entry stored by writer. And this is
> > implemented by one-way barrier instead of two-way memory fence.
> >
> > Reviewed-by: Gavin Hu <Gavin.Hu@arm.com>
> > Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
> > Signed-off-by: Yanqin Wei <Yanqin.Wei@arm.com>
> > ---
> >  lib/pvector.c | 14 +++++++-------
> >  lib/pvector.h | 12 +++++++-----
> >  2 files changed, 14 insertions(+), 12 deletions(-)
> >
> > diff --git a/lib/pvector.c b/lib/pvector.c index aaeee9214..12c599c97
> > 100644
> > --- a/lib/pvector.c
> > +++ b/lib/pvector.c
> > @@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size)
> >      struct pvector_impl *impl;
> >
> >      impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]);
> > -    impl->size = 0;
> > +    atomic_init(&impl->size, 0);
> >      impl->allocated = size;
> >
> >      return impl;
> > @@ -117,18 +117,18 @@ pvector_insert(struct pvector *pvec, void *ptr,
> > int priority)  {
> >      struct pvector_impl *temp = pvec->temp;
> >      struct pvector_impl *old = pvector_impl_get(pvec);
> > +    size_t size = old->size;
> 
> Why this is not an atomic read?  I understand that insertions are not thread-
> safe and must be protected by the mutex or be always executed from the same
> thread.
> However, if we're choosing to read this variable non-atomically, we could
> avoid introduction of additional variable here and at the same time avoid
> modification of most of the code lines in this function.  A comment, why we're
> reading it non-atomically might be good anyway since we should be consistent
> and use atomic operations for variables marked as atomic as possible.
> 
[Yanqin]  It makes sense for me.  I will update v2 for all of your comments. Thanks.

> >
> >      ovs_assert(ptr != NULL);
> >
> >      /* Check if can add to the end without reallocation. */
> > -    if (!temp && old->allocated > old->size &&
> > -        (!old->size || priority <= old->vector[old->size - 1].priority)) {
> > -        old->vector[old->size].ptr = ptr;
> > -        old->vector[old->size].priority = priority;
> > +    if (!temp && old->allocated > size &&
> > +        (!size || priority <= old->vector[size - 1].priority)) {
> > +        old->vector[size].ptr = ptr;
> > +        old->vector[size].priority = priority;
> >          /* Size increment must not be visible to the readers before the new
> >           * entry is stored. */
> > -        atomic_thread_fence(memory_order_release);
> > -        ++old->size;
> > +        atomic_store_explicit(&old->size, size + 1,
> > + memory_order_release);
> >      } else {
> >          if (!temp) {
> >              temp = pvector_impl_dup(old); diff --git a/lib/pvector.h
> > b/lib/pvector.h index b990ed9d5..430bdf746 100644
> > --- a/lib/pvector.h
> > +++ b/lib/pvector.h
> > @@ -69,8 +69,8 @@ struct pvector_entry {  };
> >
> >  struct pvector_impl {
> > -    size_t size;       /* Number of entries in the vector. */
> > -    size_t allocated;  /* Number of allocated entries. */
> > +    ATOMIC(size_t) size;  /* Number of entries in the vector. */
> 
> atomic_size_t
> 
> > +    size_t allocated;     /* Number of allocated entries. */
> >      struct pvector_entry vector[];
> >  };
> >
> > @@ -172,7 +172,7 @@ static inline void pvector_cursor_lookahead(const
> struct pvector_cursor *,
> >  #define PVECTOR_CURSOR_FOR_EACH_CONTINUE(PTR, CURSOR)                   \
> >      for (; ((PTR) = pvector_cursor_next(CURSOR, INT_MIN, 0, 0)) !=
> > NULL; )
> >
> > -
> 
> 
> 
> Don't remove the form feed character.
> 
> > +
> >  /* Inline implementations. */
> >
> >  static inline struct pvector_cursor
> > @@ -181,12 +181,14 @@ pvector_cursor_init(const struct pvector *pvec,
> > {
> >      const struct pvector_impl *impl;
> >      struct pvector_cursor cursor;
> > +    size_t size;
> >
> >      impl = ovsrcu_get(struct pvector_impl *, &pvec->impl);
> >
> > -    ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
> > +    atomic_read_explicit(&impl->size, &size, memory_order_acquire);
> > +    ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]);
> >
> > -    cursor.size = impl->size;
> > +    cursor.size = size;
> >      cursor.vector = impl->vector;
> >      cursor.entry_idx = -1;
> >
> >

Patch
diff mbox series

diff --git a/lib/pvector.c b/lib/pvector.c
index aaeee9214..12c599c97 100644
--- a/lib/pvector.c
+++ b/lib/pvector.c
@@ -33,7 +33,7 @@  pvector_impl_alloc(size_t size)
     struct pvector_impl *impl;
 
     impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]);
-    impl->size = 0;
+    atomic_init(&impl->size, 0);
     impl->allocated = size;
 
     return impl;
@@ -117,18 +117,18 @@  pvector_insert(struct pvector *pvec, void *ptr, int priority)
 {
     struct pvector_impl *temp = pvec->temp;
     struct pvector_impl *old = pvector_impl_get(pvec);
+    size_t size = old->size;
 
     ovs_assert(ptr != NULL);
 
     /* Check if can add to the end without reallocation. */
-    if (!temp && old->allocated > old->size &&
-        (!old->size || priority <= old->vector[old->size - 1].priority)) {
-        old->vector[old->size].ptr = ptr;
-        old->vector[old->size].priority = priority;
+    if (!temp && old->allocated > size &&
+        (!size || priority <= old->vector[size - 1].priority)) {
+        old->vector[size].ptr = ptr;
+        old->vector[size].priority = priority;
         /* Size increment must not be visible to the readers before the new
          * entry is stored. */
-        atomic_thread_fence(memory_order_release);
-        ++old->size;
+        atomic_store_explicit(&old->size, size + 1, memory_order_release);
     } else {
         if (!temp) {
             temp = pvector_impl_dup(old);
diff --git a/lib/pvector.h b/lib/pvector.h
index b990ed9d5..430bdf746 100644
--- a/lib/pvector.h
+++ b/lib/pvector.h
@@ -69,8 +69,8 @@  struct pvector_entry {
 };
 
 struct pvector_impl {
-    size_t size;       /* Number of entries in the vector. */
-    size_t allocated;  /* Number of allocated entries. */
+    ATOMIC(size_t) size;  /* Number of entries in the vector. */
+    size_t allocated;     /* Number of allocated entries. */
     struct pvector_entry vector[];
 };
 
@@ -172,7 +172,7 @@  static inline void pvector_cursor_lookahead(const struct pvector_cursor *,
 #define PVECTOR_CURSOR_FOR_EACH_CONTINUE(PTR, CURSOR)                   \
     for (; ((PTR) = pvector_cursor_next(CURSOR, INT_MIN, 0, 0)) != NULL; )
 
-
+
 /* Inline implementations. */
 
 static inline struct pvector_cursor
@@ -181,12 +181,14 @@  pvector_cursor_init(const struct pvector *pvec,
 {
     const struct pvector_impl *impl;
     struct pvector_cursor cursor;
+    size_t size;
 
     impl = ovsrcu_get(struct pvector_impl *, &pvec->impl);
 
-    ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
+    atomic_read_explicit(&impl->size, &size, memory_order_acquire);
+    ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]);
 
-    cursor.size = impl->size;
+    cursor.size = size;
     cursor.vector = impl->vector;
     cursor.entry_idx = -1;