diff mbox series

[ovs-dev,1/4] Native tunnel: Read/write expires atomically.

Message ID 163587313623.3737810.4429039447241544262.stgit@fed.void
State Superseded
Headers show
Series Native tunnel: Update neigh entries in tnl termination. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Paolo Valerio Nov. 2, 2021, 5:12 p.m. UTC
Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
---
 lib/tnl-neigh-cache.c |   32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

Comments

Gaetan Rivet Nov. 3, 2021, 6:49 p.m. UTC | #1
On Tue, Nov 2, 2021, at 18:12, Paolo Valerio wrote:

Hi Paolo,

I think this commit needs more details.
I'm guessing the threads involved are the PMDs and main, are there others?

Coherency is implicit on x86 cores, but those parallel read/writes are
undefined behavior & possibly incorrect on weaker memory model.

So this patch seems good to me, but it would be nice to have more context
in the log.

IIRC in the cover letter there is a short description of what this patch does,
it might be sufficient.

> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
> ---
>  lib/tnl-neigh-cache.c |   32 +++++++++++++++++++++++---------
>  1 file changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
> index 5bda4af7e..2769e5c3d 100644
> --- a/lib/tnl-neigh-cache.c
> +++ b/lib/tnl-neigh-cache.c
> @@ -32,6 +32,7 @@
>  #include "errno.h"
>  #include "flow.h"
>  #include "netdev.h"
> +#include "ovs-atomic.h"
>  #include "ovs-thread.h"
>  #include "packets.h"
>  #include "openvswitch/poll-loop.h"
> @@ -44,14 +45,14 @@
>  #include "openvswitch/vlog.h"
> 
> 
> -/* In seconds */
> -#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60)
> +/* In milliseconds */
> +#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60 * 1000)

It is easier to check correctness of macro usage if the unit is part of
the name. I think here using 'xxx_MS' would be helpful.

> 
>  struct tnl_neigh_entry {
>      struct cmap_node cmap_node;
>      struct in6_addr ip;
>      struct eth_addr mac;
> -    time_t expires;             /* Expiration time. */
> +    atomic_llong expires;       /* Expiration time in ms. */
>      char br_name[IFNAMSIZ];
>  };
> 
> @@ -64,6 +65,16 @@ tnl_neigh_hash(const struct in6_addr *ip)
>      return hash_bytes(ip->s6_addr, 16, 0);
>  }
> 
> +static bool
> +tnl_neigh_expired(struct tnl_neigh_entry *neigh)
> +{
> +    long long expired;

This is a nit but I would use 'expires', to align with the field name.

> +
> +    atomic_read_relaxed(&neigh->expires, &expired);

I'm having doubts about unfenced read / writes on expires.
Technically on lookup there would be reads then writes without barriers.
I'm not convinced it makes a difference, WDYT?

Otherwise looks good to me.

> +
> +    return expired <= time_msec();
> +}
> +
>  static struct tnl_neigh_entry *
>  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr 
> *dst)
>  {
> @@ -73,11 +84,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], 
> const struct in6_addr *dst)
>      hash = tnl_neigh_hash(dst);
>      CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
>          if (ipv6_addr_equals(&neigh->ip, dst) && 
> !strcmp(neigh->br_name, br_name)) {
> -            if (neigh->expires <= time_now()) {
> +            if (tnl_neigh_expired(neigh)) {
>                  return NULL;
>              }
> 
> -            neigh->expires = time_now() + 
> NEIGH_ENTRY_DEFAULT_IDLE_TIME;
> +            atomic_store_relaxed(&neigh->expires, time_msec() +
> +                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);

Here without lock there would be no fence.
(this write could be re-ordered before tnl_neigh_expired().)
Paolo Valerio Nov. 8, 2021, 7:30 p.m. UTC | #2
Gaëtan Rivet <grive@u256.net> writes:

> On Tue, Nov 2, 2021, at 18:12, Paolo Valerio wrote:
>
> Hi Paolo,
>
> I think this commit needs more details.
> I'm guessing the threads involved are the PMDs and main, are there others?
>
> Coherency is implicit on x86 cores, but those parallel read/writes are
> undefined behavior & possibly incorrect on weaker memory model.
>
> So this patch seems good to me, but it would be nice to have more context
> in the log.
>
> IIRC in the cover letter there is a short description of what this patch does,
> it might be sufficient.
>

Sure

>> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
>> ---
>>  lib/tnl-neigh-cache.c |   32 +++++++++++++++++++++++---------
>>  1 file changed, 23 insertions(+), 9 deletions(-)
>>
>> diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
>> index 5bda4af7e..2769e5c3d 100644
>> --- a/lib/tnl-neigh-cache.c
>> +++ b/lib/tnl-neigh-cache.c
>> @@ -32,6 +32,7 @@
>>  #include "errno.h"
>>  #include "flow.h"
>>  #include "netdev.h"
>> +#include "ovs-atomic.h"
>>  #include "ovs-thread.h"
>>  #include "packets.h"
>>  #include "openvswitch/poll-loop.h"
>> @@ -44,14 +45,14 @@
>>  #include "openvswitch/vlog.h"
>> 
>> 
>> -/* In seconds */
>> -#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60)
>> +/* In milliseconds */
>> +#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60 * 1000)
>
> It is easier to check correctness of macro usage if the unit is part of
> the name. I think here using 'xxx_MS' would be helpful.
>

ACK

>> 
>>  struct tnl_neigh_entry {
>>      struct cmap_node cmap_node;
>>      struct in6_addr ip;
>>      struct eth_addr mac;
>> -    time_t expires;             /* Expiration time. */
>> +    atomic_llong expires;       /* Expiration time in ms. */
>>      char br_name[IFNAMSIZ];
>>  };
>> 
>> @@ -64,6 +65,16 @@ tnl_neigh_hash(const struct in6_addr *ip)
>>      return hash_bytes(ip->s6_addr, 16, 0);
>>  }
>> 
>> +static bool
>> +tnl_neigh_expired(struct tnl_neigh_entry *neigh)
>> +{
>> +    long long expired;
>
> This is a nit but I would use 'expires', to align with the field name.
>

ACK, will do.

>> +
>> +    atomic_read_relaxed(&neigh->expires, &expired);
>
> I'm having doubts about unfenced read / writes on expires.
> Technically on lookup there would be reads then writes without barriers.
> I'm not convinced it makes a difference, WDYT?
>

Not sure if the store after the conditional here plays a role in
ordering.

> Otherwise looks good to me.
>
>> +
>> +    return expired <= time_msec();
>> +}
>> +
>>  static struct tnl_neigh_entry *
>>  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr 
>> *dst)
>>  {
>> @@ -73,11 +84,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], 
>> const struct in6_addr *dst)
>>      hash = tnl_neigh_hash(dst);
>>      CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
>>          if (ipv6_addr_equals(&neigh->ip, dst) && 
>> !strcmp(neigh->br_name, br_name)) {
>> -            if (neigh->expires <= time_now()) {
>> +            if (tnl_neigh_expired(neigh)) {
>>                  return NULL;
>>              }
>> 
>> -            neigh->expires = time_now() + 
>> NEIGH_ENTRY_DEFAULT_IDLE_TIME;
>> +            atomic_store_relaxed(&neigh->expires, time_msec() +
>> +                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);
>
> Here without lock there would be no fence.
> (this write could be re-ordered before tnl_neigh_expired().)

Can the control dependency avoid reorder in this case?
Namely, can the write be speculated and committed before the branch
outcome?

In case it's not enough, I guess you're suggesting load/store (on
expires) with explicit acquire/release to enforce that, right?
Gaetan Rivet Nov. 9, 2021, 10:51 a.m. UTC | #3
On Mon, Nov 8, 2021, at 20:30, Paolo Valerio wrote:
[...]
>>> +
>>> +    atomic_read_relaxed(&neigh->expires, &expired);
>>
>> I'm having doubts about unfenced read / writes on expires.
>> Technically on lookup there would be reads then writes without barriers.
>> I'm not convinced it makes a difference, WDYT?
>>
>
> Not sure if the store after the conditional here plays a role in
> ordering.
>
>> Otherwise looks good to me.
>>
>>> +
>>> +    return expired <= time_msec();
>>> +}
>>> +
>>>  static struct tnl_neigh_entry *
>>>  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr 
>>> *dst)
>>>  {
>>> @@ -73,11 +84,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], 
>>> const struct in6_addr *dst)
>>>      hash = tnl_neigh_hash(dst);
>>>      CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
>>>          if (ipv6_addr_equals(&neigh->ip, dst) && 
>>> !strcmp(neigh->br_name, br_name)) {
>>> -            if (neigh->expires <= time_now()) {
>>> +            if (tnl_neigh_expired(neigh)) {
>>>                  return NULL;
>>>              }
>>> 
>>> -            neigh->expires = time_now() + 
>>> NEIGH_ENTRY_DEFAULT_IDLE_TIME;
>>> +            atomic_store_relaxed(&neigh->expires, time_msec() +
>>> +                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);
>>
>> Here without lock there would be no fence.
>> (this write could be re-ordered before tnl_neigh_expired().)
>
> Can the control dependency avoid reorder in this case?
> Namely, can the write be speculated and committed before the branch
> outcome?

I searched a little more info on this.
ARM will use the control dependency for ordering, so here it will ensure
the store is not happening before the load.

Some weaker arch such as Alpha won't rely on the control dependency and
would require load_acquire and store_release.

I don't think we're targeting this arch. 'tnl_neigh_expired()' would not
make sense without a conditional so I guess it should be fine.

>
> In case it's not enough, I guess you're suggesting load/store (on
> expires) with explicit acquire/release to enforce that, right?
Paolo Valerio Nov. 9, 2021, 10:05 p.m. UTC | #4
Gaëtan Rivet <grive@u256.net> writes:

> On Mon, Nov 8, 2021, at 20:30, Paolo Valerio wrote:
> [...]
>>>> +
>>>> +    atomic_read_relaxed(&neigh->expires, &expired);
>>>
>>> I'm having doubts about unfenced read / writes on expires.
>>> Technically on lookup there would be reads then writes without barriers.
>>> I'm not convinced it makes a difference, WDYT?
>>>
>>
>> Not sure if the store after the conditional here plays a role in
>> ordering.
>>
>>> Otherwise looks good to me.
>>>
>>>> +
>>>> +    return expired <= time_msec();
>>>> +}
>>>> +
>>>>  static struct tnl_neigh_entry *
>>>>  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr 
>>>> *dst)
>>>>  {
>>>> @@ -73,11 +84,12 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], 
>>>> const struct in6_addr *dst)
>>>>      hash = tnl_neigh_hash(dst);
>>>>      CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
>>>>          if (ipv6_addr_equals(&neigh->ip, dst) && 
>>>> !strcmp(neigh->br_name, br_name)) {
>>>> -            if (neigh->expires <= time_now()) {
>>>> +            if (tnl_neigh_expired(neigh)) {
>>>>                  return NULL;
>>>>              }
>>>> 
>>>> -            neigh->expires = time_now() + 
>>>> NEIGH_ENTRY_DEFAULT_IDLE_TIME;
>>>> +            atomic_store_relaxed(&neigh->expires, time_msec() +
>>>> +                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);
>>>
>>> Here without lock there would be no fence.
>>> (this write could be re-ordered before tnl_neigh_expired().)
>>
>> Can the control dependency avoid reorder in this case?
>> Namely, can the write be speculated and committed before the branch
>> outcome?
>
> I searched a little more info on this.
> ARM will use the control dependency for ordering, so here it will ensure
> the store is not happening before the load.
>
> Some weaker arch such as Alpha won't rely on the control dependency and
> would require load_acquire and store_release.
>
> I don't think we're targeting this arch. 'tnl_neigh_expired()' would not
> make sense without a conditional so I guess it should be fine.
>

Thank you Gaetan for double checking.
I may miss something here, but apparently, the standard does not
guarantee ordering based on control dependency (I suppose for some
architectures or potential aggressive compiler optimizations), so even
if it could currently work in our case, it could make sense in terms of
consistency to promote this explicitly to acquire/release as you
suggested. It is also something relatively low rate and adding an
acq/rel should not be a big deal.

I'd respin for consistency with your first suggestion, if later turns
out we really prefer to use relaxed, we can go back to that.

>>
>> In case it's not enough, I guess you're suggesting load/store (on
>> expires) with explicit acquire/release to enforce that, right?
>
> -- 
> Gaetan Rivet
diff mbox series

Patch

diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 5bda4af7e..2769e5c3d 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -32,6 +32,7 @@ 
 #include "errno.h"
 #include "flow.h"
 #include "netdev.h"
+#include "ovs-atomic.h"
 #include "ovs-thread.h"
 #include "packets.h"
 #include "openvswitch/poll-loop.h"
@@ -44,14 +45,14 @@ 
 #include "openvswitch/vlog.h"
 
 
-/* In seconds */
-#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60)
+/* In milliseconds */
+#define NEIGH_ENTRY_DEFAULT_IDLE_TIME  (15 * 60 * 1000)
 
 struct tnl_neigh_entry {
     struct cmap_node cmap_node;
     struct in6_addr ip;
     struct eth_addr mac;
-    time_t expires;             /* Expiration time. */
+    atomic_llong expires;       /* Expiration time in ms. */
     char br_name[IFNAMSIZ];
 };
 
@@ -64,6 +65,16 @@  tnl_neigh_hash(const struct in6_addr *ip)
     return hash_bytes(ip->s6_addr, 16, 0);
 }
 
+static bool
+tnl_neigh_expired(struct tnl_neigh_entry *neigh)
+{
+    long long expired;
+
+    atomic_read_relaxed(&neigh->expires, &expired);
+
+    return expired <= time_msec();
+}
+
 static struct tnl_neigh_entry *
 tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
 {
@@ -73,11 +84,12 @@  tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
     hash = tnl_neigh_hash(dst);
     CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
         if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) {
-            if (neigh->expires <= time_now()) {
+            if (tnl_neigh_expired(neigh)) {
                 return NULL;
             }
 
-            neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+            atomic_store_relaxed(&neigh->expires, time_msec() +
+                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);
             return neigh;
         }
     }
@@ -121,7 +133,8 @@  tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
     struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
     if (neigh) {
         if (eth_addr_equals(neigh->mac, mac)) {
-            neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+            atomic_store_relaxed(&neigh->expires, time_msec() +
+                                 NEIGH_ENTRY_DEFAULT_IDLE_TIME);
             ovs_mutex_unlock(&mutex);
             return;
         }
@@ -133,7 +146,8 @@  tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
 
     neigh->ip = *dst;
     neigh->mac = mac;
-    neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+    atomic_store_relaxed(&neigh->expires, time_msec() +
+                         NEIGH_ENTRY_DEFAULT_IDLE_TIME);
     ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
     cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
     ovs_mutex_unlock(&mutex);
@@ -208,7 +222,7 @@  tnl_neigh_cache_run(void)
 
     ovs_mutex_lock(&mutex);
     CMAP_FOR_EACH(neigh, cmap_node, &table) {
-        if (neigh->expires <= time_now()) {
+        if (tnl_neigh_expired(neigh)) {
             tnl_neigh_delete(neigh);
             changed = true;
         }
@@ -319,7 +333,7 @@  tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
         ds_put_format(&ds, ETH_ADDR_FMT"   %s",
                       ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
-        if (neigh->expires <= time_now()) {
+        if (tnl_neigh_expired(neigh)) {
             ds_put_format(&ds, " STALE");
         }
         ds_put_char(&ds, '\n');