diff mbox series

[ovs-dev,v3,8/8] conntrack: Use an atomic conn expiration value

Message ID 7ba072a13032e3f6e843881205583a8e9a57f5d3.1623786081.git.grive@u256.net
State Changes Requested
Headers show
Series conntrack: improve multithread scalability | expand

Commit Message

Gaetan Rivet June 15, 2021, 11:22 p.m. UTC
A lock is taken during conn_lookup() to check whether a connection is
expired before returning it. This lock can have some contention.

Even though this lock ensures a consistent sequence of writes, it does
not imply a specific order. A ct_clean thread taking the lock first
could read a value that would be updated immediately after by a PMD
waiting on the same lock, just as well as the inverse order.

As such, the expiration time can be stale anytime it is read. In this
context, using an atomic will ensure the same guarantees for either
writes or reads, i.e. writes are consistent and reads are not undefined
behaviour. Reading an atomic is however less costly than taking and
releasing a lock.

Signed-off-by: Gaetan Rivet <grive@u256.net>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
Acked-by: William Tu <u9012063@gmail.com>
---
 lib/conntrack-private.h |  2 +-
 lib/conntrack-tp.c      |  2 +-
 lib/conntrack.c         | 27 +++++++++++++++------------
 3 files changed, 17 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index bb82252e8..d61ab4f36 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -144,7 +144,7 @@  struct conn {
     /* Mutable data. */
     struct ovs_mutex lock; /* Guards all mutable fields. */
     ovs_u128 label;
-    long long expiration;
+    atomic_llong expiration;
     uint32_t mark;
     int seq_skew;
 
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index 22363e7fe..5bf2816ca 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -240,7 +240,7 @@  static void
 conn_schedule_expiration(struct conn *conn, enum ct_timeout tm, long long now,
                          uint32_t tp_value)
 {
-    conn->expiration = now + tp_value * 1000;
+    atomic_store_relaxed(&conn->expiration, now + tp_value * 1000);
     conn->exp.tm = tm;
     ignore(atomic_flag_test_and_set(&conn->exp.reschedule));
 }
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 045710e8d..03aa21e78 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -99,6 +99,7 @@  static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
                                       struct dp_packet *pkt,
                                       struct conn_lookup_ctx *ctx,
                                       long long now);
+static long long int conn_expiration(const struct conn *);
 static bool conn_expired(struct conn *, long long now);
 static void set_mark(struct dp_packet *, struct conn *,
                      uint32_t val, uint32_t mask);
@@ -1018,13 +1019,10 @@  un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
 static void
 conn_seq_skew_set(struct conntrack *ct, const struct conn *conn_in,
                   long long now, int seq_skew, bool seq_skew_dir)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     struct conn *conn;
-    ovs_mutex_unlock(&conn_in->lock);
-    conn_lookup(ct, &conn_in->key, now, &conn, NULL);
-    ovs_mutex_lock(&conn_in->lock);
 
+    conn_lookup(ct, &conn_in->key, now, &conn, NULL);
     if (conn && seq_skew) {
         conn->seq_skew = seq_skew;
         conn->seq_skew_dir = seq_skew_dir;
@@ -1596,9 +1594,7 @@  ct_sweep(struct conntrack *ct, long long now, size_t limit)
                 continue;
             }
 
-            ovs_mutex_lock(&conn->lock);
-            expiration = conn->expiration;
-            ovs_mutex_unlock(&conn->lock);
+            expiration = conn_expiration(conn);
 
             if (conn == end_of_queue) {
                 /* If we already re-enqueued this conn during this sweep,
@@ -2483,14 +2479,21 @@  conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
     return update_res;
 }
 
+static long long int
+conn_expiration(const struct conn *conn)
+{
+    long long int expiration;
+
+    atomic_read_relaxed(&CONST_CAST(struct conn *, conn)->expiration,
+                        &expiration);
+    return expiration;
+}
+
 static bool
 conn_expired(struct conn *conn, long long now)
 {
     if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_lock(&conn->lock);
-        bool expired = now >= conn->expiration ? true : false;
-        ovs_mutex_unlock(&conn->lock);
-        return expired;
+        return now >= conn_expiration(conn);
     }
     return false;
 }
@@ -2633,7 +2636,7 @@  conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry,
     entry->mark = conn->mark;
     memcpy(&entry->labels, &conn->label, sizeof entry->labels);
 
-    long long expiration = conn->expiration - now;
+    long long expiration = conn_expiration(conn) - now;
 
     struct ct_l4_proto *class = l4_protos[conn->key.nw_proto];
     if (class->conn_get_protoinfo) {