diff mbox series

[ovs-dev,v1,9/9] conntrack: Use an atomic conn expiration value

Message ID 97e250478cde86a1bb27cfc3131fd45b5d6de2ca.1613557616.git.grive@u256.net
State Changes Requested
Headers show
Series conntrack: improve multithread scalability | expand

Commit Message

Gaetan Rivet Feb. 17, 2021, 4:34 p.m. UTC
A lock is taken during conn_lookup() to check whether a connection is
expired before returning it. This lock can have some contention.

Even though this lock ensures a consistent sequence of writes, it does
not imply a specific order. A ct_clean thread taking the lock first
could read a value that would be updated immediately after by a PMD
waiting on the same lock, just as well as the opposite order.

As such, the expiration time can be stale anytime it is read. In this
context, using an atomic will ensure the same write consistency while
keeping the same (lack of) guarantee for reads. Reading the atomic will
however be less costly than taking and releasing the lock.

Signed-off-by: Gaetan Rivet <grive@u256.net>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
---
 lib/conntrack-private.h |  2 +-
 lib/conntrack-tp.c      |  2 +-
 lib/conntrack.c         | 25 +++++++++++++++----------
 3 files changed, 17 insertions(+), 12 deletions(-)

Comments

William Tu Feb. 23, 2021, 10:56 p.m. UTC | #1
On Wed, Feb 17, 2021 at 8:34 AM Gaetan Rivet <grive@u256.net> wrote:
>
> A lock is taken during conn_lookup() to check whether a connection is
> expired before returning it. This lock can have some contention.
>
> Even though this lock ensures a consistent sequence of writes, it does
> not imply a specific order. A ct_clean thread taking the lock first
> could read a value that would be updated immediately after by a PMD
> waiting on the same lock, just as well as the opposite order.
>
> As such, the expiration time can be stale anytime it is read. In this
> context, using an atomic will ensure the same write consistency while
> keeping the same (lack of) guarantee for reads. Reading the atomic will
> however be less costly than taking and releasing the lock.
>
> Signed-off-by: Gaetan Rivet <grive@u256.net>
> Reviewed-by: Eli Britstein <elibr@nvidia.com>
> ---
LGTM! thanks
Acked-by: William Tu <u9012063@gmail.com>
diff mbox series

Patch

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index e8ffb5bf9..7f3f8714a 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -148,7 +148,7 @@  struct conn {
     struct ovs_mutex lock; /* Guards all mutable fields. */
     struct conn_expire exp;
     ovs_u128 label;
-    long long expiration;
+    atomic_llong expiration;
     uint32_t mark;
     int seq_skew;
 
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index 01ffa30df..a4ea19c1b 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -276,7 +276,7 @@  conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
     OVS_REQUIRES(conn->lock)
 {
     conn_expire_init(conn, ct);
-    conn->expiration = now + tp_value * 1000;
+    atomic_store_relaxed(&conn->expiration, now + tp_value * 1000);
     conn->exp.tm = tm;
     if (!atomic_flag_test_and_set(&conn->exp.insert_once)) {
         ovsrcu_postpone(conn_expire_insert, conn);
diff --git a/lib/conntrack.c b/lib/conntrack.c
index e042683aa..193d550cb 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -99,6 +99,7 @@  static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
                                       struct dp_packet *pkt,
                                       struct conn_lookup_ctx *ctx,
                                       long long now);
+static long long int conn_expiration(const struct conn *);
 static bool conn_expired(struct conn *, long long now);
 static void set_mark(struct dp_packet *, struct conn *,
                      uint32_t val, uint32_t mask);
@@ -1555,10 +1556,8 @@  ct_sweep(struct conntrack *ct, long long now, size_t limit)
 
     for (unsigned i = 0; i < N_CT_TM; i++) {
         RCULIST_FOR_EACH (conn, exp.node, &ct->exp_lists[i]) {
-            ovs_mutex_lock(&conn->lock);
-            if (now < conn->expiration || count >= limit) {
-                min_expiration = MIN(min_expiration, conn->expiration);
-                ovs_mutex_unlock(&conn->lock);
+            if (!conn_expired(conn, now) || count >= limit) {
+                min_expiration = MIN(min_expiration, conn_expiration(conn));
                 if (count >= limit) {
                     /* Do not check other lists. */
                     COVERAGE_INC(conntrack_long_cleanup);
@@ -1566,7 +1565,6 @@  ct_sweep(struct conntrack *ct, long long now, size_t limit)
                 }
                 break;
             } else {
-                ovs_mutex_unlock(&conn->lock);
                 conn_clean(ct, conn);
             }
             count++;
@@ -2395,14 +2393,21 @@  conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
     return update_res;
 }
 
+static long long int
+conn_expiration(const struct conn *conn)
+{
+    long long int expiration;
+
+    atomic_read_relaxed(&CONST_CAST(struct conn *, conn)->expiration,
+                        &expiration);
+    return expiration;
+}
+
 static bool
 conn_expired(struct conn *conn, long long now)
 {
     if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_lock(&conn->lock);
-        bool expired = now >= conn->expiration ? true : false;
-        ovs_mutex_unlock(&conn->lock);
-        return expired;
+        return now >= conn_expiration(conn);
     }
     return false;
 }
@@ -2545,7 +2550,7 @@  conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry,
     entry->mark = conn->mark;
     memcpy(&entry->labels, &conn->label, sizeof entry->labels);
 
-    long long expiration = conn->expiration - now;
+    long long expiration = conn_expiration(conn) - now;
 
     struct ct_l4_proto *class = l4_protos[conn->key.nw_proto];
     if (class->conn_get_protoinfo) {