@@ -140,6 +140,9 @@ struct conn {
struct nat_action_info_t *nat_info;
char *alg;
struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
+ atomic_flag reclaimed; /* False during the lifetime of the connection,
+ * True as soon as a thread has started freeing
+ * its memory. */
/* Mutable data. */
struct ovs_mutex lock; /* Guards all mutable fields. */
@@ -200,8 +203,8 @@ struct conntrack {
};
/* Lock acquisition order:
- * 1. 'ct_lock'
- * 2. 'conn->lock'
+ * 1. 'conn->lock'
+ * 2. 'ct_lock'
* 3. 'resources_lock'
*/
@@ -222,11 +225,18 @@ struct ct_l4_proto {
};
static inline void
-conn_expire_remove(struct conn_expire *exp)
+conn_expire_remove(struct conn_expire *exp, bool need_ct_lock)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
{
if (!atomic_flag_test_and_set(&exp->remove_once)
&& rculist_next(&exp->node)) {
+ if (need_ct_lock) {
+ ovs_mutex_lock(&exp->ct->ct_lock);
+ }
rculist_remove(&exp->node);
+ if (need_ct_lock) {
+ ovs_mutex_unlock(&exp->ct->ct_lock);
+ }
}
}
@@ -238,6 +238,7 @@ tm_to_ct_dpif_tp(enum ct_timeout tm)
static void
conn_expire_init(struct conn *conn, struct conntrack *ct)
+ OVS_REQUIRES(conn->lock)
{
struct conn_expire *exp = &conn->exp;
@@ -257,20 +258,22 @@ conn_expire_insert(struct conn *conn)
{
struct conn_expire *exp = &conn->exp;
- ovs_mutex_lock(&exp->ct->ct_lock);
ovs_mutex_lock(&conn->lock);
+ ovs_mutex_lock(&exp->ct->ct_lock);
rculist_push_back(&exp->ct->exp_lists[exp->tm], &exp->node);
+ ovs_mutex_unlock(&exp->ct->ct_lock);
+
atomic_flag_clear(&exp->insert_once);
atomic_flag_clear(&exp->remove_once);
ovs_mutex_unlock(&conn->lock);
- ovs_mutex_unlock(&exp->ct->ct_lock);
}
static void
conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
enum ct_timeout tm, long long now, uint32_t tp_value)
+ OVS_REQUIRES(conn->lock)
{
conn_expire_init(conn, ct);
conn->expiration = now + tp_value * 1000;
@@ -285,19 +288,12 @@ conn_update_expiration__(struct conntrack *ct, struct conn *conn,
enum ct_timeout tm, long long now,
uint32_t tp_value)
OVS_REQUIRES(conn->lock)
+ OVS_EXCLUDED(ct->ct_lock)
{
- ovs_mutex_unlock(&conn->lock);
-
- ovs_mutex_lock(&ct->ct_lock);
- ovs_mutex_lock(&conn->lock);
if (!conn->cleaned) {
- conn_expire_remove(&conn->exp);
+ conn_expire_remove(&conn->exp, true);
conn_schedule_expiration(ct, conn, tm, now, tp_value);
}
- ovs_mutex_unlock(&conn->lock);
- ovs_mutex_unlock(&ct->ct_lock);
-
- ovs_mutex_lock(&conn->lock);
}
/* The conn entry lock must be held on entry and exit. */
@@ -309,20 +305,12 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn,
struct timeout_policy *tp;
uint32_t val;
- ovs_mutex_unlock(&conn->lock);
-
- ovs_mutex_lock(&ct->ct_lock);
- ovs_mutex_lock(&conn->lock);
tp = timeout_policy_lookup(ct, conn->tp_id);
if (tp) {
val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
} else {
val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
}
- ovs_mutex_unlock(&conn->lock);
- ovs_mutex_unlock(&ct->ct_lock);
-
- ovs_mutex_lock(&conn->lock);
VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
"val=%u sec.",
ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
@@ -330,11 +318,10 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn,
conn_update_expiration__(ct, conn, tm, now, val);
}
-/* ct_lock must be held. */
void
conn_init_expiration(struct conntrack *ct, struct conn *conn,
enum ct_timeout tm, long long now)
- OVS_REQUIRES(ct->ct_lock)
+ OVS_REQUIRES(conn->lock)
{
struct timeout_policy *tp;
uint32_t val;
@@ -465,7 +465,7 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
static void
conn_clean_cmn(struct conntrack *ct, struct conn *conn)
- OVS_REQUIRES(ct->ct_lock)
+ OVS_REQUIRES(conn->lock, ct->ct_lock)
{
if (conn->alg) {
expectation_clean(ct, &conn->key);
@@ -484,32 +484,52 @@ conn_clean_cmn(struct conntrack *ct, struct conn *conn)
* removes the associated nat 'conn' from the lookup datastructures. */
static void
conn_clean(struct conntrack *ct, struct conn *conn)
- OVS_REQUIRES(ct->ct_lock)
+ OVS_EXCLUDED(conn->lock, ct->ct_lock)
{
ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+ if (atomic_flag_test_and_set(&conn->reclaimed)) {
+ return;
+ }
+
+ ovs_mutex_lock(&conn->lock);
+ ovs_mutex_lock(&ct->ct_lock);
+
conn_clean_cmn(ct, conn);
if (conn->nat_conn) {
uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
}
- conn_expire_remove(&conn->exp);
+ conn_expire_remove(&conn->exp, false);
conn->cleaned = true;
ovsrcu_postpone(delete_conn, conn);
atomic_count_dec(&ct->n_conn);
+
+ ovs_mutex_unlock(&ct->ct_lock);
+ ovs_mutex_unlock(&conn->lock);
}
static void
conn_clean_one(struct conntrack *ct, struct conn *conn)
- OVS_REQUIRES(ct->ct_lock)
+ OVS_EXCLUDED(conn->lock, ct->ct_lock)
{
+ if (atomic_flag_test_and_set(&conn->reclaimed)) {
+ return;
+ }
+
+ ovs_mutex_lock(&conn->lock);
+ ovs_mutex_lock(&ct->ct_lock);
+
conn_clean_cmn(ct, conn);
if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
- conn_expire_remove(&conn->exp);
+ conn_expire_remove(&conn->exp, false);
conn->cleaned = true;
atomic_count_dec(&ct->n_conn);
}
ovsrcu_postpone(delete_conn_one, conn);
+
+ ovs_mutex_unlock(&ct->ct_lock);
+ ovs_mutex_unlock(&conn->lock);
}
/* Destroys the connection tracker 'ct' and frees all the allocated memory.
@@ -523,10 +543,12 @@ conntrack_destroy(struct conntrack *ct)
pthread_join(ct->clean_thread, NULL);
latch_destroy(&ct->clean_thread_exit);
- ovs_mutex_lock(&ct->ct_lock);
CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
conn_clean_one(ct, conn);
}
+
+ ovs_mutex_lock(&ct->ct_lock);
+
cmap_destroy(&ct->conns);
struct zone_limit *zl;
@@ -1002,7 +1024,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
const struct nat_action_info_t *nat_action_info,
const char *helper, const struct alg_exp_node *alg_exp,
enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
- OVS_REQUIRES(ct->ct_lock)
{
struct conn *nc = NULL;
struct conn *nat_conn = NULL;
@@ -1076,18 +1097,24 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
nat_packet(pkt, nc, ctx->icmp_related);
memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
+ ovs_mutex_init_adaptive(&nat_conn->lock);
nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
nat_conn->nat_info = NULL;
nat_conn->alg = NULL;
nat_conn->nat_conn = NULL;
uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
+ ovs_mutex_lock(&ct->ct_lock);
cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
+ ovs_mutex_unlock(&ct->ct_lock);
}
nc->nat_conn = nat_conn;
ovs_mutex_init_adaptive(&nc->lock);
nc->conn_type = CT_CONN_TYPE_DEFAULT;
+ atomic_flag_clear(&nc->reclaimed);
+ ovs_mutex_lock(&ct->ct_lock);
cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
+ ovs_mutex_unlock(&ct->ct_lock);
atomic_count_inc(&ct->n_conn);
ctx->conn = nc; /* For completeness. */
@@ -1110,7 +1137,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
* can limit DoS impact. */
nat_res_exhaustion:
free(nat_conn);
- conn_expire_remove(&nc->exp);
+ conn_expire_remove(&nc->exp, false);
ovsrcu_postpone(delete_conn_cmn, nc);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
@@ -1150,11 +1177,9 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
pkt->md.ct_state = CS_INVALID;
break;
case CT_UPDATE_NEW:
- ovs_mutex_lock(&ct->ct_lock);
if (conn_lookup(ct, &conn->key, now, NULL, NULL)) {
conn_clean(ct, conn);
}
- ovs_mutex_unlock(&ct->ct_lock);
create_new_conn = true;
break;
case CT_UPDATE_VALID_NEW:
@@ -1336,11 +1361,9 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
/* Delete found entry if in wrong direction. 'force' implies commit. */
if (OVS_UNLIKELY(force && ctx->reply && conn)) {
- ovs_mutex_lock(&ct->ct_lock);
if (conn_lookup(ct, &conn->key, now, NULL, NULL)) {
conn_clean(ct, conn);
}
- ovs_mutex_unlock(&ct->ct_lock);
conn = NULL;
}
@@ -1404,12 +1427,10 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
}
ovs_rwlock_unlock(&ct->resources_lock);
- ovs_mutex_lock(&ct->ct_lock);
if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
helper, alg_exp, ct_alg_ctl, tp_id);
}
- ovs_mutex_unlock(&ct->ct_lock);
}
write_ct_md(pkt, zone, conn, &ctx->key, alg_exp);
@@ -1532,8 +1553,6 @@ ct_sweep(struct conntrack *ct, long long now, size_t limit)
long long min_expiration = LLONG_MAX;
size_t count = 0;
- ovs_mutex_lock(&ct->ct_lock);
-
for (unsigned i = 0; i < N_CT_TM; i++) {
RCULIST_FOR_EACH (conn, exp.node, &ct->exp_lists[i]) {
ovs_mutex_lock(&conn->lock);
@@ -1557,7 +1576,6 @@ ct_sweep(struct conntrack *ct, long long now, size_t limit)
out:
VLOG_DBG("conntrack cleanup %"PRIuSIZE" entries in %lld msec", count,
time_msec() - now);
- ovs_mutex_unlock(&ct->ct_lock);
return min_expiration;
}
@@ -2606,13 +2624,11 @@ conntrack_flush(struct conntrack *ct, const uint16_t *zone)
{
struct conn *conn;
- ovs_mutex_lock(&ct->ct_lock);
CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
if (!zone || *zone == conn->key.zone) {
conn_clean_one(ct, conn);
}
}
- ovs_mutex_unlock(&ct->ct_lock);
return 0;
}
@@ -2627,9 +2643,8 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
memset(&key, 0, sizeof(key));
tuple_to_conn_key(tuple, zone, &key);
- ovs_mutex_lock(&ct->ct_lock);
- conn_lookup(ct, &key, time_msec(), &conn, NULL);
+ conn_lookup(ct, &key, time_msec(), &conn, NULL);
if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
conn_clean(ct, conn);
} else {
@@ -2637,7 +2652,6 @@ conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
error = ENOENT;
}
- ovs_mutex_unlock(&ct->ct_lock);
return error;
}