@@ -12,6 +12,7 @@ struct nf_conncount_list {
spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
+ unsigned long last_gc; /* jiffies at most recent gc */
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
@@ -134,6 +134,9 @@ static int __nf_conncount_add(struct net *net,
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+ if (time_after_eq(list->last_gc, jiffies))
+ break;
+
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
@@ -190,6 +193,7 @@ static int __nf_conncount_add(struct net *net,
conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head);
list->count++;
+ list->last_gc = jiffies;
return 0;
}
@@ -214,6 +218,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
+ list->last_gc = jiffies;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -231,6 +236,12 @@ bool nf_conncount_gc_list(struct net *net,
if (!spin_trylock(&list->list_lock))
return false;
+ /* don't bother if we just done GC */
+ if (time_after_eq(list->last_gc, jiffies)) {
+ spin_unlock(&list->list_lock);
+ return false;
+ }
+
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
@@ -258,6 +269,7 @@ bool nf_conncount_gc_list(struct net *net,
if (!list->count)
ret = true;
+ list->last_gc = jiffies;
spin_unlock(&list->list_lock);
return ret;
Currently nf_conncount can trigger garbage collection (GC) at multiple places. Each GC process takes a spin_lock_bh to traverse the nf_conncount_list. We found that when testing port scanning use two parallel nmap, because the number of connection increase fast, the nf_conncount_count and its subsequent call to __nf_conncount_add take too much time, causing several CPU lockup. This happens when user set the conntrack limit to +20,000, because the larger the limit, the longer the list that GC has to traverse. The patch mitigate the performance issue by avoiding unnecessary GC with a timestamp. Whenever nf_conncount has done a GC, a timestamp is updated, and beforce the next time GC is triggered, we make sure it's more than a jiffies. By doin this we can greatly reduce the CPU cycles and avoid the softirq lockup. To reproduce it in OVS, $ ovs-appctl dpctl/ct-set-limits zone=1,limit=20000 $ ovs-appctl dpctl/ct-get-limits At another machine, runs two nmap $ nmap -p1- <IP> $ nmap -p1- <IP> Signed-off-by: William Tu <u9012063@gmail.com> Co-authored-by: Yifeng Sun <pkusunyifeng@gmail.com> Reported-by: Greg Rose <gvrose8192@gmail.com> Suggested-by: Florian Westphal <fw@strlen.de> --- include/net/netfilter/nf_conntrack_count.h | 1 + net/netfilter/nf_conncount.c | 12 ++++++++++++ 2 files changed, 13 insertions(+)