From patchwork Wed Mar 12 22:49:49 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 329744 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 315B42C00B7 for ; Thu, 13 Mar 2014 09:54:20 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752512AbaCLWyT (ORCPT ); Wed, 12 Mar 2014 18:54:19 -0400 Received: from Chamillionaire.breakpoint.cc ([80.244.247.6]:38335 "EHLO Chamillionaire.breakpoint.cc" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752270AbaCLWyT (ORCPT ); Wed, 12 Mar 2014 18:54:19 -0400 Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.80) (envelope-from ) id 1WNs2U-0007pv-D6; Wed, 12 Mar 2014 23:54:18 +0100 From: Florian Westphal To: netfilter-devel@vger.kernel.org Cc: Florian Westphal Subject: [PATCH v2 1/3] netfilter: connlimit: use keyed locks Date: Wed, 12 Mar 2014 23:49:49 +0100 Message-Id: <1394664591-21843-2-git-send-email-fw@strlen.de> X-Mailer: git-send-email 1.8.1.5 In-Reply-To: <1394664591-21843-1-git-send-email-fw@strlen.de> References: <1394664591-21843-1-git-send-email-fw@strlen.de> Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org connlimit currently suffers from spinlock contention, example for 4-core system with rps enabled: + 20.84% ksoftirqd/2 [kernel.kallsyms] [k] _raw_spin_lock_bh + 20.76% ksoftirqd/1 [kernel.kallsyms] [k] _raw_spin_lock_bh + 20.42% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_lock_bh + 6.07% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 6.07% ksoftirqd/1 [nf_conntrack] [k] ____nf_conntrack_find + 5.97% ksoftirqd/0 [nf_conntrack] [k] ____nf_conntrack_find + 2.47% ksoftirqd/2 [nf_conntrack] [k] hash_conntrack_raw + 2.45% ksoftirqd/0 [nf_conntrack] [k] hash_conntrack_raw + 2.44% ksoftirqd/1 [nf_conntrack] [k] hash_conntrack_raw May allow parallel lookup/insert/delete if the entry is hashed to another slot. With patch: + 20.95% ksoftirqd/0 [nf_conntrack] [k] ____nf_conntrack_find + 20.50% ksoftirqd/1 [nf_conntrack] [k] ____nf_conntrack_find + 20.27% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 5.76% ksoftirqd/1 [nf_conntrack] [k] hash_conntrack_raw + 5.39% ksoftirqd/2 [nf_conntrack] [k] hash_conntrack_raw + 5.35% ksoftirqd/0 [nf_conntrack] [k] hash_conntrack_raw + 2.00% ksoftirqd/1 [kernel.kallsyms] [k] __rcu_read_unlock Improved rx processing rate from ~35kpps to ~50 kpps. Reviewed-by: Jesper Dangaard Brouer Signed-off-by: Florian Westphal --- Changes in v2: Address Jans comments wrt. CONNLIMIT_SLOTS 'power of 2' and add BUILD_BUG_ON for it. net/netfilter/xt_connlimit.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index a8eaabb..ad290cc 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -31,6 +31,9 @@ #include #include +#define CONNLIMIT_SLOTS 256 +#define CONNLIMIT_LOCK_SLOTS 32 + /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { struct hlist_node node; @@ -39,8 +42,8 @@ struct xt_connlimit_conn { }; struct xt_connlimit_data { - struct hlist_head iphash[256]; - spinlock_t lock; + struct hlist_head iphash[CONNLIMIT_SLOTS]; + spinlock_t locks[CONNLIMIT_LOCK_SLOTS]; }; static u_int32_t connlimit_rnd __read_mostly; @@ -48,7 +51,8 @@ static struct kmem_cache *connlimit_conn_cachep __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { - return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; + return jhash_1word((__force __u32)addr, + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline unsigned int @@ -61,7 +65,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; + return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline bool already_closed(const struct nf_conn *conn) @@ -183,7 +188,7 @@ static int count_them(struct net *net, hhead = &data->iphash[hash]; - spin_lock_bh(&data->lock); + spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); count = count_hlist(net, hhead, tuple, addr, mask, family, &addit); if (addit) { if (add_hlist(hhead, tuple, addr)) @@ -191,7 +196,7 @@ static int count_them(struct net *net, else count = -ENOMEM; } - spin_unlock_bh(&data->lock); + spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); return count; } @@ -227,7 +232,6 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) connections = count_them(net, info->data, tuple_ptr, &addr, &info->mask, par->family); - if (connections < 0) /* kmalloc failed, drop it entirely */ goto hotdrop; @@ -268,7 +272,9 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return -ENOMEM; } - spin_lock_init(&info->data->lock); + for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) + spin_lock_init(&info->data->locks[i]); + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) INIT_HLIST_HEAD(&info->data->iphash[i]); @@ -309,6 +315,10 @@ static struct xt_match connlimit_mt_reg __read_mostly = { static int __init connlimit_mt_init(void) { int ret; + + BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); + BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); + connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", sizeof(struct xt_connlimit_conn), 0, 0, NULL);