From patchwork Tue Sep 18 13:45:08 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Amerigo Wang X-Patchwork-Id: 184718 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 8DAFF2C0086 for ; Tue, 18 Sep 2012 23:45:55 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757609Ab2IRNpv (ORCPT ); Tue, 18 Sep 2012 09:45:51 -0400 Received: from mx1.redhat.com ([209.132.183.28]:62025 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757560Ab2IRNpt (ORCPT ); Tue, 18 Sep 2012 09:45:49 -0400 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q8IDjVx7020040 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 18 Sep 2012 09:45:31 -0400 Received: from cr0.redhat.com (vpn-244-29.nrt.redhat.com [10.64.244.29]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id q8IDjMYp010232; Tue, 18 Sep 2012 09:45:28 -0400 From: Cong Wang To: netdev@vger.kernel.org Cc: netfilter-devel@vger.kernel.org, Cong Wang , Herbert Xu , =?UTF-8?q?Michal=20Kube=C4=8Dek?= , David Miller , Patrick McHardy , Pablo Neira Ayuso Subject: [PATCH 1/4] ipv6: add a new namespace for nf_conntrack_reasm Date: Tue, 18 Sep 2012 21:45:08 +0800 Message-Id: <1347975911-5655-2-git-send-email-amwang@redhat.com> In-Reply-To: <1347975911-5655-1-git-send-email-amwang@redhat.com> References: <1347975911-5655-1-git-send-email-amwang@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 Sender: netfilter-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netfilter-devel@vger.kernel.org As pointed by Michal, it is necessary to add a new namespace for nf_conntrack_reasm code, this prepares for the second patch. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang --- include/net/net_namespace.h | 3 + include/net/netns/ipv6.h | 8 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 135 +++++++++++++++++++++---------- 3 files changed, 104 insertions(+), 42 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5ae57f1..d61e2b3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -93,6 +93,9 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + struct netns_nf_frag nf_frag; +#endif struct sock *nfnl; struct sock *nfnl_stash; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0318104..214cb0a 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,4 +71,12 @@ struct netns_ipv6 { #endif #endif }; + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) +struct netns_nf_frag { + struct netns_sysctl_ipv6 sysctl; + struct netns_frags frags; +}; +#endif + #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f94fb3a..d28c067 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -71,27 +71,26 @@ struct nf_ct_frag6_queue }; static struct inet_frags nf_frags; -static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_init_frags.timeout, + .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_init_frags.low_thresh, + .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_init_frags.high_thresh, + .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -99,7 +98,54 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { } }; -static struct ctl_table_header *nf_ct_frag6_sysctl_header; +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = nf_ct_frag6_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + } + + hdr = register_net_sysctl(net, "net/netfilter", table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +#else +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + return 0; +} +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ +} #endif static unsigned int nf_hashfn(struct inet_frag_queue *q) @@ -131,13 +177,6 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) inet_frag_kill(&fq->q, &nf_frags); } -static void nf_ct_frag6_evictor(void) -{ - local_bh_disable(); - inet_frag_evictor(&nf_init_frags, &nf_frags); - local_bh_enable(); -} - static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq; @@ -159,8 +198,8 @@ out: /* Creation primitives. */ -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) +static __inline__ struct nf_ct_frag6_queue* +fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,7 +213,7 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); - q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) goto oom; @@ -186,7 +225,7 @@ oom: } -static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, +static int nf_ct_frag6_queue(struct nf_ct_frag6_queue*fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; @@ -312,7 +351,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &nf_init_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -322,7 +361,7 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&nf_frags.lock); return 0; @@ -391,7 +430,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_init_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -415,7 +454,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &nf_init_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); head->local_df = 1; head->next = NULL; @@ -527,6 +566,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; + struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); struct frag_hdr *fhdr; struct nf_ct_frag6_queue *fq; struct ipv6hdr *hdr; @@ -560,10 +600,13 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) - nf_ct_frag6_evictor(); + if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags); + local_bh_enable(); + } - fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -621,8 +664,31 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, nf_conntrack_put_reasm(skb); } +static int nf_ct_net_init(struct net *net) +{ + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); +} + +static void nf_ct_net_exit(struct net *net) +{ + nf_ct_frags6_sysctl_unregister(net); + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +static struct pernet_operations nf_ct_net_ops = { + .init = nf_ct_net_init, + .exit = nf_ct_net_exit, +}; + int nf_ct_frag6_init(void) { + int ret = 0; + nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -631,32 +697,17 @@ int nf_ct_frag6_init(void) nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; - nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; - inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); -#ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", - nf_ct_frag6_sysctl_table); - if (!nf_ct_frag6_sysctl_header) { + ret = register_pernet_subsys(&nf_ct_net_ops); + if (ret) inet_frags_fini(&nf_frags); - return -ENOMEM; - } -#endif - return 0; + return ret; } void nf_ct_frag6_cleanup(void) { -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); - nf_ct_frag6_sysctl_header = NULL; -#endif + unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); - - nf_init_frags.low_thresh = 0; - nf_ct_frag6_evictor(); }