@@ -15,6 +15,9 @@ enum ip_conntrack_info {
IP_CT_DIR_ORIGINAL); may be a retransmission. */
IP_CT_NEW,
+ /* Untracked */
+ IP_CT_UNTRACKED,
+
/* >= this indicates reply direction */
IP_CT_IS_REPLY,
@@ -175,7 +175,7 @@ static inline struct nf_conn *
nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
{
*ctinfo = skb->nfctinfo;
- return (struct nf_conn *)skb->nfct;
+ return container_of(skb->nfct, struct nf_conn, ct_general);
}
/* decrement reference count on a conntrack */
@@ -261,7 +261,7 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
u32 seq);
/* Fake conntrack entry for untracked connections */
-extern struct nf_conn nf_conntrack_untracked;
+DECLARE_PER_CPU(struct nf_conn, pcpu_nf_conntrack_untracked);
/* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void
@@ -291,7 +291,12 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
static inline int nf_ct_is_untracked(const struct sk_buff *skb)
{
- return (skb->nfct == &nf_conntrack_untracked.ct_general);
+ return (skb->nfctinfo == IP_CT_UNTRACKED);
+}
+
+static inline int nf_ct_is_tracked(const struct sk_buff *skb)
+{
+ return (skb->nfctinfo != IP_CT_UNTRACKED);
}
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conn *ct = (struct nf_conn *)skb->nfct;
int ret = NF_ACCEPT;
- if (ct && ct != &nf_conntrack_untracked) {
+ if (ct && nf_ct_is_tracked(skb)) {
if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb);
if (likely(ret == NF_ACCEPT))
@@ -719,6 +719,7 @@ static int __init nf_nat_init(void)
{
size_t i;
int ret;
+ int cpu;
need_ipv4_conntrack();
@@ -742,7 +743,8 @@ static int __init nf_nat_init(void)
spin_unlock_bh(&nf_nat_lock);
/* Initialize fake conntrack so that NAT will skip it */
- nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+ for_each_possible_cpu(cpu)
+ per_cpu(pcpu_nf_conntrack_untracked,cpu).status |= IPS_NAT_DONE_MASK;
l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
return NF_ACCEPT;
/* Don't try to NAT if this packet is not conntracked */
- if (ct == &nf_conntrack_untracked)
+ if (ctinfo == IP_CT_UNTRACKED)
return NF_ACCEPT;
nat = nfct_nat(ct);
@@ -208,8 +208,8 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
type = icmp6h->icmp6_type - 130;
if (type >= 0 && type < sizeof(noct_valid_new) &&
noct_valid_new[type]) {
- skb->nfct = &nf_conntrack_untracked.ct_general;
- skb->nfctinfo = IP_CT_NEW;
+ skb->nfct = &__get_cpu_var(pcpu_nf_conntrack_untracked).ct_general;
+ skb->nfctinfo = IP_CT_UNTRACKED;
nf_conntrack_get(skb->nfct);
return NF_ACCEPT;
}
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
-struct nf_conn nf_conntrack_untracked __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, pcpu_nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(pcpu_nf_conntrack_untracked);
static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
@@ -1185,10 +1185,16 @@ static void nf_ct_release_dying_list(struct net *net)
static void nf_conntrack_cleanup_init_net(void)
{
- /* wait until all references to nf_conntrack_untracked are dropped */
- while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+ int cpu, use;
+ for (;;) {
+ use = 0;
+ for_each_possible_cpu(cpu)
+ use += atomic_read(&per_cpu(pcpu_nf_conntrack_untracked, cpu).ct_general.use) - 1;
+ /* wait until all references to nf_conntrack_untracked are dropped */
+ if (!use)
+ break;
schedule();
-
+ }
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -1325,6 +1331,7 @@ static int nf_conntrack_init_init_net(void)
{
int max_factor = 8;
int ret;
+ int cpu;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1362,14 +1369,15 @@ static int nf_conntrack_init_init_net(void)
if (ret < 0)
goto err_extend;
#endif
- /* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
- nf_conntrack_untracked.ct_net = &init_net;
-#endif
- atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
- /* - and look it like as a confirmed connection */
- set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+ /* Set up fake conntracks: to never be deleted, not in any hashes */
+ for_each_possible_cpu(cpu) {
+ struct nf_conn *ct = &per_cpu(pcpu_nf_conntrack_untracked, cpu);
+ write_pnet(&ct->ct_net, &init_net);
+ atomic_set(&ct->ct_general.use, 1);
+ /* - and look it like as a confirmed connection */
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ }
return 0;
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -479,9 +479,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
unsigned int flags = 0, group;
int err;
- /* ignore our fake conntrack entry */
- if (ct == &nf_conntrack_untracked)
- return 0;
+// /* ignore our fake conntrack entry */
+// if (ct == &nf_conntrack_untracked)
+// return 0;
if (events & (1 << IPCT_DESTROY)) {
type = IPCTNL_MSG_CT_DELETE;
@@ -29,9 +29,13 @@ static unsigned int xt_ct_target(struct sk_buff *skb,
if (skb->nfct != NULL)
return XT_CONTINUE;
+ skb->nfctinfo = IP_CT_NEW;
+ if (info->flags & XT_CT_NOTRACK) {
+ ct = &__get_cpu_var(pcpu_nf_conntrack_untracked);
+ skb->nfctinfo = IP_CT_UNTRACKED;
+ }
atomic_inc(&ct->ct_general.use);
skb->nfct = &ct->ct_general;
- skb->nfctinfo = IP_CT_NEW;
return XT_CONTINUE;
}
@@ -67,8 +71,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
if (info->flags & XT_CT_NOTRACK) {
- ct = &nf_conntrack_untracked;
- atomic_inc(&ct->ct_general.use);
+ ct = &__get_cpu_var(pcpu_nf_conntrack_untracked);
goto out;
}
@@ -132,14 +135,14 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
struct nf_conn *ct = info->ct;
struct nf_conn_help *help;
- if (ct != &nf_conntrack_untracked) {
+ if (!(info->flags & XT_CT_NOTRACK)) {
help = nfct_help(ct);
if (help)
module_put(help->helper->me);
nf_ct_l3proto_module_put(par->family);
+ nf_ct_put(info->ct);
}
- nf_ct_put(info->ct);
}
static struct xt_target xt_ct_tg __read_mostly = {
@@ -23,8 +23,8 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- skb->nfct = &nf_conntrack_untracked.ct_general;
- skb->nfctinfo = IP_CT_NEW;
+ skb->nfct = &__get_cpu_var(pcpu_nf_conntrack_untracked).ct_general;
+ skb->nfctinfo = IP_CT_UNTRACKED;
nf_conntrack_get(skb->nfct);
return XT_CONTINUE;
@@ -104,8 +104,8 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef WITH_CONNTRACK
/* Avoid counting cloned packets towards the original connection. */
nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_conntrack_untracked.ct_general;
- skb->nfctinfo = IP_CT_NEW;
+ skb->nfct = &__get_cpu_var(pcpu_nf_conntrack_untracked).ct_general;
+ skb->nfctinfo = IP_CT_UNTRACKED;
nf_conntrack_get(skb->nfct);
#endif
/*
@@ -177,8 +177,8 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef WITH_CONNTRACK
nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_conntrack_untracked.ct_general;
- skb->nfctinfo = IP_CT_NEW;
+ skb->nfct = &__get_cpu_var(pcpu_nf_conntrack_untracked).ct_general;
+ skb->nfctinfo = IP_CT_UNTRACKED;
nf_conntrack_get(skb->nfct);
#endif
if (par->hooknum == NF_INET_PRE_ROUTING ||
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL)
return false;
- if (ct == &nf_conntrack_untracked)
+ if (nf_ct_is_untracked(skb))
return false;
if (ct->master)
@@ -123,7 +123,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
ct = nf_ct_get(skb, &ctinfo);
- if (ct == &nf_conntrack_untracked)
+ if (nf_ct_is_untracked(skb))
statebit = XT_CONNTRACK_STATE_UNTRACKED;
else if (ct != NULL)
statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* reply packet of an established SNAT-ted connection. */
ct = nf_ct_get(skb, &ctinfo);
- if (ct && (ct != &nf_conntrack_untracked) &&
+ if (ct && nf_ct_is_tracked(skb) &&
((iph->protocol != IPPROTO_ICMP &&
ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
(iph->protocol == IPPROTO_ICMP &&