Message ID | 20220805144745.6479-1-fw@strlen.de |
---|---|
State | Accepted, archived |
Delegated to: | Pablo Neira |
Headers | show |
Series | [nf] netfilter: nfnetlink: re-enable conntrack expectation events | expand |
Hi Florian, a few questions, comments. On Fri, Aug 05, 2022 at 04:47:45PM +0200, Florian Westphal wrote: > To avoid allocation of the conntrack extension area when possible, > the default behaviour was changed to only allocate the event extension > if a userspace program is subscribed to a notification group. > > Problem is that while 'conntrack -E' does enable the event allocation > behind the scenes, 'conntrack -E expect' does not: no expectation events > are delivered unless user sets > "net.netfilter.nf_conntrack_events" back to 1 (always on). > > Fix the autodetection to also consider EXP type group. > > We need to track the 6 event groups (3+3, new/update/destroy for events and > for expectations each) independently, else we'd disable events again > if an expectation group becomes empty while there is still an active > event group. > > Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") > Reported-by: Yi Chen <yiche@redhat.com> > Signed-off-by: Florian Westphal <fw@strlen.de> > --- > include/net/netns/conntrack.h | 2 +- > net/netfilter/nfnetlink.c | 83 ++++++++++++++++++++++++++++++----- > 2 files changed, 72 insertions(+), 13 deletions(-) > > diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h > index 0677cd3de034..c396a3862e80 100644 > --- a/include/net/netns/conntrack.h > +++ b/include/net/netns/conntrack.h > @@ -95,7 +95,7 @@ struct nf_ip_net { > > struct netns_ct { > #ifdef CONFIG_NF_CONNTRACK_EVENTS > - bool ctnetlink_has_listener; > + u8 ctnetlink_has_listener; so now ctnetlink_has_listener is a bitmask, right? > bool ecache_dwork_pending; > #endif > u8 sysctl_log_invalid; /* Log invalid packets */ > diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c > index c24b1240908f..6c268f1c201a 100644 > --- a/net/netfilter/nfnetlink.c > +++ b/net/netfilter/nfnetlink.c > @@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); > > static unsigned int nfnetlink_pernet_id __read_mostly; > > +#ifdef CONFIG_NF_CONNTRACK_EVENTS > +static DEFINE_SPINLOCK(nfnl_grp_active_lock); > +#endif > + > struct nfnl_net { > struct sock *nfnl; > }; > @@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb) > netlink_rcv_skb(skb, nfnetlink_rcv_msg); > } > > +static void nfnetlink_bind_event(struct net *net, unsigned int group) > +{ > +#ifdef CONFIG_NF_CONNTRACK_EVENTS > + int type, group_bit; > + u8 v; > + > + /* ctnetlink_has_listener is u8, all NFNLGRP_CONNTRACK_* groups > + * coming from userspace are < 8. Nit. is ctnetlink_has_listener relevant in the comment? It reads a bit confusing. > + */ > + if (group >= 8) Nit. enum nfnetlink_groups has __NFNLGRP_MAX, which is 9. This skips NFNLGRP_NFTRACE which is the last one in the array, not a big deal since the switch (type) results in return; but better fix this for the future? > + return; > + > + type = nfnl_group2type[group]; > + > + switch (type) { > + case NFNL_SUBSYS_CTNETLINK: > + break; > + case NFNL_SUBSYS_CTNETLINK_EXP: > + break; > + default: > + return; > + } > + > + group_bit = (1 << group); > + > + spin_lock(&nfnl_grp_active_lock); suggestion: maybe mutex instead? > + v = READ_ONCE(net->ct.ctnetlink_has_listener); > + if ((v & group_bit) == 0) { > + v |= group_bit; > + > + /* read concurrently without nfnl_grp_active_lock held. */ > + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); > + } > + > + spin_unlock(&nfnl_grp_active_lock); > +#endif > +} > + > static int nfnetlink_bind(struct net *net, int group) > { > const struct nfnetlink_subsystem *ss; > @@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group) > if (!ss) > request_module_nowait("nfnetlink-subsys-%d", type); > > -#ifdef CONFIG_NF_CONNTRACK_EVENTS > - if (type == NFNL_SUBSYS_CTNETLINK) { > - nfnl_lock(NFNL_SUBSYS_CTNETLINK); > - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); > - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); > - } > -#endif > + nfnetlink_bind_event(net, group); > return 0; > } > > static void nfnetlink_unbind(struct net *net, int group) > { > #ifdef CONFIG_NF_CONNTRACK_EVENTS > + int type, group_bit; > + > if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) > return; > > - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { > - nfnl_lock(NFNL_SUBSYS_CTNETLINK); > - if (!nfnetlink_has_listeners(net, group)) > - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); > - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); > + type = nfnl_group2type[group]; > + > + switch (type) { > + case NFNL_SUBSYS_CTNETLINK: > + break; > + case NFNL_SUBSYS_CTNETLINK_EXP: > + break; > + default: > + return; > + } > + > + /* ctnetlink_has_listener is u8 */ > + if (group >= 8) Same here? > + return; > + > + group_bit = (1 << group); > + > + spin_lock(&nfnl_grp_active_lock); > + if (!nfnetlink_has_listeners(net, group)) { > + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); > + > + v &= ~group_bit; > + > + /* read concurrently without nfnl_grp_active_lock held. */ > + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); > } > + spin_unlock(&nfnl_grp_active_lock); > #endif > } > > -- > 2.35.1 >
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0677cd3de034..c396a3862e80 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,7 +95,7 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS - bool ctnetlink_has_listener; + u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c24b1240908f..6c268f1c201a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static DEFINE_SPINLOCK(nfnl_grp_active_lock); +#endif + struct nfnl_net { struct sock *nfnl; }; @@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } +static void nfnetlink_bind_event(struct net *net, unsigned int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + u8 v; + + /* ctnetlink_has_listener is u8, all NFNLGRP_CONNTRACK_* groups + * coming from userspace are < 8. + */ + if (group >= 8) + return; + + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + v = READ_ONCE(net->ct.ctnetlink_has_listener); + if ((v & group_bit) == 0) { + v |= group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); + } + + spin_unlock(&nfnl_grp_active_lock); +#endif +} + static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group) if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - if (type == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - } -#endif + nfnetlink_bind_event(net, group); return 0; } static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return; - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - if (!nfnetlink_has_listeners(net, group)) - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + /* ctnetlink_has_listener is u8 */ + if (group >= 8) + return; + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + if (!nfnetlink_has_listeners(net, group)) { + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); + + v &= ~group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); } + spin_unlock(&nfnl_grp_active_lock); #endif }
To avoid allocation of the conntrack extension area when possible, the default behaviour was changed to only allocate the event extension if a userspace program is subscribed to a notification group. Problem is that while 'conntrack -E' does enable the event allocation behind the scenes, 'conntrack -E expect' does not: no expectation events are delivered unless user sets "net.netfilter.nf_conntrack_events" back to 1 (always on). Fix the autodetection to also consider EXP type group. We need to track the 6 event groups (3+3, new/update/destroy for events and for expectations each) independently, else we'd disable events again if an expectation group becomes empty while there is still an active event group. Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") Reported-by: Yi Chen <yiche@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de> --- include/net/netns/conntrack.h | 2 +- net/netfilter/nfnetlink.c | 83 ++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 13 deletions(-)