diff mbox series

[nf-next,v3,3/3] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests

Message ID 20231025200828.5482-4-phil@nwl.cc
State Changes Requested
Headers show
Series Add locking for NFT_MSG_GETOBJ_RESET requests | expand

Commit Message

Phil Sutter Oct. 25, 2023, 8:08 p.m. UTC
Objects' dump callbacks are not concurrency-safe per-se with reset bit
set. If two CPUs perform a reset at the same time, at least counter and
quota objects suffer from value underrun.

Prevent this by introducing dedicated locking callbacks for nfnetlink
and the asynchronous dump handling to serialize access.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
 1 file changed, 59 insertions(+), 13 deletions(-)

Comments

Pablo Neira Ayuso Oct. 25, 2023, 9 p.m. UTC | #1
On Wed, Oct 25, 2023 at 10:08:28PM +0200, Phil Sutter wrote:
> Objects' dump callbacks are not concurrency-safe per-se with reset bit
> set. If two CPUs perform a reset at the same time, at least counter and
> quota objects suffer from value underrun.
> 
> Prevent this by introducing dedicated locking callbacks for nfnetlink
> and the asynchronous dump handling to serialize access.
> 
> Signed-off-by: Phil Sutter <phil@nwl.cc>
> ---
>  net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
>  1 file changed, 59 insertions(+), 13 deletions(-)
> 
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 5f84bdd40c3f..245a2c5be082 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
[...]
> @@ -7832,16 +7876,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
>  		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
>  	}
>  
> -	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
> -		reset = true;
> +	if (!try_module_get(THIS_MODULE))
> +		return -EINVAL;

For netlink dump path, __netlink_dump_start() already grabs a
reference module this via c->module.

Why is this module reference needed for getting one object? This does
not follow netlink dump path, it creates the skb and it returns
inmediately.

> +	rcu_read_unlock();
> +	mutex_lock(&nft_net->commit_mutex);
> +	skb2 = nf_tables_getobj_single(portid, info, nla, true);
> +	mutex_unlock(&nft_net->commit_mutex);
> +	rcu_read_lock();
> +	module_put(THIS_MODULE);
>  
> -	skb2 = nf_tables_getobj_single(portid, info, nla, reset);
>  	if (IS_ERR(skb2))
>  		return PTR_ERR(skb2);
>  
> -	if (!reset)
> -		return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);

This is what gets added in 1/3 that goes away, I see.

> -
>  	buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
>  			nla_len(nla[NFTA_OBJ_TABLE]),
>  			(char *)nla_data(nla[NFTA_OBJ_TABLE]),
> @@ -9128,7 +9174,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
>  		.policy		= nft_obj_policy,
>  	},
>  	[NFT_MSG_GETOBJ_RESET] = {
> -		.call		= nf_tables_getobj,
> +		.call		= nf_tables_getobj_reset,
>  		.type		= NFNL_CB_RCU,
>  		.attr_count	= NFTA_OBJ_MAX,
>  		.policy		= nft_obj_policy,
> -- 
> 2.41.0
>
Pablo Neira Ayuso Oct. 26, 2023, 8:15 a.m. UTC | #2
Cc'ing Florian.

On Wed, Oct 25, 2023 at 11:00:14PM +0200, Pablo Neira Ayuso wrote:
> On Wed, Oct 25, 2023 at 10:08:28PM +0200, Phil Sutter wrote:
> > Objects' dump callbacks are not concurrency-safe per-se with reset bit
> > set. If two CPUs perform a reset at the same time, at least counter and
> > quota objects suffer from value underrun.
> > 
> > Prevent this by introducing dedicated locking callbacks for nfnetlink
> > and the asynchronous dump handling to serialize access.
> > 
> > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > ---
> >  net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
> >  1 file changed, 59 insertions(+), 13 deletions(-)
> > 
> > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > index 5f84bdd40c3f..245a2c5be082 100644
> > --- a/net/netfilter/nf_tables_api.c
> > +++ b/net/netfilter/nf_tables_api.c
> [...]
> > @@ -7832,16 +7876,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
> >  		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> >  	}
> >  
> > -	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
> > -		reset = true;
> > +	if (!try_module_get(THIS_MODULE))
> > +		return -EINVAL;
> 
> For netlink dump path, __netlink_dump_start() already grabs a
> reference module this via c->module.
> 
> Why is this module reference needed for getting one object? This does
> not follow netlink dump path, it creates the skb and it returns
> inmediately.

nfnetlink callbacks use nfnetlink_get_subsys() which use
rcu_dereference() to fetch the nfnetlink_subsystem callbacks. In
nfnetlink_rcv_batch() the ss pointer is fetched at the beginning of
the batch processing.

But then, if rcu_read_unlock() is released, then:

        const struct nfnetlink_subsystem *ss;

could become stale and refetch is needed because rcu read side lock
was released, so next iteration on the skb to process the next
nlmsghdr could be using stale pointers.

Could you please have a second look to confirm this?

Thanks!
Pablo Neira Ayuso Oct. 26, 2023, 8:26 a.m. UTC | #3
On Thu, Oct 26, 2023 at 10:15:33AM +0200, Pablo Neira Ayuso wrote:
> Cc'ing Florian.
> 
> On Wed, Oct 25, 2023 at 11:00:14PM +0200, Pablo Neira Ayuso wrote:
> > On Wed, Oct 25, 2023 at 10:08:28PM +0200, Phil Sutter wrote:
> > > Objects' dump callbacks are not concurrency-safe per-se with reset bit
> > > set. If two CPUs perform a reset at the same time, at least counter and
> > > quota objects suffer from value underrun.
> > > 
> > > Prevent this by introducing dedicated locking callbacks for nfnetlink
> > > and the asynchronous dump handling to serialize access.
> > > 
> > > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > > ---
> > >  net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
> > >  1 file changed, 59 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > > index 5f84bdd40c3f..245a2c5be082 100644
> > > --- a/net/netfilter/nf_tables_api.c
> > > +++ b/net/netfilter/nf_tables_api.c
> > [...]
> > > @@ -7832,16 +7876,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
> > >  		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > >  	}
> > >  
> > > -	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
> > > -		reset = true;
> > > +	if (!try_module_get(THIS_MODULE))
> > > +		return -EINVAL;
> > 
> > For netlink dump path, __netlink_dump_start() already grabs a
> > reference module this via c->module.
> > 
> > Why is this module reference needed for getting one object? This does
> > not follow netlink dump path, it creates the skb and it returns
> > inmediately.
> 
> nfnetlink callbacks use nfnetlink_get_subsys() which use
> rcu_dereference() to fetch the nfnetlink_subsystem callbacks. In
> nfnetlink_rcv_batch() the ss pointer is fetched at the beginning of
> the batch processing.

Correction: This is nfnetlink_rcv_msg() path, not nfnetlink_rcv_batch()
path because this is a _GET command which should not ever follow
nfnetlink_rcv_batch() path.

But still the reason below is possible, considering a skb that
contains two _GET requests (which is possible because netlink supports
for non-atomic batches, ie. stacking several netlink messages in one
sendmsg() call).

> But then, if rcu_read_unlock() is released, then:
> 
>         const struct nfnetlink_subsystem *ss;
> 
> could become stale and refetch is needed because rcu read side lock
> was released, so next iteration on the skb to process the next
> nlmsghdr could be using stale pointers.
> 
> Could you please have a second look to confirm this?
> 
> Thanks!
Pablo Neira Ayuso Oct. 26, 2023, 8:55 a.m. UTC | #4
On Thu, Oct 26, 2023 at 10:26:35AM +0200, Pablo Neira Ayuso wrote:
> On Thu, Oct 26, 2023 at 10:15:33AM +0200, Pablo Neira Ayuso wrote:
> > Cc'ing Florian.
> > 
> > On Wed, Oct 25, 2023 at 11:00:14PM +0200, Pablo Neira Ayuso wrote:
> > > On Wed, Oct 25, 2023 at 10:08:28PM +0200, Phil Sutter wrote:
> > > > Objects' dump callbacks are not concurrency-safe per-se with reset bit
> > > > set. If two CPUs perform a reset at the same time, at least counter and
> > > > quota objects suffer from value underrun.
> > > > 
> > > > Prevent this by introducing dedicated locking callbacks for nfnetlink
> > > > and the asynchronous dump handling to serialize access.
> > > > 
> > > > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > > > ---
> > > >  net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++-------
> > > >  1 file changed, 59 insertions(+), 13 deletions(-)
> > > > 
> > > > diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> > > > index 5f84bdd40c3f..245a2c5be082 100644
> > > > --- a/net/netfilter/nf_tables_api.c
> > > > +++ b/net/netfilter/nf_tables_api.c
> > > [...]
> > > > @@ -7832,16 +7876,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
> > > >  		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > > >  	}
> > > >  
> > > > -	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
> > > > -		reset = true;
> > > > +	if (!try_module_get(THIS_MODULE))
> > > > +		return -EINVAL;
> > > 
> > > For netlink dump path, __netlink_dump_start() already grabs a
> > > reference module this via c->module.
> > > 
> > > Why is this module reference needed for getting one object? This does
> > > not follow netlink dump path, it creates the skb and it returns
> > > inmediately.
> > 
> > nfnetlink callbacks use nfnetlink_get_subsys() which use
> > rcu_dereference() to fetch the nfnetlink_subsystem callbacks. In
> > nfnetlink_rcv_batch() the ss pointer is fetched at the beginning of
> > the batch processing.
> 
> Correction: This is nfnetlink_rcv_msg() path, not nfnetlink_rcv_batch()
> path because this is a _GET command which should not ever follow
> nfnetlink_rcv_batch() path.
> 
> But still the reason below is possible, considering a skb that
> contains two _GET requests (which is possible because netlink supports
> for non-atomic batches, ie. stacking several netlink messages in one
> sendmsg() call).

Scratch this.

nfnetlink_rcv_msg() is called for each netlink message, then the
nfnetlink_subsystem pointer are re-fetch.

In summary: the try_module_get() before rcu_read_unlock() from netlink
get/dump is safe.

Sorry for the noise.
diff mbox series

Patch

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5f84bdd40c3f..245a2c5be082 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7732,6 +7732,19 @@  static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int nf_tables_dumpreset_obj(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+	int ret;
+
+	mutex_lock(&nft_net->commit_mutex);
+	ret = nf_tables_dump_obj(skb, cb);
+	mutex_unlock(&nft_net->commit_mutex);
+
+	return ret;
+}
+
 static int nf_tables_dump_obj_start(struct netlink_callback *cb)
 {
 	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7748,12 +7761,18 @@  static int nf_tables_dump_obj_start(struct netlink_callback *cb)
 	if (nla[NFTA_OBJ_TYPE])
 		ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
-		ctx->reset = true;
-
 	return 0;
 }
 
+static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
+{
+	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+
+	ctx->reset = true;
+
+	return nf_tables_dump_obj_start(cb);
+}
+
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
 	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7812,18 +7831,43 @@  nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
 
 static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
+{
+	u32 portid = NETLINK_CB(skb).portid;
+	struct sk_buff *skb2;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_obj_start,
+			.dump = nf_tables_dump_obj,
+			.done = nf_tables_dump_obj_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	skb2 = nf_tables_getobj_single(portid, info, nla, false);
+	if (IS_ERR(skb2))
+		return PTR_ERR(skb2);
+
+	return nfnetlink_unicast(skb2, info->net, portid);
+}
+
+static int nf_tables_getobj_reset(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
 {
 	struct nftables_pernet *nft_net = nft_pernet(info->net);
 	u32 portid = NETLINK_CB(skb).portid;
 	struct net *net = info->net;
 	struct sk_buff *skb2;
-	bool reset = false;
 	char *buf;
 
 	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
-			.start = nf_tables_dump_obj_start,
-			.dump = nf_tables_dump_obj,
+			.start = nf_tables_dumpreset_obj_start,
+			.dump = nf_tables_dumpreset_obj,
 			.done = nf_tables_dump_obj_done,
 			.module = THIS_MODULE,
 			.data = (void *)nla,
@@ -7832,16 +7876,18 @@  static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
-	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
-		reset = true;
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+	rcu_read_unlock();
+	mutex_lock(&nft_net->commit_mutex);
+	skb2 = nf_tables_getobj_single(portid, info, nla, true);
+	mutex_unlock(&nft_net->commit_mutex);
+	rcu_read_lock();
+	module_put(THIS_MODULE);
 
-	skb2 = nf_tables_getobj_single(portid, info, nla, reset);
 	if (IS_ERR(skb2))
 		return PTR_ERR(skb2);
 
-	if (!reset)
-		return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
-
 	buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
 			nla_len(nla[NFTA_OBJ_TABLE]),
 			(char *)nla_data(nla[NFTA_OBJ_TABLE]),
@@ -9128,7 +9174,7 @@  static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_GETOBJ_RESET] = {
-		.call		= nf_tables_getobj,
+		.call		= nf_tables_getobj_reset,
 		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,