diff mbox series

[net,v2] genetlink: fix memory leaks in genl_family_rcv_msg_dumpit()

Message ID 20200603044910.27259-1-xiyou.wangcong@gmail.com
State Accepted
Delegated to: David Miller
Headers show
Series [net,v2] genetlink: fix memory leaks in genl_family_rcv_msg_dumpit() | expand

Commit Message

Cong Wang June 3, 2020, 4:49 a.m. UTC
There are two kinds of memory leaks in genl_family_rcv_msg_dumpit():

1. Before we call ops->start(), whenever an error happens, we forget
   to free the memory allocated in genl_family_rcv_msg_dumpit().

2. When ops->start() fails, the 'info' has been already installed on
   the per socket control block, so we should not free it here. More
   importantly, nlk->cb_running is still false at this point, so
   netlink_sock_destruct() cannot free it either.

The first kind of memory leaks is easier to resolve, but the second
one requires some deeper thoughts.

After reviewing how netfilter handles this, the most elegant solution
I find is just to use a similar way to allocate the memory, that is,
moving memory allocations from caller into ops->start(). With this,
we can solve both kinds of memory leaks: for 1), no memory allocation
happens before ops->start(); for 2), ops->start() handles its own
failures and 'info' is installed to the socket control block only
when success. The only ugliness here is we have to pass all local
variables on stack via a struct, but this is not hard to understand.

Alternatively, we can introduce a ops->free() to solve this too,
but it is overkill as only genetlink has this problem so far.

Fixes: 1927f41a22a0 ("net: genetlink: introduce dump info struct to be available during dumpit op")
Reported-by: syzbot+21f04f481f449c8db840@syzkaller.appspotmail.com
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jiri Pirko <jiri@mellanox.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Cc: Shaochun Chen <cscnull@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/netlink/genetlink.c | 94 +++++++++++++++++++++++++----------------
 1 file changed, 58 insertions(+), 36 deletions(-)

Comments

David Miller June 4, 2020, 10:36 p.m. UTC | #1
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue,  2 Jun 2020 21:49:10 -0700

> There are two kinds of memory leaks in genl_family_rcv_msg_dumpit():
> 
> 1. Before we call ops->start(), whenever an error happens, we forget
>    to free the memory allocated in genl_family_rcv_msg_dumpit().
> 
> 2. When ops->start() fails, the 'info' has been already installed on
>    the per socket control block, so we should not free it here. More
>    importantly, nlk->cb_running is still false at this point, so
>    netlink_sock_destruct() cannot free it either.
> 
> The first kind of memory leaks is easier to resolve, but the second
> one requires some deeper thoughts.
> 
> After reviewing how netfilter handles this, the most elegant solution
> I find is just to use a similar way to allocate the memory, that is,
> moving memory allocations from caller into ops->start(). With this,
> we can solve both kinds of memory leaks: for 1), no memory allocation
> happens before ops->start(); for 2), ops->start() handles its own
> failures and 'info' is installed to the socket control block only
> when success. The only ugliness here is we have to pass all local
> variables on stack via a struct, but this is not hard to understand.
> 
> Alternatively, we can introduce a ops->free() to solve this too,
> but it is overkill as only genetlink has this problem so far.
> 
> Fixes: 1927f41a22a0 ("net: genetlink: introduce dump info struct to be available during dumpit op")
> Reported-by: syzbot+21f04f481f449c8db840@syzkaller.appspotmail.com
> Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
> Cc: Florian Westphal <fw@strlen.de>
> Cc: Pablo Neira Ayuso <pablo@netfilter.org>
> Cc: Jiri Pirko <jiri@mellanox.com>
> Cc: YueHaibing <yuehaibing@huawei.com>
> Cc: Shaochun Chen <cscnull@gmail.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied and queued up for -stable, thanks.
Ido Schimmel June 10, 2020, 2:27 p.m. UTC | #2
On Tue, Jun 02, 2020 at 09:49:10PM -0700, Cong Wang wrote:
> diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
> index 9f357aa22b94..bcbba0bef1c2 100644
> --- a/net/netlink/genetlink.c
> +++ b/net/netlink/genetlink.c
> @@ -513,15 +513,58 @@ static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
>  		kfree(attrbuf);
>  }
>  
> -static int genl_lock_start(struct netlink_callback *cb)
> +struct genl_start_context {
> +	const struct genl_family *family;
> +	struct nlmsghdr *nlh;
> +	struct netlink_ext_ack *extack;
> +	const struct genl_ops *ops;
> +	int hdrlen;
> +};
> +
> +static int genl_start(struct netlink_callback *cb)
>  {
> -	const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
> +	struct genl_start_context *ctx = cb->data;
> +	const struct genl_ops *ops = ctx->ops;
> +	struct genl_dumpit_info *info;
> +	struct nlattr **attrs = NULL;
>  	int rc = 0;
>  
> +	if (ops->validate & GENL_DONT_VALIDATE_DUMP)
> +		goto no_attrs;
> +
> +	if (ctx->nlh->nlmsg_len < nlmsg_msg_size(ctx->hdrlen))
> +		return -EINVAL;
> +
> +	attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack,
> +						ops, ctx->hdrlen,
> +						GENL_DONT_VALIDATE_DUMP_STRICT,
> +						true);
> +	if (IS_ERR(attrs))
> +		return PTR_ERR(attrs);
> +
> +no_attrs:
> +	info = genl_dumpit_info_alloc();
> +	if (!info) {
> +		kfree(attrs);
> +		return -ENOMEM;
> +	}
> +	info->family = ctx->family;
> +	info->ops = ops;
> +	info->attrs = attrs;
> +
> +	cb->data = info;
>  	if (ops->start) {
> -		genl_lock();
> +		if (!ctx->family->parallel_ops)
> +			genl_lock();
>  		rc = ops->start(cb);
> -		genl_unlock();
> +		if (!ctx->family->parallel_ops)
> +			genl_unlock();
> +	}
> +
> +	if (rc) {
> +		kfree(attrs);
> +		genl_dumpit_info_free(info);
> +		cb->data = NULL;
>  	}
>  	return rc;
>  }
> @@ -548,7 +591,7 @@ static int genl_lock_done(struct netlink_callback *cb)
>  		rc = ops->done(cb);
>  		genl_unlock();
>  	}
> -	genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
> +	genl_family_rcv_msg_attrs_free(info->family, info->attrs, false);

Cong,

This seems to result in a memory leak because 'info->attrs' is never
freed in the non-parallel case.

Both the parallel and non-parallel code paths call genl_start() which
allocates the array, but the latter calls genl_lock_done() as its done()
callback which never frees it.

Can be reproduced as follows:

echo "10 1" > /sys/bus/netdevsim/new_device
devlink trap &> /dev/null
echo scan > /sys/kernel/debug/kmemleak
cat /sys/kernel/debug/kmemleak

unreferenced object 0xffff88810f1ed000 (size 2048):
  comm "devlink", pid 201, jiffies 4295606431 (age 35.858s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000a7cb7530>] __kmalloc+0x1d6/0x3d0
    [<000000001cb013e1>] genl_family_rcv_msg_attrs_parse+0x1f3/0x320
    [<00000000b201bc93>] genl_start+0x1ab/0x5e0
    [<00000000786e531e>] __netlink_dump_start+0x5b5/0x940
    [<00000000a2332fcb>] genl_family_rcv_msg_dumpit+0x32e/0x3a0
    [<00000000112052dd>] genl_rcv_msg+0x6d7/0xb40
    [<000000005826e358>] netlink_rcv_skb+0x175/0x490
    [<000000002c5f41ae>] genl_rcv+0x2d/0x40
    [<00000000f0301e6d>] netlink_unicast+0x5d0/0x7f0
    [<00000000a76a3934>] netlink_sendmsg+0x981/0xe90
    [<000000001c478a6f>] __sys_sendto+0x2cd/0x450
    [<0000000079d420b0>] __x64_sys_sendto+0xe6/0x1a0
    [<000000004e535e4b>] do_syscall_64+0xc1/0x600
    [<000000006e5dd3c4>] entry_SYSCALL_64_after_hwframe+0x49/0xb3

>  	genl_dumpit_info_free(info);
>  	return rc;
>  }
> @@ -573,43 +616,23 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
>  				      const struct genl_ops *ops,
>  				      int hdrlen, struct net *net)
>  {
> -	struct genl_dumpit_info *info;
> -	struct nlattr **attrs = NULL;
> +	struct genl_start_context ctx;
>  	int err;
>  
>  	if (!ops->dumpit)
>  		return -EOPNOTSUPP;
>  
> -	if (ops->validate & GENL_DONT_VALIDATE_DUMP)
> -		goto no_attrs;
> -
> -	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
> -		return -EINVAL;
> -
> -	attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
> -						ops, hdrlen,
> -						GENL_DONT_VALIDATE_DUMP_STRICT,
> -						true);
> -	if (IS_ERR(attrs))
> -		return PTR_ERR(attrs);
> -
> -no_attrs:
> -	/* Allocate dumpit info. It is going to be freed by done() callback. */
> -	info = genl_dumpit_info_alloc();
> -	if (!info) {
> -		genl_family_rcv_msg_attrs_free(family, attrs, true);
> -		return -ENOMEM;
> -	}
> -
> -	info->family = family;
> -	info->ops = ops;
> -	info->attrs = attrs;
> +	ctx.family = family;
> +	ctx.nlh = nlh;
> +	ctx.extack = extack;
> +	ctx.ops = ops;
> +	ctx.hdrlen = hdrlen;
>  
>  	if (!family->parallel_ops) {
>  		struct netlink_dump_control c = {
>  			.module = family->module,
> -			.data = info,
> -			.start = genl_lock_start,
> +			.data = &ctx,
> +			.start = genl_start,
>  			.dump = genl_lock_dumpit,
>  			.done = genl_lock_done,
>  		};
> @@ -617,12 +640,11 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
>  		genl_unlock();
>  		err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
>  		genl_lock();
> -
>  	} else {
>  		struct netlink_dump_control c = {
>  			.module = family->module,
> -			.data = info,
> -			.start = ops->start,
> +			.data = &ctx,
> +			.start = genl_start,
>  			.dump = ops->dumpit,
>  			.done = genl_parallel_done,
>  		};
> -- 
> 2.26.2
>
Cong Wang June 11, 2020, 4:14 a.m. UTC | #3
On Wed, Jun 10, 2020 at 7:27 AM Ido Schimmel <idosch@idosch.org> wrote:
> > @@ -548,7 +591,7 @@ static int genl_lock_done(struct netlink_callback *cb)
> >               rc = ops->done(cb);
> >               genl_unlock();
> >       }
> > -     genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
> > +     genl_family_rcv_msg_attrs_free(info->family, info->attrs, false);
>
> Cong,
>
> This seems to result in a memory leak because 'info->attrs' is never
> freed in the non-parallel case.
>
> Both the parallel and non-parallel code paths call genl_start() which
> allocates the array, but the latter calls genl_lock_done() as its done()
> callback which never frees it.

Good catch! Looks like I should just revert the above chunk. The
last parameter of genl_family_rcv_msg_attrs_free() is just confusing,
genl_lock_done() is clearly not parallel at all..

I will take a deeper look and send out a patch tomorrow.

Thanks!
diff mbox series

Patch

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 9f357aa22b94..bcbba0bef1c2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -513,15 +513,58 @@  static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
 		kfree(attrbuf);
 }
 
-static int genl_lock_start(struct netlink_callback *cb)
+struct genl_start_context {
+	const struct genl_family *family;
+	struct nlmsghdr *nlh;
+	struct netlink_ext_ack *extack;
+	const struct genl_ops *ops;
+	int hdrlen;
+};
+
+static int genl_start(struct netlink_callback *cb)
 {
-	const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
+	struct genl_start_context *ctx = cb->data;
+	const struct genl_ops *ops = ctx->ops;
+	struct genl_dumpit_info *info;
+	struct nlattr **attrs = NULL;
 	int rc = 0;
 
+	if (ops->validate & GENL_DONT_VALIDATE_DUMP)
+		goto no_attrs;
+
+	if (ctx->nlh->nlmsg_len < nlmsg_msg_size(ctx->hdrlen))
+		return -EINVAL;
+
+	attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack,
+						ops, ctx->hdrlen,
+						GENL_DONT_VALIDATE_DUMP_STRICT,
+						true);
+	if (IS_ERR(attrs))
+		return PTR_ERR(attrs);
+
+no_attrs:
+	info = genl_dumpit_info_alloc();
+	if (!info) {
+		kfree(attrs);
+		return -ENOMEM;
+	}
+	info->family = ctx->family;
+	info->ops = ops;
+	info->attrs = attrs;
+
+	cb->data = info;
 	if (ops->start) {
-		genl_lock();
+		if (!ctx->family->parallel_ops)
+			genl_lock();
 		rc = ops->start(cb);
-		genl_unlock();
+		if (!ctx->family->parallel_ops)
+			genl_unlock();
+	}
+
+	if (rc) {
+		kfree(attrs);
+		genl_dumpit_info_free(info);
+		cb->data = NULL;
 	}
 	return rc;
 }
@@ -548,7 +591,7 @@  static int genl_lock_done(struct netlink_callback *cb)
 		rc = ops->done(cb);
 		genl_unlock();
 	}
-	genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+	genl_family_rcv_msg_attrs_free(info->family, info->attrs, false);
 	genl_dumpit_info_free(info);
 	return rc;
 }
@@ -573,43 +616,23 @@  static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
 				      const struct genl_ops *ops,
 				      int hdrlen, struct net *net)
 {
-	struct genl_dumpit_info *info;
-	struct nlattr **attrs = NULL;
+	struct genl_start_context ctx;
 	int err;
 
 	if (!ops->dumpit)
 		return -EOPNOTSUPP;
 
-	if (ops->validate & GENL_DONT_VALIDATE_DUMP)
-		goto no_attrs;
-
-	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
-		return -EINVAL;
-
-	attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
-						ops, hdrlen,
-						GENL_DONT_VALIDATE_DUMP_STRICT,
-						true);
-	if (IS_ERR(attrs))
-		return PTR_ERR(attrs);
-
-no_attrs:
-	/* Allocate dumpit info. It is going to be freed by done() callback. */
-	info = genl_dumpit_info_alloc();
-	if (!info) {
-		genl_family_rcv_msg_attrs_free(family, attrs, true);
-		return -ENOMEM;
-	}
-
-	info->family = family;
-	info->ops = ops;
-	info->attrs = attrs;
+	ctx.family = family;
+	ctx.nlh = nlh;
+	ctx.extack = extack;
+	ctx.ops = ops;
+	ctx.hdrlen = hdrlen;
 
 	if (!family->parallel_ops) {
 		struct netlink_dump_control c = {
 			.module = family->module,
-			.data = info,
-			.start = genl_lock_start,
+			.data = &ctx,
+			.start = genl_start,
 			.dump = genl_lock_dumpit,
 			.done = genl_lock_done,
 		};
@@ -617,12 +640,11 @@  static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
 		genl_unlock();
 		err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
 		genl_lock();
-
 	} else {
 		struct netlink_dump_control c = {
 			.module = family->module,
-			.data = info,
-			.start = ops->start,
+			.data = &ctx,
+			.start = genl_start,
 			.dump = ops->dumpit,
 			.done = genl_parallel_done,
 		};