[SRU,Trusty,1/1] block: fix module reference leak on put_disk() call for cgroups throttle

Message ID 10dfdd3b6400b28812d7e2aadfcaac1457e42c6d.1492613717.git.joseph.salisbury@canonical.com
State New
Headers show

Commit Message

Joseph Salisbury April 19, 2017, 6:36 p.m.
From: Roman Pen <roman.penyaev@profitbricks.com>

BugLink: http://bugs.launchpad.net/bugs/1683976

get_disk(),get_gendisk() calls have non explicit side effect: they
increase the reference on the disk owner module.

The following is the correct sequence how to get a disk reference and
to put it:

    disk = get_gendisk(...);

    /* use disk */

    owner = disk->fops->owner;
    put_disk(disk);
    module_put(owner);

fs/block_dev.c is aware of this required module_put() call, but f.e.
blkg_conf_finish(), which is located in block/blk-cgroup.c, does not put
a module reference.  To see a leakage in action cgroups throttle config
can be used.  In the following script I'm removing throttle for /dev/ram0
(actually this is NOP, because throttle was never set for this device):

    # lsmod | grep brd
    brd                     5175  0
    # i=100; while [ $i -gt 0 ]; do echo "1:0 0" > \
        /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device; i=$(($i - 1)); \
    done
    # lsmod | grep brd
    brd                     5175  100

Now brd module has 100 references.

The issue is fixed by calling module_put() just right away put_disk().

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Gi-Oh Kim <gi-oh.kim@profitbricks.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>
(backported from commit 39a169b62b415390398291080dafe63aec751e0a)
Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
---
 block/blk-cgroup.c | 9 +++++++++
 1 file changed, 9 insertions(+)

Comments

Colin Ian King April 19, 2017, 10:57 p.m. | #1
On 19/04/17 19:36, Joseph Salisbury wrote:
> From: Roman Pen <roman.penyaev@profitbricks.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1683976
> 
> get_disk(),get_gendisk() calls have non explicit side effect: they
> increase the reference on the disk owner module.
> 
> The following is the correct sequence how to get a disk reference and
> to put it:
> 
>     disk = get_gendisk(...);
> 
>     /* use disk */
> 
>     owner = disk->fops->owner;
>     put_disk(disk);
>     module_put(owner);
> 
> fs/block_dev.c is aware of this required module_put() call, but f.e.
> blkg_conf_finish(), which is located in block/blk-cgroup.c, does not put
> a module reference.  To see a leakage in action cgroups throttle config
> can be used.  In the following script I'm removing throttle for /dev/ram0
> (actually this is NOP, because throttle was never set for this device):
> 
>     # lsmod | grep brd
>     brd                     5175  0
>     # i=100; while [ $i -gt 0 ]; do echo "1:0 0" > \
>         /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device; i=$(($i - 1)); \
>     done
>     # lsmod | grep brd
>     brd                     5175  100
> 
> Now brd module has 100 references.
> 
> The issue is fixed by calling module_put() just right away put_disk().
> 
> Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
> Cc: Gi-Oh Kim <gi-oh.kim@profitbricks.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: linux-block@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Jens Axboe <axboe@fb.com>
> (backported from commit 39a169b62b415390398291080dafe63aec751e0a)
> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com>
> ---
>  block/blk-cgroup.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index a717585..640ea87 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -695,6 +695,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  {
>  	struct gendisk *disk;
>  	struct blkcg_gq *blkg;
> +	struct module *owner;
>  	unsigned int major, minor;
>  	unsigned long long v;
>  	int part, ret;
> @@ -706,7 +707,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  	if (!disk)
>  		return -EINVAL;
>  	if (part) {
> +		owner = disk->fops->owner;
>  		put_disk(disk);
> +		module_put(owner);
>  		return -EINVAL;
>  	}
>  
> @@ -722,7 +725,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		ret = PTR_ERR(blkg);
>  		rcu_read_unlock();
>  		spin_unlock_irq(disk->queue->queue_lock);
> +		owner = disk->fops->owner;
>  		put_disk(disk);
> +		module_put(owner);
>  		/*
>  		 * If queue was bypassing, we should retry.  Do so after a
>  		 * short msleep().  It isn't strictly necessary but queue
> @@ -753,9 +758,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
>  	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
>  {
> +	struct module *owner;
> +
>  	spin_unlock_irq(ctx->disk->queue->queue_lock);
>  	rcu_read_unlock();
> +	owner = ctx->disk->fops->owner;
>  	put_disk(ctx->disk);
> +	module_put(owner);
>  }
>  EXPORT_SYMBOL_GPL(blkg_conf_finish);
>  
> 
Sane looking upstream fix, backport looks good, good test case. This
does not seem to be tested for Trusty, but going on the Xenial results
I'm OK with that.

Thanks Joe.

Acked-by: Colin Ian King <colin.king@canonical.com>

Patch

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index a717585..640ea87 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -695,6 +695,7 @@  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 {
 	struct gendisk *disk;
 	struct blkcg_gq *blkg;
+	struct module *owner;
 	unsigned int major, minor;
 	unsigned long long v;
 	int part, ret;
@@ -706,7 +707,9 @@  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	if (!disk)
 		return -EINVAL;
 	if (part) {
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		return -EINVAL;
 	}
 
@@ -722,7 +725,9 @@  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		ret = PTR_ERR(blkg);
 		rcu_read_unlock();
 		spin_unlock_irq(disk->queue->queue_lock);
+		owner = disk->fops->owner;
 		put_disk(disk);
+		module_put(owner);
 		/*
 		 * If queue was bypassing, we should retry.  Do so after a
 		 * short msleep().  It isn't strictly necessary but queue
@@ -753,9 +758,13 @@  EXPORT_SYMBOL_GPL(blkg_conf_prep);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
 	__releases(ctx->disk->queue->queue_lock) __releases(rcu)
 {
+	struct module *owner;
+
 	spin_unlock_irq(ctx->disk->queue->queue_lock);
 	rcu_read_unlock();
+	owner = ctx->disk->fops->owner;
 	put_disk(ctx->disk);
+	module_put(owner);
 }
 EXPORT_SYMBOL_GPL(blkg_conf_finish);