@@ -1582,6 +1582,20 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (bdev == NULL)
return -ENOMEM;
+ /*
+ * A negative i_writecount for bdev->bd_inode means that the bdev
+ * or one of its paritions is mounted in a user namespace. Deny
+ * writing for non-root in this case, otherwise an unprivileged
+ * user can attack the kernel by modifying the backing store of a
+ * mounted filesystem.
+ */
+ if ((filp->f_mode & FMODE_WRITE) &&
+ !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN) &&
+ !atomic_inc_unless_negative(&bdev->bd_inode->i_writecount)) {
+ bdput(bdev);
+ return -EBUSY;
+ }
+
filp->f_mapping = bdev->bd_inode->i_mapping;
return blkdev_get(bdev, filp->f_mode, filp);
@@ -1683,6 +1697,9 @@ EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+ if (filp->f_mode & FMODE_WRITE &&
+ !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN))
+ atomic_dec(&bdev->bd_inode->i_writecount);
blkdev_put(bdev, filp->f_mode);
return 0;
}
@@ -1033,6 +1033,23 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
if (IS_ERR(bdev))
return ERR_CAST(bdev);
+ if (current_user_ns() != &init_user_ns) {
+ /*
+ * For userns mounts, disallow mounting if bdev is open for
+ * writing
+ */
+ if (!atomic_dec_unless_positive(&bdev->bd_inode->i_writecount)) {
+ error = -EBUSY;
+ goto error_bdev;
+ }
+ if (bdev->bd_contains != bdev &&
+ !atomic_dec_unless_positive(&bdev->bd_contains->bd_inode->i_writecount)) {
+ atomic_inc(&bdev->bd_inode->i_writecount);
+ error = -EBUSY;
+ goto error_bdev;
+ }
+ }
+
/*
* once the super is inserted into the list by sget, s_umount
* will protect the lockfs code from trying to start a snapshot
@@ -1042,7 +1059,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
error = -EBUSY;
- goto error_bdev;
+ goto error_inc;
}
s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
bdev);
@@ -1054,7 +1071,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
if ((flags ^ s->s_flags) & MS_RDONLY) {
deactivate_locked_super(s);
error = -EBUSY;
- goto error_bdev;
+ goto error_inc;
}
/*
@@ -1085,6 +1102,12 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
error_s:
error = PTR_ERR(s);
+error_inc:
+ if (current_user_ns() != &init_user_ns) {
+ atomic_inc(&bdev->bd_inode->i_writecount);
+ if (bdev->bd_contains != bdev)
+ atomic_inc(&bdev->bd_contains->bd_inode->i_writecount);
+ }
error_bdev:
blkdev_put(bdev, mode);
error:
@@ -1101,6 +1124,11 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb);
sync_blockdev(bdev);
WARN_ON_ONCE(!(mode & FMODE_EXCL));
+ if (sb->s_user_ns != &init_user_ns) {
+ atomic_inc(&bdev->bd_inode->i_writecount);
+ if (bdev->bd_contains != bdev)
+ atomic_inc(&bdev->bd_contains->bd_inode->i_writecount);
+ }
blkdev_put(bdev, mode | FMODE_EXCL);
}
For unprivileged mounts to be safe the user must not be able to make changes to the backing store while it is mounted. This patch takes a step towards preventing this by refusing to mount in a user namepspace if the block device is open for writing and refusing attempts to open the block device for writing by non- root while it is mounted in a user namespace. To prevent this from happening we use i_writecount in the inodes of the bdev filesystem similarly to how it is used for regular files. Whenever the device is opened for writing i_writecount is checked; if it is negative the open returns -EBUSY, otherwise i_writecount is incremented. On mount, a positive i_writecount results in mount_bdev returning -EBUSY, otherwise i_writecount is decremented. Opens by root and mounts from init_user_ns do not check nor modify i_writecount. Signed-off-by: Seth Forshee <seth.forshee@canonical.com> --- This patch was mistakenly dropped in the rebase from xenial to yakkety. fs/block_dev.c | 17 +++++++++++++++++ fs/super.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-)