diff mbox

[yakkety] UBUNTU: SAUCE: (namespace) block_dev: Forbid unprivileged mounting when device is opened for writing

Message ID 1474317986-47617-1-git-send-email-seth.forshee@canonical.com
State New
Headers show

Commit Message

Seth Forshee Sept. 19, 2016, 8:46 p.m. UTC
For unprivileged mounts to be safe the user must not be able to
make changes to the backing store while it is mounted. This patch
takes a step towards preventing this by refusing to mount in a
user namepspace if the block device is open for writing and
refusing attempts to open the block device for writing by non-
root while it is mounted in a user namespace.

To prevent this from happening we use i_writecount in the inodes
of the bdev filesystem similarly to how it is used for regular
files. Whenever the device is opened for writing i_writecount
is checked; if it is negative the open returns -EBUSY, otherwise
i_writecount is incremented. On mount, a positive i_writecount
results in mount_bdev returning -EBUSY, otherwise i_writecount
is decremented. Opens by root and mounts from init_user_ns do not
check nor modify i_writecount.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
---

This patch was mistakenly dropped in the rebase from xenial to yakkety.

 fs/block_dev.c | 17 +++++++++++++++++
 fs/super.c     | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 2 deletions(-)

Comments

Tim Gardner Sept. 20, 2016, 1:21 p.m. UTC | #1
Does this patch address a specific test regression ? Just curious how
you noticed this.
Seth Forshee Sept. 20, 2016, 1:40 p.m. UTC | #2
On Tue, Sep 20, 2016 at 07:21:46AM -0600, Tim Gardner wrote:
> Does this patch address a specific test regression ? Just curious how
> you noticed this.

No specific regression. It was just chance - I was looking at some code
and noticed that it didn't have these changes.
diff mbox

Patch

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0bfe3e2..1fd10d4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1582,6 +1582,20 @@  static int blkdev_open(struct inode * inode, struct file * filp)
 	if (bdev == NULL)
 		return -ENOMEM;
 
+	/*
+	 * A negative i_writecount for bdev->bd_inode means that the bdev
+	 * or one of its paritions is mounted in a user namespace. Deny
+	 * writing for non-root in this case, otherwise an unprivileged
+	 * user can attack the kernel by modifying the backing store of a
+	 * mounted filesystem.
+	 */
+	if ((filp->f_mode & FMODE_WRITE) &&
+	    !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN) &&
+	    !atomic_inc_unless_negative(&bdev->bd_inode->i_writecount)) {
+		bdput(bdev);
+		return -EBUSY;
+	}
+
 	filp->f_mapping = bdev->bd_inode->i_mapping;
 
 	return blkdev_get(bdev, filp->f_mode, filp);
@@ -1683,6 +1697,9 @@  EXPORT_SYMBOL(blkdev_put);
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+	if (filp->f_mode & FMODE_WRITE &&
+	    !file_ns_capable(filp, &init_user_ns, CAP_SYS_ADMIN))
+		atomic_dec(&bdev->bd_inode->i_writecount);
 	blkdev_put(bdev, filp->f_mode);
 	return 0;
 }
diff --git a/fs/super.c b/fs/super.c
index c2ff475..6022546 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1033,6 +1033,23 @@  struct dentry *mount_bdev(struct file_system_type *fs_type,
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
+	if (current_user_ns() != &init_user_ns) {
+		/*
+		 * For userns mounts, disallow mounting if bdev is open for
+		 * writing
+		 */
+		if (!atomic_dec_unless_positive(&bdev->bd_inode->i_writecount)) {
+			error = -EBUSY;
+			goto error_bdev;
+		}
+		if (bdev->bd_contains != bdev &&
+		    !atomic_dec_unless_positive(&bdev->bd_contains->bd_inode->i_writecount)) {
+			atomic_inc(&bdev->bd_inode->i_writecount);
+			error = -EBUSY;
+			goto error_bdev;
+		}
+	}
+
 	/*
 	 * once the super is inserted into the list by sget, s_umount
 	 * will protect the lockfs code from trying to start a snapshot
@@ -1042,7 +1059,7 @@  struct dentry *mount_bdev(struct file_system_type *fs_type,
 	if (bdev->bd_fsfreeze_count > 0) {
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		error = -EBUSY;
-		goto error_bdev;
+		goto error_inc;
 	}
 	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
 		 bdev);
@@ -1054,7 +1071,7 @@  struct dentry *mount_bdev(struct file_system_type *fs_type,
 		if ((flags ^ s->s_flags) & MS_RDONLY) {
 			deactivate_locked_super(s);
 			error = -EBUSY;
-			goto error_bdev;
+			goto error_inc;
 		}
 
 		/*
@@ -1085,6 +1102,12 @@  struct dentry *mount_bdev(struct file_system_type *fs_type,
 
 error_s:
 	error = PTR_ERR(s);
+error_inc:
+	if (current_user_ns() != &init_user_ns) {
+		atomic_inc(&bdev->bd_inode->i_writecount);
+		if (bdev->bd_contains != bdev)
+			atomic_inc(&bdev->bd_contains->bd_inode->i_writecount);
+	}
 error_bdev:
 	blkdev_put(bdev, mode);
 error:
@@ -1101,6 +1124,11 @@  void kill_block_super(struct super_block *sb)
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	WARN_ON_ONCE(!(mode & FMODE_EXCL));
+	if (sb->s_user_ns != &init_user_ns) {
+		atomic_inc(&bdev->bd_inode->i_writecount);
+		if (bdev->bd_contains != bdev)
+			atomic_inc(&bdev->bd_contains->bd_inode->i_writecount);
+	}
 	blkdev_put(bdev, mode | FMODE_EXCL);
 }