diff mbox

[RFC] fs: xattr-based FS_IOC_[GS]ETFLAGS interface

Message ID 20140107025855.GA9229@birch.djwong.org
State New, archived
Headers show

Commit Message

Darrick Wong Jan. 7, 2014, 2:58 a.m. UTC
This is a proof of concept interface for replacing the contentious
FS_IOC_[GS]ETFLAGS interface with one that presents itself as the
xattr 'system.iflags'.  Instead of using integer inode flags, this
interface uses a comma-separated string of words, such as
"extents,immutable" to describe the inode flags.  This ought to enable
all filesystems to introduce arbitrary flags, with any sort of backend
they want, while also taking advantage of generic fs flags.

The initial conversion is for ext4, though given the similarities of
most filesystems' implementations, converting the rest shouldn't be
difficult.  How we get everyone to agree on common flag names is
anyone's guess.

Includes some helper methods to handle the string<->int conversion,
and a tweak to the generic xattr code to allow setting iflags on an
immutable file.

Usage:
# setfattr -n system.iflags -v 'extents,append' /path/file
# getfattr -n system.iflags /path/file
system.iflags="secrm,unrm,sync,immutable,nodump,noatime,journal_data,extents"

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/ext4/Makefile           |    2 
 fs/ext4/ext4.h             |    3 +
 fs/ext4/ioctl.c            |  198 ++++++++++++++++++++++++--------------------
 fs/ext4/xattr.c            |    1 
 fs/ext4/xattr.h            |    1 
 fs/ext4/xattr_iflags.c     |   90 ++++++++++++++++++++
 fs/xattr.c                 |    9 ++
 include/linux/strflags.h   |   27 ++++++
 include/uapi/linux/xattr.h |    2 
 lib/Makefile               |    2 
 lib/strflags.c             |  117 ++++++++++++++++++++++++++
 11 files changed, 358 insertions(+), 94 deletions(-)
 create mode 100644 fs/ext4/xattr_iflags.c
 create mode 100644 include/linux/strflags.h
 create mode 100644 lib/strflags.c

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jan Kara Jan. 7, 2014, 12:48 p.m. UTC | #1
On Mon 06-01-14 18:58:55, Darrick J. Wong wrote:
> This is a proof of concept interface for replacing the contentious
> FS_IOC_[GS]ETFLAGS interface with one that presents itself as the
> xattr 'system.iflags'.  Instead of using integer inode flags, this
> interface uses a comma-separated string of words, such as
> "extents,immutable" to describe the inode flags.  This ought to enable
> all filesystems to introduce arbitrary flags, with any sort of backend
> they want, while also taking advantage of generic fs flags.
> 
> The initial conversion is for ext4, though given the similarities of
> most filesystems' implementations, converting the rest shouldn't be
> difficult.  How we get everyone to agree on common flag names is
> anyone's guess.
> 
> Includes some helper methods to handle the string<->int conversion,
> and a tweak to the generic xattr code to allow setting iflags on an
> immutable file.
  I have to say I'm not thrilled by the idea of juggling strings in
userspace and in kernel to set a flag for an inode...

								Honza

> Usage:
> # setfattr -n system.iflags -v 'extents,append' /path/file
> # getfattr -n system.iflags /path/file
> system.iflags="secrm,unrm,sync,immutable,nodump,noatime,journal_data,extents"
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/ext4/Makefile           |    2 
>  fs/ext4/ext4.h             |    3 +
>  fs/ext4/ioctl.c            |  198 ++++++++++++++++++++++++--------------------
>  fs/ext4/xattr.c            |    1 
>  fs/ext4/xattr.h            |    1 
>  fs/ext4/xattr_iflags.c     |   90 ++++++++++++++++++++
>  fs/xattr.c                 |    9 ++
>  include/linux/strflags.h   |   27 ++++++
>  include/uapi/linux/xattr.h |    2 
>  lib/Makefile               |    2 
>  lib/strflags.c             |  117 ++++++++++++++++++++++++++
>  11 files changed, 358 insertions(+), 94 deletions(-)
>  create mode 100644 fs/ext4/xattr_iflags.c
>  create mode 100644 include/linux/strflags.h
>  create mode 100644 lib/strflags.c
> 
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index 0310fec..7126958 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -8,7 +8,7 @@ ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
>  		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
>  		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
>  		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
> -		xattr_trusted.o inline.o
> +		xattr_trusted.o inline.o xattr_iflags.o
>  
>  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
>  ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index bf6c5cd..24837ab 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2660,6 +2660,9 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
>  
>  extern int ext4_convert_inline_data(struct inode *inode);
>  
> +/* ioctl.c */
> +extern int ext4_set_iflags(struct inode *inode, int flags);
> +
>  /* namei.c */
>  extern const struct inode_operations ext4_dir_inode_operations;
>  extern const struct inode_operations ext4_special_inode_operations;
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 60589b6..c6f21c9 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -213,12 +213,115 @@ swap_boot_out:
>  	return err;
>  }
>  
> +/*
> + * Set inode flags.  Assume we already have mnt_want_write_file and i_mutex.
> + */
> +int ext4_set_iflags(struct inode *inode, int flags)
> +{
> +	handle_t *handle = NULL;
> +	int err, migrate = 0;
> +	struct ext4_iloc iloc;
> +	unsigned int oldflags, mask, i;
> +	unsigned int jflag;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
> +
> +	if (!inode_owner_or_capable(inode))
> +		return -EACCES;
> +
> +	flags = ext4_mask_flags(inode->i_mode, flags);
> +
> +	err = -EPERM;
> +	/* Is it quota file? Do not allow user to mess with it */
> +	if (IS_NOQUOTA(inode))
> +		goto flags_out;
> +
> +	oldflags = ei->i_flags;
> +
> +	/* The JOURNAL_DATA flag is modifiable only by root */
> +	jflag = flags & EXT4_JOURNAL_DATA_FL;
> +
> +	/*
> +	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> +	 * the relevant capability.
> +	 *
> +	 * This test looks nicer. Thanks to Pauline Middelink
> +	 */
> +	if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
> +		if (!capable(CAP_LINUX_IMMUTABLE))
> +			goto flags_out;
> +	}
> +
> +	/*
> +	 * The JOURNAL_DATA flag can only be changed by
> +	 * the relevant capability.
> +	 */
> +	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
> +		if (!capable(CAP_SYS_RESOURCE))
> +			goto flags_out;
> +	}
> +	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
> +		migrate = 1;
> +
> +	if (flags & EXT4_EOFBLOCKS_FL) {
> +		/* we don't support adding EOFBLOCKS flag */
> +		if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> +			err = -EOPNOTSUPP;
> +			goto flags_out;
> +		}
> +	} else if (oldflags & EXT4_EOFBLOCKS_FL)
> +		ext4_truncate(inode);
> +
> +	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
> +	if (IS_ERR(handle)) {
> +		err = PTR_ERR(handle);
> +		goto flags_out;
> +	}
> +	if (IS_SYNC(inode))
> +		ext4_handle_sync(handle);
> +	err = ext4_reserve_inode_write(handle, inode, &iloc);
> +	if (err)
> +		goto flags_err;
> +
> +	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
> +		if (!(mask & EXT4_FL_USER_MODIFIABLE))
> +			continue;
> +		if (mask & flags)
> +			ext4_set_inode_flag(inode, i);
> +		else
> +			ext4_clear_inode_flag(inode, i);
> +	}
> +
> +	ext4_set_inode_flags(inode);
> +	inode->i_ctime = ext4_current_time(inode);
> +
> +	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
> +flags_err:
> +	ext4_journal_stop(handle);
> +	if (err)
> +		goto flags_out;
> +
> +	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
> +		err = ext4_change_inode_journal_flag(inode, jflag);
> +	if (err)
> +		goto flags_out;
> +	if (migrate) {
> +		if (flags & EXT4_EXTENTS_FL)
> +			err = ext4_ext_migrate(inode);
> +		else
> +			err = ext4_ind_migrate(inode);
> +	}
> +
> +flags_out:
> +	return err;
> +}
> +
>  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  {
>  	struct inode *inode = file_inode(filp);
>  	struct super_block *sb = inode->i_sb;
>  	struct ext4_inode_info *ei = EXT4_I(inode);
>  	unsigned int flags;
> +	int err;
>  
>  	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
>  
> @@ -227,13 +330,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		ext4_get_inode_flags(ei);
>  		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
>  		return put_user(flags, (int __user *) arg);
> -	case EXT4_IOC_SETFLAGS: {
> -		handle_t *handle = NULL;
> -		int err, migrate = 0;
> -		struct ext4_iloc iloc;
> -		unsigned int oldflags, mask, i;
> -		unsigned int jflag;
> -
> +	case EXT4_IOC_SETFLAGS:
>  		if (!inode_owner_or_capable(inode))
>  			return -EACCES;
>  
> @@ -244,95 +341,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		if (err)
>  			return err;
>  
> -		flags = ext4_mask_flags(inode->i_mode, flags);
> -
> -		err = -EPERM;
>  		mutex_lock(&inode->i_mutex);
> -		/* Is it quota file? Do not allow user to mess with it */
> -		if (IS_NOQUOTA(inode))
> -			goto flags_out;
> -
> -		oldflags = ei->i_flags;
> -
> -		/* The JOURNAL_DATA flag is modifiable only by root */
> -		jflag = flags & EXT4_JOURNAL_DATA_FL;
> -
> -		/*
> -		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> -		 * the relevant capability.
> -		 *
> -		 * This test looks nicer. Thanks to Pauline Middelink
> -		 */
> -		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
> -			if (!capable(CAP_LINUX_IMMUTABLE))
> -				goto flags_out;
> -		}
> -
> -		/*
> -		 * The JOURNAL_DATA flag can only be changed by
> -		 * the relevant capability.
> -		 */
> -		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
> -			if (!capable(CAP_SYS_RESOURCE))
> -				goto flags_out;
> -		}
> -		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
> -			migrate = 1;
> -
> -		if (flags & EXT4_EOFBLOCKS_FL) {
> -			/* we don't support adding EOFBLOCKS flag */
> -			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> -				err = -EOPNOTSUPP;
> -				goto flags_out;
> -			}
> -		} else if (oldflags & EXT4_EOFBLOCKS_FL)
> -			ext4_truncate(inode);
> -
> -		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
> -		if (IS_ERR(handle)) {
> -			err = PTR_ERR(handle);
> -			goto flags_out;
> -		}
> -		if (IS_SYNC(inode))
> -			ext4_handle_sync(handle);
> -		err = ext4_reserve_inode_write(handle, inode, &iloc);
> -		if (err)
> -			goto flags_err;
> -
> -		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
> -			if (!(mask & EXT4_FL_USER_MODIFIABLE))
> -				continue;
> -			if (mask & flags)
> -				ext4_set_inode_flag(inode, i);
> -			else
> -				ext4_clear_inode_flag(inode, i);
> -		}
> -
> -		ext4_set_inode_flags(inode);
> -		inode->i_ctime = ext4_current_time(inode);
> -
> -		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
> -flags_err:
> -		ext4_journal_stop(handle);
> -		if (err)
> -			goto flags_out;
> -
> -		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
> -			err = ext4_change_inode_journal_flag(inode, jflag);
> -		if (err)
> -			goto flags_out;
> -		if (migrate) {
> -			if (flags & EXT4_EXTENTS_FL)
> -				err = ext4_ext_migrate(inode);
> -			else
> -				err = ext4_ind_migrate(inode);
> -		}
> -
> -flags_out:
> +		err = ext4_set_iflags(inode, flags);
>  		mutex_unlock(&inode->i_mutex);
> +
>  		mnt_drop_write_file(filp);
>  		return err;
> -	}
>  	case EXT4_IOC_GETVERSION:
>  	case EXT4_IOC_GETVERSION_OLD:
>  		return put_user(inode->i_generation, (int __user *) arg);
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 1423c48..64963e4 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -105,6 +105,7 @@ static const struct xattr_handler *ext4_xattr_handler_map[] = {
>  };
>  
>  const struct xattr_handler *ext4_xattr_handlers[] = {
> +	&ext4_xattr_iflags_handler,
>  	&ext4_xattr_user_handler,
>  	&ext4_xattr_trusted_handler,
>  #ifdef CONFIG_EXT4_FS_POSIX_ACL
> diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
> index c767dbd..2559583 100644
> --- a/fs/ext4/xattr.h
> +++ b/fs/ext4/xattr.h
> @@ -99,6 +99,7 @@ extern const struct xattr_handler ext4_xattr_trusted_handler;
>  extern const struct xattr_handler ext4_xattr_acl_access_handler;
>  extern const struct xattr_handler ext4_xattr_acl_default_handler;
>  extern const struct xattr_handler ext4_xattr_security_handler;
> +extern const struct xattr_handler ext4_xattr_iflags_handler;
>  
>  extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
>  
> diff --git a/fs/ext4/xattr_iflags.c b/fs/ext4/xattr_iflags.c
> new file mode 100644
> index 0000000..e3c5472
> --- /dev/null
> +++ b/fs/ext4/xattr_iflags.c
> @@ -0,0 +1,90 @@
> +/*
> + * linux/fs/ext4/xattr_iflags.c
> + * Handler for using the extended attribute interface to store inode flags.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <linux/string.h>
> +#include <linux/fs.h>
> +#include <linux/security.h>
> +#include <linux/slab.h>
> +#include <linux/strflags.h>
> +#include "ext4_jbd2.h"
> +#include "ext4.h"
> +#include "xattr.h"
> +
> +static struct flag_db ext4_iflags = {
> +	.flag_sz = sizeof(int),
> +	.maps = {
> +		{"secrm", EXT4_SECRM_FL},
> +		{"unrm", EXT4_UNRM_FL},
> +		{"compr", EXT4_COMPR_FL},
> +		{"sync", EXT4_SYNC_FL},
> +		{"immutable", EXT4_IMMUTABLE_FL},
> +		{"append", EXT4_APPEND_FL},
> +		{"nodump", EXT4_NODUMP_FL},
> +		{"noatime", EXT4_NOATIME_FL},
> +		{"htree_dir", EXT4_INDEX_FL},
> +		{"journal_data", EXT4_JOURNAL_DATA_FL},
> +		{"notail", EXT4_NOTAIL_FL},
> +		{"dirsync", EXT4_DIRSYNC_FL},
> +		{"topdir", EXT4_TOPDIR_FL},
> +		{"extents", EXT4_EXTENTS_FL},
> +		{NULL, 0},
> +	}
> +};
> +
> +static size_t
> +ext4_xattr_iflags_list(struct dentry *dentry, char *list, size_t list_size,
> +		       const char *name, size_t name_len, int type)
> +{
> +	const size_t prefix_len = sizeof(XATTR_NAME_IFLAGS)-1;
> +	const size_t total_len = prefix_len + name_len + 1;
> +
> +	if (list && total_len <= list_size) {
> +		memcpy(list, XATTR_NAME_IFLAGS, prefix_len);
> +		memcpy(list+prefix_len, name, name_len);
> +		list[prefix_len + name_len] = '\0';
> +	}
> +	return total_len;
> +}
> +
> +static int
> +ext4_xattr_iflags_get(struct dentry *dentry, const char *name,
> +		       void *buffer, size_t size, int type)
> +{
> +	struct ext4_inode_info *ei;
> +
> +	ei = EXT4_I(dentry->d_inode);
> +	return flags_to_string(&ext4_iflags, &ei->i_flags, buffer, size);
> +}
> +
> +static int
> +ext4_xattr_iflags_set(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int type)
> +{
> +	char *string = (char *)value;
> +	int ret, moo;
> +
> +	string = kmalloc(size + 1, GFP_KERNEL);
> +	if (!string)
> +		return -ENOMEM;
> +	memcpy(string, value, size);
> +	string[size] = 0;
> +
> +	ret = string_to_flags(&ext4_iflags, string, size, &moo);
> +	if (ret)
> +		goto out;
> +
> +	ret = ext4_set_iflags(dentry->d_inode, moo);
> +out:
> +	kfree(string);
> +	return ret;
> +}
> +
> +const struct xattr_handler ext4_xattr_iflags_handler = {
> +	.prefix	= XATTR_NAME_IFLAGS,
> +	.list	= ext4_xattr_iflags_list,
> +	.get	= ext4_xattr_iflags_get,
> +	.set	= ext4_xattr_iflags_set,
> +};
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 3377dff..bf0fe89 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -32,6 +32,15 @@ static int
>  xattr_permission(struct inode *inode, const char *name, int mask)
>  {
>  	/*
> +	 * Always allow read/write of inode flag attribute.
> +	 */
> +	if (!strcmp(name, XATTR_NAME_IFLAGS)) {
> +		if (mask & MAY_WRITE)
> +			return inode_owner_or_capable(inode) ? 0 : -EACCES;
> +		return 0;
> +	}
> +
> +	/*
>  	 * We can never set or remove an extended attribute on a read-only
>  	 * filesystem  or on an immutable / append-only inode.
>  	 */
> diff --git a/include/linux/strflags.h b/include/linux/strflags.h
> new file mode 100644
> index 0000000..6c8067c
> --- /dev/null
> +++ b/include/linux/strflags.h
> @@ -0,0 +1,27 @@
> +/*
> + * File: linux/strflags.h
> + * Convert flags to pretty strings.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#ifndef _LINUX_STRFLAGS_H_
> +#define _LINUX_STRFLAGS_H_
> +
> +#include <linux/types.h>
> +
> +struct flag_map {
> +	const char *string;
> +	unsigned long flag;
> +};
> +
> +struct flag_db {
> +	size_t flag_sz;
> +	struct flag_map maps[];
> +};
> +
> +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
> +		    size_t buf_len);
> +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buf_len,
> +		    void *flags);
> +
> +#endif /* _LINUX_STRFLAGS_H_ */
> diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
> index e4629b9..8cd78ae 100644
> --- a/include/uapi/linux/xattr.h
> +++ b/include/uapi/linux/xattr.h
> @@ -63,5 +63,7 @@
>  #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
>  #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
>  
> +#define XATTR_IFLAGS "iflags"
> +#define XATTR_NAME_IFLAGS XATTR_SYSTEM_PREFIX XATTR_IFLAGS
>  
>  #endif /* _UAPI_LINUX_XATTR_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index a459c31..f3a517d 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>  	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
>  	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
>  	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
> -	 percpu-refcount.o percpu_ida.o
> +	 percpu-refcount.o percpu_ida.o strflags.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
>  obj-y += kstrtox.o
> diff --git a/lib/strflags.c b/lib/strflags.c
> new file mode 100644
> index 0000000..d1a34e7
> --- /dev/null
> +++ b/lib/strflags.c
> @@ -0,0 +1,117 @@
> +/*
> + * File: lib/strflags.c
> + * Convert flags to pretty strings.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/strflags.h>
> +#include <linux/errno.h>
> +
> +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
> +		    size_t buf_len)
> +{
> +	struct flag_map *mapping;
> +	char *b;
> +	unsigned long fl;
> +	int used, first;
> +	size_t sz;
> +
> +	switch (flagdb->flag_sz) {
> +	case 1:
> +		fl = *(u8 *)flags;
> +		break;
> +	case 2:
> +		fl = *(u16 *)flags;
> +		break;
> +	case 4:
> +		fl = *(u32 *)flags;
> +		break;
> +	case 8:
> +		fl = *(u64 *)flags;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	/* Calculating buffer size */
> +	sz = 0;
> +	for (mapping = flagdb->maps; mapping->string; mapping++) {
> +		if (mapping->flag & fl) {
> +			sz += strlen(mapping->string);
> +			sz++;
> +		}
> +	}
> +
> +	if (buf == NULL)
> +		return sz;
> +	if (buf_len < sz)
> +		return -ERANGE;
> +
> +	/* Stringify the flags */
> +	b = buf;
> +	first = 1;
> +	for (mapping = flagdb->maps; mapping->string; mapping++) {
> +		if (mapping->flag & fl) {
> +			used = snprintf(b, buf_len, "%s%s",
> +					(first ? "" : ","),
> +					mapping->string);
> +			first = 0;
> +			buf_len -= used;
> +			b += used;
> +		}
> +	}
> +
> +	return b - buf;
> +}
> +EXPORT_SYMBOL_GPL(flags_to_string);
> +
> +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buflen,
> +		    void *flags)
> +{
> +	struct flag_map *mapping;
> +	unsigned long fl = 0;
> +	char *p, *q;
> +	char c;
> +	int found;
> +
> +	for (p = buf; p < buf + buflen; p = q + 1) {
> +		for (q = p; *q != 0 && *q != ','; q++)
> +			; /* empty */
> +		if (p == q)
> +			continue;
> +
> +		c = *q; *q = 0;
> +		found = 0;
> +		for (mapping = flagdb->maps; mapping->string; mapping++)
> +			if (strcmp(p, mapping->string) == 0) {
> +				fl |= mapping->flag;
> +				found = 1;
> +			}
> +		if (!found)
> +			return -EINVAL;
> +		*q = c;
> +		p = q + 1;
> +	}
> +
> +	switch (flagdb->flag_sz) {
> +	case 1:
> +		*(u8 *)flags = fl;
> +		break;
> +	case 2:
> +		*(u16 *)flags = fl;
> +		break;
> +	case 4:
> +		*(u32 *)flags = fl;
> +		break;
> +	case 8:
> +		*(u64 *)flags = fl;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(string_to_flags);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 7, 2014, 3:49 p.m. UTC | #2
On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
>   I have to say I'm not thrilled by the idea of juggling strings in
> userspace and in kernel to set a flag for an inode...

Nevermind the massive amounts of code that sit in the filesystem.

Although my recent ACL patches are the first step towards handling the
existing semantically overloaded xattrs in common code.  Unless Al has
a valid reason to disagree I'd like to put a big NAK on adding any new
xattrs that aren't stored on disk as-is but provide magic functionality,
as they are pain to implement, maintain, and audit.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Jan. 7, 2014, 5:04 p.m. UTC | #3
On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> >   I have to say I'm not thrilled by the idea of juggling strings in
> > userspace and in kernel to set a flag for an inode...
> 
> Nevermind the massive amounts of code that sit in the filesystem.

The reason for this patch was to address what Dave Chinner has called
"a shitty interface"[1].  Using bitfields that need to be coordinated
across file systems, when sometimes a bit assignment is validly a fs
specific thing, and then later becomes something that gets shared
across file systems.

[1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396

If we don't go about it this way, there are alternatives: we could
create new ioctls (or a new syscall) as we start running out of bits
used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
are intended for fs-specific flags, which then later get promoted to
the new syscall when some functionality starts to get shared accross
other file systems (probably with a different bit assignment).  This
is certainly less code, but it does mean more complexity outside of
the code when we try to coordinate new functionality across file
systems.

Personally, I don't mind dealing with codepoint assignments, but my
impression is that this is a minority viewpoint.  Al and Linus have
historically hated bitfields, and Al in the past has spoken favorably
of Plan 9's approach of using strings for the system interface.

So while I have a preference towards using bitfields, as opposed to
using the xattr approach, what I'd really like is that we make a
decision, one way or another, about what's the best way to move
forward.

   	    	     	      	      	  - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick Wong Jan. 7, 2014, 7:43 p.m. UTC | #4
On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > userspace and in kernel to set a flag for an inode...
> > 
> > Nevermind the massive amounts of code that sit in the filesystem.
> 
> The reason for this patch was to address what Dave Chinner has called
> "a shitty interface"[1].  Using bitfields that need to be coordinated
> across file systems, when sometimes a bit assignment is validly a fs
> specific thing, and then later becomes something that gets shared
> across file systems.
> 
> [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> 
> If we don't go about it this way, there are alternatives: we could
> create new ioctls (or a new syscall) as we start running out of bits
> used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> are intended for fs-specific flags, which then later get promoted to
> the new syscall when some functionality starts to get shared accross
> other file systems (probably with a different bit assignment).  This
> is certainly less code, but it does mean more complexity outside of
> the code when we try to coordinate new functionality across file
> systems.

I had thought of indexed inode flags as an alternative to the xattr/string
parsing thing.  Feature flags make their first appearance as part of a per-FS
flag-space and are migrated to the common flag-space when there is demand.
It would also avoid the need for each fs to create its own flag ioctl.

On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
so off I went. :)

#define FS_IOC_FLAGS_COMMON	0
#define FS_IOC_FLAGS_COMMON2	1
#define FS_IOC_FLAGS_EXT4	0xEF53

struct inode_flag_ioctl {
	u32 flag;
	u32 value;	/* or u64? */
};
#define FS_IOC_GETFLAGS2 _IOR('f', 12, struct inode_flag_ioctl);
#define FS_IOC_SETFLAGS2 _IOW('f', 13, struct inode_flag_ioctl);

foo() {
	struct inode_flag_ioctl if;

	if.flag = FS_IOC_FLAGS_COMMON;
	ioctl(fd, FS_IOC_GETFLAGS2, &if);
	printf("%d\n", if.value);

	if.flag = FS_IOC_FLAGS_EXT4;
	if.value = EXT5_BONGHITS_FL | EXT4_EA_INODE_FL;
	ioctl(fd, FS_IOC_SETFLAGS2, &if);
}

> Personally, I don't mind dealing with codepoint assignments, but my
> impression is that this is a minority viewpoint.  Al and Linus have
> historically hated bitfields, and Al in the past has spoken favorably
> of Plan 9's approach of using strings for the system interface.

I prefer strings too, but I suppose one pays for the complexity.  Given that
all the flags so far seem to have been booleans, this could be good enough.

> So while I have a preference towards using bitfields, as opposed to
> using the xattr approach, what I'd really like is that we make a
> decision, one way or another, about what's the best way to move
> forward.

Agreed.

--D
> 
>    	    	     	      	      	  - Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Jan. 7, 2014, 7:59 p.m. UTC | #5
On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > userspace and in kernel to set a flag for an inode...
> > > 
> > > Nevermind the massive amounts of code that sit in the filesystem.
> > 
> > The reason for this patch was to address what Dave Chinner has called
> > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > across file systems, when sometimes a bit assignment is validly a fs
> > specific thing, and then later becomes something that gets shared
> > across file systems.
> > 
> > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > 
> > If we don't go about it this way, there are alternatives: we could
> > create new ioctls (or a new syscall) as we start running out of bits
> > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > are intended for fs-specific flags, which then later get promoted to
> > the new syscall when some functionality starts to get shared accross
> > other file systems (probably with a different bit assignment).  This
> > is certainly less code, but it does mean more complexity outside of
> > the code when we try to coordinate new functionality across file
> > systems.
> 
> I had thought of indexed inode flags as an alternative to the xattr/string
> parsing thing.  Feature flags make their first appearance as part of a per-FS
> flag-space and are migrated to the common flag-space when there is demand.
> It would also avoid the need for each fs to create its own flag ioctl.
> 
> On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> so off I went. :)
> 

At least in btrfs xattrs are more expensive than something right in the
inode.  We can cache it when we load the inode (it'll be right next to
the inode most of the time) but for performance critical things I do
like the good old fashioned flags.

It's also possible to turn xattrs off, so we have to deal with
filesystems that are mounted with them off and then back on again.  I
can't think of huge problems from that right now, just something to be
aware of.

-chris

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick Wong Jan. 7, 2014, 10:02 p.m. UTC | #6
On Tue, Jan 07, 2014 at 07:59:15PM +0000, Chris Mason wrote:
> On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> > On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > > userspace and in kernel to set a flag for an inode...
> > > > 
> > > > Nevermind the massive amounts of code that sit in the filesystem.
> > > 
> > > The reason for this patch was to address what Dave Chinner has called
> > > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > > across file systems, when sometimes a bit assignment is validly a fs
> > > specific thing, and then later becomes something that gets shared
> > > across file systems.
> > > 
> > > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > > 
> > > If we don't go about it this way, there are alternatives: we could
> > > create new ioctls (or a new syscall) as we start running out of bits
> > > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > > are intended for fs-specific flags, which then later get promoted to
> > > the new syscall when some functionality starts to get shared accross
> > > other file systems (probably with a different bit assignment).  This
> > > is certainly less code, but it does mean more complexity outside of
> > > the code when we try to coordinate new functionality across file
> > > systems.
> > 
> > I had thought of indexed inode flags as an alternative to the xattr/string
> > parsing thing.  Feature flags make their first appearance as part of a per-FS
> > flag-space and are migrated to the common flag-space when there is demand.
> > It would also avoid the need for each fs to create its own flag ioctl.
> > 
> > On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> > so off I went. :)
> > 
> 
> At least in btrfs xattrs are more expensive than something right in the
> inode.  We can cache it when we load the inode (it'll be right next to
> the inode most of the time) but for performance critical things I do
> like the good old fashioned flags.

Just to clarify -- I wasn't proposing any on-disk changes for any filesystems,
merely creating virtual xattrs that wrap the inode flags.

> It's also possible to turn xattrs off, so we have to deal with
> filesystems that are mounted with them off and then back on again.  I
> can't think of huge problems from that right now, just something to be
> aware of.

Just to satisfy my curiosity: are xattrs always separate objects in btrfs?

--D
> 
> -chris
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Jan. 7, 2014, 10:08 p.m. UTC | #7
On Tue, 2014-01-07 at 14:02 -0800, Darrick J. Wong wrote:
> On Tue, Jan 07, 2014 at 07:59:15PM +0000, Chris Mason wrote:
> > On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> > > On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > > > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > > > userspace and in kernel to set a flag for an inode...
> > > > > 
> > > > > Nevermind the massive amounts of code that sit in the filesystem.
> > > > 
> > > > The reason for this patch was to address what Dave Chinner has called
> > > > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > > > across file systems, when sometimes a bit assignment is validly a fs
> > > > specific thing, and then later becomes something that gets shared
> > > > across file systems.
> > > > 
> > > > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > > > 
> > > > If we don't go about it this way, there are alternatives: we could
> > > > create new ioctls (or a new syscall) as we start running out of bits
> > > > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > > > are intended for fs-specific flags, which then later get promoted to
> > > > the new syscall when some functionality starts to get shared accross
> > > > other file systems (probably with a different bit assignment).  This
> > > > is certainly less code, but it does mean more complexity outside of
> > > > the code when we try to coordinate new functionality across file
> > > > systems.
> > > 
> > > I had thought of indexed inode flags as an alternative to the xattr/string
> > > parsing thing.  Feature flags make their first appearance as part of a per-FS
> > > flag-space and are migrated to the common flag-space when there is demand.
> > > It would also avoid the need for each fs to create its own flag ioctl.
> > > 
> > > On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> > > so off I went. :)
> > > 
> > 
> > At least in btrfs xattrs are more expensive than something right in the
> > inode.  We can cache it when we load the inode (it'll be right next to
> > the inode most of the time) but for performance critical things I do
> > like the good old fashioned flags.
> 
> Just to clarify -- I wasn't proposing any on-disk changes for any filesystems,
> merely creating virtual xattrs that wrap the inode flags.

Ah, sorry I missed that part.  I was assuming that as we run out of
flags we'll want to use real xattrs instead.

> 
> > It's also possible to turn xattrs off, so we have to deal with
> > filesystems that are mounted with them off and then back on again.  I
> > can't think of huge problems from that right now, just something to be
> > aware of.
> 
> Just to satisfy my curiosity: are xattrs always separate objects in btrfs?

Strictly speaking, yes.  They aren't in the inode struct.  But most of
the time they will be in the same btree block.  For anything performance
critical we can order the keys to push them to the front.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Jan. 7, 2014, 10:27 p.m. UTC | #8
On Tue, Jan 07, 2014 at 11:43:56AM -0800, Darrick J. Wong wrote:
> I had thought of indexed inode flags as an alternative to the xattr/string
> parsing thing.  Feature flags make their first appearance as part of a per-FS
> flag-space and are migrated to the common flag-space when there is demand.
> It would also avoid the need for each fs to create its own flag ioctl.
> 
> On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> so off I went. :)
> 
> #define FS_IOC_FLAGS_COMMON	0
> #define FS_IOC_FLAGS_COMMON2	1
> #define FS_IOC_FLAGS_EXT4	0xEF53
> 
> struct inode_flag_ioctl {
> 	u32 flag;
> 	u32 value;	/* or u64? */
> };
> #define FS_IOC_GETFLAGS2 _IOR('f', 12, struct inode_flag_ioctl);
> #define FS_IOC_SETFLAGS2 _IOW('f', 13, struct inode_flag_ioctl);

Is having this structure and demultiplexing based on
inode_flag_ioctl.flag really worth it?

I'd just simply introduce two new ioctl's for generic flags:
FS_IOC_COMMON_[GS]ETFLAGS, and then two new ioctl's for each file
system: FS_IOC_EXT4_[GS]ETFLAGS, FS_IOC_BTRFS_[GS]ETFLAGS, etc.

Is this uglier or pretier than using strings?  Eh.... six of one, half
dozen of the other.  I think it's mostly a matter of personal taste.

      	     	       	     	  	 - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 0310fec..7126958 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,7 @@  ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
 		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
-		xattr_trusted.o inline.o
+		xattr_trusted.o inline.o xattr_iflags.o
 
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf6c5cd..24837ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2660,6 +2660,9 @@  extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 
 extern int ext4_convert_inline_data(struct inode *inode);
 
+/* ioctl.c */
+extern int ext4_set_iflags(struct inode *inode, int flags);
+
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 60589b6..c6f21c9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -213,12 +213,115 @@  swap_boot_out:
 	return err;
 }
 
+/*
+ * Set inode flags.  Assume we already have mnt_want_write_file and i_mutex.
+ */
+int ext4_set_iflags(struct inode *inode, int flags)
+{
+	handle_t *handle = NULL;
+	int err, migrate = 0;
+	struct ext4_iloc iloc;
+	unsigned int oldflags, mask, i;
+	unsigned int jflag;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
+
+	flags = ext4_mask_flags(inode->i_mode, flags);
+
+	err = -EPERM;
+	/* Is it quota file? Do not allow user to mess with it */
+	if (IS_NOQUOTA(inode))
+		goto flags_out;
+
+	oldflags = ei->i_flags;
+
+	/* The JOURNAL_DATA flag is modifiable only by root */
+	jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 *
+	 * This test looks nicer. Thanks to Pauline Middelink
+	 */
+	if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto flags_out;
+	}
+
+	/*
+	 * The JOURNAL_DATA flag can only be changed by
+	 * the relevant capability.
+	 */
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+		if (!capable(CAP_SYS_RESOURCE))
+			goto flags_out;
+	}
+	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+		migrate = 1;
+
+	if (flags & EXT4_EOFBLOCKS_FL) {
+		/* we don't support adding EOFBLOCKS flag */
+		if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+			err = -EOPNOTSUPP;
+			goto flags_out;
+		}
+	} else if (oldflags & EXT4_EOFBLOCKS_FL)
+		ext4_truncate(inode);
+
+	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto flags_out;
+	}
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto flags_err;
+
+	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+		if (!(mask & EXT4_FL_USER_MODIFIABLE))
+			continue;
+		if (mask & flags)
+			ext4_set_inode_flag(inode, i);
+		else
+			ext4_clear_inode_flag(inode, i);
+	}
+
+	ext4_set_inode_flags(inode);
+	inode->i_ctime = ext4_current_time(inode);
+
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+	ext4_journal_stop(handle);
+	if (err)
+		goto flags_out;
+
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+		err = ext4_change_inode_journal_flag(inode, jflag);
+	if (err)
+		goto flags_out;
+	if (migrate) {
+		if (flags & EXT4_EXTENTS_FL)
+			err = ext4_ext_migrate(inode);
+		else
+			err = ext4_ind_migrate(inode);
+	}
+
+flags_out:
+	return err;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
+	int err;
 
 	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
 
@@ -227,13 +330,7 @@  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		ext4_get_inode_flags(ei);
 		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
-	case EXT4_IOC_SETFLAGS: {
-		handle_t *handle = NULL;
-		int err, migrate = 0;
-		struct ext4_iloc iloc;
-		unsigned int oldflags, mask, i;
-		unsigned int jflag;
-
+	case EXT4_IOC_SETFLAGS:
 		if (!inode_owner_or_capable(inode))
 			return -EACCES;
 
@@ -244,95 +341,12 @@  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			return err;
 
-		flags = ext4_mask_flags(inode->i_mode, flags);
-
-		err = -EPERM;
 		mutex_lock(&inode->i_mutex);
-		/* Is it quota file? Do not allow user to mess with it */
-		if (IS_NOQUOTA(inode))
-			goto flags_out;
-
-		oldflags = ei->i_flags;
-
-		/* The JOURNAL_DATA flag is modifiable only by root */
-		jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-		/*
-		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-		 * the relevant capability.
-		 *
-		 * This test looks nicer. Thanks to Pauline Middelink
-		 */
-		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-			if (!capable(CAP_LINUX_IMMUTABLE))
-				goto flags_out;
-		}
-
-		/*
-		 * The JOURNAL_DATA flag can only be changed by
-		 * the relevant capability.
-		 */
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE))
-				goto flags_out;
-		}
-		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-			migrate = 1;
-
-		if (flags & EXT4_EOFBLOCKS_FL) {
-			/* we don't support adding EOFBLOCKS flag */
-			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-				err = -EOPNOTSUPP;
-				goto flags_out;
-			}
-		} else if (oldflags & EXT4_EOFBLOCKS_FL)
-			ext4_truncate(inode);
-
-		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-		if (IS_ERR(handle)) {
-			err = PTR_ERR(handle);
-			goto flags_out;
-		}
-		if (IS_SYNC(inode))
-			ext4_handle_sync(handle);
-		err = ext4_reserve_inode_write(handle, inode, &iloc);
-		if (err)
-			goto flags_err;
-
-		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-			if (!(mask & EXT4_FL_USER_MODIFIABLE))
-				continue;
-			if (mask & flags)
-				ext4_set_inode_flag(inode, i);
-			else
-				ext4_clear_inode_flag(inode, i);
-		}
-
-		ext4_set_inode_flags(inode);
-		inode->i_ctime = ext4_current_time(inode);
-
-		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-		ext4_journal_stop(handle);
-		if (err)
-			goto flags_out;
-
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-			err = ext4_change_inode_journal_flag(inode, jflag);
-		if (err)
-			goto flags_out;
-		if (migrate) {
-			if (flags & EXT4_EXTENTS_FL)
-				err = ext4_ext_migrate(inode);
-			else
-				err = ext4_ind_migrate(inode);
-		}
-
-flags_out:
+		err = ext4_set_iflags(inode, flags);
 		mutex_unlock(&inode->i_mutex);
+
 		mnt_drop_write_file(filp);
 		return err;
-	}
 	case EXT4_IOC_GETVERSION:
 	case EXT4_IOC_GETVERSION_OLD:
 		return put_user(inode->i_generation, (int __user *) arg);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1423c48..64963e4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -105,6 +105,7 @@  static const struct xattr_handler *ext4_xattr_handler_map[] = {
 };
 
 const struct xattr_handler *ext4_xattr_handlers[] = {
+	&ext4_xattr_iflags_handler,
 	&ext4_xattr_user_handler,
 	&ext4_xattr_trusted_handler,
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index c767dbd..2559583 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -99,6 +99,7 @@  extern const struct xattr_handler ext4_xattr_trusted_handler;
 extern const struct xattr_handler ext4_xattr_acl_access_handler;
 extern const struct xattr_handler ext4_xattr_acl_default_handler;
 extern const struct xattr_handler ext4_xattr_security_handler;
+extern const struct xattr_handler ext4_xattr_iflags_handler;
 
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
diff --git a/fs/ext4/xattr_iflags.c b/fs/ext4/xattr_iflags.c
new file mode 100644
index 0000000..e3c5472
--- /dev/null
+++ b/fs/ext4/xattr_iflags.c
@@ -0,0 +1,90 @@ 
+/*
+ * linux/fs/ext4/xattr_iflags.c
+ * Handler for using the extended attribute interface to store inode flags.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/strflags.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "xattr.h"
+
+static struct flag_db ext4_iflags = {
+	.flag_sz = sizeof(int),
+	.maps = {
+		{"secrm", EXT4_SECRM_FL},
+		{"unrm", EXT4_UNRM_FL},
+		{"compr", EXT4_COMPR_FL},
+		{"sync", EXT4_SYNC_FL},
+		{"immutable", EXT4_IMMUTABLE_FL},
+		{"append", EXT4_APPEND_FL},
+		{"nodump", EXT4_NODUMP_FL},
+		{"noatime", EXT4_NOATIME_FL},
+		{"htree_dir", EXT4_INDEX_FL},
+		{"journal_data", EXT4_JOURNAL_DATA_FL},
+		{"notail", EXT4_NOTAIL_FL},
+		{"dirsync", EXT4_DIRSYNC_FL},
+		{"topdir", EXT4_TOPDIR_FL},
+		{"extents", EXT4_EXTENTS_FL},
+		{NULL, 0},
+	}
+};
+
+static size_t
+ext4_xattr_iflags_list(struct dentry *dentry, char *list, size_t list_size,
+		       const char *name, size_t name_len, int type)
+{
+	const size_t prefix_len = sizeof(XATTR_NAME_IFLAGS)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_NAME_IFLAGS, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext4_xattr_iflags_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
+{
+	struct ext4_inode_info *ei;
+
+	ei = EXT4_I(dentry->d_inode);
+	return flags_to_string(&ext4_iflags, &ei->i_flags, buffer, size);
+}
+
+static int
+ext4_xattr_iflags_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	char *string = (char *)value;
+	int ret, moo;
+
+	string = kmalloc(size + 1, GFP_KERNEL);
+	if (!string)
+		return -ENOMEM;
+	memcpy(string, value, size);
+	string[size] = 0;
+
+	ret = string_to_flags(&ext4_iflags, string, size, &moo);
+	if (ret)
+		goto out;
+
+	ret = ext4_set_iflags(dentry->d_inode, moo);
+out:
+	kfree(string);
+	return ret;
+}
+
+const struct xattr_handler ext4_xattr_iflags_handler = {
+	.prefix	= XATTR_NAME_IFLAGS,
+	.list	= ext4_xattr_iflags_list,
+	.get	= ext4_xattr_iflags_get,
+	.set	= ext4_xattr_iflags_set,
+};
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff..bf0fe89 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -32,6 +32,15 @@  static int
 xattr_permission(struct inode *inode, const char *name, int mask)
 {
 	/*
+	 * Always allow read/write of inode flag attribute.
+	 */
+	if (!strcmp(name, XATTR_NAME_IFLAGS)) {
+		if (mask & MAY_WRITE)
+			return inode_owner_or_capable(inode) ? 0 : -EACCES;
+		return 0;
+	}
+
+	/*
 	 * We can never set or remove an extended attribute on a read-only
 	 * filesystem  or on an immutable / append-only inode.
 	 */
diff --git a/include/linux/strflags.h b/include/linux/strflags.h
new file mode 100644
index 0000000..6c8067c
--- /dev/null
+++ b/include/linux/strflags.h
@@ -0,0 +1,27 @@ 
+/*
+ * File: linux/strflags.h
+ * Convert flags to pretty strings.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef _LINUX_STRFLAGS_H_
+#define _LINUX_STRFLAGS_H_
+
+#include <linux/types.h>
+
+struct flag_map {
+	const char *string;
+	unsigned long flag;
+};
+
+struct flag_db {
+	size_t flag_sz;
+	struct flag_map maps[];
+};
+
+int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
+		    size_t buf_len);
+int string_to_flags(struct flag_db *flagdb, char *buf, size_t buf_len,
+		    void *flags);
+
+#endif /* _LINUX_STRFLAGS_H_ */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index e4629b9..8cd78ae 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -63,5 +63,7 @@ 
 #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
 #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
 
+#define XATTR_IFLAGS "iflags"
+#define XATTR_NAME_IFLAGS XATTR_SYSTEM_PREFIX XATTR_IFLAGS
 
 #endif /* _UAPI_LINUX_XATTR_H */
diff --git a/lib/Makefile b/lib/Makefile
index a459c31..f3a517d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@  obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o
+	 percpu-refcount.o percpu_ida.o strflags.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/strflags.c b/lib/strflags.c
new file mode 100644
index 0000000..d1a34e7
--- /dev/null
+++ b/lib/strflags.c
@@ -0,0 +1,117 @@ 
+/*
+ * File: lib/strflags.c
+ * Convert flags to pretty strings.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/strflags.h>
+#include <linux/errno.h>
+
+int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
+		    size_t buf_len)
+{
+	struct flag_map *mapping;
+	char *b;
+	unsigned long fl;
+	int used, first;
+	size_t sz;
+
+	switch (flagdb->flag_sz) {
+	case 1:
+		fl = *(u8 *)flags;
+		break;
+	case 2:
+		fl = *(u16 *)flags;
+		break;
+	case 4:
+		fl = *(u32 *)flags;
+		break;
+	case 8:
+		fl = *(u64 *)flags;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Calculating buffer size */
+	sz = 0;
+	for (mapping = flagdb->maps; mapping->string; mapping++) {
+		if (mapping->flag & fl) {
+			sz += strlen(mapping->string);
+			sz++;
+		}
+	}
+
+	if (buf == NULL)
+		return sz;
+	if (buf_len < sz)
+		return -ERANGE;
+
+	/* Stringify the flags */
+	b = buf;
+	first = 1;
+	for (mapping = flagdb->maps; mapping->string; mapping++) {
+		if (mapping->flag & fl) {
+			used = snprintf(b, buf_len, "%s%s",
+					(first ? "" : ","),
+					mapping->string);
+			first = 0;
+			buf_len -= used;
+			b += used;
+		}
+	}
+
+	return b - buf;
+}
+EXPORT_SYMBOL_GPL(flags_to_string);
+
+int string_to_flags(struct flag_db *flagdb, char *buf, size_t buflen,
+		    void *flags)
+{
+	struct flag_map *mapping;
+	unsigned long fl = 0;
+	char *p, *q;
+	char c;
+	int found;
+
+	for (p = buf; p < buf + buflen; p = q + 1) {
+		for (q = p; *q != 0 && *q != ','; q++)
+			; /* empty */
+		if (p == q)
+			continue;
+
+		c = *q; *q = 0;
+		found = 0;
+		for (mapping = flagdb->maps; mapping->string; mapping++)
+			if (strcmp(p, mapping->string) == 0) {
+				fl |= mapping->flag;
+				found = 1;
+			}
+		if (!found)
+			return -EINVAL;
+		*q = c;
+		p = q + 1;
+	}
+
+	switch (flagdb->flag_sz) {
+	case 1:
+		*(u8 *)flags = fl;
+		break;
+	case 2:
+		*(u16 *)flags = fl;
+		break;
+	case 4:
+		*(u32 *)flags = fl;
+		break;
+	case 8:
+		*(u64 *)flags = fl;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(string_to_flags);