[v7,2/8] fs: Add standard casefolding support
diff mbox series

Message ID 20200208013552.241832-3-drosen@google.com
State New
Headers show
Series
  • Support fof Casefolding and Encryption
Related show

Commit Message

Daniel Rosenberg Feb. 8, 2020, 1:35 a.m. UTC
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

Ext4 and F2fs are switch to these implementations.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/libfs.c         | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h | 22 +++++++++++++
 2 files changed, 99 insertions(+)

Comments

Al Viro Feb. 8, 2020, 2:12 a.m. UTC | #1
On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:

> +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
> +			  const char *str, const struct qstr *name)
> +{
> +	const struct dentry *parent = READ_ONCE(dentry->d_parent);
> +	const struct inode *inode = READ_ONCE(parent->d_inode);
> +	const struct super_block *sb = dentry->d_sb;
> +	const struct unicode_map *um = sb->s_encoding;
> +	struct qstr entry = QSTR_INIT(str, len);
> +	int ret;
> +
> +	if (!inode || !needs_casefold(inode))
> +		goto fallback;
> +
> +	ret = utf8_strncasecmp(um, name, &entry);

Again, is that safe in case when the contents of the string str points to
keeps changing under you?
Daniel Rosenberg Feb. 10, 2020, 11:11 p.m. UTC | #2
On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
>
>
> Again, is that safe in case when the contents of the string str points to
> keeps changing under you?

I'm not sure what you mean. I thought it was safe to use the str and
len passed into d_compare. Even if it gets changed under RCU
conditions I thought there was some code to ensure that the name/len
pair passed in is consistent, and any other inconsistencies would get
caught by d_seq later. Are there unsafe code paths that can follow?
Al Viro Feb. 10, 2020, 11:42 p.m. UTC | #3
On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> >
> >
> > Again, is that safe in case when the contents of the string str points to
> > keeps changing under you?
> 
> I'm not sure what you mean. I thought it was safe to use the str and
> len passed into d_compare. Even if it gets changed under RCU
> conditions I thought there was some code to ensure that the name/len
> pair passed in is consistent, and any other inconsistencies would get
> caught by d_seq later. Are there unsafe code paths that can follow?

If you ever fetch the same byte twice, you might see different values.
You need a fairly careful use of READ_ONCE() or equivalents to make
sure that you don't get screwed over by that.

Sure, ->d_seq mismatch will throw the result out, but you need to make
sure you won't oops/step on uninitialized memory/etc. in process.

It's not impossible to get right, but it's not trivial and you need all
code working with that much more careful than normal for string handling.
Eric Biggers Feb. 12, 2020, 3:55 a.m. UTC | #4
On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> This adds general supporting functions for filesystems that use
> utf8 casefolding. It provides standard dentry_operations and adds the
> necessary structures in struct super_block to allow this standardization.
> 
> Ext4 and F2fs are switch to these implementations.

I think you mean that ext4 and f2fs *will be switched* to these implementations?
It's later in the series, not in this patch.

> +#ifdef CONFIG_UNICODE
> +bool needs_casefold(const struct inode *dir)
> +{
> +	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
> +			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
> +}
> +EXPORT_SYMBOL(needs_casefold);

Can you add kerneldoc comments to all the new functions that are exported to
modules?

> +struct hash_ctx {
> +	struct utf8_itr_context ctx;
> +	unsigned long hash;
> +};
> +
> +static int do_generic_ci_hash(struct utf8_itr_context *ctx, int byte, int pos)
> +{
> +	struct hash_ctx *hctx = container_of(ctx, struct hash_ctx, ctx);
> +
> +	hctx->hash = partial_name_hash((unsigned char)byte, hctx->hash);
> +	return 0;
> +}
> +
> +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
> +{
> +	const struct inode *inode = READ_ONCE(dentry->d_inode);
> +	struct super_block *sb = dentry->d_sb;
> +	const struct unicode_map *um = sb->s_encoding;
> +	int ret = 0;
> +	struct hash_ctx hctx;
> +
> +	if (!inode || !needs_casefold(inode))
> +		return 0;
> +
> +	hctx.hash = init_name_hash(dentry);
> +	hctx.ctx.actor = do_generic_ci_hash;
> +	ret = utf8_casefold_iter(um, str, &hctx.ctx);
> +	if (ret < 0)
> +		goto err;
> +	str->hash = end_name_hash(hctx.hash);
> +
> +	return 0;
> +err:
> +	if (sb_has_enc_strict_mode(sb))
> +		ret = -EINVAL;
> +	return ret;
> +}
> +EXPORT_SYMBOL(generic_ci_d_hash);
> +#endif

This breaks the !strict_mode case by starting to fail lookups of names that
aren't valid Unicode, instead of falling back to the standard case-sensitive
behavior.

There is an xfstest for casefolding; is this bug not caught by it (in which case
the test needs to be improved)?  Or did you just not run it?

> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6eae91c0668f9..a260afbc06d22 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1382,6 +1382,12 @@ extern int send_sigurg(struct fown_struct *fown);
>  #define SB_ACTIVE	(1<<30)
>  #define SB_NOUSER	(1<<31)
>  
> +/* These flags relate to encoding and casefolding */
> +#define SB_ENC_STRICT_MODE_FL	(1 << 0)

It would be helpful if the comment mentioned that these flags are stored on-disk
(and therefore can't be re-numbered, unlike the other flags defined nearby).

> +#ifdef CONFIG_UNICODE
> +	struct unicode_map *s_encoding;
> +	__u16 s_encoding_flags;
>  #endif

This isn't a UAPI header, so 's_encoding_flags' should use u16, not __u16.

And for that matter, 's_encoding_flags' will be pointer-sized due to padding
anyway, so maybe just make it 'unsigned int'?

> +static inline bool needs_casefold(const struct inode *dir)
> +{
> +	return 0;
> +}
> +#endif

Use false instead of 0 for 'bool'.

- Eric
Eric Biggers Feb. 12, 2020, 6:34 a.m. UTC | #5
On Mon, Feb 10, 2020 at 11:42:07PM +0000, Al Viro wrote:
> On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> > On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> > >
> > > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> > >
> > >
> > > Again, is that safe in case when the contents of the string str points to
> > > keeps changing under you?
> > 
> > I'm not sure what you mean. I thought it was safe to use the str and
> > len passed into d_compare. Even if it gets changed under RCU
> > conditions I thought there was some code to ensure that the name/len
> > pair passed in is consistent, and any other inconsistencies would get
> > caught by d_seq later. Are there unsafe code paths that can follow?
> 
> If you ever fetch the same byte twice, you might see different values.
> You need a fairly careful use of READ_ONCE() or equivalents to make
> sure that you don't get screwed over by that.
> 
> Sure, ->d_seq mismatch will throw the result out, but you need to make
> sure you won't oops/step on uninitialized memory/etc. in process.
> 
> It's not impossible to get right, but it's not trivial and you need all
> code working with that much more careful than normal for string handling.

It looks like this is a real problem, not just a "theoretical" data race.
For example, see:

utf8ncursor():
        /* The first byte of s may not be an utf8 continuation. */
        if (len > 0 && (*s & 0xC0) == 0x80)
                return -1;

and then utf8byte():
                } else if ((*u8c->s & 0xC0) == 0x80) {
                        /* This is a continuation of the current character. */
                        if (!u8c->p)
                                u8c->len--;
                        return (unsigned char)*u8c->s++;

The first byte of the string is checked in two different functions, so it's very
likely to be loaded twice.  In between, it could change from a non-continuation
byte to a continuation byte.  That would cause the string length to be
decremented from 0 to UINT_MAX.  Then utf8_strncasecmp() would run beyond the
bounds of the string until something happened to mismatch.

That's just an example that I found right away; there are probably more.

IMO, this needs to be fixed before anyone can actually use the ext4 and f2fs
casefolding stuff.

I don't know the best solution.  One option is to fix fs/unicode/ to handle
concurrently modified strings.  Another could be to see what it would take to
serialize lookups and renames for casefolded directories...

- Eric
Eric Biggers Feb. 12, 2020, 6:57 a.m. UTC | #6
On Tue, Feb 11, 2020 at 10:34:40PM -0800, Eric Biggers wrote:
> On Mon, Feb 10, 2020 at 11:42:07PM +0000, Al Viro wrote:
> > On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> > > On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> > > >
> > > > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> > > >
> > > >
> > > > Again, is that safe in case when the contents of the string str points to
> > > > keeps changing under you?
> > > 
> > > I'm not sure what you mean. I thought it was safe to use the str and
> > > len passed into d_compare. Even if it gets changed under RCU
> > > conditions I thought there was some code to ensure that the name/len
> > > pair passed in is consistent, and any other inconsistencies would get
> > > caught by d_seq later. Are there unsafe code paths that can follow?
> > 
> > If you ever fetch the same byte twice, you might see different values.
> > You need a fairly careful use of READ_ONCE() or equivalents to make
> > sure that you don't get screwed over by that.
> > 
> > Sure, ->d_seq mismatch will throw the result out, but you need to make
> > sure you won't oops/step on uninitialized memory/etc. in process.
> > 
> > It's not impossible to get right, but it's not trivial and you need all
> > code working with that much more careful than normal for string handling.
> 
> It looks like this is a real problem, not just a "theoretical" data race.
> For example, see:
> 
> utf8ncursor():
>         /* The first byte of s may not be an utf8 continuation. */
>         if (len > 0 && (*s & 0xC0) == 0x80)
>                 return -1;
> 
> and then utf8byte():
>                 } else if ((*u8c->s & 0xC0) == 0x80) {
>                         /* This is a continuation of the current character. */
>                         if (!u8c->p)
>                                 u8c->len--;
>                         return (unsigned char)*u8c->s++;
> 
> The first byte of the string is checked in two different functions, so it's very
> likely to be loaded twice.  In between, it could change from a non-continuation
> byte to a continuation byte.  That would cause the string length to be
> decremented from 0 to UINT_MAX.  Then utf8_strncasecmp() would run beyond the
> bounds of the string until something happened to mismatch.
> 
> That's just an example that I found right away; there are probably more.
> 
> IMO, this needs to be fixed before anyone can actually use the ext4 and f2fs
> casefolding stuff.
> 
> I don't know the best solution.  One option is to fix fs/unicode/ to handle
> concurrently modified strings.  Another could be to see what it would take to
> serialize lookups and renames for casefolded directories...
> 

Or (just throwing another idea out there) the dentry's name could be copied to a
temporary buffer in ->d_compare().  The simplest version would be:

	u8 _name[NAME_MAX];

	memcpy(_name, name, len);
	name = _name;

Though, 255 bytes is a bit large for a stack buffer (so for long names it may
need kmalloc with GFP_ATOMIC), and technically it would need a special version
of memcpy() to be guaranteed safe from compiler optimizations (though I expect
this would work in practice).

Alternatively, take_dentry_name_snapshot() kind of does this already, except
that it takes a dentry and not a (name, len) pair.

- Eric

Patch
diff mbox series

diff --git a/fs/libfs.c b/fs/libfs.c
index c686bd9caac67..433c283df3099 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,9 @@ 
 #include <linux/fs_context.h>
 #include <linux/pseudo_fs.h>
 #include <linux/fsnotify.h>
+#include <linux/unicode.h>
+#include <linux/fscrypt.h>
+#include <linux/stringhash.h>
 
 #include <linux/uaccess.h>
 
@@ -1361,3 +1364,77 @@  bool is_empty_dir_inode(struct inode *inode)
 	return (inode->i_fop == &empty_dir_operations) &&
 		(inode->i_op == &empty_dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+bool needs_casefold(const struct inode *dir)
+{
+	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
+			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
+}
+EXPORT_SYMBOL(needs_casefold);
+
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+			  const char *str, const struct qstr *name)
+{
+	const struct dentry *parent = READ_ONCE(dentry->d_parent);
+	const struct inode *inode = READ_ONCE(parent->d_inode);
+	const struct super_block *sb = dentry->d_sb;
+	const struct unicode_map *um = sb->s_encoding;
+	struct qstr entry = QSTR_INIT(str, len);
+	int ret;
+
+	if (!inode || !needs_casefold(inode))
+		goto fallback;
+
+	ret = utf8_strncasecmp(um, name, &entry);
+	if (ret >= 0)
+		return ret;
+
+	if (sb_has_enc_strict_mode(sb))
+		return -EINVAL;
+fallback:
+	if (len != name->len)
+		return 1;
+	return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+struct hash_ctx {
+	struct utf8_itr_context ctx;
+	unsigned long hash;
+};
+
+static int do_generic_ci_hash(struct utf8_itr_context *ctx, int byte, int pos)
+{
+	struct hash_ctx *hctx = container_of(ctx, struct hash_ctx, ctx);
+
+	hctx->hash = partial_name_hash((unsigned char)byte, hctx->hash);
+	return 0;
+}
+
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+	const struct inode *inode = READ_ONCE(dentry->d_inode);
+	struct super_block *sb = dentry->d_sb;
+	const struct unicode_map *um = sb->s_encoding;
+	int ret = 0;
+	struct hash_ctx hctx;
+
+	if (!inode || !needs_casefold(inode))
+		return 0;
+
+	hctx.hash = init_name_hash(dentry);
+	hctx.ctx.actor = do_generic_ci_hash;
+	ret = utf8_casefold_iter(um, str, &hctx.ctx);
+	if (ret < 0)
+		goto err;
+	str->hash = end_name_hash(hctx.hash);
+
+	return 0;
+err:
+	if (sb_has_enc_strict_mode(sb))
+		ret = -EINVAL;
+	return ret;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6eae91c0668f9..a260afbc06d22 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1382,6 +1382,12 @@  extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE	(1<<30)
 #define SB_NOUSER	(1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL	(1 << 0)
+
+#define sb_has_enc_strict_mode(sb) \
+	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  *	Umount options
  */
@@ -1449,6 +1455,10 @@  struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
 	const struct fsverity_operations *s_vop;
+#endif
+#ifdef CONFIG_UNICODE
+	struct unicode_map *s_encoding;
+	__u16 s_encoding_flags;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
@@ -3361,6 +3371,18 @@  extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 
 extern int generic_check_addressable(unsigned, u64);
 
+#ifdef CONFIG_UNICODE
+extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+				const char *str, const struct qstr *name);
+extern bool needs_casefold(const struct inode *dir);
+#else
+static inline bool needs_casefold(const struct inode *dir)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *,