[v7,2/8] fs: Add standard casefolding support
diff mbox series

Message ID 20200208013552.241832-3-drosen@google.com
State New
Headers show
Series
  • Support fof Casefolding and Encryption
Related show

Commit Message

Daniel Rosenberg Feb. 8, 2020, 1:35 a.m. UTC
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

Ext4 and F2fs are switch to these implementations.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
---
 fs/libfs.c         | 77 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h | 22 +++++++++++++
 2 files changed, 99 insertions(+)

Comments

Al Viro Feb. 8, 2020, 2:12 a.m. UTC | #1
On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:

> +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
> +			  const char *str, const struct qstr *name)
> +{
> +	const struct dentry *parent = READ_ONCE(dentry->d_parent);
> +	const struct inode *inode = READ_ONCE(parent->d_inode);
> +	const struct super_block *sb = dentry->d_sb;
> +	const struct unicode_map *um = sb->s_encoding;
> +	struct qstr entry = QSTR_INIT(str, len);
> +	int ret;
> +
> +	if (!inode || !needs_casefold(inode))
> +		goto fallback;
> +
> +	ret = utf8_strncasecmp(um, name, &entry);

Again, is that safe in case when the contents of the string str points to
keeps changing under you?
Daniel Rosenberg Feb. 10, 2020, 11:11 p.m. UTC | #2
On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
>
>
> Again, is that safe in case when the contents of the string str points to
> keeps changing under you?

I'm not sure what you mean. I thought it was safe to use the str and
len passed into d_compare. Even if it gets changed under RCU
conditions I thought there was some code to ensure that the name/len
pair passed in is consistent, and any other inconsistencies would get
caught by d_seq later. Are there unsafe code paths that can follow?
Al Viro Feb. 10, 2020, 11:42 p.m. UTC | #3
On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> >
> >
> > Again, is that safe in case when the contents of the string str points to
> > keeps changing under you?
> 
> I'm not sure what you mean. I thought it was safe to use the str and
> len passed into d_compare. Even if it gets changed under RCU
> conditions I thought there was some code to ensure that the name/len
> pair passed in is consistent, and any other inconsistencies would get
> caught by d_seq later. Are there unsafe code paths that can follow?

If you ever fetch the same byte twice, you might see different values.
You need a fairly careful use of READ_ONCE() or equivalents to make
sure that you don't get screwed over by that.

Sure, ->d_seq mismatch will throw the result out, but you need to make
sure you won't oops/step on uninitialized memory/etc. in process.

It's not impossible to get right, but it's not trivial and you need all
code working with that much more careful than normal for string handling.
Eric Biggers Feb. 12, 2020, 3:55 a.m. UTC | #4
On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> This adds general supporting functions for filesystems that use
> utf8 casefolding. It provides standard dentry_operations and adds the
> necessary structures in struct super_block to allow this standardization.
> 
> Ext4 and F2fs are switch to these implementations.

I think you mean that ext4 and f2fs *will be switched* to these implementations?
It's later in the series, not in this patch.

> +#ifdef CONFIG_UNICODE
> +bool needs_casefold(const struct inode *dir)
> +{
> +	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
> +			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
> +}
> +EXPORT_SYMBOL(needs_casefold);

Can you add kerneldoc comments to all the new functions that are exported to
modules?

> +struct hash_ctx {
> +	struct utf8_itr_context ctx;
> +	unsigned long hash;
> +};
> +
> +static int do_generic_ci_hash(struct utf8_itr_context *ctx, int byte, int pos)
> +{
> +	struct hash_ctx *hctx = container_of(ctx, struct hash_ctx, ctx);
> +
> +	hctx->hash = partial_name_hash((unsigned char)byte, hctx->hash);
> +	return 0;
> +}
> +
> +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
> +{
> +	const struct inode *inode = READ_ONCE(dentry->d_inode);
> +	struct super_block *sb = dentry->d_sb;
> +	const struct unicode_map *um = sb->s_encoding;
> +	int ret = 0;
> +	struct hash_ctx hctx;
> +
> +	if (!inode || !needs_casefold(inode))
> +		return 0;
> +
> +	hctx.hash = init_name_hash(dentry);
> +	hctx.ctx.actor = do_generic_ci_hash;
> +	ret = utf8_casefold_iter(um, str, &hctx.ctx);
> +	if (ret < 0)
> +		goto err;
> +	str->hash = end_name_hash(hctx.hash);
> +
> +	return 0;
> +err:
> +	if (sb_has_enc_strict_mode(sb))
> +		ret = -EINVAL;
> +	return ret;
> +}
> +EXPORT_SYMBOL(generic_ci_d_hash);
> +#endif

This breaks the !strict_mode case by starting to fail lookups of names that
aren't valid Unicode, instead of falling back to the standard case-sensitive
behavior.

There is an xfstest for casefolding; is this bug not caught by it (in which case
the test needs to be improved)?  Or did you just not run it?

> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6eae91c0668f9..a260afbc06d22 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1382,6 +1382,12 @@ extern int send_sigurg(struct fown_struct *fown);
>  #define SB_ACTIVE	(1<<30)
>  #define SB_NOUSER	(1<<31)
>  
> +/* These flags relate to encoding and casefolding */
> +#define SB_ENC_STRICT_MODE_FL	(1 << 0)

It would be helpful if the comment mentioned that these flags are stored on-disk
(and therefore can't be re-numbered, unlike the other flags defined nearby).

> +#ifdef CONFIG_UNICODE
> +	struct unicode_map *s_encoding;
> +	__u16 s_encoding_flags;
>  #endif

This isn't a UAPI header, so 's_encoding_flags' should use u16, not __u16.

And for that matter, 's_encoding_flags' will be pointer-sized due to padding
anyway, so maybe just make it 'unsigned int'?

> +static inline bool needs_casefold(const struct inode *dir)
> +{
> +	return 0;
> +}
> +#endif

Use false instead of 0 for 'bool'.

- Eric
Eric Biggers Feb. 12, 2020, 6:34 a.m. UTC | #5
On Mon, Feb 10, 2020 at 11:42:07PM +0000, Al Viro wrote:
> On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> > On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> > >
> > > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> > >
> > >
> > > Again, is that safe in case when the contents of the string str points to
> > > keeps changing under you?
> > 
> > I'm not sure what you mean. I thought it was safe to use the str and
> > len passed into d_compare. Even if it gets changed under RCU
> > conditions I thought there was some code to ensure that the name/len
> > pair passed in is consistent, and any other inconsistencies would get
> > caught by d_seq later. Are there unsafe code paths that can follow?
> 
> If you ever fetch the same byte twice, you might see different values.
> You need a fairly careful use of READ_ONCE() or equivalents to make
> sure that you don't get screwed over by that.
> 
> Sure, ->d_seq mismatch will throw the result out, but you need to make
> sure you won't oops/step on uninitialized memory/etc. in process.
> 
> It's not impossible to get right, but it's not trivial and you need all
> code working with that much more careful than normal for string handling.

It looks like this is a real problem, not just a "theoretical" data race.
For example, see:

utf8ncursor():
        /* The first byte of s may not be an utf8 continuation. */
        if (len > 0 && (*s & 0xC0) == 0x80)
                return -1;

and then utf8byte():
                } else if ((*u8c->s & 0xC0) == 0x80) {
                        /* This is a continuation of the current character. */
                        if (!u8c->p)
                                u8c->len--;
                        return (unsigned char)*u8c->s++;

The first byte of the string is checked in two different functions, so it's very
likely to be loaded twice.  In between, it could change from a non-continuation
byte to a continuation byte.  That would cause the string length to be
decremented from 0 to UINT_MAX.  Then utf8_strncasecmp() would run beyond the
bounds of the string until something happened to mismatch.

That's just an example that I found right away; there are probably more.

IMO, this needs to be fixed before anyone can actually use the ext4 and f2fs
casefolding stuff.

I don't know the best solution.  One option is to fix fs/unicode/ to handle
concurrently modified strings.  Another could be to see what it would take to
serialize lookups and renames for casefolded directories...

- Eric
Eric Biggers Feb. 12, 2020, 6:57 a.m. UTC | #6
On Tue, Feb 11, 2020 at 10:34:40PM -0800, Eric Biggers wrote:
> On Mon, Feb 10, 2020 at 11:42:07PM +0000, Al Viro wrote:
> > On Mon, Feb 10, 2020 at 03:11:13PM -0800, Daniel Rosenberg wrote:
> > > On Fri, Feb 7, 2020 at 6:12 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> > > >
> > > > On Fri, Feb 07, 2020 at 05:35:46PM -0800, Daniel Rosenberg wrote:
> > > >
> > > >
> > > > Again, is that safe in case when the contents of the string str points to
> > > > keeps changing under you?
> > > 
> > > I'm not sure what you mean. I thought it was safe to use the str and
> > > len passed into d_compare. Even if it gets changed under RCU
> > > conditions I thought there was some code to ensure that the name/len
> > > pair passed in is consistent, and any other inconsistencies would get
> > > caught by d_seq later. Are there unsafe code paths that can follow?
> > 
> > If you ever fetch the same byte twice, you might see different values.
> > You need a fairly careful use of READ_ONCE() or equivalents to make
> > sure that you don't get screwed over by that.
> > 
> > Sure, ->d_seq mismatch will throw the result out, but you need to make
> > sure you won't oops/step on uninitialized memory/etc. in process.
> > 
> > It's not impossible to get right, but it's not trivial and you need all
> > code working with that much more careful than normal for string handling.
> 
> It looks like this is a real problem, not just a "theoretical" data race.
> For example, see:
> 
> utf8ncursor():
>         /* The first byte of s may not be an utf8 continuation. */
>         if (len > 0 && (*s & 0xC0) == 0x80)
>                 return -1;
> 
> and then utf8byte():
>                 } else if ((*u8c->s & 0xC0) == 0x80) {
>                         /* This is a continuation of the current character. */
>                         if (!u8c->p)
>                                 u8c->len--;
>                         return (unsigned char)*u8c->s++;
> 
> The first byte of the string is checked in two different functions, so it's very
> likely to be loaded twice.  In between, it could change from a non-continuation
> byte to a continuation byte.  That would cause the string length to be
> decremented from 0 to UINT_MAX.  Then utf8_strncasecmp() would run beyond the
> bounds of the string until something happened to mismatch.
> 
> That's just an example that I found right away; there are probably more.
> 
> IMO, this needs to be fixed before anyone can actually use the ext4 and f2fs
> casefolding stuff.
> 
> I don't know the best solution.  One option is to fix fs/unicode/ to handle
> concurrently modified strings.  Another could be to see what it would take to
> serialize lookups and renames for casefolded directories...
> 

Or (just throwing another idea out there) the dentry's name could be copied to a
temporary buffer in ->d_compare().  The simplest version would be:

	u8 _name[NAME_MAX];

	memcpy(_name, name, len);
	name = _name;

Though, 255 bytes is a bit large for a stack buffer (so for long names it may
need kmalloc with GFP_ATOMIC), and technically it would need a special version
of memcpy() to be guaranteed safe from compiler optimizations (though I expect
this would work in practice).

Alternatively, take_dentry_name_snapshot() kind of does this already, except
that it takes a dentry and not a (name, len) pair.

- Eric
Daniel Rosenberg Feb. 20, 2020, 2:27 a.m. UTC | #7
On Tue, Feb 11, 2020 at 10:57 PM Eric Biggers <ebiggers@kernel.org> wrote:
>
> Or (just throwing another idea out there) the dentry's name could be copied to a
> temporary buffer in ->d_compare().  The simplest version would be:
>
>         u8 _name[NAME_MAX];
>
>         memcpy(_name, name, len);
>         name = _name;
>
> Though, 255 bytes is a bit large for a stack buffer (so for long names it may
> need kmalloc with GFP_ATOMIC), and technically it would need a special version
> of memcpy() to be guaranteed safe from compiler optimizations (though I expect
> this would work in practice).
>
> Alternatively, take_dentry_name_snapshot() kind of does this already, except
> that it takes a dentry and not a (name, len) pair.
>
> - Eric

If we want to use take_dentry_name_snapshot, we'd need to do it before
calling the dentry op, since we get the dentry as a const. It would do
exactly what we want, in that it either takes a reference on the long
name, or copies the short name, although it does so under a spinlock.
I'm guessing we don't want to add that overhead for all
d_compare/d_hash's. I suppose it could just take a snapshot if it
falls under needs_casefold, but that feels a bit silly to me.

i don't think utf8cursor/utf8byte could be modified to be RCU safe
apart from a copy. As part of normalization there's some sorting that
goes on to ensure that different encodings of the same characters can
be matched, and I think those can technically be arbitrarily long, so
we'd possibly end up needing the copy anyways.

So, I see two possible fixes.
1. Use take_dentry_name_snapshot along the RCU paths to calling d_hash
and d_compare, at least when needs_casefold is true.
2. Within d_hash/d_compare, create a copy of the name if it is a short name.

For 1, it adds some overhead in general, which I'm sure we'd want to avoid.
For 2, I don't think we know we're in RCU mode, so we'd need to always
copy short filenames. I'm also unsure if it's valid to assume that
name given is stable if it is not the same as dentry->d_iname. If it
is, we only need to worry about copying DNAME_INLINE_LEN bytes at max
there. For memcpy, is there a different version that we'd want to use
for option 2?

-Daniel

Patch
diff mbox series

diff --git a/fs/libfs.c b/fs/libfs.c
index c686bd9caac67..433c283df3099 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,9 @@ 
 #include <linux/fs_context.h>
 #include <linux/pseudo_fs.h>
 #include <linux/fsnotify.h>
+#include <linux/unicode.h>
+#include <linux/fscrypt.h>
+#include <linux/stringhash.h>
 
 #include <linux/uaccess.h>
 
@@ -1361,3 +1364,77 @@  bool is_empty_dir_inode(struct inode *inode)
 	return (inode->i_fop == &empty_dir_operations) &&
 		(inode->i_op == &empty_dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+bool needs_casefold(const struct inode *dir)
+{
+	return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
+			(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
+}
+EXPORT_SYMBOL(needs_casefold);
+
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+			  const char *str, const struct qstr *name)
+{
+	const struct dentry *parent = READ_ONCE(dentry->d_parent);
+	const struct inode *inode = READ_ONCE(parent->d_inode);
+	const struct super_block *sb = dentry->d_sb;
+	const struct unicode_map *um = sb->s_encoding;
+	struct qstr entry = QSTR_INIT(str, len);
+	int ret;
+
+	if (!inode || !needs_casefold(inode))
+		goto fallback;
+
+	ret = utf8_strncasecmp(um, name, &entry);
+	if (ret >= 0)
+		return ret;
+
+	if (sb_has_enc_strict_mode(sb))
+		return -EINVAL;
+fallback:
+	if (len != name->len)
+		return 1;
+	return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+struct hash_ctx {
+	struct utf8_itr_context ctx;
+	unsigned long hash;
+};
+
+static int do_generic_ci_hash(struct utf8_itr_context *ctx, int byte, int pos)
+{
+	struct hash_ctx *hctx = container_of(ctx, struct hash_ctx, ctx);
+
+	hctx->hash = partial_name_hash((unsigned char)byte, hctx->hash);
+	return 0;
+}
+
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+	const struct inode *inode = READ_ONCE(dentry->d_inode);
+	struct super_block *sb = dentry->d_sb;
+	const struct unicode_map *um = sb->s_encoding;
+	int ret = 0;
+	struct hash_ctx hctx;
+
+	if (!inode || !needs_casefold(inode))
+		return 0;
+
+	hctx.hash = init_name_hash(dentry);
+	hctx.ctx.actor = do_generic_ci_hash;
+	ret = utf8_casefold_iter(um, str, &hctx.ctx);
+	if (ret < 0)
+		goto err;
+	str->hash = end_name_hash(hctx.hash);
+
+	return 0;
+err:
+	if (sb_has_enc_strict_mode(sb))
+		ret = -EINVAL;
+	return ret;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6eae91c0668f9..a260afbc06d22 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1382,6 +1382,12 @@  extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE	(1<<30)
 #define SB_NOUSER	(1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL	(1 << 0)
+
+#define sb_has_enc_strict_mode(sb) \
+	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  *	Umount options
  */
@@ -1449,6 +1455,10 @@  struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
 	const struct fsverity_operations *s_vop;
+#endif
+#ifdef CONFIG_UNICODE
+	struct unicode_map *s_encoding;
+	__u16 s_encoding_flags;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
@@ -3361,6 +3371,18 @@  extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 
 extern int generic_check_addressable(unsigned, u64);
 
+#ifdef CONFIG_UNICODE
+extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+				const char *str, const struct qstr *name);
+extern bool needs_casefold(const struct inode *dir);
+#else
+static inline bool needs_casefold(const struct inode *dir)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *,