Patchwork [1/2] ext4: fix readdir error in case inline_data+dir_index.

login
register
mail settings
Submitter Tao Ma
Date April 10, 2013, 3:37 p.m.
Message ID <1365608228-3950-1-git-send-email-tm@tao.ma>
Download mbox | patch
Permalink /patch/235409/
State Accepted
Headers show

Comments

Tao Ma - April 10, 2013, 3:37 p.m.
From: Tao Ma <boyu.mt@taobao.com>

Zach reported a problem that if inline data is enabled, we don't
tell the difference between the offset of '.' and '..'. And a
getdents will fail if the user only want to get '.' and what's worse,
if there is a conversion happens when the user calls getdents
many times, he/she may get the same entry twice.

In theroy, a dir block would also fail if it is converted to a
hashed-index based dir since f_pos will become a hash value, not the
real one, but it doesn't happen. And a deep investigation
shows that we uses a hash based solution even for a normal dir if
the dir_index feature is enabled.

So this patch just adds a new htree_inlinedir_to_tree for inline dir,
and if we find that the hash index is supported, we will do like what
we do for a dir block.

Reported-by: Zach Brown <zab@redhat.com>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ext4/dir.c    |   20 +++++----
 fs/ext4/ext4.h   |   23 +++++++++++
 fs/ext4/inline.c |  109 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/namei.c  |   29 +++++---------
 4 files changed, 153 insertions(+), 28 deletions(-)
Zach Brown - April 12, 2013, 9:55 p.m.
On Wed, Apr 10, 2013 at 11:37:07PM +0800, Tao Ma wrote:
> From: Tao Ma <boyu.mt@taobao.com>
> 
> Zach reported a problem that if inline data is enabled, we don't
> tell the difference between the offset of '.' and '..'. And a
> getdents will fail if the user only want to get '.' and what's worse,
> if there is a conversion happens when the user calls getdents
> many times, he/she may get the same entry twice.
> 
> In theroy, a dir block would also fail if it is converted to a
> hashed-index based dir since f_pos will become a hash value, not the
> real one, but it doesn't happen. And a deep investigation
> shows that we uses a hash based solution even for a normal dir if
> the dir_index feature is enabled.
> 
> So this patch just adds a new htree_inlinedir_to_tree for inline dir,
> and if we find that the hash index is supported, we will do like what
> we do for a dir block.

Mmm, I like the sound of that fix.  Nice and simple.

The little test app that was seeing duplicate entries now sees stable
f_pos values and no dupes after the patch.  So I think this works.

Thanks for fixing this.

- z
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - April 19, 2013, 10 p.m.
On Wed, Apr 10, 2013 at 11:37:07PM +0800, Tao Ma wrote:
> From: Tao Ma <boyu.mt@taobao.com>
> 
> Zach reported a problem that if inline data is enabled, we don't
> tell the difference between the offset of '.' and '..'. And a
> getdents will fail if the user only want to get '.' and what's worse,
> if there is a conversion happens when the user calls getdents
> many times, he/she may get the same entry twice.
> 
> In theroy, a dir block would also fail if it is converted to a
> hashed-index based dir since f_pos will become a hash value, not the
> real one, but it doesn't happen. And a deep investigation
> shows that we uses a hash based solution even for a normal dir if
> the dir_index feature is enabled.
> 
> So this patch just adds a new htree_inlinedir_to_tree for inline dir,
> and if we find that the hash index is supported, we will do like what
> we do for a dir block.
> 
> Reported-by: Zach Brown <zab@redhat.com>
> Signed-off-by: Tao Ma <boyu.mt@taobao.com>

Thanks, applied.

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d8cd1f0..f8d56e4 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -46,7 +46,8 @@  static int is_dx_dir(struct inode *inode)
 	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
 		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
 	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
-	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+	     ((inode->i_size >> sb->s_blocksize_bits) == 1) ||
+	     ext4_has_inline_data(inode)))
 		return 1;
 
 	return 0;
@@ -115,14 +116,6 @@  static int ext4_readdir(struct file *filp,
 	int ret = 0;
 	int dir_has_error = 0;
 
-	if (ext4_has_inline_data(inode)) {
-		int has_inline_data = 1;
-		ret = ext4_read_inline_dir(filp, dirent, filldir,
-					   &has_inline_data);
-		if (has_inline_data)
-			return ret;
-	}
-
 	if (is_dx_dir(inode)) {
 		err = ext4_dx_readdir(filp, dirent, filldir);
 		if (err != ERR_BAD_DX_DIR) {
@@ -136,6 +129,15 @@  static int ext4_readdir(struct file *filp,
 		ext4_clear_inode_flag(file_inode(filp),
 				      EXT4_INODE_INDEX);
 	}
+
+	if (ext4_has_inline_data(inode)) {
+		int has_inline_data = 1;
+		ret = ext4_read_inline_dir(filp, dirent, filldir,
+					   &has_inline_data);
+		if (has_inline_data)
+			return ret;
+	}
+
 	stored = 0;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3b83cd6..83ba3b4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2511,6 +2511,11 @@  extern int ext4_try_create_inline_dir(handle_t *handle,
 extern int ext4_read_inline_dir(struct file *filp,
 				void *dirent, filldir_t filldir,
 				int *has_inline_data);
+extern int htree_inlinedir_to_tree(struct file *dir_file,
+				   struct inode *dir, ext4_lblk_t block,
+				   struct dx_hash_info *hinfo,
+				   __u32 start_hash, __u32 start_minor_hash,
+				   int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
@@ -2547,6 +2552,24 @@  extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
 extern int ext4_handle_dirty_dirent_node(handle_t *handle,
 					 struct inode *inode,
 					 struct buffer_head *bh);
+#define S_SHIFT 12
+static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
+};
+
+static inline void ext4_set_de_type(struct super_block *sb,
+				struct ext4_dir_entry_2 *de,
+				umode_t mode) {
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
+		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
 
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index c0fd1a1..abf8b62 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -19,7 +19,8 @@ 
 
 #define EXT4_XATTR_SYSTEM_DATA	"data"
 #define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
-#define EXT4_INLINE_DOTDOT_SIZE	4
+#define EXT4_INLINE_DOTDOT_OFFSET	2
+#define EXT4_INLINE_DOTDOT_SIZE		4
 
 int ext4_get_inline_size(struct inode *inode)
 {
@@ -1289,6 +1290,112 @@  out:
 	return ret;
 }
 
+/*
+ * This function fills a red-black tree with information from an
+ * inlined dir.  It returns the number directory entries loaded
+ * into the tree.  If there is an error it is returned in err.
+ */
+int htree_inlinedir_to_tree(struct file *dir_file,
+			    struct inode *dir, ext4_lblk_t block,
+			    struct dx_hash_info *hinfo,
+			    __u32 start_hash, __u32 start_minor_hash,
+			    int *has_inline_data)
+{
+	int err = 0, count = 0;
+	unsigned int parent_ino;
+	int pos;
+	struct ext4_dir_entry_2 *de;
+	struct inode *inode = file_inode(dir_file);
+	int ret, inline_size = 0;
+	struct ext4_iloc iloc;
+	void *dir_buf = NULL;
+	struct ext4_dir_entry_2 fake;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		up_read(&EXT4_I(inode)->xattr_sem);
+		*has_inline_data = 0;
+		goto out;
+	}
+
+	inline_size = ext4_get_inline_size(inode);
+	dir_buf = kmalloc(inline_size, GFP_NOFS);
+	if (!dir_buf) {
+		ret = -ENOMEM;
+		up_read(&EXT4_I(inode)->xattr_sem);
+		goto out;
+	}
+
+	ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
+	up_read(&EXT4_I(inode)->xattr_sem);
+	if (ret < 0)
+		goto out;
+
+	pos = 0;
+	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
+	while (pos < inline_size) {
+		/*
+		 * As inlined dir doesn't store any information about '.' and
+		 * only the inode number of '..' is stored, we have to handle
+		 * them differently.
+		 */
+		if (pos == 0) {
+			fake.inode = cpu_to_le32(inode->i_ino);
+			fake.name_len = 1;
+			strcpy(fake.name, ".");
+			fake.rec_len = ext4_rec_len_to_disk(
+						EXT4_DIR_REC_LEN(fake.name_len),
+						inline_size);
+			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+			de = &fake;
+			pos = EXT4_INLINE_DOTDOT_OFFSET;
+		} else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
+			fake.inode = cpu_to_le32(parent_ino);
+			fake.name_len = 2;
+			strcpy(fake.name, "..");
+			fake.rec_len = ext4_rec_len_to_disk(
+						EXT4_DIR_REC_LEN(fake.name_len),
+						inline_size);
+			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+			de = &fake;
+			pos = EXT4_INLINE_DOTDOT_SIZE;
+		} else {
+			de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
+			pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
+			if (ext4_check_dir_entry(inode, dir_file, de,
+					 iloc.bh, dir_buf,
+					 inline_size, pos)) {
+				ret = count;
+				goto out;
+			}
+		}
+
+		ext4fs_dirhash(de->name, de->name_len, hinfo);
+		if ((hinfo->hash < start_hash) ||
+		    ((hinfo->hash == start_hash) &&
+		     (hinfo->minor_hash < start_minor_hash)))
+			continue;
+		if (de->inode == 0)
+			continue;
+		err = ext4_htree_store_dirent(dir_file,
+				   hinfo->hash, hinfo->minor_hash, de);
+		if (err) {
+			count = err;
+			goto out;
+		}
+		count++;
+	}
+	ret = count;
+out:
+	kfree(dir_buf);
+	brelse(iloc.bh);
+	return ret;
+}
+
 int ext4_read_inline_dir(struct file *filp,
 			 void *dirent, filldir_t filldir,
 			 int *has_inline_data)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3825d6a..a35270a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -971,6 +971,17 @@  int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 			hinfo.hash_version +=
 				EXT4_SB(dir->i_sb)->s_hash_unsigned;
 		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+		if (ext4_has_inline_data(dir)) {
+			int has_inline_data = 1;
+			count = htree_inlinedir_to_tree(dir_file, dir, 0,
+							&hinfo, start_hash,
+							start_minor_hash,
+							&has_inline_data);
+			if (has_inline_data) {
+				*next_hash = ~0;
+				return count;
+			}
+		}
 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 					       start_hash, start_minor_hash);
 		*next_hash = ~0;
@@ -1455,24 +1466,6 @@  struct dentry *ext4_get_parent(struct dentry *child)
 	return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
 }
 
-#define S_SHIFT 12
-static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
-	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
-	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
-	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
-	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
-	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
-	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
-	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
-};
-
-static inline void ext4_set_de_type(struct super_block *sb,
-				struct ext4_dir_entry_2 *de,
-				umode_t mode) {
-	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
-		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
-
 /*
  * Move count entries from end of map between two memory locations.
  * Returns pointer to last entry moved.