[v2] ext4: shrink directory when last block is empty

Message ID 20190201093813.122491-1-harshadshirwadkar@gmail.com
State Superseded
Headers show
Series
  • [v2] ext4: shrink directory when last block is empty
Related show

Commit Message

harshad shirwadkar Feb. 1, 2019, 9:38 a.m.
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

This patch is the first step towards shrinking htree based directories
when files are deleted. We truncate directory inode when a directory
entry removal causes last directory block to be empty. In order to be
backwards compatible, we don't remove empty last directory block if it
results in one of the intermediate htree nodes to be empty.

Changes since v1:
- not freeing the last block if it makes intermediate htree block
  empty
- style fixes

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/namei.c | 134 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 110 insertions(+), 24 deletions(-)

Comments

Andreas Dilger Feb. 4, 2019, 7:31 p.m. | #1
On Feb 1, 2019, at 2:38 AM, harshadshirwadkar@gmail.com wrote:
> 
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> This patch is the first step towards shrinking htree based directories
> when files are deleted. We truncate directory inode when a directory
> entry removal causes last directory block to be empty. In order to be
> backwards compatible, we don't remove empty last directory block if it
> results in one of the intermediate htree nodes to be empty.
> 
> Changes since v1:
> - not freeing the last block if it makes intermediate htree block empty
> - style fixes
> 
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

Some minor style nits below, but looks fine otherwise, you can add:

Reviewed-by: Andreas Dilger <adilger@dilger.ca>

Have you run any kind of test (e.g. create 100k entries, ls -ld to get the
directory size, delete all entries, ls -ld to get the final "empty" size)
to see how much the directory shrinks from the maximum size?  Some info
about how well this works without further optimizations would be useful to
include into the commit comment.

It would also be good to run a loop with the above workload creating files
with different names (to change hash ordering), then unmount the filesystem
and run "e2fsck -f" on it to verify there are no errors introduced.

As the next step, it makes sense to implement checking if the previous leaf
block in the directory is also empty and free that as well.  Even before we
get to recursively freeing htree index blocks, being able to free all but
one leaf block per htree would give us a 509x reduction in space used by the
directory.

Cheers, Andreas

> +static inline bool is_empty_dirent_block(struct inode *dir,
> +					 struct buffer_head *bh)
> +{
> +	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
> +
> +	return 	ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)

(style) remove extra space after "return"

> +			== dir->i_sb->s_blocksize && de->inode == 0;
> +}

(style) "==" should go at the end of the previous line


Cheers, Andreas

Patch

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a4c25c4681d..6b9c813f736c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,8 @@  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		struct ext4_filename *fname,
-		struct ext4_dir_entry_2 **res_dir);
+		struct ext4_dir_entry_2 **res_dir,
+		struct dx_frame *dx_frame);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			     struct inode *dir, struct inode *inode);
 
@@ -866,6 +867,47 @@  dx_probe(struct ext4_filename *fname, struct inode *dir,
 	return ret_err;
 }
 
+static bool
+ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
+		     struct dx_frame *dx_frame, __le64 block)
+{
+	struct dx_entry *entries;
+	unsigned int count;
+	int err, i;
+
+	entries = dx_frame->entries;
+	count = dx_get_count(entries);
+
+	if (count == 1)
+		return false;
+
+	for (i = 0; i < count; i++)
+		if (entries[i].block == block)
+			break;
+
+	if (i >= count)
+		return false;
+
+	err = ext4_journal_get_write_access(handle, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return false;
+	}
+
+	for (; i < count - 1; i++)
+		entries[i] = entries[i + 1];
+
+	dx_set_count(entries, count - 1);
+
+	err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return false;
+	}
+
+	return true;
+}
+
 static void dx_release(struct dx_frame *frames)
 {
 	struct dx_root_info *info;
@@ -1309,6 +1351,24 @@  int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 	return 0;
 }
 
+static inline bool is_empty_dirent_block(struct inode *dir,
+					 struct buffer_head *bh)
+{
+	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
+
+	return 	ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)
+			== dir->i_sb->s_blocksize && de->inode == 0;
+}
+
+static inline bool should_try_dx_delete(struct dx_frame *dx_frame,
+					struct buffer_head *bh,
+					struct inode *dir)
+{
+	return dx_frame && dx_frame->bh && is_empty_dirent_block(dir, bh) &&
+			dx_get_block(dx_frame->at) ==
+			(dir->i_size - 1) >> dir->i_sb->s_blocksize_bits;
+}
+
 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 			       struct ext4_dir_entry *de)
 {
@@ -1336,10 +1396,11 @@  static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
  * The returned buffer_head has ->b_count elevated.  The caller is expected
  * to brelse() it when appropriate.
  */
-static struct buffer_head * ext4_find_entry (struct inode *dir,
-					const struct qstr *d_name,
-					struct ext4_dir_entry_2 **res_dir,
-					int *inlined)
+static struct buffer_head *ext4_find_entry(struct inode *dir,
+					   const struct qstr *d_name,
+					   struct ext4_dir_entry_2 **res_dir,
+					   int *inlined,
+					   struct dx_frame *dx_frame)
 {
 	struct super_block *sb;
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
@@ -1388,7 +1449,7 @@  static struct buffer_head * ext4_find_entry (struct inode *dir,
 		goto restart;
 	}
 	if (is_dx(dir)) {
-		ret = ext4_dx_find_entry(dir, &fname, res_dir);
+		ret = ext4_dx_find_entry(dir, &fname, res_dir, dx_frame);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
@@ -1486,9 +1547,10 @@  static struct buffer_head * ext4_find_entry (struct inode *dir,
 	return ret;
 }
 
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
-			struct ext4_filename *fname,
-			struct ext4_dir_entry_2 **res_dir)
+static struct buffer_head *ext4_dx_find_entry(struct inode *dir,
+					      struct ext4_filename *fname,
+					      struct ext4_dir_entry_2 **res_dir,
+					      struct dx_frame *dx_frame)
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
@@ -1511,8 +1573,13 @@  static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		retval = search_dirblock(bh, dir, fname,
 					 block << EXT4_BLOCK_SIZE_BITS(sb),
 					 res_dir);
-		if (retval == 1)
+		if (retval == 1) {
+			if (dx_frame) {
+				*dx_frame = *frame;
+				get_bh(dx_frame->bh);
+			}
 			goto success;
+		}
 		brelse(bh);
 		if (retval == -1) {
 			bh = ERR_PTR(ERR_BAD_DX_DIR);
@@ -1553,7 +1620,7 @@  static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	inode = NULL;
@@ -1597,7 +1664,7 @@  struct dentry *ext4_get_parent(struct dentry *child)
 	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
-	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
+	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	if (!bh)
@@ -2337,9 +2404,11 @@  int ext4_generic_delete_entry(handle_t *handle,
 static int ext4_delete_entry(handle_t *handle,
 			     struct inode *dir,
 			     struct ext4_dir_entry_2 *de_del,
-			     struct buffer_head *bh)
+			     struct buffer_head *bh,
+			     struct dx_frame *dx_frame)
 {
 	int err, csum_size = 0;
+	bool should_truncate = false;
 
 	if (ext4_has_inline_data(dir)) {
 		int has_inline_data = 1;
@@ -2363,11 +2432,21 @@  static int ext4_delete_entry(handle_t *handle,
 	if (err)
 		goto out;
 
+	if (should_try_dx_delete(dx_frame, bh, dir) &&
+	    ext4_dx_delete_entry(handle, dir, dx_frame,
+				 cpu_to_le64(dx_get_block(dx_frame->at))))
+		should_truncate = true;
+
 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
 	if (unlikely(err))
 		goto out;
 
+	if (should_truncate) {
+		dir->i_size -= dir->i_sb->s_blocksize;
+		ext4_truncate(dir);
+	}
+
 	return 0;
 out:
 	if (err != -ENOENT)
@@ -2923,7 +3003,7 @@  static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -2950,7 +3030,7 @@  static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 	if (retval)
 		goto end_rmdir;
 	if (!EXT4_DIR_LINK_EMPTY(inode))
@@ -2985,6 +3065,7 @@  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 	handle_t *handle = NULL;
+	struct dx_frame dx_frame = { NULL };
 
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
 		return -EIO;
@@ -3000,7 +3081,7 @@  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, &dx_frame);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -3028,9 +3109,9 @@  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 				   dentry->d_name.len, dentry->d_name.name);
 		set_nlink(inode, 1);
 	}
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, &dx_frame);
 	if (retval)
 		goto end_unlink;
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	ext4_update_dx_flag(dir);
 	ext4_mark_inode_dirty(handle, dir);
@@ -3042,6 +3124,8 @@  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 end_unlink:
 	brelse(bh);
+	if (dx_frame.bh)
+		brelse(dx_frame.bh);
 	if (handle)
 		ext4_journal_stop(handle);
 	trace_ext4_unlink_exit(dentry, retval);
@@ -3362,11 +3446,11 @@  static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 
-	bh = ext4_find_entry(dir, d_name, &de, NULL);
+	bh = ext4_find_entry(dir, d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (bh) {
-		retval = ext4_delete_entry(handle, dir, de, bh);
+		retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 		brelse(bh);
 	}
 	return retval;
@@ -3390,7 +3474,8 @@  static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
 		retval = ext4_find_delete_entry(handle, ent->dir,
 						&ent->dentry->d_name);
 	} else {
-		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
+		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh,
+					   NULL);
 		if (retval == -ENOENT) {
 			retval = ext4_find_delete_entry(handle, ent->dir,
 							&ent->dentry->d_name);
@@ -3497,7 +3582,8 @@  static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			return retval;
 	}
 
-	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL,
+				 NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3511,7 +3597,7 @@  static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;
@@ -3691,7 +3777,7 @@  static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return retval;
 
 	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
-				 &old.de, &old.inlined);
+				 &old.de, &old.inlined, NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3705,7 +3791,7 @@  static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;