diff mbox

[RFC,4/12] ext4: exchange the blocks between two inodes

Message ID 48DDE037.2010006@rs.jp.nec.com
State Changes Requested
Headers show

Commit Message

Akira Fujita Sept. 27, 2008, 7:26 a.m. UTC
ext4: online defrag -- Exchange the blocks between two inodes.

From: Akira Fujita <a-fujita@rs.jp.nec.com>

Exchange the data blocks between the temporary inode and
the original inode.

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/defrag.c |  286 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/ext4.h   |    2 +-
 2 files changed, 274 insertions(+), 14 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index d3ff958..4dd4318 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -103,6 +103,7 @@  int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,

 	if (cmd == EXT4_IOC_DEFRAG) {
 		struct ext4_ext_defrag_data defrag;
+		struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;

 		if (!capable(CAP_DAC_OVERRIDE)) {
 			if ((inode->i_mode & S_IRUSR) != S_IRUSR)
@@ -116,14 +117,205 @@  int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 						sizeof(defrag)))
 			return -EFAULT;

+		/* Check goal offset if goal offset was given from userspace */
+		if (defrag.goal != -1 &&
+				ext4_blocks_count(es) <= defrag.goal) {
+			printk(KERN_ERR "ext4 defrag: Invalid goal offset"
+				" %llu, you can set goal offset up to %llu\n",
+				defrag.goal, ext4_blocks_count(es) - 1);
+			return -EINVAL;
+		}
+
 		err = ext4_defrag(filp, defrag.start_offset,
-				defrag.defrag_size);
+				defrag.defrag_size, defrag.goal);
 	}

 	return err;
 }

 /**
+ * ext4_defrag_merge_across_blocks - Merge extents across leaf block
+ *
+ * @handle:		journal handle
+ * @org_inode:		original inode
+ * @o_start:		first original extent to be defraged
+ * @o_end:		last original extent to be defraged
+ * @start_ext:		first new extent to be merged
+ * @new_ext:		middle of new extent to be merged
+ * @end_ext:		last new extent to be merged
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode,
+		struct ext4_extent *o_start, struct ext4_extent *o_end,
+		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
+		struct ext4_extent *end_ext)
+{
+	struct ext4_ext_path *org_path = NULL;
+	ext4_lblk_t eblock = 0;
+	int new_flag = 0;
+	int end_flag = 0;
+	int err;
+
+	if (le16_to_cpu(start_ext->ee_len) &&
+		le16_to_cpu(new_ext->ee_len) &&
+		le16_to_cpu(end_ext->ee_len)) {
+
+		if (o_start == o_end) {
+
+			/*       start_ext   new_ext    end_ext
+			 * dest |---------|-----------|--------|
+			 * org  |------------------------------|
+			 */
+
+			end_flag = 1;
+		} else {
+
+			/*       start_ext   new_ext   end_ext
+			 * dest |---------|----------|---------|
+			 * org  |---------------|--------------|
+			 */
+
+			o_end->ee_block = end_ext->ee_block;
+			o_end->ee_len = end_ext->ee_len;
+			ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+		}
+
+		o_start->ee_len = start_ext->ee_len;
+		new_flag = 1;
+
+	} else if (le16_to_cpu(start_ext->ee_len) &&
+			le16_to_cpu(new_ext->ee_len) &&
+			!le16_to_cpu(end_ext->ee_len) &&
+			o_start == o_end) {
+
+		/*	 start_ext	new_ext
+		 * dest |--------------|---------------|
+		 * org  |------------------------------|
+		 */
+
+		o_start->ee_len = start_ext->ee_len;
+		new_flag = 1;
+
+	} else if (!le16_to_cpu(start_ext->ee_len) &&
+			le16_to_cpu(new_ext->ee_len) &&
+			le16_to_cpu(end_ext->ee_len) &&
+			o_start == o_end) {
+
+		/*	  new_ext	end_ext
+		 * dest |--------------|---------------|
+		 * org  |------------------------------|
+		 */
+
+		o_end->ee_block = end_ext->ee_block;
+		o_end->ee_len = end_ext->ee_len;
+		ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+
+		/*
+		 * Set 0 to the extent block if new_ext was
+		 * the first block.
+		 */
+		if (!new_ext->ee_block)
+			eblock = 0;
+		else
+			eblock = le32_to_cpu(new_ext->ee_block);
+
+		new_flag = 1;
+	} else {
+		printk(KERN_ERR "ext4 defrag: Unexpected merge case\n");
+		return -EIO;
+	}
+
+	if (new_flag) {
+		org_path = ext4_ext_find_extent(org_inode, eblock, NULL);
+		if (IS_ERR(org_path)) {
+			err = PTR_ERR(org_path);
+			org_path = NULL;
+			goto out;
+		}
+		err = ext4_ext_insert_extent(handle, org_inode,
+					org_path, new_ext);
+		if (err)
+			goto out;
+	}
+
+	if (end_flag) {
+		org_path = ext4_ext_find_extent(org_inode,
+				le32_to_cpu(end_ext->ee_block) - 1, org_path);
+		if (IS_ERR(org_path)) {
+			err = PTR_ERR(org_path);
+			org_path = NULL;
+			goto out;
+		}
+		err = ext4_ext_insert_extent(handle, org_inode,
+					org_path, end_ext);
+		if (err)
+			goto out;
+	}
+out:
+	if (org_path) {
+		ext4_ext_drop_refs(org_path);
+		kfree(org_path);
+	}
+
+	return err;
+
+}
+
+/**
+ * ext4_defrag_merge_inside_block - Merge new extent to the extent block
+ *
+ * @o_start:		first original extent to be merged
+ * @o_end:		last original extent to be merged
+ * @start_ext:		first new extent to be merged
+ * @new_ext:		middle of new extent to be merged
+ * @end_ext:		last new extent to be merged
+ * @eh:			extent header of target leaf block
+ * @replaced:		the number of blocks which will be replaced with new_ext
+ * @range_to_move:	used to decide how to merge
+ *
+ * This function always returns 0.
+ */
+static int
+ext4_defrag_merge_inside_block(struct ext4_extent *o_start,
+		struct ext4_extent *o_end, struct ext4_extent *start_ext,
+		struct ext4_extent *new_ext, struct ext4_extent *end_ext,
+		struct ext4_extent_header *eh, ext4_fsblk_t replaced,
+		int range_to_move)
+{
+	int i = 0;
+	unsigned len;
+
+	/* Move the existing extents */
+	if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
+		len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
+			(unsigned long)(o_end + 1);
+		memmove(o_end + 1 + range_to_move, o_end + 1, len);
+	}
+
+	/* Insert start entry */
+	if (le16_to_cpu(start_ext->ee_len))
+		o_start[i++].ee_len = start_ext->ee_len;
+
+	/* Insert new entry */
+	if (le16_to_cpu(new_ext->ee_len)) {
+		o_start[i].ee_block = new_ext->ee_block;
+		o_start[i].ee_len = cpu_to_le16(replaced);
+		ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+	}
+
+	/* Insert end entry */
+	if (end_ext->ee_len)
+		o_start[i] = *end_ext;
+
+	/* Increment the total entries counter on the extent block */
+	le16_add_cpu(&eh->eh_entries, range_to_move);
+
+	return 0;
+}
+
+/**
  * ext4_defrag_merge_extents - Merge new extent
  *
  * @handle:	journal handle
@@ -145,6 +337,63 @@  ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode,
 		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
 		struct ext4_extent *end_ext, ext4_fsblk_t replaced)
 {
+	struct  ext4_extent_header *eh;
+	unsigned need_slots, slots_range;
+	int	range_to_move, depth, ret;
+
+	/*
+	 * The extents need to be inserted
+	 * start_extent + new_extent + end_extent.
+	 */
+	need_slots = (le16_to_cpu(start_ext->ee_len) ? 1 : 0) +
+			(le16_to_cpu(end_ext->ee_len) ? 1 : 0) +
+			(le16_to_cpu(new_ext->ee_len) ? 1 : 0);
+
+	/* The number of slots between start and end */
+	slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
+					/ sizeof(struct ext4_extent);
+
+	/* Range to move the end of extent */
+	range_to_move = need_slots - slots_range;
+	depth = org_path->p_depth;
+	org_path += depth;
+	eh = org_path->p_hdr;
+
+	if (depth) {
+		/* Register to journal */
+		ret = ext4_journal_get_write_access(handle, org_path->p_bh);
+		if (ret)
+			return ret;
+	}
+
+	/* Expansion */
+	if (range_to_move > 0 &&
+		(range_to_move > le16_to_cpu(eh->eh_max)
+			- le16_to_cpu(eh->eh_entries))) {
+
+		ret = ext4_defrag_merge_across_blocks(handle, org_inode,
+					o_start, o_end, start_ext, new_ext,
+					end_ext);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = ext4_defrag_merge_inside_block(o_start, o_end,
+					start_ext, new_ext, end_ext, eh,
+					replaced, range_to_move);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (depth) {
+		ret = ext4_journal_dirty_metadata(handle, org_path->p_bh);
+		if (ret)
+			return ret;
+	} else {
+		ret = ext4_mark_inode_dirty(handle, org_inode);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;

 }
@@ -489,6 +738,7 @@  out:
  * @dest_path:		indicating the temporary inode's extent
  * @req_blocks:		contiguous blocks count we need
  * @iblock:		target file offset
+ * @goal:		goal offset
  *
  */
 static void
@@ -496,7 +746,8 @@  ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
 			struct ext4_allocation_request *ar,
 			struct ext4_ext_path *org_path,
 			struct ext4_ext_path *dest_path,
-			ext4_fsblk_t req_blocks, ext4_lblk_t iblock)
+			ext4_fsblk_t req_blocks, ext4_lblk_t iblock,
+			ext4_fsblk_t goal)
 {
 	ar->inode = dest_inode;
 	ar->len = req_blocks;
@@ -508,7 +759,10 @@  ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
 	ar->lright = 0;
 	ar->pright = 0;

-	ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
+	if (goal)
+		ar->goal = goal;
+	else
+		ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
 }

 /**
@@ -705,6 +959,7 @@  out:
  *			original extent tree
  * @tar_end:		the last block number of the allocated blocks
  * @sum_tmp:		the extents count  in the allocated blocks
+ * @goal:		block offset for allocation
  *
  * This function returns the values as below.
  *	0 (improved)
@@ -714,7 +969,7 @@  out:
 static int
 ext4_defrag_comp_ext_count(struct inode *org_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
-			int sum_tmp)
+			int sum_tmp, ext4_fsblk_t goal)
 {
 	struct ext4_extent *ext = NULL;
 	int depth = ext_depth(org_inode);
@@ -738,7 +993,7 @@  ext4_defrag_comp_ext_count(struct inode *org_inode,
 			 * Fail if goal is not set and the fragmentation
 			 * is not improved.
 			 */
-			if (sum_org == sum_tmp) {
+			if (sum_org == sum_tmp && !goal) {
 				/* Not improved */
 				ret = 1;
 			} else if (sum_org < sum_tmp) {
@@ -769,6 +1024,7 @@  ext4_defrag_comp_ext_count(struct inode *org_inode,
  * @req_start:		starting offset to allocate in blocks
  * @req_blocks:		the number of blocks to allocate
  * @iblock:		file related offset
+ * @goal:		block offset for allocation
  *
  * This function returns the value as below:
  *	0 (succeed)
@@ -778,7 +1034,8 @@  ext4_defrag_comp_ext_count(struct inode *org_inode,
 static int
 ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t req_start,
-			ext4_lblk_t req_blocks, ext4_lblk_t iblock)
+			ext4_lblk_t req_blocks, ext4_lblk_t iblock,
+			ext4_fsblk_t goal)
 {
 	handle_t *handle;
 	struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb);
@@ -806,7 +1063,7 @@  ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,

 	/* Fill struct ext4_allocation_request with necessary info */
 	ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
-				dest_path, req_blocks, iblock);
+				dest_path, req_blocks, iblock, goal);

 	handle = ext4_journal_start(tmp_inode, 0);
 	if (IS_ERR(handle)) {
@@ -841,7 +1098,7 @@  ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 	}

 	ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end,
-					sum_tmp);
+					sum_tmp, goal);

 out:
 	if (ret < 0 && ar.len)
@@ -869,11 +1126,13 @@  out2:
  *
  * @org_inode:		original inode
  * @defrag_size:	size of defrag in blocks
+ * @goal:		pointer to block offset for allocation
  *
  * This function returns 0 if succeed, otherwise returns error value.
  */
 static int
-ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size)
+ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size,
+		ext4_fsblk_t *goal)
 {
 	/* ext4 online defrag needs mballoc mount option. */
 	if (!test_opt(org_inode->i_sb, MBALLOC)) {
@@ -962,13 +1221,14 @@  out:
  * @filp:		pointer to file
  * @block_start:	starting offset to defrag in blocks
  * @defrag_size:	size of defrag in blocks
+ * @goal:		block offset for allocation
  *
  * This function returns the number of blocks if succeed, otherwise
  * returns error value.
  */
 int
 ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-		ext4_lblk_t defrag_size)
+		ext4_lblk_t defrag_size, ext4_fsblk_t goal)
 {
 	struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
 	struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL;
@@ -983,7 +1243,7 @@  ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 	int block_len_in_page;

 	/* Check the filesystem environment whether defrag can be done */
-	ret = ext4_defrag_check(org_inode, defrag_size);
+	ret = ext4_defrag_check(org_inode, defrag_size, &goal);
 	if (ret < 0)
 		return ret;

@@ -1093,14 +1353,14 @@  ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 		}

 		/* Found an isolated block */
-		if (seq_extents == 1) {
+		if (seq_extents == 1 && !goal) {
 			seq_start = le32_to_cpu(ext_cur->ee_block);
 			goto CLEANUP;
 		}

 		ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode,
 					org_path, seq_start, seq_blocks,
-					block_start);
+					block_start, goal);

 		if (ret < 0) {
 			break;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c6a194..556ff5e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1142,7 +1142,7 @@  extern void ext4_inode_table_set(struct super_block *sb,
 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
-			ext4_lblk_t defrag_size);
+			ext4_lblk_t defrag_size, ext4_fsblk_t goal);
 extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
 				unsigned long);