Patchwork [RFC,2/12] ext4: allocate new contiguous blocks with mballoc

login
register
mail settings
Submitter Akira Fujita
Date Sept. 27, 2008, 7:26 a.m.
Message ID <48DDE02F.2060500@rs.jp.nec.com>
Download mbox | patch
Permalink /patch/1758/
State Changes Requested
Headers show

Comments

Akira Fujita - Sept. 27, 2008, 7:26 a.m.
ext4: online defrag -- Allocate new contiguous blocks with mballoc.

From: Akira Fujita <a-fujita@rs.jp.nec.com>

Search contiguous free blocks with multi-block allocation
and allocate them for the temporary inode.

Signed-off-by: Akira Fujita <a-fujita@rs.jp.nec.com>
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
---
 fs/ext4/defrag.c       |  279 +++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/ext4.h         |    3 +
 fs/ext4/ext4_extents.h |    3 +
 fs/ext4/extents.c      |    4 +-
 4 files changed, 285 insertions(+), 4 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 1750714..2fc8d9f 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -34,7 +34,60 @@  static int
 ext4_defrag_next_extent(struct inode *inode, struct ext4_ext_path *path,
 			struct ext4_extent **extent)
 {
-	return 0;
+	int ppos, leaf_ppos = path->p_depth;
+
+	ppos = leaf_ppos;
+	if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+		/* leaf block */
+		*extent = ++path[ppos].p_ext;
+		return 0;
+	}
+
+	while (--ppos >= 0) {
+		if (EXT_LAST_INDEX(path[ppos].p_hdr) >
+		    path[ppos].p_idx) {
+			int cur_ppos = ppos;
+
+			/* index block */
+			path[ppos].p_idx++;
+			path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+			if (path[ppos+1].p_bh)
+				brelse(path[ppos+1].p_bh);
+			path[ppos+1].p_bh =
+				sb_bread(inode->i_sb, path[ppos].p_block);
+			if (!path[ppos+1].p_bh)
+				goto err;
+			path[ppos+1].p_hdr =
+				ext_block_hdr(path[ppos+1].p_bh);
+
+			/* Halfway index block */
+			while (++cur_ppos < leaf_ppos) {
+				path[cur_ppos].p_idx =
+					EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
+				path[cur_ppos].p_block =
+					idx_pblock(path[cur_ppos].p_idx);
+				if (path[cur_ppos+1].p_bh)
+					brelse(path[cur_ppos+1].p_bh);
+				path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
+					path[cur_ppos].p_block);
+				if (!path[cur_ppos+1].p_bh)
+					goto err;
+				path[cur_ppos+1].p_hdr =
+					ext_block_hdr(path[cur_ppos+1].p_bh);
+			}
+
+			/* leaf block */
+			path[leaf_ppos].p_ext = *extent =
+				EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+			return 0;
+		}
+	}
+	/* We found the last extent */
+	return 1;
+err:
+	if (path)
+		ext4_ext_drop_refs(path);
+	return -EIO;
 }

 int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
@@ -71,6 +124,84 @@  int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 }

 /**
+ * ext4_defrag_fill_ar - Prepare to multiple block allocate for tmp inode
+ *
+ * @org_inode:		original inode
+ * @dest_inode:		temporary inode
+ * @ar:			allocation request for multiple block allocation
+ * @org_path:		indicating the original inode's extent
+ * @dest_path:		indicating the temporary inode's extent
+ * @req_blocks:		contiguous blocks count we need
+ * @iblock:		target file offset
+ *
+ */
+static void
+ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
+			struct ext4_allocation_request *ar,
+			struct ext4_ext_path *org_path,
+			struct ext4_ext_path *dest_path,
+			ext4_fsblk_t req_blocks, ext4_lblk_t iblock)
+{
+	ar->inode = dest_inode;
+	ar->len = req_blocks;
+	ar->logical = iblock;
+	ar->flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED
+		| EXT4_MB_HINT_NOPREALLOC;
+	ar->lleft = 0;
+	ar->pleft = 0;
+	ar->lright = 0;
+	ar->pright = 0;
+
+	ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
+}
+
+/**
+ * ext4_defrag_alloc_blocks - Allocate contiguous blocks to temporary inode
+ *
+ * @handle:		journal handle
+ * @org_inode:		original inode
+ * @dest_inode:		temporary inode for multiple block allocation
+ * @ar:			allocation request for multiple block allocation
+ * @dest_path:		indicating the temporary inode's extent
+ * @newblock:		start offset of contiguous blocks
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
+		struct inode *dest_inode, struct ext4_allocation_request *ar,
+		struct ext4_ext_path *dest_path, ext4_fsblk_t *newblock)
+{
+	struct super_block *sb = org_inode->i_sb;
+	struct buffer_head *bh = NULL;
+	int err, i, credits = 0;
+
+	credits = ext4_ext_calc_credits_for_single_extent(dest_inode,
+							  ar->len, dest_path);
+	err = ext4_ext_journal_restart(handle, credits);
+	if (err)
+		return err;
+
+	*newblock = ext4_mb_new_blocks(handle, ar, &err);
+	if (err)
+		return err;
+
+	/*
+	 * Dirty buffer_head causes the overwriting
+	 * if ext4_mb_new_blocks() allocates the block
+	 * which used to be the metadata block.
+	 * We should call unmap_underlying_metadata()
+	 * to clear the dirty flag.
+	 */
+	for (i = 0; i < ar->len; i++) {
+		bh = sb_find_get_block(sb, *newblock + i);
+		unmap_underlying_metadata(sb->s_bdev, *newblock + i);
+	}
+
+	return err;
+}
+
+/**
  * ext4_defrag_partial - Defrag a file per page
  *
  * @tmp_inode:			temporary inode
@@ -91,6 +222,69 @@  ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
 }

 /**
+ * ext4_defrag_comp_ext_count- Check whether fragments are improved or not
+ *
+ * @org_inode:		original inode
+ * @path:		the structure holding some info about
+ *			original extent tree
+ * @tar_end:		the last block number of the allocated blocks
+ * @sum_tmp:		the extents count  in the allocated blocks
+ *
+ * This function returns the values as below.
+ *	0 (improved)
+ *	1 (not improved)
+ *	negative value (error case)
+ */
+static int
+ext4_defrag_comp_ext_count(struct inode *org_inode,
+			struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
+			int sum_tmp)
+{
+	struct ext4_extent *ext = NULL;
+	int depth = ext_depth(org_inode);
+	int last_extent = 0;
+	int sum_org = 0;
+	int ret = 0;
+
+	ext = org_path[depth].p_ext;
+
+	/*
+	 * Compare the number of the newly allocated extents to
+	 * that of existing one.
+	 */
+	while (1) {
+		if (!last_extent)
+			++sum_org;
+		if (tar_end <= (le32_to_cpu(ext->ee_block) +
+			       le16_to_cpu(ext->ee_len) - 1) ||
+			       last_extent) {
+			/*
+			 * Fail if goal is not set and the fragmentation
+			 * is not improved.
+			 */
+			if (sum_org == sum_tmp) {
+				/* Not improved */
+				ret = 1;
+			} else if (sum_org < sum_tmp) {
+				/* Fragment increased */
+				ret = -ENOSPC;
+				printk(KERN_ERR "ext4 defrag: "
+					"Insufficient free blocks\n");
+			}
+			break;
+		}
+		last_extent =
+			ext4_defrag_next_extent(org_inode, org_path, &ext);
+		if (last_extent < 0) {
+			ret = last_extent;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
  * ext4_defrag_new_extent_tree - Get contiguous blocks and build an extent tree
  *
  * @org_inode:		original inode
@@ -110,7 +304,88 @@  ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
 			struct ext4_ext_path *org_path, ext4_lblk_t req_start,
 			ext4_lblk_t req_blocks, ext4_lblk_t iblock)
 {
-	return 0;
+	handle_t *handle;
+	struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb);
+	struct ext4_extent_header *eh = NULL;
+	struct ext4_allocation_request ar;
+	struct ext4_ext_path *dest_path = NULL;
+	struct ext4_extent newex;
+	ext4_fsblk_t alloc_total = 0;
+	ext4_fsblk_t newblock = 0;
+	ext4_lblk_t req_end = req_start + req_blocks - 1;
+	ext4_lblk_t rest_blocks = 0;
+	int sum_tmp = 0;
+	int metadata = 1;
+	int ret;
+
+	eh = ext_inode_hdr(tmp_inode);
+	eh->eh_depth = 0;
+
+	dest_path = ext4_ext_find_extent(tmp_inode, iblock, NULL);
+	if (IS_ERR(dest_path)) {
+		ret = PTR_ERR(dest_path);
+		dest_path = NULL;
+		goto out2;
+	}
+
+	/* Fill struct ext4_allocation_request with necessary info */
+	ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
+				dest_path, req_blocks, iblock);
+
+	handle = ext4_journal_start(tmp_inode, 0);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out2;
+	}
+
+	while (alloc_total != req_blocks) {
+		/* Allocate blocks */
+		ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
+						&ar, dest_path, &newblock);
+		if (ret < 0)
+			goto out;
+		/* Claimed blocks are already reserved */
+		EXT4_I(ar.inode)->i_delalloc_reserved_flag = 1;
+
+		alloc_total += ar.len;
+		rest_blocks = req_blocks - alloc_total;
+
+		newex.ee_block = cpu_to_le32(alloc_total - ar.len);
+		ext4_ext_store_pblock(&newex, newblock);
+		newex.ee_len = cpu_to_le16(ar.len);
+
+		ret = ext4_ext_insert_extent(handle, tmp_inode,
+						dest_path, &newex);
+		if (ret < 0)
+			goto out;
+
+		ar.goal = newblock + ar.len;
+		ar.len = req_blocks - alloc_total;
+		sum_tmp++;
+	}
+
+	ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end,
+					sum_tmp);
+
+out:
+	if (ret < 0 && ar.len)
+		ext4_free_blocks(handle, tmp_inode, newblock, ar.len, metadata);
+	/*
+	 * Update dirty-blocks counter if we cannot allocate the all of
+	 * requested blocks.
+	 */
+	if (rest_blocks)
+		percpu_counter_sub(&sbi->s_dirtyblocks_counter, rest_blocks);
+
+	ext4_journal_stop(handle);
+
+out2:
+	if (dest_path) {
+		ext4_ext_drop_refs(dest_path);
+		kfree(dest_path);
+	}
+
+	return ret;
 }

 /**
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 64c61ae..3c6a194 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -326,6 +326,7 @@  struct ext4_ext_defrag_data {
 	ext4_fsblk_t goal;		/* block offset for allocation */
 };

+#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */

 /*
  *  Mount options
@@ -1137,6 +1138,8 @@  extern void ext4_inode_bitmap_set(struct super_block *sb,
 				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_table_set(struct super_block *sb,
 				 struct ext4_group_desc *bg, ext4_fsblk_t blk);
+/* extents.c */
+extern int ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
 extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
 			ext4_lblk_t defrag_size);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 0325158..6407222 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -248,5 +248,8 @@  extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
+extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
+					struct ext4_ext_path *path,
+					ext4_lblk_t block);
 #endif /* _EXT4_EXTENTS */

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d05dfb7..6e2ccb8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -93,7 +93,7 @@  static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }

-static int ext4_ext_journal_restart(handle_t *handle, int needed)
+int ext4_ext_journal_restart(handle_t *handle, int needed)
 {
 	int err;

@@ -142,7 +142,7 @@  static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
 	return err;
 }

-static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
+ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 			      struct ext4_ext_path *path,
 			      ext4_lblk_t block)
 {