From patchwork Sat Sep 27 07:27:16 2008 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Akira Fujita X-Patchwork-Id: 1766 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by ozlabs.org (Postfix) with ESMTP id 4CED4DDD0C for ; Sat, 27 Sep 2008 17:27:23 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752115AbYI0H1U (ORCPT ); Sat, 27 Sep 2008 03:27:20 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752044AbYI0H1U (ORCPT ); Sat, 27 Sep 2008 03:27:20 -0400 Received: from TYO201.gate.nec.co.jp ([202.32.8.193]:42383 "EHLO tyo201.gate.nec.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752037AbYI0H1S (ORCPT ); Sat, 27 Sep 2008 03:27:18 -0400 Received: from mailgate3.nec.co.jp ([10.7.69.161]) by tyo201.gate.nec.co.jp (8.13.8/8.13.4) with ESMTP id m8R7RHYX001786; Sat, 27 Sep 2008 16:27:17 +0900 (JST) Received: (from root@localhost) by mailgate3.nec.co.jp (8.11.7/3.7W-MAILGATE-NEC) id m8R7RHl22384; Sat, 27 Sep 2008 16:27:17 +0900 (JST) Received: from kuichi.jp.nec.com (kuichi.jp.nec.com [10.26.220.17]) by mailsv3.nec.co.jp (8.13.8/8.13.4) with ESMTP id m8R7RGmd014443; Sat, 27 Sep 2008 16:27:16 +0900 (JST) Received: from [10.64.168.93] ([10.64.168.93] [10.64.168.93]) by mail.jp.nec.com with ESMTP; Sat, 27 Sep 2008 16:27:16 +0900 Message-ID: <48DDE054.40708@rs.jp.nec.com> Date: Sat, 27 Sep 2008 16:27:16 +0900 From: Akira Fujita User-Agent: Thunderbird 2.0.0.14 (Windows/20080421) MIME-Version: 1.0 To: linux-ext4@vger.kernel.org CC: linux-fsdevel@vger.kernel.org Subject: [RFC][PATCH 10/12]ext4: Add the EXT4_IOC_MOVE_VICTIM ioctl Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org ext4: online defrag -- Add the EXT4_IOC_MOVE_VICTIM ioctl. From: Akira Fujita The EXT4_IOC_MOVE_VICTIM moves the victim extents into other block group. Therefore the contiguous free space is made in the target block group. This ioctl is used only in the force defrag (-f). Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato --- fs/ext4/balloc.c | 1 + fs/ext4/defrag.c | 262 ++++++++++++++++++++++++++++++++++++++++++------ fs/ext4/ext4.h | 18 +++- fs/ext4/ext4_extents.h | 5 + fs/ext4/extents.c | 54 ++++++++-- fs/ext4/ioctl.c | 3 +- fs/ext4/mballoc.c | 5 + fs/ext4/mballoc.h | 1 + 8 files changed, 307 insertions(+), 42 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 2344a96..969e996 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -2026,6 +2026,7 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, ar.goal = goal; ar.len = *count; ar.logical = iblock; + ar.excepted_group = -1; if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) /* enable in-core preallocation for data block allocation */ diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c index 26fb4a6..a2b17c5 100644 --- a/fs/ext4/defrag.c +++ b/fs/ext4/defrag.c @@ -380,6 +380,80 @@ err: } /** + * ext4_defrag_move_victim - Create free space for defrag + * + * @target_filp: target file + * @ext_info: target extents array to move + * + * This function returns 0 if succeed, otherwise + * returns error value. + */ +static int +ext4_defrag_move_victim(struct file *target_filp, + struct ext4_extents_info *ext_info) +{ + struct inode *org_inode = target_filp->f_dentry->d_inode; + struct super_block *sb = org_inode->i_sb; + struct file victim_file; + struct dentry victim_dent; + struct inode *victim_inode; + struct ext4_extent_data ext; + ext4_fsblk_t goal = ext_info->goal; + ext4_group_t group; + ext4_grpblk_t grp_off; + int ret, i; + + /* Setup dummy extent data */ + ext.len = 0; + + /* Get the inode of the victim file */ + victim_inode = ext4_iget(sb, ext_info->ino); + if (IS_ERR(victim_inode)) + return PTR_ERR(victim_inode); + + /* Setup file for the victim file */ + victim_dent.d_inode = victim_inode; + victim_file.f_dentry = &victim_dent; + victim_file.f_mapping = victim_inode->i_mapping; + + /* Set the goal appropriate offset */ + if (goal == -1) { + ext4_get_group_no_and_offset(victim_inode->i_sb, + ext_info->ext[0].start, &group, &grp_off); + goal = ext4_group_first_block_no(sb, group + 1); + } + + for (i = 0; i < ext_info->entries; i++) { + /* Move original blocks to another block group */ + ret = ext4_defrag(&victim_file, ext_info->ext[i].block, + ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext); + if (ret < 0) { + printk(KERN_ERR "ext4 defrag: " + "Moving victim file failed. ino [%llu]\n", + ext_info->ino); + goto err; + } + + /* Sync journal blocks before reservation */ + ret = ext4_force_commit(sb); + if (ret) { + printk(KERN_ERR "ext4 defrag: " + "ext4_force_commit failed(%d)\n", ret); + goto err; + } + } + + iput(victim_inode); + return 0; +err: + down_write(&EXT4_I(org_inode)->i_data_sem); + ext4_discard_reservation(org_inode); + up_write(&EXT4_I(org_inode)->i_data_sem); + iput(victim_inode); + return ret; +} + +/** * ext4_defrag_fblocks_distribution - Search free blocks distribution * * @org_inode: original inode @@ -538,6 +612,16 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, return -EFAULT; err = ext4_defrag_reserve_fblocks(inode, &ext_info); + } else if (cmd == EXT4_IOC_MOVE_VICTIM) { + struct ext4_extents_info ext_info; + + if (copy_from_user(&ext_info, + (struct ext4_extents_info __user *)arg, + sizeof(ext_info))) + return -EFAULT; + + err = ext4_defrag_move_victim(filp, &ext_info); + } else if (cmd == EXT4_IOC_DEFRAG) { struct ext4_ext_defrag_data defrag; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; @@ -564,7 +648,8 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, } err = ext4_defrag(filp, defrag.start_offset, - defrag.defrag_size, defrag.goal); + defrag.defrag_size, defrag.goal, defrag.flag, + &defrag.ext); } return err; @@ -580,6 +665,7 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, * @start_ext: first new extent to be merged * @new_ext: middle of new extent to be merged * @end_ext: last new extent to be merged + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. */ @@ -587,14 +673,20 @@ static int ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode, struct ext4_extent *o_start, struct ext4_extent *o_end, struct ext4_extent *start_ext, struct ext4_extent *new_ext, - struct ext4_extent *end_ext) + struct ext4_extent *end_ext, int phase) { struct ext4_ext_path *org_path = NULL; ext4_lblk_t eblock = 0; int new_flag = 0; int end_flag = 0; + int defrag_flag; int err; + if (phase == DEFRAG_FORCE_VICTIM) + defrag_flag = 1; + else + defrag_flag = 0; + if (le16_to_cpu(start_ext->ee_len) && le16_to_cpu(new_ext->ee_len) && le16_to_cpu(end_ext->ee_len)) { @@ -671,8 +763,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode, org_path = NULL; goto out; } - err = ext4_ext_insert_extent(handle, org_inode, - org_path, new_ext); + err = ext4_ext_insert_extent_defrag(handle, org_inode, + org_path, new_ext, defrag_flag); if (err) goto out; } @@ -685,8 +777,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode, org_path = NULL; goto out; } - err = ext4_ext_insert_extent(handle, org_inode, - org_path, end_ext); + err = ext4_ext_insert_extent_defrag(handle, org_inode, + org_path, end_ext, defrag_flag); if (err) goto out; } @@ -764,6 +856,7 @@ ext4_defrag_merge_inside_block(struct ext4_extent *o_start, * @new_ext: middle of new extent to be merged * @end_ext: last new extent to be merged * @replaced: the number of blocks which will be replaced with new_ext + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. */ @@ -772,7 +865,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode, struct ext4_ext_path *org_path, struct ext4_extent *o_start, struct ext4_extent *o_end, struct ext4_extent *start_ext, struct ext4_extent *new_ext, - struct ext4_extent *end_ext, ext4_fsblk_t replaced) + struct ext4_extent *end_ext, ext4_fsblk_t replaced, int phase) { struct ext4_extent_header *eh; unsigned need_slots, slots_range; @@ -810,7 +903,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode, ret = ext4_defrag_merge_across_blocks(handle, org_inode, o_start, o_end, start_ext, new_ext, - end_ext); + end_ext, phase); if (ret < 0) return ret; } else { @@ -843,13 +936,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode, * @org_path: path indicates first extent to be defraged * @dext: destination extent * @from: start offset on the target file + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. */ static int ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode, struct ext4_ext_path *org_path, struct ext4_extent *dext, - ext4_lblk_t *from) + ext4_lblk_t *from, int phase) { struct ext4_extent *oext, *o_start = NULL, *o_end = NULL, *prev_ext; struct ext4_extent new_ext, start_ext, end_ext; @@ -950,7 +1044,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode, + le16_to_cpu(oext->ee_len) - 1) { ret = ext4_defrag_merge_extents(handle, org_inode, org_path, o_start, o_end, &start_ext, - &new_ext, &end_ext, replaced); + &new_ext, &end_ext, replaced, phase); if (ret < 0) return ret; @@ -1002,6 +1096,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode, * @from: block offset of org_inode * @dest_off: block offset of dest_inode * @count: block count to be replaced + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. * Replace extents for blocks from "from" to "from + count - 1". @@ -1009,7 +1104,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode, static int ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode, struct inode *dest_inode, ext4_lblk_t from, - ext4_lblk_t dest_off, ext4_lblk_t count) + ext4_lblk_t dest_off, ext4_lblk_t count, int phase) { struct ext4_ext_path *org_path = NULL; struct ext4_ext_path *dest_path = NULL; @@ -1070,7 +1165,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode, /* Loop for the original extent blocks */ err = ext4_defrag_leaf_block(handle, org_inode, - org_path, dext, &from); + org_path, dext, &from, phase); if (err < 0) goto out; @@ -1080,7 +1175,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode, * e.g. ext4_defrag_merge_extents() */ err = ext4_defrag_leaf_block(handle, dest_inode, - dest_path, swap_ext, &dest_off); + dest_path, swap_ext, &dest_off, -1); if (err < 0) goto out; @@ -1176,6 +1271,7 @@ out: * @req_blocks: contiguous blocks count we need * @iblock: target file offset * @goal: goal offset + * @phase: phase of the force defrag mode * */ static void @@ -1184,8 +1280,22 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode, struct ext4_ext_path *org_path, struct ext4_ext_path *dest_path, ext4_fsblk_t req_blocks, ext4_lblk_t iblock, - ext4_fsblk_t goal) + ext4_fsblk_t goal, int phase) { + ext4_group_t org_grp_no; + ext4_grpblk_t org_blk_off; + int org_depth = ext_depth(org_inode); + + if (phase == DEFRAG_FORCE_VICTIM) { + ext4_get_group_no_and_offset(org_inode->i_sb, + ext_pblock(org_path[org_depth].p_ext), + &org_grp_no, &org_blk_off); + ar->excepted_group = org_grp_no; + } else { + /* Allocate contiguous blocks to any block group */ + ar->excepted_group = -1; + } + ar->inode = dest_inode; ar->len = req_blocks; ar->logical = iblock; @@ -1249,6 +1359,56 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode, } /** + * ext4_defrag_check_phase + * - Check condition of the allocated blocks (only force defrag mode) + * + * @ar: allocation request for multiple block allocation + * @dest_grp_no: block group num of the allocated blocks + * @goal_grp_no: block group num of the destination of block allocation + * @alloc_total: sum total of the allocated blocks + * @req_blocks: contiguous blocks count we need + * @phase: phase of the force defrag mode + * + * This function returns 0 if succeed, otherwise returns error value. + */ +static int +ext4_defrag_check_phase(struct ext4_allocation_request *ar, + ext4_group_t dest_grp_no, ext4_group_t goal_grp_no, + ext4_fsblk_t alloc_total, ext4_lblk_t req_blocks, + int phase) +{ + int err = 0; + + switch (phase) { + case DEFRAG_FORCE_TRY: + /* If there is not enough space, return -ENOSPC. */ + if (ar->len != req_blocks) + /* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */ + err = -ENOSPC; + break; + case DEFRAG_FORCE_VICTIM: + /* We can't allocate new blocks in the same block group. */ + if (dest_grp_no == ar->excepted_group) { + printk(KERN_ERR "ext4 defrag: Failed to allocate" + " victim file to other block group\n"); + err = -ENOSPC; + } + break; + case DEFRAG_FORCE_GATHER: + /* Maybe reserved blocks are already used by other process. */ + if (dest_grp_no != goal_grp_no + || alloc_total != req_blocks) { + printk(KERN_ERR "ext4 defrag: Reserved blocks are" + " already used by other process\n"); + err = -EIO; + } + break; + } + + return err; +} + +/** * ext4_defrag_partial - Defrag a file per page * * @tmp_inode: temporary inode @@ -1257,13 +1417,15 @@ ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode, * @dest_blk_offset: block index on temporary file * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. */ static int ext4_defrag_partial(struct inode *tmp_inode, struct file *filp, pgoff_t org_page_offset, ext4_lblk_t dest_blk_offset, - int data_offset_in_page, int block_len_in_page) + int data_offset_in_page, int block_len_in_page, + int phase) { struct inode *org_inode = filp->f_dentry->d_inode; struct address_space *mapping = org_inode->i_mapping; @@ -1346,7 +1508,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp, try_to_release_page(page, 0); ret = ext4_defrag_replace_branches(handle, org_inode, tmp_inode, org_blk_offset, dest_blk_offset, - block_len_in_page); + block_len_in_page, phase); if (ret < 0) goto out; @@ -1397,6 +1559,7 @@ out: * @tar_end: the last block number of the allocated blocks * @sum_tmp: the extents count in the allocated blocks * @goal: block offset for allocation + * @phase: phase of the force defrag mode * * This function returns the values as below. * 0 (improved) @@ -1406,7 +1569,7 @@ out: static int ext4_defrag_comp_ext_count(struct inode *org_inode, struct ext4_ext_path *org_path, ext4_lblk_t tar_end, - int sum_tmp, ext4_fsblk_t goal) + int sum_tmp, ext4_fsblk_t goal, int phase) { struct ext4_extent *ext = NULL; int depth = ext_depth(org_inode); @@ -1433,7 +1596,8 @@ ext4_defrag_comp_ext_count(struct inode *org_inode, if (sum_org == sum_tmp && !goal) { /* Not improved */ ret = 1; - } else if (sum_org < sum_tmp) { + } else if (sum_org < sum_tmp && + phase != DEFRAG_FORCE_VICTIM) { /* Fragment increased */ ret = -ENOSPC; printk(KERN_ERR "ext4 defrag: " @@ -1462,6 +1626,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode, * @req_blocks: the number of blocks to allocate * @iblock: file related offset * @goal: block offset for allocation + * @phase: phase of the force defrag mode * * This function returns the value as below: * 0 (succeed) @@ -1472,7 +1637,7 @@ static int ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, struct ext4_ext_path *org_path, ext4_lblk_t req_start, ext4_lblk_t req_blocks, ext4_lblk_t iblock, - ext4_fsblk_t goal) + ext4_fsblk_t goal, int phase) { handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb); @@ -1484,6 +1649,8 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, ext4_fsblk_t newblock = 0; ext4_lblk_t req_end = req_start + req_blocks - 1; ext4_lblk_t rest_blocks = 0; + ext4_group_t dest_group_no, goal_group_no; + ext4_grpblk_t dest_blk_off, goal_blk_off; int sum_tmp = 0; int metadata = 1; int ret; @@ -1500,7 +1667,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, /* Fill struct ext4_allocation_request with necessary info */ ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path, - dest_path, req_blocks, iblock, goal); + dest_path, req_blocks, iblock, goal, phase); handle = ext4_journal_start(tmp_inode, 0); if (IS_ERR(handle)) { @@ -1508,6 +1675,9 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, goto out2; } + ext4_get_group_no_and_offset(tmp_inode->i_sb, goal, + &goal_group_no, &goal_blk_off); + while (alloc_total != req_blocks) { /* Allocate blocks */ ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode, @@ -1517,9 +1687,21 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, /* Claimed blocks are already reserved */ EXT4_I(ar.inode)->i_delalloc_reserved_flag = 1; + ext4_get_group_no_and_offset(tmp_inode->i_sb, newblock, + &dest_group_no, &dest_blk_off); + alloc_total += ar.len; rest_blocks = req_blocks - alloc_total; + /* the checks that done in force mode */ + if (phase) { + ret = ext4_defrag_check_phase(&ar, dest_group_no, + goal_group_no, alloc_total, + req_blocks, phase); + if (ret < 0) + goto out; + } + newex.ee_block = cpu_to_le32(alloc_total - ar.len); ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(ar.len); @@ -1529,13 +1711,14 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, if (ret < 0) goto out; - ar.goal = newblock + ar.len; + if (!phase) + ar.goal = newblock + ar.len; ar.len = req_blocks - alloc_total; sum_tmp++; } ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end, - sum_tmp, goal); + sum_tmp, goal, phase); out: if (ret < 0 && ar.len) @@ -1562,14 +1745,16 @@ out2: * ext4_defrag_check - Check the environment whether a defrag can be done * * @org_inode: original inode + * @ext: extent to be moved (only defrag force mode) * @defrag_size: size of defrag in blocks * @goal: pointer to block offset for allocation + * @phase: phase of the force defrag mode * * This function returns 0 if succeed, otherwise returns error value. */ static int -ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size, - ext4_fsblk_t *goal) +ext4_defrag_check(struct inode *org_inode, struct ext4_extent_data *ext, + ext4_lblk_t defrag_size, ext4_fsblk_t *goal, int *phase) { /* ext4 online defrag needs mballoc mount option. */ if (!test_opt(org_inode->i_sb, MBALLOC)) { @@ -1578,6 +1763,17 @@ ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size, return -EOPNOTSUPP; } + if (ext->len) { + /* Setup for the force defrag mode */ + if (ext->len < defrag_size) { + printk(KERN_ERR "ext4 defrag: " + "Invalid length of extent\n"); + return -EINVAL; + } + *phase = DEFRAG_FORCE_GATHER; + *goal = ext->start; + } + return 0; } @@ -1659,13 +1855,16 @@ out: * @block_start: starting offset to defrag in blocks * @defrag_size: size of defrag in blocks * @goal: block offset for allocation + * @phase: phase of the force defrag mode + * @ext: extent to be moved (only defrag force mode) * * This function returns the number of blocks if succeed, otherwise * returns error value. */ int ext4_defrag(struct file *filp, ext4_lblk_t block_start, - ext4_lblk_t defrag_size, ext4_fsblk_t goal) + ext4_lblk_t defrag_size, ext4_fsblk_t goal, int phase, + struct ext4_extent_data *ext) { struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL; struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL; @@ -1680,7 +1879,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start, int block_len_in_page; /* Check the filesystem environment whether defrag can be done */ - ret = ext4_defrag_check(org_inode, defrag_size, &goal); + ret = ext4_defrag_check(org_inode, ext, defrag_size, &goal, &phase); if (ret < 0) return ret; @@ -1797,11 +1996,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start, ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode, org_path, seq_start, seq_blocks, - block_start, goal); + block_start, goal, phase); if (ret < 0) { break; - } else if (ret == 1) { + } else if (ret == 1 && (!goal || (goal && !phase))) { ret = 0; seq_start = le32_to_cpu(ext_cur->ee_block); goto CLEANUP; @@ -1846,7 +2045,8 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start, org_page_offset, dest_block_offset, data_offset_in_page, - block_len_in_page); + block_len_in_page, + phase); if (ret < 0) goto out; @@ -1905,6 +2105,10 @@ out: kfree(holecheck_path); } + if (phase == DEFRAG_FORCE_GATHER) + /* Release reserved block in force mode */ + ext4_discard_reservation(org_inode); + up_write(&EXT4_I(org_inode)->i_data_sem); mutex_unlock(&org_inode->i_mutex); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index eef7885..4e54eb4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -97,6 +97,11 @@ struct ext4_allocation_request { unsigned long len; /* flags. see above EXT4_MB_HINT_* */ unsigned long flags; + /* + * for ext4 online defrag: + * the block group which is excepted from allocation target + */ + long long excepted_group; }; /* @@ -308,6 +313,7 @@ struct ext4_new_group_data { #define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 18, struct ext4_extents_info) #define EXT4_IOC_FIEMAP_INO _IOW('f', 19, struct fiemap_ino) #define EXT4_IOC_RESERVE_BLOCK _IOW('f', 20, struct ext4_extents_info) +#define EXT4_IOC_MOVE_VICTIM _IOW('f', 21, struct ext4_extents_info) /* * ioctl commands in 32 bit emulation @@ -330,8 +336,15 @@ struct ext4_new_group_data { * * DEFRAG_MAX_ENT: the maximum number of extents for exchanging between * kernel-space and user-space per an ioctl + * DEFRAG_FORCE_TRY: check whether we have free space fragmentation or not + * DEFRAG_FORCE_VICTIM: move victim extents to make sufficient space + * DEFRAG_FORCE_GATHER: move the target file into the free space made in the + * DEFRAG_FORCE_VICTIM phase */ #define DEFRAG_MAX_ENT 32 +#define DEFRAG_FORCE_TRY 1 +#define DEFRAG_FORCE_VICTIM 2 +#define DEFRAG_FORCE_GATHER 3 struct ext4_extent_data { ext4_lblk_t block; /* start logical block number */ @@ -343,6 +356,8 @@ struct ext4_ext_defrag_data { ext4_lblk_t start_offset; /* start offset to defrag in blocks */ ext4_lblk_t defrag_size; /* size of defrag in blocks */ ext4_fsblk_t goal; /* block offset for allocation */ + int flag; /* free space mode flag */ + struct ext4_extent_data ext; }; struct ext4_group_data_info { @@ -1193,7 +1208,8 @@ extern void ext4_inode_table_set(struct super_block *sb, extern int ext4_ext_journal_restart(handle_t *handle, int needed); /* defrag.c */ extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start, - ext4_lblk_t defrag_size, ext4_fsblk_t goal); + ext4_lblk_t defrag_size, ext4_fsblk_t goal, + int flag, struct ext4_extent_data *ext); extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6407222..fbe34b4 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -251,5 +251,10 @@ extern void ext4_ext_drop_refs(struct ext4_ext_path *path); extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block); +extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int defrag); +extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); + #endif /* _EXT4_EXTENTS */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7fcf72d..32c1aa9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -186,11 +186,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex, int *err) + struct ext4_extent *ex, int *err, + ext4_fsblk_t defrag_goal) { ext4_fsblk_t goal, newblock; - goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); + if (defrag_goal) + goal = defrag_goal; + else + goal = ext4_ext_find_goal(inode, path, + le32_to_cpu(ex->ee_block)); + newblock = ext4_new_meta_block(handle, inode, goal, err); return newblock; } @@ -675,7 +681,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, */ static int ext4_ext_split(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *newext, int at) + struct ext4_extent *newext, int at, + ext4_fsblk_t defrag_goal) { struct buffer_head *bh = NULL; int depth = ext_depth(inode); @@ -726,7 +733,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) { newblock = ext4_ext_new_meta_block(handle, inode, path, - newext, &err); + newext, &err, defrag_goal); if (newblock == 0) goto cleanup; ablocks[a] = newblock; @@ -913,7 +920,8 @@ cleanup: */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *newext) + struct ext4_extent *newext, + ext4_fsblk_t defrag_goal) { struct ext4_ext_path *curp = path; struct ext4_extent_header *neh; @@ -922,7 +930,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, ext4_fsblk_t newblock; int err = 0; - newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); + newblock = ext4_ext_new_meta_block(handle, inode, path, + newext, &err, defrag_goal); if (newblock == 0) return err; @@ -998,7 +1007,8 @@ out: */ static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *newext) + struct ext4_extent *newext, + ext4_fsblk_t defrag_goal) { struct ext4_ext_path *curp; int depth, i, err = 0; @@ -1018,7 +1028,8 @@ repeat: if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that * entry: create all needed subtree and add new leaf */ - err = ext4_ext_split(handle, inode, path, newext, i); + err = ext4_ext_split(handle, inode, path, newext, i, + defrag_goal); if (err) goto out; @@ -1031,7 +1042,8 @@ repeat: err = PTR_ERR(path); } else { /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, path, newext); + err = ext4_ext_grow_indepth(handle, inode, path, + newext, defrag_goal); if (err) goto out; @@ -1211,7 +1223,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, * allocated block. Thus, index entries have to be consistent * with leaves. */ -static ext4_lblk_t +ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path) { int depth; @@ -1477,6 +1489,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *newext) { + return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0); +} + +/* + * ext4_ext_insert_extent_defrag: + * The difference from ext4_ext_insert_extent is to use the first block + * in newext as the goal of the new index block. + */ +int +ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int defrag) +{ struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ @@ -1484,6 +1509,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, int depth, len, err; ext4_lblk_t next; unsigned uninitialized = 0; + ext4_fsblk_t defrag_goal; BUG_ON(ext4_ext_get_actual_len(newext) == 0); depth = ext_depth(inode); @@ -1544,11 +1570,16 @@ repeat: le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); } + if (defrag) + defrag_goal = ext_pblock(newext); + else + defrag_goal = 0; /* * There is no free space in the found leaf. * We're gonna add a new leaf in the tree. */ - err = ext4_ext_create_new_leaf(handle, inode, path, newext); + err = ext4_ext_create_new_leaf(handle, inode, path, + newext, defrag_goal); if (err) goto cleanup; depth = ext_depth(inode); @@ -2848,6 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ar.goal = ext4_ext_find_goal(inode, path, iblock); ar.logical = iblock; ar.len = allocated; + ar.excepted_group = -1; if (S_ISREG(inode->i_mode)) ar.flags = EXT4_MB_HINT_DATA; else diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9c992d8..a596785 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -261,7 +261,8 @@ setversion_out: case EXT4_IOC_GROUP_INFO: case EXT4_IOC_FREE_BLOCKS_INFO: case EXT4_IOC_FIEMAP_INO: - case EXT4_IOC_RESERVE_BLOCK: { + case EXT4_IOC_RESERVE_BLOCK: + case EXT4_IOC_MOVE_VICTIM: { return ext4_defrag_ioctl(inode, filp, cmd, arg); } case EXT4_IOC_GROUP_ADD: { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 01a7daa..78f76da 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1775,6 +1775,10 @@ repeat: if (group == EXT4_SB(sb)->s_groups_count) group = 0; + if (ac->ac_excepted_group != -1 && + group == ac->ac_excepted_group) + continue; + /* quick check to skip empty groups */ grp = ext4_get_group_info(ac->ac_sb, group); if (grp->bb_free == 0) @@ -4160,6 +4164,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ac->ac_bitmap_page = NULL; ac->ac_buddy_page = NULL; ac->ac_lg = NULL; + ac->ac_excepted_group = ar->excepted_group; /* we have to define context: we'll we work with a file or * locality group. this is a policy, actually */ diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906..6b46c86 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -211,6 +211,7 @@ struct ext4_allocation_context { struct page *ac_buddy_page; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; + long long ac_excepted_group; }; #define AC_STATUS_CONTINUE 1