From patchwork Sun Oct 6 20:13:24 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Namjae Jeon X-Patchwork-Id: 280909 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 0E2F42C00B7 for ; Mon, 7 Oct 2013 07:15:09 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754532Ab3JFUNg (ORCPT ); Sun, 6 Oct 2013 16:13:36 -0400 Received: from mail-pa0-f48.google.com ([209.85.220.48]:42846 "EHLO mail-pa0-f48.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754414Ab3JFUNd (ORCPT ); Sun, 6 Oct 2013 16:13:33 -0400 Received: by mail-pa0-f48.google.com with SMTP id bj1so6297408pad.7 for ; Sun, 06 Oct 2013 13:13:32 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id; bh=KxLjUWGzQdb0J75Exlu1H8drkkFh+cfYxDIcWcdcHqc=; b=l1mX0tFDTQmZ3MsoP6b2vZkTdJKz86WIMbQUaCRIAxOHs5j8YhYCLwTREOBokhCekB UOHQxn2adyK7524aBwT7VjnbtEdivl3SnSMKRaHIRSbsSXf6Yz658vD84yDOlIdWABZF bcZVUxgrEoqOY0Obu9pTzZ9vdPndyLBOfjv4u0iBDRRagWpE0GiP28zDC3Bzwv2oj6vn 2vQ5n7jTRCzVNXo7Vda7dR1+6Vr1V2E1BFnSCLhgky2oxlfoLaUitU+i+fUtnr1CaYht f3yNXkIdZVQ9FuY3+G8HLBwaL+Seic6RVga8nH5lTSqWxT4IdYCOo4mKP+WB2ycu2fer owlw== X-Received: by 10.68.202.38 with SMTP id kf6mr26945598pbc.43.1381090412864; Sun, 06 Oct 2013 13:13:32 -0700 (PDT) Received: from linkinjeon-Aspire-One-522.kornet ([61.84.85.169]) by mx.google.com with ESMTPSA id py4sm28590089pbb.33.1969.12.31.16.00.00 (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Sun, 06 Oct 2013 13:13:32 -0700 (PDT) From: Namjae Jeon To: viro@zeniv.linux.org.uk, mtk.manpages@gmail.com, tytso@mit.edu, adilger.kernel@dilger.ca, bpm@sgi.com, elder@kernel.org, hch@infradead.org, david@fromorbit.com Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-ext4@vger.kernel.org, xfs@oss.sgi.com, a.sangwan@samsung.com, Namjae Jeon , Namjae Jeon Subject: [PATCH RESEND 3/7] ext4: add support FALLOC_FL_COLLAPSE_RANGE for fallocate Date: Mon, 7 Oct 2013 05:13:24 +0900 Message-Id: <1381090404-2795-1-git-send-email-linkinjeon@gmail.com> X-Mailer: git-send-email 1.7.9.5 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Namjae Jeon Add support FALLOC_FL_COLLAPSE_RANGE for fallocate. Signed-off-by: Namjae Jeon Signed-off-by: Ashish Sangwan --- fs/ext4/ext4.h | 3 + fs/ext4/extents.c | 286 ++++++++++++++++++++++++++++++++++++++++++- fs/ext4/move_extent.c | 2 +- include/trace/events/ext4.h | 25 ++++ 4 files changed, 314 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f3c43fc..a13e0f4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2737,6 +2737,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); extern int ext4_ext_precache(struct inode *inode); +extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); /* move_extent.c */ extern void ext4_double_down_write_data_sem(struct inode *first, @@ -2748,6 +2749,8 @@ void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2); extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); +extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, + struct ext4_extent **extent); /* page-io.c */ extern int __init ext4_init_pageio(void); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54d52af..baf1faf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4580,12 +4580,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) unsigned int credits, blkbits = inode->i_blkbits; /* Return error if mode is not supported */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE)) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) return ext4_punch_hole(inode, offset, len); + if (mode & FALLOC_FL_COLLAPSE_RANGE) + return ext4_collapse_range(inode, offset, len); + ret = ext4_convert_inline_data(inode); if (ret) return ret; @@ -4884,3 +4888,283 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ext4_es_lru_add(inode); return error; } + +/* + * ext4_access_path: + * Function to access the path buffer for marking it dirty. + * It also checks if there are sufficient credits left in the journal handle + * to update path. + */ +static int +ext4_access_path(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + int credits, err; + + /* + * Check if need to extend journal credits + * 3 for leaf, sb, and inode plus 2 (bmap and group + * descriptor) for each block group; assume two block + * groups + */ + if (handle->h_buffer_credits < 7) { + credits = ext4_writepage_trans_blocks(inode); + err = ext4_ext_truncate_extend_restart(handle, inode, credits); + /* EAGAIN is success */ + if (err && err != -EAGAIN) + return err; + } + + err = ext4_ext_get_access(handle, inode, path); + return err; +} + +/* + * ext4_ext_shift_path_extents: + * Shift the extents of a path structure lying between path[depth].p_ext + * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift + * from starting block for each extent. + */ +static int +ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, + struct inode *inode, handle_t *handle, + ext4_lblk_t *start) +{ + int depth, err = 0; + struct ext4_extent *ex_start, *ex_last; + bool update = 0; + depth = path->p_depth; + + while (depth >= 0) { + if (depth == path->p_depth) { + ex_start = path[depth].p_ext; + if (!ex_start) + return -EIO; + + ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + if (!ex_last) + return -EIO; + + err = ext4_access_path(handle, inode, path + depth); + if (err) + goto out; + + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) + update = 1; + + *start = ex_last->ee_block + + ext4_ext_get_actual_len(ex_last); + + while (ex_start <= ex_last) { + ex_start->ee_block -= shift; + ext4_ext_try_to_merge(handle, inode, + path, ex_start); + ex_start++; + } + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto out; + + if (--depth < 0 || !update) + break; + } + + /* Update index too */ + err = ext4_access_path(handle, inode, path + depth); + if (err) + goto out; + + path[depth].p_idx->ei_block -= shift; + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto out; + + /* we are done if current index is not a starting index */ + if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) + break; + + depth--; + } + +out: + return err; +} + +/* + * ext4_ext_shift_extents: + * All the extents which lies in the range from start to the last allocated + * block for the file are shifted downwards by shift blocks. + * On success, 0 is returned, error otherwise. + */ +static int +ext4_ext_shift_extents(struct inode *inode, handle_t *handle, + ext4_lblk_t start, ext4_lblk_t shift) +{ + struct ext4_ext_path *path; + int ret = 0, depth; + struct ext4_extent *extent; + ext4_lblk_t stop_block, current_block; + ext4_lblk_t ex_start, ex_end; + + /* Let path point to the last extent */ + path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + + depth = path->p_depth; + extent = path[depth].p_ext; + if (!extent) { + ext4_ext_drop_refs(path); + kfree(path); + return ret; + } + + stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); + ext4_ext_drop_refs(path); + kfree(path); + + /* Nothing to shift, if hole is at the end of file */ + if (start >= stop_block) + return ret; + + /* + * Don't start shifting extents until we make sure the hole is big + * enough to accomodate the shift. + */ + path = ext4_ext_find_extent(inode, start - 1, NULL, 0); + depth = path->p_depth; + extent = path[depth].p_ext; + ex_start = extent->ee_block; + ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); + ext4_ext_drop_refs(path); + kfree(path); + + if ((ex_start > start - 1 && shift > ex_start) || + (ex_end > start - shift)) + return -EIO; + + /* Its safe to start updating extents */ + while (start < stop_block) { + path = ext4_ext_find_extent(inode, start, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + depth = path->p_depth; + extent = path[depth].p_ext; + current_block = extent->ee_block; + if (start > current_block) { + /* Hole, move to the next extent */ + ret = mext_next_extent(inode, path, &extent); + if (ret != 0) { + ext4_ext_drop_refs(path); + kfree(path); + if (ret == 1) + ret = 0; + break; + } + } + ret = ext4_ext_shift_path_extents(path, shift, inode, + handle, &start); + ext4_ext_drop_refs(path); + kfree(path); + if (ret) + break; + } + + return ret; +} + +/* + * ext4_collapse_range: + * This implements the fallocate's collapse range functionality for ext4 + * Returns: 0 and non-zero on error. + */ +int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +{ + struct super_block *sb = inode->i_sb; + ext4_lblk_t punch_start, punch_stop; + handle_t *handle; + unsigned int credits; + unsigned int rounding; + loff_t ioffset, new_size; + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if (EXT4_SB(sb)->s_cluster_ratio > 1) + return -EOPNOTSUPP; + + /* Currently just for extent based files */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + return -EOPNOTSUPP; + + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + + trace_ext4_collapse_range(inode, offset, len); + + punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); + punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); + + rounding = max_t(uint, 1 << EXT4_BLOCK_SIZE_BITS(sb), PAGE_CACHE_SIZE); + ioffset = offset & ~(rounding - 1); + + /* Write out all dirty pages */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, -1); + if (ret) + return ret; + + /* Take mutex lock */ + mutex_lock(&inode->i_mutex); + + /* Wait for existing dio to complete */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + truncate_pagecache_range(inode, ioffset, -1); + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_dio; + } + + down_write(&EXT4_I(inode)->i_data_sem); + + ext4_discard_preallocations(inode); + + ret = ext4_es_remove_extent(inode, punch_start, + EXT_MAX_BLOCKS - punch_start - 1); + if (ret) + goto journal_stop; + + ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); + if (ret) + goto journal_stop; + + ret = ext4_ext_shift_extents(inode, handle, punch_stop, + punch_stop - punch_start); + if (ret) + goto journal_stop; + + if ((offset + len) > i_size_read(inode)) + new_size = offset; + else + new_size = i_size_read(inode) - len; + + truncate_setsize(inode, new_size); + EXT4_I(inode)->i_disksize = new_size; + + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + +journal_stop: + ext4_journal_stop(handle); + up_write(&EXT4_I(inode)->i_data_sem); + +out_dio: + ext4_inode_resume_unlocked_dio(inode); + mutex_unlock(&inode->i_mutex); + return ret; +} diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7fa4d85..4cbebb0 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) * ext4_ext_path structure refers to the last extent, or a negative error * value on failure. */ -static int +int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent **extent) { diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 197d312..90e2f71 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2410,6 +2410,31 @@ TRACE_EVENT(ext4_es_shrink_exit, __entry->shrunk_nr, __entry->cache_cnt) ); +TRACE_EVENT(ext4_collapse_range, + TP_PROTO(struct inode *inode, loff_t offset, loff_t len), + + TP_ARGS(inode, offset, len), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, offset) + __field(loff_t, len) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->offset = offset; + __entry->len = len; + ), + + TP_printk("dev %d,%d ino %lu offset %lld len %lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino, + __entry->offset, __entry->len) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */