Patchwork [RFC,3/3] ext4: cache all of an extent tree's leaf block upon reading

login
register
mail settings
Submitter Theodore Ts'o
Date July 11, 2013, 4:39 a.m.
Message ID <1373517542-21234-3-git-send-email-tytso@mit.edu>
Download mbox | patch
Permalink /patch/258300/
State Superseded
Headers show

Comments

Theodore Ts'o - July 11, 2013, 4:39 a.m.
When we read in an extent tree leaf block from disk, arrange to have
all of its entries cached.  In nearly all cases the in-memory
representation will be more compact than the on-disk representation in
the buffer cache, and it allows us to get the information without
having to traverse the extent tree for successive extents.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/ext4.h              | 14 +++++++-
 fs/ext4/extents.c           | 84 +++++++++++++++++++++++++++++++--------------
 fs/ext4/extents_status.c    | 39 ++++++++++++++++++++-
 fs/ext4/extents_status.h    |  3 ++
 fs/ext4/migrate.c           |  2 +-
 fs/ext4/move_extent.c       |  2 +-
 include/trace/events/ext4.h | 14 +++++++-
 7 files changed, 127 insertions(+), 31 deletions(-)
Zheng Liu - July 11, 2013, 12:52 p.m.
On Thu, Jul 11, 2013 at 12:39:02AM -0400, Theodore Ts'o wrote:
> When we read in an extent tree leaf block from disk, arrange to have
> all of its entries cached.  In nearly all cases the in-memory
> representation will be more compact than the on-disk representation in
> the buffer cache, and it allows us to get the information without
> having to traverse the extent tree for successive extents.
> 
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> Cc: Zheng Liu <wenqing.lz@taobao.com>

One minor nit below.  Otherwise the patch looks good to me.
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>

> ---
>  fs/ext4/ext4.h              | 14 +++++++-
>  fs/ext4/extents.c           | 84 +++++++++++++++++++++++++++++++--------------
>  fs/ext4/extents_status.c    | 39 ++++++++++++++++++++-
>  fs/ext4/extents_status.h    |  3 ++
>  fs/ext4/migrate.c           |  2 +-
>  fs/ext4/move_extent.c       |  2 +-
>  include/trace/events/ext4.h | 14 +++++++-
>  7 files changed, 127 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6ed348d..a8497b0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -561,6 +561,17 @@ enum {
>  #define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200
>  
>  /*
> + * The bit position of this flag must not overlap with any of the
> + * EXT4_GET_BLOCKS_*.  It is used by ext4_ext_find_extent(),
> + * read_extent_tree_block(), ext4_split_extent_at(),
> + * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf() to
> + * indicate that the we shouldn't be caching the extents when reading
> + * from the extent tree while a truncate or punch hole operation
> + * is in progress.
> + */
> +#define EXT4_EX_NOCACHE				0x0400
> +
> +/*
>   * Flags used by ext4_free_blocks
>   */
>  #define EXT4_FREE_BLOCKS_METADATA	0x0001
> @@ -2683,7 +2694,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
>  				  struct ext4_ext_path *,
>  				  struct ext4_extent *, int);
>  extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
> -						  struct ext4_ext_path *);
> +						  struct ext4_ext_path *,
> +						  int flags);
>  extern void ext4_ext_drop_refs(struct ext4_ext_path *);
>  extern int ext4_ext_check_inode(struct inode *inode);
>  extern int ext4_find_delalloc_range(struct inode *inode,
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index e174814..506c3fe 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -466,7 +466,8 @@ int ext4_ext_check_inode(struct inode *inode)
>  
>  static struct buffer_head *
>  __read_extent_tree_block(const char *function, unsigned int line,
> -			 struct inode *inode, ext4_fsblk_t pblk, int depth)
> +			 struct inode *inode, ext4_fsblk_t pblk, int depth,
> +			 int flags)
>  {
>  	struct buffer_head		*bh;
>  	int				err;
> @@ -490,6 +491,31 @@ __read_extent_tree_block(const char *function, unsigned int line,
>  	if (err)
>  		goto errout;
>  	set_buffer_verified(bh);
> +	/*
> +	 * If this is a leaf block, cache all of its entries
> +	 */
> +	if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
> +		struct ext4_extent_header *eh = ext_block_hdr(bh);
> +		struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
> +		int i;
> +
> +		for (i = eh->eh_entries; i > 0; i--, ex++) {

Here I get a warning message when I use the following command to build
the ext4 kernel module.

  $ make M=fs/ext4 C=2 CF=-D__CHECK_ENDIAN__
  CHECK   fs/ext4/extents.c
fs/ext4/extents.c:502:24: warning: incorrect type in assignment (different base types)
fs/ext4/extents.c:502:24:    expected int [signed] i
fs/ext4/extents.c:502:24:    got restricted __le16 [usertype] eh_entries

                                                - Zheng

> +			unsigned int status = EXTENT_STATUS_WRITTEN;
> +			ext4_lblk_t prev = 0, lblk = le32_to_cpu(ex->ee_block);
> +			int len = ext4_ext_get_actual_len(ex);
> +
> +			if (prev && (prev != lblk))
> +				ext4_es_cache_extent(inode, prev,
> +						     lblk - prev, ~0,
> +						     EXTENT_STATUS_HOLE);
> +
> +			if (ext4_ext_is_uninitialized(ex))
> +				status = EXTENT_STATUS_UNWRITTEN;
> +			ext4_es_cache_extent(inode, lblk, len,
> +					     ext4_ext_pblock(ex), status);
> +			prev = lblk + len;
> +		}
> +	}
>  	return bh;
>  errout:
>  	put_bh(bh);
> @@ -497,8 +523,9 @@ errout:
>  
>  }
>  
> -#define read_extent_tree_block(inode, pblk, depth)		\
> -	__read_extent_tree_block(__func__, __LINE__, (inode), (pblk), (depth))
> +#define read_extent_tree_block(inode, pblk, depth, flags)		\
> +	__read_extent_tree_block(__func__, __LINE__, (inode), (pblk),   \
> +				 (depth), (flags))
>  
>  #ifdef EXT_DEBUG
>  static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
> @@ -732,7 +759,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
>  
>  struct ext4_ext_path *
>  ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
> -					struct ext4_ext_path *path)
> +		     struct ext4_ext_path *path, int flags)
>  {
>  	struct ext4_extent_header *eh;
>  	struct buffer_head *bh;
> @@ -764,7 +791,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
>  		path[ppos].p_depth = i;
>  		path[ppos].p_ext = NULL;
>  
> -		bh = read_extent_tree_block(inode, path[ppos].p_block, --i);
> +		bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
> +					    flags);
>  		if (IS_ERR(bh)) {
>  			ret = PTR_ERR(bh);
>  			goto err;
> @@ -1201,7 +1229,8 @@ out:
>   * if no free index is found, then it requests in-depth growing.
>   */
>  static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
> -				    unsigned int flags,
> +				    unsigned int mb_flags,
> +				    unsigned int gb_flags,
>  				    struct ext4_ext_path *path,
>  				    struct ext4_extent *newext)
>  {
> @@ -1223,7 +1252,7 @@ repeat:
>  	if (EXT_HAS_FREE_INDEX(curp)) {
>  		/* if we found index with free entry, then use that
>  		 * entry: create all needed subtree and add new leaf */
> -		err = ext4_ext_split(handle, inode, flags, path, newext, i);
> +		err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
>  		if (err)
>  			goto out;
>  
> @@ -1231,12 +1260,12 @@ repeat:
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode,
>  				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
> -				    path);
> +				    path, gb_flags);
>  		if (IS_ERR(path))
>  			err = PTR_ERR(path);
>  	} else {
>  		/* tree is full, time to grow in depth */
> -		err = ext4_ext_grow_indepth(handle, inode, flags, newext);
> +		err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
>  		if (err)
>  			goto out;
>  
> @@ -1244,7 +1273,7 @@ repeat:
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode,
>  				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
> -				    path);
> +				    path, gb_flags);
>  		if (IS_ERR(path)) {
>  			err = PTR_ERR(path);
>  			goto out;
> @@ -1417,7 +1446,7 @@ got_index:
>  	while (++depth < path->p_depth) {
>  		/* subtract from p_depth to get proper eh_depth */
>  		bh = read_extent_tree_block(inode, *logical,
> -					    path->p_depth - depth);
> +					    path->p_depth - depth, 0);
>  		if (IS_ERR(bh))
>  			return PTR_ERR(bh);
>  		eh = ext_block_hdr(bh);
> @@ -1426,7 +1455,7 @@ got_index:
>  		put_bh(bh);
>  	}
>  
> -	bh = read_extent_tree_block(inode, *logical, path->p_depth - depth);
> +	bh = read_extent_tree_block(inode, *logical, path->p_depth - depth, 0);
>  	if (IS_ERR(bh))
>  		return PTR_ERR(bh);
>  	eh = ext_block_hdr(bh);
> @@ -1788,7 +1817,7 @@ out:
>   */
>  int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
>  				struct ext4_ext_path *path,
> -				struct ext4_extent *newext, int flag)
> +				struct ext4_extent *newext, int gb_flags)
>  {
>  	struct ext4_extent_header *eh;
>  	struct ext4_extent *ex, *fex;
> @@ -1797,7 +1826,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
>  	int depth, len, err;
>  	ext4_lblk_t next;
>  	unsigned uninitialized = 0;
> -	int flags = 0;
> +	int mb_flags = 0;
>  
>  	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
>  		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
> @@ -1812,7 +1841,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
>  	}
>  
>  	/* try to insert block into found extent and return */
> -	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
> +	if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
>  
>  		/*
>  		 * Try to see whether we should rather test the extent on
> @@ -1915,7 +1944,7 @@ prepend:
>  	if (next != EXT_MAX_BLOCKS) {
>  		ext_debug("next leaf block - %u\n", next);
>  		BUG_ON(npath != NULL);
> -		npath = ext4_ext_find_extent(inode, next, NULL);
> +		npath = ext4_ext_find_extent(inode, next, NULL, 0);
>  		if (IS_ERR(npath))
>  			return PTR_ERR(npath);
>  		BUG_ON(npath->p_depth != path->p_depth);
> @@ -1934,9 +1963,10 @@ prepend:
>  	 * There is no free space in the found leaf.
>  	 * We're gonna add a new leaf in the tree.
>  	 */
> -	if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
> -		flags = EXT4_MB_USE_RESERVED;
> -	err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
> +	if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
> +		mb_flags = EXT4_MB_USE_RESERVED;
> +	err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
> +				       path, newext);
>  	if (err)
>  		goto cleanup;
>  	depth = ext_depth(inode);
> @@ -2002,7 +2032,7 @@ has_space:
>  
>  merge:
>  	/* try to merge extents */
> -	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
> +	if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
>  		ext4_ext_try_to_merge(handle, inode, path, nearex);
>  
>  
> @@ -2045,7 +2075,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
>  			path = NULL;
>  		}
>  
> -		path = ext4_ext_find_extent(inode, block, path);
> +		path = ext4_ext_find_extent(inode, block, path, 0);
>  		if (IS_ERR(path)) {
>  			up_read(&EXT4_I(inode)->i_data_sem);
>  			err = PTR_ERR(path);
> @@ -2707,7 +2737,7 @@ again:
>  		ext4_lblk_t ee_block;
>  
>  		/* find extent for this block */
> -		path = ext4_ext_find_extent(inode, end, NULL);
> +		path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
>  		if (IS_ERR(path)) {
>  			ext4_journal_stop(handle);
>  			return PTR_ERR(path);
> @@ -2749,6 +2779,7 @@ again:
>  			 */
>  			err = ext4_split_extent_at(handle, inode, path,
>  					end + 1, split_flag,
> +					EXT4_EX_NOCACHE |
>  					EXT4_GET_BLOCKS_PRE_IO |
>  					EXT4_GET_BLOCKS_METADATA_NOFAIL);
>  
> @@ -2825,7 +2856,8 @@ again:
>  				  i + 1, ext4_idx_pblock(path[i].p_idx));
>  			memset(path + i + 1, 0, sizeof(*path));
>  			bh = read_extent_tree_block(inode,
> -				ext4_idx_pblock(path[i].p_idx), depth - i - 1);
> +				ext4_idx_pblock(path[i].p_idx), depth - i - 1,
> +				EXT4_EX_NOCACHE);
>  			if (IS_ERR(bh)) {
>  				err = PTR_ERR(bh);
>  				break;
> @@ -3168,7 +3200,7 @@ static int ext4_split_extent(handle_t *handle,
>  	 * result in split of original leaf or extent zeroout.
>  	 */
>  	ext4_ext_drop_refs(path);
> -	path = ext4_ext_find_extent(inode, map->m_lblk, path);
> +	path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
>  	if (IS_ERR(path))
>  		return PTR_ERR(path);
>  	depth = ext_depth(inode);
> @@ -3552,7 +3584,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
>  		if (err < 0)
>  			goto out;
>  		ext4_ext_drop_refs(path);
> -		path = ext4_ext_find_extent(inode, map->m_lblk, path);
> +		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
>  		if (IS_ERR(path)) {
>  			err = PTR_ERR(path);
>  			goto out;
> @@ -4039,7 +4071,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
>  	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
>  
>  	/* find extent for this block */
> -	path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
> +	path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
>  	if (IS_ERR(path)) {
>  		err = PTR_ERR(path);
>  		path = NULL;
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index 7358fb3..a84804c 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -417,7 +417,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
>  	unsigned short ee_len;
>  	int depth, ee_status, es_status;
>  
> -	path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
> +	path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
>  	if (IS_ERR(path))
>  		return;
>  
> @@ -678,6 +678,43 @@ error:
>  }
>  
>  /*
> + * ext4_es_cache_extent() inserts information into the extent status
> + * tree if and only if there isn't information about the range in
> + * question already.
> + */
> +void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
> +			  ext4_lblk_t len, ext4_fsblk_t pblk,
> +			  unsigned int status)
> +{
> +	struct extent_status *es;
> +	struct extent_status newes;
> +	ext4_lblk_t end = lblk + len - 1;
> +
> +	es_debug("cache [%u/%u) %llu %x to extent status tree of inode %lu\n",
> +		 lblk, len, pblk, status, inode->i_ino);
> +	if (!len)
> +		return;
> +
> +	BUG_ON(end < lblk);
> +
> +	write_lock(&EXT4_I(inode)->i_es_lock);
> +
> +	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
> +	if (es && ((es->es_lblk <= lblk) || (es->es_lblk <= end)))
> +		goto out;
> +
> +	newes.es_lblk = lblk;
> +	newes.es_len = len;
> +	ext4_es_store_pblock(&newes, pblk);
> +	ext4_es_store_status(&newes, status);
> +	trace_ext4_es_cache_extent(inode, &newes);
> +
> +	__es_insert_extent(inode, &newes);
> +out:
> +	write_unlock(&EXT4_I(inode)->i_es_lock);
> +}
> +
> +/*
>   * ext4_es_lookup_extent() looks up an extent in extent status tree.
>   *
>   * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index ae1be58..23348d1 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -71,6 +71,9 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
>  extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
>  				 ext4_lblk_t len, ext4_fsblk_t pblk,
>  				 unsigned int status);
> +extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
> +				 ext4_lblk_t len, ext4_fsblk_t pblk,
> +				 unsigned int status);
>  extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
>  				 ext4_lblk_t len);
>  extern void ext4_es_find_delayed_extent_range(struct inode *inode,
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index 49e8bdf..f99bdb8 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
>  	newext.ee_block = cpu_to_le32(lb->first_block);
>  	newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
>  	ext4_ext_store_pblock(&newext, lb->first_pblock);
> -	path = ext4_ext_find_extent(inode, lb->first_block, NULL);
> +	path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
>  
>  	if (IS_ERR(path)) {
>  		retval = PTR_ERR(path);
> diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
> index e86dddb..7fa4d85 100644
> --- a/fs/ext4/move_extent.c
> +++ b/fs/ext4/move_extent.c
> @@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
>  	int ret = 0;
>  	struct ext4_ext_path *path;
>  
> -	path = ext4_ext_find_extent(inode, lblock, *orig_path);
> +	path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
>  	if (IS_ERR(path))
>  		ret = PTR_ERR(path);
>  	else if (path[ext_depth(inode)].p_ext == NULL)
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index 47a355b..d892b55 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2192,7 +2192,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
>  		  (unsigned short) __entry->eh_entries)
>  );
>  
> -TRACE_EVENT(ext4_es_insert_extent,
> +DECLARE_EVENT_CLASS(ext4__es_extent,
>  	TP_PROTO(struct inode *inode, struct extent_status *es),
>  
>  	TP_ARGS(inode, es),
> @@ -2222,6 +2222,18 @@ TRACE_EVENT(ext4_es_insert_extent,
>  		  __entry->pblk, show_extent_status(__entry->status))
>  );
>  
> +DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent,
> +	TP_PROTO(struct inode *inode, struct extent_status *es),
> +
> +	TP_ARGS(inode, es)
> +);
> +
> +DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent,
> +	TP_PROTO(struct inode *inode, struct extent_status *es),
> +
> +	TP_ARGS(inode, es)
> +);
> +
>  TRACE_EVENT(ext4_es_remove_extent,
>  	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),
>  
> -- 
> 1.7.12.rc0.22.gcdd159b
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zach Brown - July 11, 2013, 6:54 p.m.
> @@ -490,6 +491,31 @@ __read_extent_tree_block(const char *function, unsigned int line,
>  	if (err)
>  		goto errout;
>  	set_buffer_verified(bh);
> +	/*
> +	 * If this is a leaf block, cache all of its entries
> +	 */
> +	if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
> +		struct ext4_extent_header *eh = ext_block_hdr(bh);
> +		struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
> +		int i;
> +
> +		for (i = eh->eh_entries; i > 0; i--, ex++) {
> +			unsigned int status = EXTENT_STATUS_WRITTEN;
> +			ext4_lblk_t prev = 0, lblk = le32_to_cpu(ex->ee_block);
> +			int len = ext4_ext_get_actual_len(ex);
> +
> +			if (prev && (prev != lblk))
> +				ext4_es_cache_extent(inode, prev,
> +						     lblk - prev, ~0,
> +						     EXTENT_STATUS_HOLE);
> +
> +			if (ext4_ext_is_uninitialized(ex))
> +				status = EXTENT_STATUS_UNWRITTEN;
> +			ext4_es_cache_extent(inode, lblk, len,
> +					     ext4_ext_pblock(ex), status);
> +			prev = lblk + len;
> +		}
> +	}

Isn't prev going to be reinitialized to 0 for every extent?

- z
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - July 11, 2013, 7:33 p.m.
On Thu, Jul 11, 2013 at 11:54:23AM -0700, Zach Brown wrote:
> 
> Isn't prev going to be reinitialized to 0 for every extent?

Whoops, nice catch.  I'll fix this in my next spin of the patches.

	     	     	      	   - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6ed348d..a8497b0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -561,6 +561,17 @@  enum {
 #define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200
 
 /*
+ * The bit position of this flag must not overlap with any of the
+ * EXT4_GET_BLOCKS_*.  It is used by ext4_ext_find_extent(),
+ * read_extent_tree_block(), ext4_split_extent_at(),
+ * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf() to
+ * indicate that the we shouldn't be caching the extents when reading
+ * from the extent tree while a truncate or punch hole operation
+ * is in progress.
+ */
+#define EXT4_EX_NOCACHE				0x0400
+
+/*
  * Flags used by ext4_free_blocks
  */
 #define EXT4_FREE_BLOCKS_METADATA	0x0001
@@ -2683,7 +2694,8 @@  extern int ext4_ext_insert_extent(handle_t *, struct inode *,
 				  struct ext4_ext_path *,
 				  struct ext4_extent *, int);
 extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
-						  struct ext4_ext_path *);
+						  struct ext4_ext_path *,
+						  int flags);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
 extern int ext4_find_delalloc_range(struct inode *inode,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e174814..506c3fe 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -466,7 +466,8 @@  int ext4_ext_check_inode(struct inode *inode)
 
 static struct buffer_head *
 __read_extent_tree_block(const char *function, unsigned int line,
-			 struct inode *inode, ext4_fsblk_t pblk, int depth)
+			 struct inode *inode, ext4_fsblk_t pblk, int depth,
+			 int flags)
 {
 	struct buffer_head		*bh;
 	int				err;
@@ -490,6 +491,31 @@  __read_extent_tree_block(const char *function, unsigned int line,
 	if (err)
 		goto errout;
 	set_buffer_verified(bh);
+	/*
+	 * If this is a leaf block, cache all of its entries
+	 */
+	if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
+		struct ext4_extent_header *eh = ext_block_hdr(bh);
+		struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+		int i;
+
+		for (i = eh->eh_entries; i > 0; i--, ex++) {
+			unsigned int status = EXTENT_STATUS_WRITTEN;
+			ext4_lblk_t prev = 0, lblk = le32_to_cpu(ex->ee_block);
+			int len = ext4_ext_get_actual_len(ex);
+
+			if (prev && (prev != lblk))
+				ext4_es_cache_extent(inode, prev,
+						     lblk - prev, ~0,
+						     EXTENT_STATUS_HOLE);
+
+			if (ext4_ext_is_uninitialized(ex))
+				status = EXTENT_STATUS_UNWRITTEN;
+			ext4_es_cache_extent(inode, lblk, len,
+					     ext4_ext_pblock(ex), status);
+			prev = lblk + len;
+		}
+	}
 	return bh;
 errout:
 	put_bh(bh);
@@ -497,8 +523,9 @@  errout:
 
 }
 
-#define read_extent_tree_block(inode, pblk, depth)		\
-	__read_extent_tree_block(__func__, __LINE__, (inode), (pblk), (depth))
+#define read_extent_tree_block(inode, pblk, depth, flags)		\
+	__read_extent_tree_block(__func__, __LINE__, (inode), (pblk),   \
+				 (depth), (flags))
 
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -732,7 +759,7 @@  int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 
 struct ext4_ext_path *
 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
-					struct ext4_ext_path *path)
+		     struct ext4_ext_path *path, int flags)
 {
 	struct ext4_extent_header *eh;
 	struct buffer_head *bh;
@@ -764,7 +791,8 @@  ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 		path[ppos].p_depth = i;
 		path[ppos].p_ext = NULL;
 
-		bh = read_extent_tree_block(inode, path[ppos].p_block, --i);
+		bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
+					    flags);
 		if (IS_ERR(bh)) {
 			ret = PTR_ERR(bh);
 			goto err;
@@ -1201,7 +1229,8 @@  out:
  * if no free index is found, then it requests in-depth growing.
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
-				    unsigned int flags,
+				    unsigned int mb_flags,
+				    unsigned int gb_flags,
 				    struct ext4_ext_path *path,
 				    struct ext4_extent *newext)
 {
@@ -1223,7 +1252,7 @@  repeat:
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
-		err = ext4_ext_split(handle, inode, flags, path, newext, i);
+		err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
 		if (err)
 			goto out;
 
@@ -1231,12 +1260,12 @@  repeat:
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode,
 				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-				    path);
+				    path, gb_flags);
 		if (IS_ERR(path))
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, flags, newext);
+		err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
 		if (err)
 			goto out;
 
@@ -1244,7 +1273,7 @@  repeat:
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode,
 				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-				    path);
+				    path, gb_flags);
 		if (IS_ERR(path)) {
 			err = PTR_ERR(path);
 			goto out;
@@ -1417,7 +1446,7 @@  got_index:
 	while (++depth < path->p_depth) {
 		/* subtract from p_depth to get proper eh_depth */
 		bh = read_extent_tree_block(inode, *logical,
-					    path->p_depth - depth);
+					    path->p_depth - depth, 0);
 		if (IS_ERR(bh))
 			return PTR_ERR(bh);
 		eh = ext_block_hdr(bh);
@@ -1426,7 +1455,7 @@  got_index:
 		put_bh(bh);
 	}
 
-	bh = read_extent_tree_block(inode, *logical, path->p_depth - depth);
+	bh = read_extent_tree_block(inode, *logical, path->p_depth - depth, 0);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	eh = ext_block_hdr(bh);
@@ -1788,7 +1817,7 @@  out:
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
-				struct ext4_extent *newext, int flag)
+				struct ext4_extent *newext, int gb_flags)
 {
 	struct ext4_extent_header *eh;
 	struct ext4_extent *ex, *fex;
@@ -1797,7 +1826,7 @@  int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	int depth, len, err;
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
-	int flags = 0;
+	int mb_flags = 0;
 
 	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
 		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1812,7 +1841,7 @@  int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	}
 
 	/* try to insert block into found extent and return */
-	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
+	if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
 
 		/*
 		 * Try to see whether we should rather test the extent on
@@ -1915,7 +1944,7 @@  prepend:
 	if (next != EXT_MAX_BLOCKS) {
 		ext_debug("next leaf block - %u\n", next);
 		BUG_ON(npath != NULL);
-		npath = ext4_ext_find_extent(inode, next, NULL);
+		npath = ext4_ext_find_extent(inode, next, NULL, 0);
 		if (IS_ERR(npath))
 			return PTR_ERR(npath);
 		BUG_ON(npath->p_depth != path->p_depth);
@@ -1934,9 +1963,10 @@  prepend:
 	 * There is no free space in the found leaf.
 	 * We're gonna add a new leaf in the tree.
 	 */
-	if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
-		flags = EXT4_MB_USE_RESERVED;
-	err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
+	if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+		mb_flags = EXT4_MB_USE_RESERVED;
+	err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
+				       path, newext);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);
@@ -2002,7 +2032,7 @@  has_space:
 
 merge:
 	/* try to merge extents */
-	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
+	if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
 		ext4_ext_try_to_merge(handle, inode, path, nearex);
 
 
@@ -2045,7 +2075,7 @@  static int ext4_fill_fiemap_extents(struct inode *inode,
 			path = NULL;
 		}
 
-		path = ext4_ext_find_extent(inode, block, path);
+		path = ext4_ext_find_extent(inode, block, path, 0);
 		if (IS_ERR(path)) {
 			up_read(&EXT4_I(inode)->i_data_sem);
 			err = PTR_ERR(path);
@@ -2707,7 +2737,7 @@  again:
 		ext4_lblk_t ee_block;
 
 		/* find extent for this block */
-		path = ext4_ext_find_extent(inode, end, NULL);
+		path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
 		if (IS_ERR(path)) {
 			ext4_journal_stop(handle);
 			return PTR_ERR(path);
@@ -2749,6 +2779,7 @@  again:
 			 */
 			err = ext4_split_extent_at(handle, inode, path,
 					end + 1, split_flag,
+					EXT4_EX_NOCACHE |
 					EXT4_GET_BLOCKS_PRE_IO |
 					EXT4_GET_BLOCKS_METADATA_NOFAIL);
 
@@ -2825,7 +2856,8 @@  again:
 				  i + 1, ext4_idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
 			bh = read_extent_tree_block(inode,
-				ext4_idx_pblock(path[i].p_idx), depth - i - 1);
+				ext4_idx_pblock(path[i].p_idx), depth - i - 1,
+				EXT4_EX_NOCACHE);
 			if (IS_ERR(bh)) {
 				err = PTR_ERR(bh);
 				break;
@@ -3168,7 +3200,7 @@  static int ext4_split_extent(handle_t *handle,
 	 * result in split of original leaf or extent zeroout.
 	 */
 	ext4_ext_drop_refs(path);
-	path = ext4_ext_find_extent(inode, map->m_lblk, path);
+	path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	depth = ext_depth(inode);
@@ -3552,7 +3584,7 @@  static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 		if (err < 0)
 			goto out;
 		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode, map->m_lblk, path);
+		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
 		if (IS_ERR(path)) {
 			err = PTR_ERR(path);
 			goto out;
@@ -4039,7 +4071,7 @@  int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
 	/* find extent for this block */
-	path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
+	path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
 	if (IS_ERR(path)) {
 		err = PTR_ERR(path);
 		path = NULL;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 7358fb3..a84804c 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -417,7 +417,7 @@  static void ext4_es_insert_extent_ext_check(struct inode *inode,
 	unsigned short ee_len;
 	int depth, ee_status, es_status;
 
-	path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+	path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
 	if (IS_ERR(path))
 		return;
 
@@ -678,6 +678,43 @@  error:
 }
 
 /*
+ * ext4_es_cache_extent() inserts information into the extent status
+ * tree if and only if there isn't information about the range in
+ * question already.
+ */
+void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+			  ext4_lblk_t len, ext4_fsblk_t pblk,
+			  unsigned int status)
+{
+	struct extent_status *es;
+	struct extent_status newes;
+	ext4_lblk_t end = lblk + len - 1;
+
+	es_debug("cache [%u/%u) %llu %x to extent status tree of inode %lu\n",
+		 lblk, len, pblk, status, inode->i_ino);
+	if (!len)
+		return;
+
+	BUG_ON(end < lblk);
+
+	write_lock(&EXT4_I(inode)->i_es_lock);
+
+	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
+	if (es && ((es->es_lblk <= lblk) || (es->es_lblk <= end)))
+		goto out;
+
+	newes.es_lblk = lblk;
+	newes.es_len = len;
+	ext4_es_store_pblock(&newes, pblk);
+	ext4_es_store_status(&newes, status);
+	trace_ext4_es_cache_extent(inode, &newes);
+
+	__es_insert_extent(inode, &newes);
+out:
+	write_unlock(&EXT4_I(inode)->i_es_lock);
+}
+
+/*
  * ext4_es_lookup_extent() looks up an extent in extent status tree.
  *
  * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index ae1be58..23348d1 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -71,6 +71,9 @@  extern void ext4_es_init_tree(struct ext4_es_tree *tree);
 extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 				 ext4_lblk_t len, ext4_fsblk_t pblk,
 				 unsigned int status);
+extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+				 ext4_lblk_t len, ext4_fsblk_t pblk,
+				 unsigned int status);
 extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 				 ext4_lblk_t len);
 extern void ext4_es_find_delayed_extent_range(struct inode *inode,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 49e8bdf..f99bdb8 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,7 +39,7 @@  static int finish_range(handle_t *handle, struct inode *inode,
 	newext.ee_block = cpu_to_le32(lb->first_block);
 	newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
 	ext4_ext_store_pblock(&newext, lb->first_pblock);
-	path = ext4_ext_find_extent(inode, lb->first_block, NULL);
+	path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
 
 	if (IS_ERR(path)) {
 		retval = PTR_ERR(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e86dddb..7fa4d85 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -37,7 +37,7 @@  get_ext_path(struct inode *inode, ext4_lblk_t lblock,
 	int ret = 0;
 	struct ext4_ext_path *path;
 
-	path = ext4_ext_find_extent(inode, lblock, *orig_path);
+	path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
 	if (IS_ERR(path))
 		ret = PTR_ERR(path);
 	else if (path[ext_depth(inode)].p_ext == NULL)
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 47a355b..d892b55 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2192,7 +2192,7 @@  TRACE_EVENT(ext4_ext_remove_space_done,
 		  (unsigned short) __entry->eh_entries)
 );
 
-TRACE_EVENT(ext4_es_insert_extent,
+DECLARE_EVENT_CLASS(ext4__es_extent,
 	TP_PROTO(struct inode *inode, struct extent_status *es),
 
 	TP_ARGS(inode, es),
@@ -2222,6 +2222,18 @@  TRACE_EVENT(ext4_es_insert_extent,
 		  __entry->pblk, show_extent_status(__entry->status))
 );
 
+DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent,
+	TP_PROTO(struct inode *inode, struct extent_status *es),
+
+	TP_ARGS(inode, es)
+);
+
+DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent,
+	TP_PROTO(struct inode *inode, struct extent_status *es),
+
+	TP_ARGS(inode, es)
+);
+
 TRACE_EVENT(ext4_es_remove_extent,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),