diff mbox

[v3,3/6] ext4: cache extent hole in extent status tree for ext4_da_map_blocks()

Message ID 1407382553-24256-4-git-send-email-wenqing.lz@taobao.com
State Superseded, archived
Headers show

Commit Message

Zheng Liu Aug. 7, 2014, 3:35 a.m. UTC
From: Zheng Liu <wenqing.lz@taobao.com>

Currently extent status tree doesn't cache extent hole when a write
looks up in extent tree to make sure whether a block has been allocated
or not.  In this case, we don't put extent hole in extent cache because
later this extent might be removed and a new delayed extent might be
added back.  But it will cause a defect when we do a lot of writes.
If we don't put extent hole in extent cache, the following writes also
need to access extent tree to look at whether or not a block has been
allocated.  It brings a cache miss.  This commit fixes this defect.
Meanwhile, if an inode has no any extent, this extent hole also will
be cached.

Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/ext4.h              |    4 +---
 fs/ext4/extents.c           |   23 +++++++++--------------
 fs/ext4/inode.c             |    6 ++----
 include/trace/events/ext4.h |    3 +--
 4 files changed, 13 insertions(+), 23 deletions(-)

Comments

Jan Kara Aug. 27, 2014, 1:55 p.m. UTC | #1
On Thu 07-08-14 11:35:50, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@taobao.com>
> 
> Currently extent status tree doesn't cache extent hole when a write
> looks up in extent tree to make sure whether a block has been allocated
> or not.  In this case, we don't put extent hole in extent cache because
> later this extent might be removed and a new delayed extent might be
> added back.  But it will cause a defect when we do a lot of writes.
> If we don't put extent hole in extent cache, the following writes also
> need to access extent tree to look at whether or not a block has been
> allocated.  It brings a cache miss.  This commit fixes this defect.
> Meanwhile, if an inode has no any extent, this extent hole also will
> be cached.
> 
> Cc: "Theodore Ts'o" <tytso@mit.edu>
> Cc: Andreas Dilger <adilger.kernel@dilger.ca>
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
  So I agree with this change, it certainly doesn't make things worse. But
when looking into ext4_ext_put_gap_in_cache() I have one question: That
function uses ext4_find_delalloc_range() to check that the intended range
doesn't have any delalloc blocks in it. However it doesn't make any effort
to put a smaller hole in cache - for example if we have blocks allocated
like:

5-6 = delalloc
7-10 = allocated

and ext4_ext_put_gap_in_cache() is called for block 0, the function won't
put anything into cache although it could put range 0-4 in the cache as a
hole. Why is that?

								Honza

> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 76c2df3..6463d34 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2284,16 +2284,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
>  				ext4_lblk_t block)
>  {
>  	int depth = ext_depth(inode);
> -	unsigned long len = 0;
> -	ext4_lblk_t lblock = 0;
> +	unsigned long len;
> +	ext4_lblk_t lblock;
>  	struct ext4_extent *ex;
>  
>  	ex = path[depth].p_ext;
>  	if (ex == NULL) {
> -		/*
> -		 * there is no extent yet, so gap is [0;-] and we
> -		 * don't cache it
> -		 */
> +		/* there is no extent yet, so gap is [0;-] */
> +		lblock = 0;
> +		len = EXT_MAX_BLOCKS;
>  		ext_debug("cache gap(whole file):");
>  	} else if (block < le32_to_cpu(ex->ee_block)) {
>  		lblock = block;
> @@ -2302,9 +2301,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
>  				block,
>  				le32_to_cpu(ex->ee_block),
>  				 ext4_ext_get_actual_len(ex));
> -		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
> -			ext4_es_insert_extent(inode, lblock, len, ~0,
> -					      EXTENT_STATUS_HOLE);
>  	} else if (block >= le32_to_cpu(ex->ee_block)
>  			+ ext4_ext_get_actual_len(ex)) {
>  		ext4_lblk_t next;
> @@ -2318,14 +2314,14 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
>  				block);
>  		BUG_ON(next == lblock);
>  		len = next - lblock;
> -		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
> -			ext4_es_insert_extent(inode, lblock, len, ~0,
> -					      EXTENT_STATUS_HOLE);
>  	} else {
>  		BUG();
>  	}
>  
>  	ext_debug(" -> %u:%lu\n", lblock, len);
> +	if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
> +		ext4_es_insert_extent(inode, lblock, len, ~0,
> +				      EXTENT_STATUS_HOLE);
>  }
>  
>  /*
> @@ -4362,8 +4358,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
>  		 * put just found gap into cache to speed up
>  		 * subsequent requests
>  		 */
> -		if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
> -			ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
> +		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
>  		goto out2;
>  	}
Theodore Ts'o Sept. 2, 2014, 2:43 a.m. UTC | #2
On Thu, Aug 07, 2014 at 11:35:50AM +0800, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@taobao.com>
> 
> Currently extent status tree doesn't cache extent hole when a write
> looks up in extent tree to make sure whether a block has been allocated
> or not.  In this case, we don't put extent hole in extent cache because
> later this extent might be removed and a new delayed extent might be
> added back.  But it will cause a defect when we do a lot of writes.
> If we don't put extent hole in extent cache, the following writes also
> need to access extent tree to look at whether or not a block has been
> allocated.  It brings a cache miss.  This commit fixes this defect.
> Meanwhile, if an inode has no any extent, this extent hole also will
> be cached.

Hi Zheng,

I thought the reason why we have the EXT4_GET_BLOCKS_NO_PUT_HOLE flag
is because in ext4_da_map_blocks(), if there is a hole, we will be
immediately following it up with a call to ext4_es_insert_extent() to
fill in the hole with the EXTENT_STATUS_DELAYED flag.  The only time
we don't is if we run into an ENOSPC error.

Am I missing something?

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zheng Liu Sept. 4, 2014, 1:04 p.m. UTC | #3
On Mon, Sep 01, 2014 at 10:43:50PM -0400, Theodore Ts'o wrote:
> On Thu, Aug 07, 2014 at 11:35:50AM +0800, Zheng Liu wrote:
> > From: Zheng Liu <wenqing.lz@taobao.com>
> > 
> > Currently extent status tree doesn't cache extent hole when a write
> > looks up in extent tree to make sure whether a block has been allocated
> > or not.  In this case, we don't put extent hole in extent cache because
> > later this extent might be removed and a new delayed extent might be
> > added back.  But it will cause a defect when we do a lot of writes.
> > If we don't put extent hole in extent cache, the following writes also
> > need to access extent tree to look at whether or not a block has been
> > allocated.  It brings a cache miss.  This commit fixes this defect.
> > Meanwhile, if an inode has no any extent, this extent hole also will
> > be cached.
> 
> Hi Zheng,
> 
> I thought the reason why we have the EXT4_GET_BLOCKS_NO_PUT_HOLE flag
> is because in ext4_da_map_blocks(), if there is a hole, we will be
> immediately following it up with a call to ext4_es_insert_extent() to
> fill in the hole with the EXTENT_STATUS_DELAYED flag.  The only time
> we don't is if we run into an ENOSPC error.
> 
> Am I missing something?

Yes, you are right.  The purpose is used to do the work like you said
above.  As the commit log described this flag brings a huge number of
cache misses when an empty file is written.  So it might be worth
putting a hole in the cache when delalloc is enabled.

Regards,
                                                - Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zheng Liu Sept. 4, 2014, 1:05 p.m. UTC | #4
On Wed, Aug 27, 2014 at 03:55:16PM +0200, Jan Kara wrote:
> On Thu 07-08-14 11:35:50, Zheng Liu wrote:
> > From: Zheng Liu <wenqing.lz@taobao.com>
> > 
> > Currently extent status tree doesn't cache extent hole when a write
> > looks up in extent tree to make sure whether a block has been allocated
> > or not.  In this case, we don't put extent hole in extent cache because
> > later this extent might be removed and a new delayed extent might be
> > added back.  But it will cause a defect when we do a lot of writes.
> > If we don't put extent hole in extent cache, the following writes also
> > need to access extent tree to look at whether or not a block has been
> > allocated.  It brings a cache miss.  This commit fixes this defect.
> > Meanwhile, if an inode has no any extent, this extent hole also will
> > be cached.
> > 
> > Cc: "Theodore Ts'o" <tytso@mit.edu>
> > Cc: Andreas Dilger <adilger.kernel@dilger.ca>
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
>   So I agree with this change, it certainly doesn't make things worse. But
> when looking into ext4_ext_put_gap_in_cache() I have one question: That
> function uses ext4_find_delalloc_range() to check that the intended range
> doesn't have any delalloc blocks in it. However it doesn't make any effort
> to put a smaller hole in cache - for example if we have blocks allocated
> like:
> 
> 5-6 = delalloc
> 7-10 = allocated
> 
> and ext4_ext_put_gap_in_cache() is called for block 0, the function won't
> put anything into cache although it could put range 0-4 in the cache as a
> hole. Why is that?

Oops, it should put range 0-4 in the cache.  Thanks for pointing it out.
I will fix it.

Regards,
                                                - Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Sept. 4, 2014, 3:54 p.m. UTC | #5
On Thu, Sep 04, 2014 at 09:04:48PM +0800, Zheng Liu wrote:
> > I thought the reason why we have the EXT4_GET_BLOCKS_NO_PUT_HOLE flag
> > is because in ext4_da_map_blocks(), if there is a hole, we will be
> > immediately following it up with a call to ext4_es_insert_extent() to
> > fill in the hole with the EXTENT_STATUS_DELAYED flag.  The only time
> > we don't is if we run into an ENOSPC error.
> > 
> > Am I missing something?
> 
> Yes, you are right.  The purpose is used to do the work like you said
> above.  As the commit log described this flag brings a huge number of
> cache misses when an empty file is written.  

Right, and I was trying to figure out why that was happening, because
it looked like in the normal case we would immediately fill in the
hole afterwards.  I was goign to try doing some tracing to understand
why this was resulting a huge number of cache misses, but I haven't
had time to do that investigation yet.  Can you sketch out the
scenario where this was leading to so many cache misses?

Thanks,

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6f294d3..7df9220 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -565,10 +565,8 @@  enum {
 #define EXT4_GET_BLOCKS_KEEP_SIZE		0x0080
 	/* Do not take i_data_sem locking in ext4_map_blocks */
 #define EXT4_GET_BLOCKS_NO_LOCK			0x0100
-	/* Do not put hole in extent cache */
-#define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200
 	/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN	0x0400
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN	0x0200
 
 /*
  * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df3..6463d34 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2284,16 +2284,15 @@  ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 				ext4_lblk_t block)
 {
 	int depth = ext_depth(inode);
-	unsigned long len = 0;
-	ext4_lblk_t lblock = 0;
+	unsigned long len;
+	ext4_lblk_t lblock;
 	struct ext4_extent *ex;
 
 	ex = path[depth].p_ext;
 	if (ex == NULL) {
-		/*
-		 * there is no extent yet, so gap is [0;-] and we
-		 * don't cache it
-		 */
+		/* there is no extent yet, so gap is [0;-] */
+		lblock = 0;
+		len = EXT_MAX_BLOCKS;
 		ext_debug("cache gap(whole file):");
 	} else if (block < le32_to_cpu(ex->ee_block)) {
 		lblock = block;
@@ -2302,9 +2301,6 @@  ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 				block,
 				le32_to_cpu(ex->ee_block),
 				 ext4_ext_get_actual_len(ex));
-		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
-			ext4_es_insert_extent(inode, lblock, len, ~0,
-					      EXTENT_STATUS_HOLE);
 	} else if (block >= le32_to_cpu(ex->ee_block)
 			+ ext4_ext_get_actual_len(ex)) {
 		ext4_lblk_t next;
@@ -2318,14 +2314,14 @@  ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 				block);
 		BUG_ON(next == lblock);
 		len = next - lblock;
-		if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
-			ext4_es_insert_extent(inode, lblock, len, ~0,
-					      EXTENT_STATUS_HOLE);
 	} else {
 		BUG();
 	}
 
 	ext_debug(" -> %u:%lu\n", lblock, len);
+	if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
+		ext4_es_insert_extent(inode, lblock, len, ~0,
+				      EXTENT_STATUS_HOLE);
 }
 
 /*
@@ -4362,8 +4358,7 @@  int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		 * put just found gap into cache to speed up
 		 * subsequent requests
 		 */
-		if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
-			ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
 		goto out2;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c..d1ad9f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1485,11 +1485,9 @@  static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 			map->m_flags |= EXT4_MAP_FROM_CLUSTER;
 		retval = 0;
 	} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		retval = ext4_ext_map_blocks(NULL, inode, map,
-					     EXT4_GET_BLOCKS_NO_PUT_HOLE);
+		retval = ext4_ext_map_blocks(NULL, inode, map, 0);
 	else
-		retval = ext4_ind_map_blocks(NULL, inode, map,
-					     EXT4_GET_BLOCKS_NO_PUT_HOLE);
+		retval = ext4_ind_map_blocks(NULL, inode, map, 0);
 
 add_delayed:
 	if (retval == 0) {
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index ff4bd1b..9337d36 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -43,8 +43,7 @@  struct extent_status;
 	{ EXT4_GET_BLOCKS_METADATA_NOFAIL,	"METADATA_NOFAIL" },	\
 	{ EXT4_GET_BLOCKS_NO_NORMALIZE,		"NO_NORMALIZE" },	\
 	{ EXT4_GET_BLOCKS_KEEP_SIZE,		"KEEP_SIZE" },		\
-	{ EXT4_GET_BLOCKS_NO_LOCK,		"NO_LOCK" },		\
-	{ EXT4_GET_BLOCKS_NO_PUT_HOLE,		"NO_PUT_HOLE" })
+	{ EXT4_GET_BLOCKS_NO_LOCK,		"NO_LOCK" })
 
 #define show_mflags(flags) __print_flags(flags, "",	\
 	{ EXT4_MAP_NEW,		"N" },			\