diff mbox series

[RFC,3/6] ext4: correct the hole length returned by ext4_map_blocks()

Message ID 20231121093429.1827390-4-yi.zhang@huaweicloud.com
State New
Headers show
Series ext4: make ext4_map_blocks() recognize delayed only extent | expand

Commit Message

Zhang Yi Nov. 21, 2023, 9:34 a.m. UTC
From: Zhang Yi <yi.zhang@huawei.com>

In ext4_map_blocks(), if we can't find a range of mapping in the
extents cache, we are calling ext4_ext_map_blocks() to search the real
path. But if the querying range was tail overlaped by a delayed extent,
we can't find it on the real extent path, so the returned hole length
could be larger than it really is.

      |          querying map          |
      v                                v
      |----------{-------------}{------|----------------}-----...
      ^          ^             ^^                       ^
      | uncached | hole extent ||     delayed extent    |

We have to adjust the mapping length to the next not hole extent's
lblk before searching the extent path.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/inode.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

Comments

Jan Kara Dec. 13, 2023, 6:21 p.m. UTC | #1
On Tue 21-11-23 17:34:26, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> In ext4_map_blocks(), if we can't find a range of mapping in the
> extents cache, we are calling ext4_ext_map_blocks() to search the real
> path. But if the querying range was tail overlaped by a delayed extent,
> we can't find it on the real extent path, so the returned hole length
> could be larger than it really is.
> 
>       |          querying map          |
>       v                                v
>       |----------{-------------}{------|----------------}-----...
>       ^          ^             ^^                       ^
>       | uncached | hole extent ||     delayed extent    |
> 
> We have to adjust the mapping length to the next not hole extent's
> lblk before searching the extent path.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>

So I agree the ext4_ext_determine_hole() does return a hole that does not
reflect possible delalloc extent (it doesn't even need to be straddling the
end of looked up range, does it?). But ext4_ext_put_gap_in_cache() does
actually properly trim the hole length in the status tree so I think the
problem rather is that the trimming should happen in
ext4_ext_determine_hole() instead of ext4_ext_put_gap_in_cache() and that
will also make ext4_map_blocks() return proper hole length? And then
there's no need for this special handling? Or am I missing something?

								Honza

> ---
>  fs/ext4/inode.c | 24 ++++++++++++++++++++++--
>  1 file changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 4ce35f1c8b0a..94e7b8500878 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -479,6 +479,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>  		    struct ext4_map_blocks *map, int flags)
>  {
>  	struct extent_status es;
> +	ext4_lblk_t next;
>  	int retval;
>  	int ret = 0;
>  #ifdef ES_AGGRESSIVE_TEST
> @@ -502,8 +503,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>  		return -EFSCORRUPTED;
>  
>  	/* Lookup extent status tree firstly */
> -	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
> -	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
> +	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
> +		goto uncached;
> +
> +	if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
>  		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
>  			map->m_pblk = ext4_es_pblock(&es) +
>  					map->m_lblk - es.es_lblk;
> @@ -532,6 +535,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>  #endif
>  		goto found;
>  	}
> +	/*
> +	 * Not found, maybe a hole, need to adjust the map length before
> +	 * seraching the real extent path. It can prevent incorrect hole
> +	 * length returned if the following entries have delayed only
> +	 * ones.
> +	 */
> +	if (!(flags & EXT4_GET_BLOCKS_CREATE) && es.es_lblk > map->m_lblk) {
> +		next = es.es_lblk;
> +		if (ext4_es_is_hole(&es))
> +			next = ext4_es_skip_hole_extent(inode, map->m_lblk,
> +							map->m_len);
> +		retval = next - map->m_lblk;
> +		if (map->m_len > retval)
> +			map->m_len = retval;
> +	}
> +
> +uncached:
>  	/*
>  	 * In the query cache no-wait mode, nothing we can do more if we
>  	 * cannot find extent in the cache.
> -- 
> 2.39.2
>
Zhang Yi Dec. 14, 2023, 9:18 a.m. UTC | #2
On 2023/12/14 2:21, Jan Kara wrote:
> On Tue 21-11-23 17:34:26, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> In ext4_map_blocks(), if we can't find a range of mapping in the
>> extents cache, we are calling ext4_ext_map_blocks() to search the real
>> path. But if the querying range was tail overlaped by a delayed extent,
>> we can't find it on the real extent path, so the returned hole length
>> could be larger than it really is.
>>
>>       |          querying map          |
>>       v                                v
>>       |----------{-------------}{------|----------------}-----...
>>       ^          ^             ^^                       ^
>>       | uncached | hole extent ||     delayed extent    |
>>
>> We have to adjust the mapping length to the next not hole extent's
>> lblk before searching the extent path.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> 
> So I agree the ext4_ext_determine_hole() does return a hole that does not
> reflect possible delalloc extent (it doesn't even need to be straddling the
> end of looked up range, does it?). But ext4_ext_put_gap_in_cache() does

Yeah.

> actually properly trim the hole length in the status tree so I think the
> problem rather is that the trimming should happen in
> ext4_ext_determine_hole() instead of ext4_ext_put_gap_in_cache() and that
> will also make ext4_map_blocks() return proper hole length? And then
> there's no need for this special handling? Or am I missing something?
> 

Thanks for your suggestions. Yeah, we can trim the hole length in
ext4_ext_determine_hole(), but I'm a little uneasy about the race condition.
ext4_da_map_blocks() only hold inode lock and i_data_sem read lock while
inserting delay extents, and not all query path of ext4_map_blocks() hold
inode lock. I guess the hole/delayed range could be raced by another new
delay allocation and changed after we first check in ext4_map_blocks(),
the querying range could be overlapped and became all or partial delayed,
so we also need to recheck the map type here if the start querying block
has became delayed, right?

Thanks,
Yi.

> 
>> ---
>>  fs/ext4/inode.c | 24 ++++++++++++++++++++++--
>>  1 file changed, 22 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 4ce35f1c8b0a..94e7b8500878 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -479,6 +479,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>  		    struct ext4_map_blocks *map, int flags)
>>  {
>>  	struct extent_status es;
>> +	ext4_lblk_t next;
>>  	int retval;
>>  	int ret = 0;
>>  #ifdef ES_AGGRESSIVE_TEST
>> @@ -502,8 +503,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>  		return -EFSCORRUPTED;
>>  
>>  	/* Lookup extent status tree firstly */
>> -	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
>> -	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
>> +	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
>> +		goto uncached;
>> +
>> +	if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
>>  		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
>>  			map->m_pblk = ext4_es_pblock(&es) +
>>  					map->m_lblk - es.es_lblk;
>> @@ -532,6 +535,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>  #endif
>>  		goto found;
>>  	}
>> +	/*
>> +	 * Not found, maybe a hole, need to adjust the map length before
>> +	 * seraching the real extent path. It can prevent incorrect hole
>> +	 * length returned if the following entries have delayed only
>> +	 * ones.
>> +	 */
>> +	if (!(flags & EXT4_GET_BLOCKS_CREATE) && es.es_lblk > map->m_lblk) {
>> +		next = es.es_lblk;
>> +		if (ext4_es_is_hole(&es))
>> +			next = ext4_es_skip_hole_extent(inode, map->m_lblk,
>> +							map->m_len);
>> +		retval = next - map->m_lblk;
>> +		if (map->m_len > retval)
>> +			map->m_len = retval;
>> +	}
>> +
>> +uncached:
>>  	/*
>>  	 * In the query cache no-wait mode, nothing we can do more if we
>>  	 * cannot find extent in the cache.
>> -- 
>> 2.39.2
>>
Jan Kara Dec. 14, 2023, 2:31 p.m. UTC | #3
On Thu 14-12-23 17:18:45, Zhang Yi wrote:
> On 2023/12/14 2:21, Jan Kara wrote:
> > On Tue 21-11-23 17:34:26, Zhang Yi wrote:
> >> From: Zhang Yi <yi.zhang@huawei.com>
> >>
> >> In ext4_map_blocks(), if we can't find a range of mapping in the
> >> extents cache, we are calling ext4_ext_map_blocks() to search the real
> >> path. But if the querying range was tail overlaped by a delayed extent,
> >> we can't find it on the real extent path, so the returned hole length
> >> could be larger than it really is.
> >>
> >>       |          querying map          |
> >>       v                                v
> >>       |----------{-------------}{------|----------------}-----...
> >>       ^          ^             ^^                       ^
> >>       | uncached | hole extent ||     delayed extent    |
> >>
> >> We have to adjust the mapping length to the next not hole extent's
> >> lblk before searching the extent path.
> >>
> >> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> > 
> > So I agree the ext4_ext_determine_hole() does return a hole that does not
> > reflect possible delalloc extent (it doesn't even need to be straddling the
> > end of looked up range, does it?). But ext4_ext_put_gap_in_cache() does
> 
> Yeah.
> 
> > actually properly trim the hole length in the status tree so I think the
> > problem rather is that the trimming should happen in
> > ext4_ext_determine_hole() instead of ext4_ext_put_gap_in_cache() and that
> > will also make ext4_map_blocks() return proper hole length? And then
> > there's no need for this special handling? Or am I missing something?
> > 
> 
> Thanks for your suggestions. Yeah, we can trim the hole length in
> ext4_ext_determine_hole(), but I'm a little uneasy about the race condition.
> ext4_da_map_blocks() only hold inode lock and i_data_sem read lock while
> inserting delay extents, and not all query path of ext4_map_blocks() hold
> inode lock.

That is a good point! I think something like following could happen already
now:

Suppose we have a file 8192 bytes large containing just a hole.

Task1					Task2
pread(f, buf, 4096, 0)			pwrite(f, buf, 4096, 4096)
  filemap_read()
    filemap_get_pages()
      filemap_create_folio()
        filemap_read_folio()
          ext4_mpage_readpages()
            ext4_map_blocks()
	      down_read(&EXT4_I(inode)->i_data_sem);
              ext4_ext_map_blocks()
		- finds hole 0..8192
	        ext4_ext_put_gap_in_cache()
		  ext4_es_find_extent_range()
		    - finds no delalloc extent
					  ext4_da_write_begin()
					    ext4_da_get_block_prep()
					      ext4_da_map_blocks()
					        down_read(&EXT4_I(inode)->i_data_sem);
					        ext4_ext_map_blocks()
						  - nothing found
						ext4_insert_delayed_block()
						  - inserts delalloc extent
						    to 4096-8192
		  ext4_es_insert_extent()
		    - inserts 0..8192 a hole overwriting delalloc extent

> I guess the hole/delayed range could be raced by another new
> delay allocation and changed after we first check in ext4_map_blocks(),
> the querying range could be overlapped and became all or partial delayed,
> so we also need to recheck the map type here if the start querying block
> has became delayed, right?

I don't think think you can fix this just by rechecking. I think we need to
hold i_data_sem in exclusive mode when inserting delalloc extents. Because
that operation is in fact changing state of allocation tree (although not
on disk yet). And that will fix this race because holding i_data_sem shared
is then enough so that delalloc state cannot change.

Please do this as a separate patch because this will need to be backported
to stable tree. Thanks!

								Honza

> >> ---
> >>  fs/ext4/inode.c | 24 ++++++++++++++++++++++--
> >>  1 file changed, 22 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> >> index 4ce35f1c8b0a..94e7b8500878 100644
> >> --- a/fs/ext4/inode.c
> >> +++ b/fs/ext4/inode.c
> >> @@ -479,6 +479,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
> >>  		    struct ext4_map_blocks *map, int flags)
> >>  {
> >>  	struct extent_status es;
> >> +	ext4_lblk_t next;
> >>  	int retval;
> >>  	int ret = 0;
> >>  #ifdef ES_AGGRESSIVE_TEST
> >> @@ -502,8 +503,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
> >>  		return -EFSCORRUPTED;
> >>  
> >>  	/* Lookup extent status tree firstly */
> >> -	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
> >> -	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
> >> +	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
> >> +		goto uncached;
> >> +
> >> +	if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
> >>  		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
> >>  			map->m_pblk = ext4_es_pblock(&es) +
> >>  					map->m_lblk - es.es_lblk;
> >> @@ -532,6 +535,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
> >>  #endif
> >>  		goto found;
> >>  	}
> >> +	/*
> >> +	 * Not found, maybe a hole, need to adjust the map length before
> >> +	 * seraching the real extent path. It can prevent incorrect hole
> >> +	 * length returned if the following entries have delayed only
> >> +	 * ones.
> >> +	 */
> >> +	if (!(flags & EXT4_GET_BLOCKS_CREATE) && es.es_lblk > map->m_lblk) {
> >> +		next = es.es_lblk;
> >> +		if (ext4_es_is_hole(&es))
> >> +			next = ext4_es_skip_hole_extent(inode, map->m_lblk,
> >> +							map->m_len);
> >> +		retval = next - map->m_lblk;
> >> +		if (map->m_len > retval)
> >> +			map->m_len = retval;
> >> +	}
> >> +
> >> +uncached:
> >>  	/*
> >>  	 * In the query cache no-wait mode, nothing we can do more if we
> >>  	 * cannot find extent in the cache.
> >> -- 
> >> 2.39.2
> >>
>
Zhang Yi Dec. 15, 2023, 4:36 a.m. UTC | #4
On 2023/12/14 22:31, Jan Kara wrote:
> On Thu 14-12-23 17:18:45, Zhang Yi wrote:
>> On 2023/12/14 2:21, Jan Kara wrote:
>>> On Tue 21-11-23 17:34:26, Zhang Yi wrote:
>>>> From: Zhang Yi <yi.zhang@huawei.com>
>>>>
>>>> In ext4_map_blocks(), if we can't find a range of mapping in the
>>>> extents cache, we are calling ext4_ext_map_blocks() to search the real
>>>> path. But if the querying range was tail overlaped by a delayed extent,
>>>> we can't find it on the real extent path, so the returned hole length
>>>> could be larger than it really is.
>>>>
>>>>       |          querying map          |
>>>>       v                                v
>>>>       |----------{-------------}{------|----------------}-----...
>>>>       ^          ^             ^^                       ^
>>>>       | uncached | hole extent ||     delayed extent    |
>>>>
>>>> We have to adjust the mapping length to the next not hole extent's
>>>> lblk before searching the extent path.
>>>>
>>>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>>>
>>> So I agree the ext4_ext_determine_hole() does return a hole that does not
>>> reflect possible delalloc extent (it doesn't even need to be straddling the
>>> end of looked up range, does it?). But ext4_ext_put_gap_in_cache() does
>>
>> Yeah.
>>
>>> actually properly trim the hole length in the status tree so I think the
>>> problem rather is that the trimming should happen in
>>> ext4_ext_determine_hole() instead of ext4_ext_put_gap_in_cache() and that
>>> will also make ext4_map_blocks() return proper hole length? And then
>>> there's no need for this special handling? Or am I missing something?
>>>
>>
>> Thanks for your suggestions. Yeah, we can trim the hole length in
>> ext4_ext_determine_hole(), but I'm a little uneasy about the race condition.
>> ext4_da_map_blocks() only hold inode lock and i_data_sem read lock while
>> inserting delay extents, and not all query path of ext4_map_blocks() hold
>> inode lock.
> 
> That is a good point! I think something like following could happen already
> now:
> 
> Suppose we have a file 8192 bytes large containing just a hole.
> 
> Task1					Task2
> pread(f, buf, 4096, 0)			pwrite(f, buf, 4096, 4096)
>   filemap_read()
>     filemap_get_pages()
>       filemap_create_folio()
>         filemap_read_folio()
>           ext4_mpage_readpages()
>             ext4_map_blocks()
> 	      down_read(&EXT4_I(inode)->i_data_sem);
>               ext4_ext_map_blocks()
> 		- finds hole 0..8192
> 	        ext4_ext_put_gap_in_cache()
> 		  ext4_es_find_extent_range()
> 		    - finds no delalloc extent
> 					  ext4_da_write_begin()
> 					    ext4_da_get_block_prep()
> 					      ext4_da_map_blocks()
> 					        down_read(&EXT4_I(inode)->i_data_sem);
> 					        ext4_ext_map_blocks()
> 						  - nothing found
> 						ext4_insert_delayed_block()
> 						  - inserts delalloc extent
> 						    to 4096-8192
> 		  ext4_es_insert_extent()
> 		    - inserts 0..8192 a hole overwriting delalloc extent
> 
>> I guess the hole/delayed range could be raced by another new
>> delay allocation and changed after we first check in ext4_map_blocks(),
>> the querying range could be overlapped and became all or partial delayed,
>> so we also need to recheck the map type here if the start querying block
>> has became delayed, right?
> 
> I don't think think you can fix this just by rechecking. I think we need to
> hold i_data_sem in exclusive mode when inserting delalloc extents. Because
> that operation is in fact changing state of allocation tree (although not
> on disk yet). And that will fix this race because holding i_data_sem shared
> is then enough so that delalloc state cannot change.
> 
> Please do this as a separate patch because this will need to be backported
> to stable tree. Thanks!
> 

Thanks for the insightful graph,I totally agree with you. For now the absent
delayed extents could lead to inaccurate space reservation and perhaps some
other potential problems. I will send a separate patch to fix this long
standing issue.

Thanks,
Yi.

> 
>>>> ---
>>>>  fs/ext4/inode.c | 24 ++++++++++++++++++++++--
>>>>  1 file changed, 22 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>>>> index 4ce35f1c8b0a..94e7b8500878 100644
>>>> --- a/fs/ext4/inode.c
>>>> +++ b/fs/ext4/inode.c
>>>> @@ -479,6 +479,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>>>  		    struct ext4_map_blocks *map, int flags)
>>>>  {
>>>>  	struct extent_status es;
>>>> +	ext4_lblk_t next;
>>>>  	int retval;
>>>>  	int ret = 0;
>>>>  #ifdef ES_AGGRESSIVE_TEST
>>>> @@ -502,8 +503,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>>>  		return -EFSCORRUPTED;
>>>>  
>>>>  	/* Lookup extent status tree firstly */
>>>> -	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
>>>> -	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
>>>> +	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
>>>> +		goto uncached;
>>>> +
>>>> +	if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
>>>>  		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
>>>>  			map->m_pblk = ext4_es_pblock(&es) +
>>>>  					map->m_lblk - es.es_lblk;
>>>> @@ -532,6 +535,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
>>>>  #endif
>>>>  		goto found;
>>>>  	}
>>>> +	/*
>>>> +	 * Not found, maybe a hole, need to adjust the map length before
>>>> +	 * seraching the real extent path. It can prevent incorrect hole
>>>> +	 * length returned if the following entries have delayed only
>>>> +	 * ones.
>>>> +	 */
>>>> +	if (!(flags & EXT4_GET_BLOCKS_CREATE) && es.es_lblk > map->m_lblk) {
>>>> +		next = es.es_lblk;
>>>> +		if (ext4_es_is_hole(&es))
>>>> +			next = ext4_es_skip_hole_extent(inode, map->m_lblk,
>>>> +							map->m_len);
>>>> +		retval = next - map->m_lblk;
>>>> +		if (map->m_len > retval)
>>>> +			map->m_len = retval;
>>>> +	}
>>>> +
>>>> +uncached:
>>>>  	/*
>>>>  	 * In the query cache no-wait mode, nothing we can do more if we
>>>>  	 * cannot find extent in the cache.
>>>> -- 
>>>> 2.39.2
>>>>
>>
diff mbox series

Patch

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4ce35f1c8b0a..94e7b8500878 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -479,6 +479,7 @@  int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		    struct ext4_map_blocks *map, int flags)
 {
 	struct extent_status es;
+	ext4_lblk_t next;
 	int retval;
 	int ret = 0;
 #ifdef ES_AGGRESSIVE_TEST
@@ -502,8 +503,10 @@  int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		return -EFSCORRUPTED;
 
 	/* Lookup extent status tree firstly */
-	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
-	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
+		goto uncached;
+
+	if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
 		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
 			map->m_pblk = ext4_es_pblock(&es) +
 					map->m_lblk - es.es_lblk;
@@ -532,6 +535,23 @@  int ext4_map_blocks(handle_t *handle, struct inode *inode,
 #endif
 		goto found;
 	}
+	/*
+	 * Not found, maybe a hole, need to adjust the map length before
+	 * seraching the real extent path. It can prevent incorrect hole
+	 * length returned if the following entries have delayed only
+	 * ones.
+	 */
+	if (!(flags & EXT4_GET_BLOCKS_CREATE) && es.es_lblk > map->m_lblk) {
+		next = es.es_lblk;
+		if (ext4_es_is_hole(&es))
+			next = ext4_es_skip_hole_extent(inode, map->m_lblk,
+							map->m_len);
+		retval = next - map->m_lblk;
+		if (map->m_len > retval)
+			map->m_len = retval;
+	}
+
+uncached:
 	/*
 	 * In the query cache no-wait mode, nothing we can do more if we
 	 * cannot find extent in the cache.