diff mbox

ext4: fix extent tree corruption that incurred by hole punch

Message ID 1354623069-8124-1-git-send-email-forrestl@synology.com
State Superseded, archived
Headers show

Commit Message

Forrest Liu Dec. 4, 2012, 12:11 p.m. UTC
Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
of extent tree is greater than 1.

Signed-off-by: Forrest Liu <forrestl@synology.com>
---
 fs/ext4/extents.c |   24 ++++++++++++++++++++----
 1 files changed, 20 insertions(+), 4 deletions(-)

Comments

Forrest Liu Dec. 4, 2012, 12:29 p.m. UTC | #1
This problem is easily to reproduce

Create a file with size larger than 1GB.

dd if=/dev/zero of=/test_file bs=1M count=1024

Punch every even block in test_file, and then use debugfs to dump
extents,  followings is dumped result

 2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
 0/ 2   2/  2 231201 - 262143   3901486              30943
 1/ 2   1/ 46 231201 - 231880   3901488                680
 2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
 2/ 2   2/340 231203 - 231203   3917603 -   3917603      1

Punch blocks #231779 ~#231201 , to remove extent index, and then use
debugfs to dump extents,  followings is dumped result

2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
 0/ 2   2/  2 231201 - 262143   3901486              30943
-------> logical block index didn't update when remove extent index
 1/ 2   1/ 45 231881 - 232560   3901490                680
 2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
 2/ 2   2/340 231883 - 231883   3918283 -   3918283      1

Thanks,
Forrest

2012/12/4 Forrest Liu <forrestl@synology.com>:
> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
> of extent tree is greater than 1.
>
> Signed-off-by: Forrest Liu <forrestl@synology.com>
> ---
>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>  1 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index d3dd618..b10b8c0 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2190,13 +2190,15 @@ errout:
>   * removes index from the index block.
>   */
>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> -                       struct ext4_ext_path *path)
> +                       struct ext4_ext_path *path, int depth)
>  {
>         int err;
>         ext4_fsblk_t leaf;
> +       __le32 border;
>
>         /* free index block */
> -       path--;
> +       depth--;
> +       path = path + depth;
>         leaf = ext4_idx_pblock(path->p_idx);
>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>
>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> +
> +       border = path->p_idx->ei_block;
> +       while (--depth >= 0) {
> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> +                       break;
> +               path--;
> +               err = ext4_ext_get_access(handle, inode, path);
> +               if (err)
> +                       break;
> +               path->p_idx->ei_block = border;
> +               err = ext4_ext_dirty(handle, inode, path);
> +               if (err)
> +                       break;
> +       }
>         return err;
>  }
>
> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>         /* if this leaf is free, then we should
>          * remove it from index block above */
>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>
>  out:
>         return err;
> @@ -2760,7 +2776,7 @@ again:
>                                 /* index is empty, remove it;
>                                  * handle must be already prepared by the
>                                  * truncatei_leaf() */
> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>                         }
>                         /* root level has p_bh == NULL, brelse() eats this */
>                         brelse(path[i].p_bh);
> --
> 1.7.5.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 5, 2012, 6:13 a.m. UTC | #2
2012/12/4 forrest <forrestl@synology.com>:
> This problem is easily to reproduce
>
> Create a file with size larger than 1GB.
>
> dd if=/dev/zero of=/test_file bs=1M count=1024
>
> Punch every even block in test_file, and then use debugfs to dump
> extents,  followings is dumped result
>
>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>  0/ 2   2/  2 231201 - 262143   3901486              30943
>  1/ 2   1/ 46 231201 - 231880   3901488                680
>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>
> Punch blocks #231779 ~#231201 , to remove extent index, and then use
> debugfs to dump extents,  followings is dumped result
>
> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>  0/ 2   2/  2 231201 - 262143   3901486              30943
> -------> logical block index didn't update when remove extent index
>  1/ 2   1/ 45 231881 - 232560   3901490                680
>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1

After blocks #231202~#231880 had been punch out, theses blocks can't map again.
If we do map a block in #231202~#231880, we will get error messages
like followings.

EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
offset 231203 with max blocks 1 with error -5
EXT4-fs (md2): This should not happen!! Data will be lost

Regards,
Forrest

>
> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>> of extent tree is greater than 1.
>>
>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>> ---
>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index d3dd618..b10b8c0 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2190,13 +2190,15 @@ errout:
>>   * removes index from the index block.
>>   */
>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>> -                       struct ext4_ext_path *path)
>> +                       struct ext4_ext_path *path, int depth)
>>  {
>>         int err;
>>         ext4_fsblk_t leaf;
>> +       __le32 border;
>>
>>         /* free index block */
>> -       path--;
>> +       depth--;
>> +       path = path + depth;
>>         leaf = ext4_idx_pblock(path->p_idx);
>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>
>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>> +
>> +       border = path->p_idx->ei_block;
>> +       while (--depth >= 0) {
>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> +                       break;
>> +               path--;
>> +               err = ext4_ext_get_access(handle, inode, path);
>> +               if (err)
>> +                       break;
>> +               path->p_idx->ei_block = border;
>> +               err = ext4_ext_dirty(handle, inode, path);
>> +               if (err)
>> +                       break;
>> +       }
>>         return err;
>>  }
>>
>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>         /* if this leaf is free, then we should
>>          * remove it from index block above */
>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>
>>  out:
>>         return err;
>> @@ -2760,7 +2776,7 @@ again:
>>                                 /* index is empty, remove it;
>>                                  * handle must be already prepared by the
>>                                  * truncatei_leaf() */
>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>                         }
>>                         /* root level has p_bh == NULL, brelse() eats this */
>>                         brelse(path[i].p_bh);
>> --
>> 1.7.5.4
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 5, 2012, 7:53 a.m. UTC | #3
I have tested it and the problem is real.
It happens when depth => 2 and punch hole completely removes any of
the first extent indexes at depth 1.
The change in ei_block is not propagated backwards to index at depth 0.

However, AFAICS, the problematic call to ext4_ext_rm_idx is from
ext4_ext_rm_leaf and there is no need to change in
ext4_ext_remove_space or in ext4_ext_rm_idx.

Forrest, what do you think about below change ?
basically its your code, but its been added to ext4_ext_rm_leaf after
removing index.

@@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

        /* if this leaf is free, then we should
         * remove it from index block above */
-       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
+       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
                err = ext4_ext_rm_idx(handle, inode, path + depth);
+               /* If this was the first extent index in node,
+                * propagates the ei_block updation info to top */
+               if(!err) {
+                       --depth;
+                       path = path + depth;
+                       while (--depth >= 0) {
+                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+                                       break;
+                               path--;
+                               err = ext4_ext_get_access(handle, inode, path);
+                               if (err)
+                                       break;
+                               path->p_idx->ei_block = border;
+                               err = ext4_ext_dirty(handle, inode, path);
+                               if (err)
+                                       break;
+                       }
+               }
+       }
+

On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
> 2012/12/4 forrest <forrestl@synology.com>:
>> This problem is easily to reproduce
>>
>> Create a file with size larger than 1GB.
>>
>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>
>> Punch every even block in test_file, and then use debugfs to dump
>> extents,  followings is dumped result
>>
>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>
>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>> debugfs to dump extents,  followings is dumped result
>>
>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>> -------> logical block index didn't update when remove extent index
>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>
> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
> If we do map a block in #231202~#231880, we will get error messages
> like followings.
>
> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
> offset 231203 with max blocks 1 with error -5
> EXT4-fs (md2): This should not happen!! Data will be lost
>
> Regards,
> Forrest
>
>>
>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>> of extent tree is greater than 1.
>>>
>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>> ---
>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index d3dd618..b10b8c0 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -2190,13 +2190,15 @@ errout:
>>>   * removes index from the index block.
>>>   */
>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>> -                       struct ext4_ext_path *path)
>>> +                       struct ext4_ext_path *path, int depth)
>>>  {
>>>         int err;
>>>         ext4_fsblk_t leaf;
>>> +       __le32 border;
>>>
>>>         /* free index block */
>>> -       path--;
>>> +       depth--;
>>> +       path = path + depth;
>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>
>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>> +
>>> +       border = path->p_idx->ei_block;
>>> +       while (--depth >= 0) {
>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> +                       break;
>>> +               path--;
>>> +               err = ext4_ext_get_access(handle, inode, path);
>>> +               if (err)
>>> +                       break;
>>> +               path->p_idx->ei_block = border;
>>> +               err = ext4_ext_dirty(handle, inode, path);
>>> +               if (err)
>>> +                       break;
>>> +       }
>>>         return err;
>>>  }
>>>
>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>         /* if this leaf is free, then we should
>>>          * remove it from index block above */
>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>
>>>  out:
>>>         return err;
>>> @@ -2760,7 +2776,7 @@ again:
>>>                                 /* index is empty, remove it;
>>>                                  * handle must be already prepared by the
>>>                                  * truncatei_leaf() */
>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>                         }
>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>                         brelse(path[i].p_bh);
>>> --
>>> 1.7.5.4
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 5, 2012, 8:09 a.m. UTC | #4
Sorry 1 typo.
It should be
+ path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
instead of
+ path->p_idx->ei_block = border;

On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
> I have tested it and the problem is real.
> It happens when depth => 2 and punch hole completely removes any of
> the first extent indexes at depth 1.
> The change in ei_block is not propagated backwards to index at depth 0.
>
> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
> ext4_ext_rm_leaf and there is no need to change in
> ext4_ext_remove_space or in ext4_ext_rm_idx.
>
> Forrest, what do you think about below change ?
> basically its your code, but its been added to ext4_ext_rm_leaf after
> removing index.
>
> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>
>         /* if this leaf is free, then we should
>          * remove it from index block above */
> -       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>                 err = ext4_ext_rm_idx(handle, inode, path + depth);
> +               /* If this was the first extent index in node,
> +                * propagates the ei_block updation info to top */
> +               if(!err) {
> +                       --depth;
> +                       path = path + depth;
> +                       while (--depth >= 0) {
> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> +                                       break;
> +                               path--;
> +                               err = ext4_ext_get_access(handle, inode, path);
> +                               if (err)
> +                                       break;
> +                               path->p_idx->ei_block = border;
> +                               err = ext4_ext_dirty(handle, inode, path);
> +                               if (err)
> +                                       break;
> +                       }
> +               }
> +       }
> +
>
> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
>> 2012/12/4 forrest <forrestl@synology.com>:
>>> This problem is easily to reproduce
>>>
>>> Create a file with size larger than 1GB.
>>>
>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>
>>> Punch every even block in test_file, and then use debugfs to dump
>>> extents,  followings is dumped result
>>>
>>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>>
>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>> debugfs to dump extents,  followings is dumped result
>>>
>>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>> -------> logical block index didn't update when remove extent index
>>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>>
>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>> If we do map a block in #231202~#231880, we will get error messages
>> like followings.
>>
>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>> offset 231203 with max blocks 1 with error -5
>> EXT4-fs (md2): This should not happen!! Data will be lost
>>
>> Regards,
>> Forrest
>>
>>>
>>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>> of extent tree is greater than 1.
>>>>
>>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>>> ---
>>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>> index d3dd618..b10b8c0 100644
>>>> --- a/fs/ext4/extents.c
>>>> +++ b/fs/ext4/extents.c
>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>   * removes index from the index block.
>>>>   */
>>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>> -                       struct ext4_ext_path *path)
>>>> +                       struct ext4_ext_path *path, int depth)
>>>>  {
>>>>         int err;
>>>>         ext4_fsblk_t leaf;
>>>> +       __le32 border;
>>>>
>>>>         /* free index block */
>>>> -       path--;
>>>> +       depth--;
>>>> +       path = path + depth;
>>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>
>>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>> +
>>>> +       border = path->p_idx->ei_block;
>>>> +       while (--depth >= 0) {
>>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>> +                       break;
>>>> +               path--;
>>>> +               err = ext4_ext_get_access(handle, inode, path);
>>>> +               if (err)
>>>> +                       break;
>>>> +               path->p_idx->ei_block = border;
>>>> +               err = ext4_ext_dirty(handle, inode, path);
>>>> +               if (err)
>>>> +                       break;
>>>> +       }
>>>>         return err;
>>>>  }
>>>>
>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>         /* if this leaf is free, then we should
>>>>          * remove it from index block above */
>>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>
>>>>  out:
>>>>         return err;
>>>> @@ -2760,7 +2776,7 @@ again:
>>>>                                 /* index is empty, remove it;
>>>>                                  * handle must be already prepared by the
>>>>                                  * truncatei_leaf() */
>>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>                         }
>>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>>                         brelse(path[i].p_bh);
>>>> --
>>>> 1.7.5.4
>>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 6, 2012, 11:35 a.m. UTC | #5
Hi Ashish,
   Thanks for your review and fixed hole punch bugs.

We can't propogate ei_block only in ext4_ext_rm_leave.

There have two cases to call ext4_ext_rm_idx
1) extent block has no entry, ext4_ext_rm_leave calls
ext4_ext_rm_idx to release extent block.

2) extent index block has no entry, ext4_ext_remove_space
calls ext4_ext_rm_idx to release extent index block.

Consider remove third level of extent index block that indexed by
second level of extent index, and the extent index in second level
is first entry in block.

In this case, ei_block will not propogate to top level correctly,
if we only do propogation in ext4_ext_rm_leave.

Regards,
Forrest

2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>:
> Sorry 1 typo.
> It should be
> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
> instead of
> + path->p_idx->ei_block = border;
>
> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>> I have tested it and the problem is real.
>> It happens when depth => 2 and punch hole completely removes any of
>> the first extent indexes at depth 1.
>> The change in ei_block is not propagated backwards to index at depth 0.
>>
>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>> ext4_ext_rm_leaf and there is no need to change in
>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>
>> Forrest, what do you think about below change ?
>> basically its your code, but its been added to ext4_ext_rm_leaf after
>> removing index.
>>
>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>
>>         /* if this leaf is free, then we should
>>          * remove it from index block above */
>> -       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>                 err = ext4_ext_rm_idx(handle, inode, path + depth);
>> +               /* If this was the first extent index in node,
>> +                * propagates the ei_block updation info to top */
>> +               if(!err) {
>> +                       --depth;
>> +                       path = path + depth;
>> +                       while (--depth >= 0) {
>> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> +                                       break;
>> +                               path--;
>> +                               err = ext4_ext_get_access(handle, inode, path);
>> +                               if (err)
>> +                                       break;
>> +                               path->p_idx->ei_block = border;
>> +                               err = ext4_ext_dirty(handle, inode, path);
>> +                               if (err)
>> +                                       break;
>> +                       }
>> +               }
>> +       }
>> +
>>
>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
>>> 2012/12/4 forrest <forrestl@synology.com>:
>>>> This problem is easily to reproduce
>>>>
>>>> Create a file with size larger than 1GB.
>>>>
>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>
>>>> Punch every even block in test_file, and then use debugfs to dump
>>>> extents,  followings is dumped result
>>>>
>>>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>>>
>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>> debugfs to dump extents,  followings is dumped result
>>>>
>>>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>> -------> logical block index didn't update when remove extent index
>>>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>>>
>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>> If we do map a block in #231202~#231880, we will get error messages
>>> like followings.
>>>
>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>> offset 231203 with max blocks 1 with error -5
>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>
>>> Regards,
>>> Forrest
>>>
>>>>
>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>> of extent tree is greater than 1.
>>>>>
>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>>>> ---
>>>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>> index d3dd618..b10b8c0 100644
>>>>> --- a/fs/ext4/extents.c
>>>>> +++ b/fs/ext4/extents.c
>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>   * removes index from the index block.
>>>>>   */
>>>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>> -                       struct ext4_ext_path *path)
>>>>> +                       struct ext4_ext_path *path, int depth)
>>>>>  {
>>>>>         int err;
>>>>>         ext4_fsblk_t leaf;
>>>>> +       __le32 border;
>>>>>
>>>>>         /* free index block */
>>>>> -       path--;
>>>>> +       depth--;
>>>>> +       path = path + depth;
>>>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>
>>>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>> +
>>>>> +       border = path->p_idx->ei_block;
>>>>> +       while (--depth >= 0) {
>>>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>> +                       break;
>>>>> +               path--;
>>>>> +               err = ext4_ext_get_access(handle, inode, path);
>>>>> +               if (err)
>>>>> +                       break;
>>>>> +               path->p_idx->ei_block = border;
>>>>> +               err = ext4_ext_dirty(handle, inode, path);
>>>>> +               if (err)
>>>>> +                       break;
>>>>> +       }
>>>>>         return err;
>>>>>  }
>>>>>
>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>         /* if this leaf is free, then we should
>>>>>          * remove it from index block above */
>>>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>
>>>>>  out:
>>>>>         return err;
>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>                                 /* index is empty, remove it;
>>>>>                                  * handle must be already prepared by the
>>>>>                                  * truncatei_leaf() */
>>>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>                         }
>>>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>>>                         brelse(path[i].p_bh);
>>>>> --
>>>>> 1.7.5.4
>>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 6, 2012, 1:16 p.m. UTC | #6
On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote:
> Hi Ashish,
>    Thanks for your review and fixed hole punch bugs.
>
> We can't propogate ei_block only in ext4_ext_rm_leave.
>
> There have two cases to call ext4_ext_rm_idx
> 1) extent block has no entry, ext4_ext_rm_leave calls
> ext4_ext_rm_idx to release extent block.
>

1.1)  And when the extent block is removed, the corresponding index
entry has also to be removed. And if this was the first index entry,
all the other index entries, are also updated. This updation will
continue till we reach the root of extent tree OR the currently
updated index entry is not the first entry in the node.

> 2) extent index block has no entry, ext4_ext_remove_space
> calls ext4_ext_rm_idx to release extent index block.
>
> Consider remove third level of extent index block that indexed by
> second level of extent index, and the extent index in second level
> is first entry in block.
>
So, I think the above case would be handled by point (1.1) i.e. the
earlier index updation done from within ext4_ext_rm_leaf.
The earlier ei_block updation will reach to the top of tree.

> In this case, ei_block will not propogate to top level correctly,
> if we only do propogation in ext4_ext_rm_leave.

And thanks for pointing the problem. Can you reproduce it with my code
suggesstion even with depth 3?
If no, than please resend the patch with suggested changes.
The intent here is to solve the problem without changing any function
signature and with minimal changes.

Regards,
Ashish
>
> Regards,
> Forrest
>
> 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>:
>> Sorry 1 typo.
>> It should be
>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>> instead of
>> + path->p_idx->ei_block = border;
>>
>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>>> I have tested it and the problem is real.
>>> It happens when depth => 2 and punch hole completely removes any of
>>> the first extent indexes at depth 1.
>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>
>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>> ext4_ext_rm_leaf and there is no need to change in
>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>
>>> Forrest, what do you think about below change ?
>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>> removing index.
>>>
>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>
>>>         /* if this leaf is free, then we should
>>>          * remove it from index block above */
>>> -       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>>                 err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> +               /* If this was the first extent index in node,
>>> +                * propagates the ei_block updation info to top */
>>> +               if(!err) {
>>> +                       --depth;
>>> +                       path = path + depth;
>>> +                       while (--depth >= 0) {
>>> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> +                                       break;
>>> +                               path--;
>>> +                               err = ext4_ext_get_access(handle, inode, path);
>>> +                               if (err)
>>> +                                       break;
>>> +                               path->p_idx->ei_block = border;
>>> +                               err = ext4_ext_dirty(handle, inode, path);
>>> +                               if (err)
>>> +                                       break;
>>> +                       }
>>> +               }
>>> +       }
>>> +
>>>
>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
>>>> 2012/12/4 forrest <forrestl@synology.com>:
>>>>> This problem is easily to reproduce
>>>>>
>>>>> Create a file with size larger than 1GB.
>>>>>
>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>
>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>> extents,  followings is dumped result
>>>>>
>>>>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>>>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>>>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>>>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>>>>
>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>> debugfs to dump extents,  followings is dumped result
>>>>>
>>>>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>> -------> logical block index didn't update when remove extent index
>>>>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>>>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>>>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>>>>
>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>> If we do map a block in #231202~#231880, we will get error messages
>>>> like followings.
>>>>
>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>> offset 231203 with max blocks 1 with error -5
>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>
>>>> Regards,
>>>> Forrest
>>>>
>>>>>
>>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>> of extent tree is greater than 1.
>>>>>>
>>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>>>>> ---
>>>>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>> index d3dd618..b10b8c0 100644
>>>>>> --- a/fs/ext4/extents.c
>>>>>> +++ b/fs/ext4/extents.c
>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>>   * removes index from the index block.
>>>>>>   */
>>>>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>> -                       struct ext4_ext_path *path)
>>>>>> +                       struct ext4_ext_path *path, int depth)
>>>>>>  {
>>>>>>         int err;
>>>>>>         ext4_fsblk_t leaf;
>>>>>> +       __le32 border;
>>>>>>
>>>>>>         /* free index block */
>>>>>> -       path--;
>>>>>> +       depth--;
>>>>>> +       path = path + depth;
>>>>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>
>>>>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>> +
>>>>>> +       border = path->p_idx->ei_block;
>>>>>> +       while (--depth >= 0) {
>>>>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>> +                       break;
>>>>>> +               path--;
>>>>>> +               err = ext4_ext_get_access(handle, inode, path);
>>>>>> +               if (err)
>>>>>> +                       break;
>>>>>> +               path->p_idx->ei_block = border;
>>>>>> +               err = ext4_ext_dirty(handle, inode, path);
>>>>>> +               if (err)
>>>>>> +                       break;
>>>>>> +       }
>>>>>>         return err;
>>>>>>  }
>>>>>>
>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>>         /* if this leaf is free, then we should
>>>>>>          * remove it from index block above */
>>>>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>
>>>>>>  out:
>>>>>>         return err;
>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>>                                 /* index is empty, remove it;
>>>>>>                                  * handle must be already prepared by the
>>>>>>                                  * truncatei_leaf() */
>>>>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>>                         }
>>>>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>>>>                         brelse(path[i].p_bh);
>>>>>> --
>>>>>> 1.7.5.4
>>>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 6, 2012, 2:36 p.m. UTC | #7
Hi Ashish,

   There have a chance to do  a trivial update, and this case will be
covered in ext4_ext_remove_space.

+       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
                 err = ext4_ext_rm_idx(handle, inode, path + depth);
+               /* If this was the first extent index in node,
+                * propagates the ei_block updation info to top */
+               if(!err) {
+                       --depth;
+                       path = path + depth;
+                       while (--depth >= 0) {
+                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+                                       break;
+                               path--;
+                               err = ext4_ext_get_access(handle, inode, path);
+                               if (err)
+                                       break;

        trivial update, when (path + 1)->p_idx->eh_entries == 0

+                               path->p_idx->ei_block =(path +
1)->p_idx->ei_block ;
+                               err = ext4_ext_dirty(handle, inode, path);
+                               if (err)
+                                       break;
+                       }
+               }
+       }
+

   I will trying to reproduce the problem tomorrow.

Thanks,
Forrest

2012/12/6 Ashish Sangwan <ashishsangwan2@gmail.com>:
> On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote:
>> Hi Ashish,
>>    Thanks for your review and fixed hole punch bugs.
>>
>> We can't propogate ei_block only in ext4_ext_rm_leave.
>>
>> There have two cases to call ext4_ext_rm_idx
>> 1) extent block has no entry, ext4_ext_rm_leave calls
>> ext4_ext_rm_idx to release extent block.
>>
>
> 1.1)  And when the extent block is removed, the corresponding index
> entry has also to be removed. And if this was the first index entry,
> all the other index entries, are also updated. This updation will
> continue till we reach the root of extent tree OR the currently
> updated index entry is not the first entry in the node.
>
>> 2) extent index block has no entry, ext4_ext_remove_space
>> calls ext4_ext_rm_idx to release extent index block.
>>
>> Consider remove third level of extent index block that indexed by
>> second level of extent index, and the extent index in second level
>> is first entry in block.
>>
> So, I think the above case would be handled by point (1.1) i.e. the
> earlier index updation done from within ext4_ext_rm_leaf.
> The earlier ei_block updation will reach to the top of tree.
>
>> In this case, ei_block will not propogate to top level correctly,
>> if we only do propogation in ext4_ext_rm_leave.
>
> And thanks for pointing the problem. Can you reproduce it with my code
> suggesstion even with depth 3?
> If no, than please resend the patch with suggested changes.
> The intent here is to solve the problem without changing any function
> signature and with minimal changes.
>
> Regards,
> Ashish
>>
>> Regards,
>> Forrest
>>
>> 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>:
>>> Sorry 1 typo.
>>> It should be
>>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>>> instead of
>>> + path->p_idx->ei_block = border;
>>>
>>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>>>> I have tested it and the problem is real.
>>>> It happens when depth => 2 and punch hole completely removes any of
>>>> the first extent indexes at depth 1.
>>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>>
>>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>>> ext4_ext_rm_leaf and there is no need to change in
>>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>>
>>>> Forrest, what do you think about below change ?
>>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>>> removing index.
>>>>
>>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>
>>>>         /* if this leaf is free, then we should
>>>>          * remove it from index block above */
>>>> -       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>>>                 err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>> +               /* If this was the first extent index in node,
>>>> +                * propagates the ei_block updation info to top */
>>>> +               if(!err) {
>>>> +                       --depth;
>>>> +                       path = path + depth;
>>>> +                       while (--depth >= 0) {
>>>> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>> +                                       break;
>>>> +                               path--;
>>>> +                               err = ext4_ext_get_access(handle, inode, path);
>>>> +                               if (err)
>>>> +                                       break;
>>>> +                               path->p_idx->ei_block = border;
>>>> +                               err = ext4_ext_dirty(handle, inode, path);
>>>> +                               if (err)
>>>> +                                       break;
>>>> +                       }
>>>> +               }
>>>> +       }
>>>> +
>>>>
>>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
>>>>> 2012/12/4 forrest <forrestl@synology.com>:
>>>>>> This problem is easily to reproduce
>>>>>>
>>>>>> Create a file with size larger than 1GB.
>>>>>>
>>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>>
>>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>>> extents,  followings is dumped result
>>>>>>
>>>>>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>>>>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>>>>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>>>>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>>>>>
>>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>>> debugfs to dump extents,  followings is dumped result
>>>>>>
>>>>>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>>> -------> logical block index didn't update when remove extent index
>>>>>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>>>>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>>>>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>>>>>
>>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>>> If we do map a block in #231202~#231880, we will get error messages
>>>>> like followings.
>>>>>
>>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>>> offset 231203 with max blocks 1 with error -5
>>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>>
>>>>> Regards,
>>>>> Forrest
>>>>>
>>>>>>
>>>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>>> of extent tree is greater than 1.
>>>>>>>
>>>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>>>>>> ---
>>>>>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>>>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>>> index d3dd618..b10b8c0 100644
>>>>>>> --- a/fs/ext4/extents.c
>>>>>>> +++ b/fs/ext4/extents.c
>>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>>>   * removes index from the index block.
>>>>>>>   */
>>>>>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>> -                       struct ext4_ext_path *path)
>>>>>>> +                       struct ext4_ext_path *path, int depth)
>>>>>>>  {
>>>>>>>         int err;
>>>>>>>         ext4_fsblk_t leaf;
>>>>>>> +       __le32 border;
>>>>>>>
>>>>>>>         /* free index block */
>>>>>>> -       path--;
>>>>>>> +       depth--;
>>>>>>> +       path = path + depth;
>>>>>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>>>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>
>>>>>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>>> +
>>>>>>> +       border = path->p_idx->ei_block;
>>>>>>> +       while (--depth >= 0) {
>>>>>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>>> +                       break;
>>>>>>> +               path--;
>>>>>>> +               err = ext4_ext_get_access(handle, inode, path);
>>>>>>> +               if (err)
>>>>>>> +                       break;
>>>>>>> +               path->p_idx->ei_block = border;
>>>>>>> +               err = ext4_ext_dirty(handle, inode, path);
>>>>>>> +               if (err)
>>>>>>> +                       break;
>>>>>>> +       }
>>>>>>>         return err;
>>>>>>>  }
>>>>>>>
>>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>>>         /* if this leaf is free, then we should
>>>>>>>          * remove it from index block above */
>>>>>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>>
>>>>>>>  out:
>>>>>>>         return err;
>>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>>>                                 /* index is empty, remove it;
>>>>>>>                                  * handle must be already prepared by the
>>>>>>>                                  * truncatei_leaf() */
>>>>>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>>>                         }
>>>>>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>>>>>                         brelse(path[i].p_bh);
>>>>>>> --
>>>>>>> 1.7.5.4
>>>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>>> the body of a message to majordomo@vger.kernel.org
>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Dec. 6, 2012, 3:45 p.m. UTC | #8
On 12/4/12 6:11 AM, Forrest Liu wrote:
> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
> of extent tree is greater than 1.

This is interesting; we had 2 reports of similar corruption on the
list, I wonder if the application in question was doing hole punching.
I didn't expect that they were, so TBH I was pretty much ignoring
the hole-punch cases for parent index updates.  Hm.  I'll have
to look into that.

Could you turn your testcase into an xfstest regression test?

-Eric

> Signed-off-by: Forrest Liu <forrestl@synology.com>
> ---
>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>  1 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index d3dd618..b10b8c0 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2190,13 +2190,15 @@ errout:
>   * removes index from the index block.
>   */
>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> -			struct ext4_ext_path *path)
> +			struct ext4_ext_path *path, int depth)
>  {
>  	int err;
>  	ext4_fsblk_t leaf;
> +	__le32 border;
>  
>  	/* free index block */
> -	path--;
> +	depth--;
> +	path = path + depth;
>  	leaf = ext4_idx_pblock(path->p_idx);
>  	if (unlikely(path->p_hdr->eh_entries == 0)) {
>  		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>  
>  	ext4_free_blocks(handle, inode, NULL, leaf, 1,
>  			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> +
> +	border = path->p_idx->ei_block;
> +	while (--depth >= 0) {
> +		if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> +			break;
> +		path--;
> +		err = ext4_ext_get_access(handle, inode, path);
> +		if (err)
> +			break;
> +		path->p_idx->ei_block = border;
> +		err = ext4_ext_dirty(handle, inode, path);
> +		if (err)
> +			break;
> +	}
>  	return err;
>  }
>  
> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>  	/* if this leaf is free, then we should
>  	 * remove it from index block above */
>  	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> -		err = ext4_ext_rm_idx(handle, inode, path + depth);
> +		err = ext4_ext_rm_idx(handle, inode, path, depth);
>  
>  out:
>  	return err;
> @@ -2760,7 +2776,7 @@ again:
>  				/* index is empty, remove it;
>  				 * handle must be already prepared by the
>  				 * truncatei_leaf() */
> -				err = ext4_ext_rm_idx(handle, inode, path + i);
> +				err = ext4_ext_rm_idx(handle, inode, path, i);
>  			}
>  			/* root level has p_bh == NULL, brelse() eats this */
>  			brelse(path[i].p_bh);
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Dec. 6, 2012, 3:48 p.m. UTC | #9
On 12/6/12 9:45 AM, Eric Sandeen wrote:
> On 12/4/12 6:11 AM, Forrest Liu wrote:
>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>> of extent tree is greater than 1.
> 
> This is interesting; we had 2 reports of similar corruption on the
> list, I wonder if the application in question was doing hole punching.
> I didn't expect that they were, so TBH I was pretty much ignoring
> the hole-punch cases for parent index updates.  Hm.  I'll have
> to look into that.
> 
> Could you turn your testcase into an xfstest regression test?

Also, please note that I sent an e2fsck patch to try to fix this
problem after the fact; it'd be great if in your testing, you could
also confirm that e2fsck w/ my patch fixes it correctly.

Thanks,
-Eric

> -Eric
> 
>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>> ---
>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index d3dd618..b10b8c0 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2190,13 +2190,15 @@ errout:
>>   * removes index from the index block.
>>   */
>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>> -			struct ext4_ext_path *path)
>> +			struct ext4_ext_path *path, int depth)
>>  {
>>  	int err;
>>  	ext4_fsblk_t leaf;
>> +	__le32 border;
>>  
>>  	/* free index block */
>> -	path--;
>> +	depth--;
>> +	path = path + depth;
>>  	leaf = ext4_idx_pblock(path->p_idx);
>>  	if (unlikely(path->p_hdr->eh_entries == 0)) {
>>  		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>  
>>  	ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>  			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>> +
>> +	border = path->p_idx->ei_block;
>> +	while (--depth >= 0) {
>> +		if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> +			break;
>> +		path--;
>> +		err = ext4_ext_get_access(handle, inode, path);
>> +		if (err)
>> +			break;
>> +		path->p_idx->ei_block = border;
>> +		err = ext4_ext_dirty(handle, inode, path);
>> +		if (err)
>> +			break;
>> +	}
>>  	return err;
>>  }
>>  
>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>  	/* if this leaf is free, then we should
>>  	 * remove it from index block above */
>>  	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> -		err = ext4_ext_rm_idx(handle, inode, path + depth);
>> +		err = ext4_ext_rm_idx(handle, inode, path, depth);
>>  
>>  out:
>>  	return err;
>> @@ -2760,7 +2776,7 @@ again:
>>  				/* index is empty, remove it;
>>  				 * handle must be already prepared by the
>>  				 * truncatei_leaf() */
>> -				err = ext4_ext_rm_idx(handle, inode, path + i);
>> +				err = ext4_ext_rm_idx(handle, inode, path, i);
>>  			}
>>  			/* root level has p_bh == NULL, brelse() eats this */
>>  			brelse(path[i].p_bh);
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 7, 2012, 5:11 a.m. UTC | #10
On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <forrestl@synology.com> wrote:
> Hi Ashish,
>
>    There have a chance to do  a trivial update, and this case will be
> covered in ext4_ext_remove_space.
>
> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>                  err = ext4_ext_rm_idx(handle, inode, path + depth);
> +               /* If this was the first extent index in node,
> +                * propagates the ei_block updation info to top */
> +               if(!err) {
> +                       --depth;
> +                       path = path + depth;
> +                       while (--depth >= 0) {
> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> +                                       break;
> +                               path--;
> +                               err = ext4_ext_get_access(handle, inode, path);
> +                               if (err)
> +                                       break;
>
>         trivial update, when (path + 1)->p_idx->eh_entries == 0
>
> +                               path->p_idx->ei_block =(path +
> 1)->p_idx->ei_block ;
> +                               err = ext4_ext_dirty(handle, inode, path);
> +                               if (err)
> +                                       break;
> +                       }
> +               }
> +       }
> +
>
>    I will trying to reproduce the problem tomorrow.
I think it will  work fine atleast till depth = 2.
It would be great if you could check with depth > 2.
If still having this problem, we can think more.

>
> Thanks,
> Forrest
>
> 2012/12/6 Ashish Sangwan <ashishsangwan2@gmail.com>:
>> On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote:
>>> Hi Ashish,
>>>    Thanks for your review and fixed hole punch bugs.
>>>
>>> We can't propogate ei_block only in ext4_ext_rm_leave.
>>>
>>> There have two cases to call ext4_ext_rm_idx
>>> 1) extent block has no entry, ext4_ext_rm_leave calls
>>> ext4_ext_rm_idx to release extent block.
>>>
>>
>> 1.1)  And when the extent block is removed, the corresponding index
>> entry has also to be removed. And if this was the first index entry,
>> all the other index entries, are also updated. This updation will
>> continue till we reach the root of extent tree OR the currently
>> updated index entry is not the first entry in the node.
>>
>>> 2) extent index block has no entry, ext4_ext_remove_space
>>> calls ext4_ext_rm_idx to release extent index block.
>>>
>>> Consider remove third level of extent index block that indexed by
>>> second level of extent index, and the extent index in second level
>>> is first entry in block.
>>>
>> So, I think the above case would be handled by point (1.1) i.e. the
>> earlier index updation done from within ext4_ext_rm_leaf.
>> The earlier ei_block updation will reach to the top of tree.
>>
>>> In this case, ei_block will not propogate to top level correctly,
>>> if we only do propogation in ext4_ext_rm_leave.
>>
>> And thanks for pointing the problem. Can you reproduce it with my code
>> suggesstion even with depth 3?
>> If no, than please resend the patch with suggested changes.
>> The intent here is to solve the problem without changing any function
>> signature and with minimal changes.
>>
>> Regards,
>> Ashish
>>>
>>> Regards,
>>> Forrest
>>>
>>> 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>:
>>>> Sorry 1 typo.
>>>> It should be
>>>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>>>> instead of
>>>> + path->p_idx->ei_block = border;
>>>>
>>>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>>>>> I have tested it and the problem is real.
>>>>> It happens when depth => 2 and punch hole completely removes any of
>>>>> the first extent indexes at depth 1.
>>>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>>>
>>>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>>>> ext4_ext_rm_leaf and there is no need to change in
>>>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>>>
>>>>> Forrest, what do you think about below change ?
>>>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>>>> removing index.
>>>>>
>>>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>
>>>>>         /* if this leaf is free, then we should
>>>>>          * remove it from index block above */
>>>>> -       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>>>>                 err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>> +               /* If this was the first extent index in node,
>>>>> +                * propagates the ei_block updation info to top */
>>>>> +               if(!err) {
>>>>> +                       --depth;
>>>>> +                       path = path + depth;
>>>>> +                       while (--depth >= 0) {
>>>>> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>> +                                       break;
>>>>> +                               path--;
>>>>> +                               err = ext4_ext_get_access(handle, inode, path);
>>>>> +                               if (err)
>>>>> +                                       break;
>>>>> +                               path->p_idx->ei_block = border;
>>>>> +                               err = ext4_ext_dirty(handle, inode, path);
>>>>> +                               if (err)
>>>>> +                                       break;
>>>>> +                       }
>>>>> +               }
>>>>> +       }
>>>>> +
>>>>>
>>>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote:
>>>>>> 2012/12/4 forrest <forrestl@synology.com>:
>>>>>>> This problem is easily to reproduce
>>>>>>>
>>>>>>> Create a file with size larger than 1GB.
>>>>>>>
>>>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>>>
>>>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>>>> extents,  followings is dumped result
>>>>>>>
>>>>>>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>>>>>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>>>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>>>>>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>>>>>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>>>>>>
>>>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>>>> debugfs to dump extents,  followings is dumped result
>>>>>>>
>>>>>>> 2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>>>>>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>>>>>> -------> logical block index didn't update when remove extent index
>>>>>>>  1/ 2   1/ 45 231881 - 232560   3901490                680
>>>>>>>  2/ 2   1/340 231881 - 231881   3918281 -   3918281      1
>>>>>>>  2/ 2   2/340 231883 - 231883   3918283 -   3918283      1
>>>>>>
>>>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>>>> If we do map a block in #231202~#231880, we will get error messages
>>>>>> like followings.
>>>>>>
>>>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>>>> offset 231203 with max blocks 1 with error -5
>>>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>>>
>>>>>> Regards,
>>>>>> Forrest
>>>>>>
>>>>>>>
>>>>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>:
>>>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>>>> of extent tree is greater than 1.
>>>>>>>>
>>>>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>>>>>>> ---
>>>>>>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>>>>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>>>> index d3dd618..b10b8c0 100644
>>>>>>>> --- a/fs/ext4/extents.c
>>>>>>>> +++ b/fs/ext4/extents.c
>>>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>>>>   * removes index from the index block.
>>>>>>>>   */
>>>>>>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>> -                       struct ext4_ext_path *path)
>>>>>>>> +                       struct ext4_ext_path *path, int depth)
>>>>>>>>  {
>>>>>>>>         int err;
>>>>>>>>         ext4_fsblk_t leaf;
>>>>>>>> +       __le32 border;
>>>>>>>>
>>>>>>>>         /* free index block */
>>>>>>>> -       path--;
>>>>>>>> +       depth--;
>>>>>>>> +       path = path + depth;
>>>>>>>>         leaf = ext4_idx_pblock(path->p_idx);
>>>>>>>>         if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>>>>                 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>>
>>>>>>>>         ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>>>>                          EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>>>> +
>>>>>>>> +       border = path->p_idx->ei_block;
>>>>>>>> +       while (--depth >= 0) {
>>>>>>>> +               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>>>> +                       break;
>>>>>>>> +               path--;
>>>>>>>> +               err = ext4_ext_get_access(handle, inode, path);
>>>>>>>> +               if (err)
>>>>>>>> +                       break;
>>>>>>>> +               path->p_idx->ei_block = border;
>>>>>>>> +               err = ext4_ext_dirty(handle, inode, path);
>>>>>>>> +               if (err)
>>>>>>>> +                       break;
>>>>>>>> +       }
>>>>>>>>         return err;
>>>>>>>>  }
>>>>>>>>
>>>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>>>>         /* if this leaf is free, then we should
>>>>>>>>          * remove it from index block above */
>>>>>>>>         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>>>> -               err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>>>> +               err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>>>
>>>>>>>>  out:
>>>>>>>>         return err;
>>>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>>>>                                 /* index is empty, remove it;
>>>>>>>>                                  * handle must be already prepared by the
>>>>>>>>                                  * truncatei_leaf() */
>>>>>>>> -                               err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>>>> +                               err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>>>>                         }
>>>>>>>>                         /* root level has p_bh == NULL, brelse() eats this */
>>>>>>>>                         brelse(path[i].p_bh);
>>>>>>>> --
>>>>>>>> 1.7.5.4
>>>>>>>>
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 7, 2012, 5:53 a.m. UTC | #11
On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote:
> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>> of extent tree is greater than 1.
>>
>> This is interesting; we had 2 reports of similar corruption on the
>> list, I wonder if the application in question was doing hole punching.
>> I didn't expect that they were, so TBH I was pretty much ignoring
>> the hole-punch cases for parent index updates.  Hm.  I'll have
>> to look into that.
>>
>> Could you turn your testcase into an xfstest regression test?
>
> Also, please note that I sent an e2fsck patch to try to fix this
> problem after the fact; it'd be great if in your testing, you could
> also confirm that e2fsck w/ my patch fixes it correctly.
>
I checked you patch.
This was the extent tree situation after removing 1st extent index:
debugfs:  ex abc
Level Entries       Logical        Physical Length Flags
 0/ 2   1/  1     0 -  8399  32857            8400
 1/ 2   1/  4  2048 -  4081   4138            2034
 2/ 2   1/339  2048 -  2053  69632 -  69637      6
 2/ 2   2/339  2054 -  2059  69656 -  69661      6

E2fsck's output with your patch=>
Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
e2fsck 1.42.6.1 (06-DEC-2012)
Pass 1: Checking inodes, blocks, and sizes
Interior extent node level 0 of inode 31:
Logical start 0 does not match logical start 2048 at next level.  Fix<y>? yes
Inode 31, i_blocks is 50856, should be 16280.  Fix<y>? yes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
Block bitmap differences:  -4138 -(73712--73717) -73728
-(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383)
-(98390--98395) -(98402--98407)
< Similarly this was huge list of around 50-60 lines, so I skip to last >
910--106915) -(106922--106927) -(106934--106939) -(106946--106951)
-(106958--106963) -(122872--122879)
Fix<y>? yes
Free blocks count wrong for group #0 (14308, counted=14309).
Fix<y>? yes
Free blocks count wrong for group #2 (12297, counted=12304).
Fix<y>? yes
Free blocks count wrong for group #3 (9770, counted=14084).
Fix<y>? yes
Free blocks count wrong (52407, counted=56729).
Fix<y>? yes
/dev/sda1: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks
Linux#>

> Thanks,
> -Eric
>
>> -Eric
>>
>>> Signed-off-by: Forrest Liu <forrestl@synology.com>
>>> ---
>>>  fs/ext4/extents.c |   24 ++++++++++++++++++++----
>>>  1 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index d3dd618..b10b8c0 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -2190,13 +2190,15 @@ errout:
>>>   * removes index from the index block.
>>>   */
>>>  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>> -                    struct ext4_ext_path *path)
>>> +                    struct ext4_ext_path *path, int depth)
>>>  {
>>>      int err;
>>>      ext4_fsblk_t leaf;
>>> +    __le32 border;
>>>
>>>      /* free index block */
>>> -    path--;
>>> +    depth--;
>>> +    path = path + depth;
>>>      leaf = ext4_idx_pblock(path->p_idx);
>>>      if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>              EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>
>>>      ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>                       EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>> +
>>> +    border = path->p_idx->ei_block;
>>> +    while (--depth >= 0) {
>>> +            if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> +                    break;
>>> +            path--;
>>> +            err = ext4_ext_get_access(handle, inode, path);
>>> +            if (err)
>>> +                    break;
>>> +            path->p_idx->ei_block = border;
>>> +            err = ext4_ext_dirty(handle, inode, path);
>>> +            if (err)
>>> +                    break;
>>> +    }
>>>      return err;
>>>  }
>>>
>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>      /* if this leaf is free, then we should
>>>       * remove it from index block above */
>>>      if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> -            err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> +            err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>
>>>  out:
>>>      return err;
>>> @@ -2760,7 +2776,7 @@ again:
>>>                              /* index is empty, remove it;
>>>                               * handle must be already prepared by the
>>>                               * truncatei_leaf() */
>>> -                            err = ext4_ext_rm_idx(handle, inode, path + i);
>>> +                            err = ext4_ext_rm_idx(handle, inode, path, i);
>>>                      }
>>>                      /* root level has p_bh == NULL, brelse() eats this */
>>>                      brelse(path[i].p_bh);
>>>
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 7, 2012, 12:13 p.m. UTC | #12
2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>:
> On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <forrestl@synology.com> wrote:
>> Hi Ashish,
>>
>>    There have a chance to do  a trivial update, and this case will be
>> covered in ext4_ext_remove_space.
>>
>> +       if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>                  err = ext4_ext_rm_idx(handle, inode, path + depth);
>> +               /* If this was the first extent index in node,
>> +                * propagates the ei_block updation info to top */
>> +               if(!err) {
>> +                       --depth;
>> +                       path = path + depth;
>> +                       while (--depth >= 0) {
>> +                               if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> +                                       break;
>> +                               path--;
>> +                               err = ext4_ext_get_access(handle, inode, path);
>> +                               if (err)
>> +                                       break;
>>
>>         trivial update, when (path + 1)->p_idx->eh_entries == 0
>>
>> +                               path->p_idx->ei_block =(path +
>> 1)->p_idx->ei_block ;
>> +                               err = ext4_ext_dirty(handle, inode, path);
>> +                               if (err)
>> +                                       break;
>> +                       }
>> +               }
>> +       }
>> +
>>
>>    I will trying to reproduce the problem tomorrow.
> I think it will  work fine atleast till depth = 2.
> It would be great if you could check with depth > 2.
> If still having this problem, we can think more.

Hi Ashish,

First, create a file lager than 4GB, and then punch out every even blocks;
Use ex command in debugfs to dump extents, belowing is the result.

Level Entries           Logical              Physical Length Flags
 0/ 3   1/  1       1 - 1048575     40013             1048575
 1/ 3   1/  5       1 -  231160     40012             231160
 2/ 3   1/340       1 -     680     40011                680
 3/ 3   1/340       1 -       1   2228225 -   2228225      1
 3/ 3   2/340       3 -       3   2228227 -   2228227      1
 3/ 3   3/340       5 -       5   2228229 -   2228229      1


punch out 231159, 231157, ..., 1..


Level Entries           Logical              Physical Length Flags
 0/ 3   1/  1       1 - 1048575     40013             1048575
 1/ 3   1/  4  231161 -  462320   2457604             231160
 2/ 3   1/340  231161 -  231840   2457606                680
 3/ 3   1/340  231161 -  231161   2459385 -   2459385      1


Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf.


Thanks,
Forrest
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 7, 2012, 12:26 p.m. UTC | #13
2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>:
> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>> of extent tree is greater than 1.
>>>
>>> This is interesting; we had 2 reports of similar corruption on the
>>> list, I wonder if the application in question was doing hole punching.
>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>> the hole-punch cases for parent index updates.  Hm.  I'll have
>>> to look into that.
>>> Could you turn your testcase into an xfstest regression test?

Hi Eric,
   I will check how to do that.
>>
>> Also, please note that I sent an e2fsck patch to try to fix this
>> problem after the fact; it'd be great if in your testing, you could
>> also confirm that e2fsck w/ my patch fixes it correctly.
>>
> I checked you patch.
> This was the extent tree situation after removing 1st extent index:
> debugfs:  ex abc
> Level Entries       Logical        Physical Length Flags
>  0/ 2   1/  1     0 -  8399  32857            8400
>  1/ 2   1/  4  2048 -  4081   4138            2034
>  2/ 2   1/339  2048 -  2053  69632 -  69637      6
>  2/ 2   2/339  2054 -  2059  69656 -  69661      6
>
> E2fsck's output with your patch=>
> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
> e2fsck 1.42.6.1 (06-DEC-2012)
> Pass 1: Checking inodes, blocks, and sizes
> Interior extent node level 0 of inode 31:
> Logical start 0 does not match logical start 2048 at next level.  Fix<y>? yes
> Inode 31, i_blocks is 50856, should be 16280.  Fix<y>? yes

I got similar result, pb->num_blocks is incorrect if
ext2fs_extent_fix_parents called.

> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> Block bitmap differences:  -4138 -(73712--73717) -73728
> -(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383)
> -(98390--98395) -(98402--98407)
> < Similarly this was huge list of around 50-60 lines, so I skip to last >
> 910--106915) -(106922--106927) -(106934--106939) -(106946--106951)
> -(106958--106963) -(122872--122879)
> Fix<y>? yes
> Free blocks count wrong for group #0 (14308, counted=14309).
> Fix<y>? yes
> Free blocks count wrong for group #2 (12297, counted=12304).
> Fix<y>? yes
> Free blocks count wrong for group #3 (9770, counted=14084).
> Fix<y>? yes
> Free blocks count wrong (52407, counted=56729).
> Fix<y>? yes
> /dev/sda1: ***** FILE SYSTEM WAS MODIFIED *****
> /dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks
> Linux#>
>
>> Thanks,
>> -Eric
>>
>>> -Eric
>>>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Dec. 7, 2012, 10:17 p.m. UTC | #14
On 12/7/12 6:26 AM, Forrest Liu wrote:
> 2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>:
>> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>> of extent tree is greater than 1.
>>>>
>>>> This is interesting; we had 2 reports of similar corruption on the
>>>> list, I wonder if the application in question was doing hole punching.
>>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>>> the hole-punch cases for parent index updates.  Hm.  I'll have
>>>> to look into that.
>>>> Could you turn your testcase into an xfstest regression test?
> 
> Hi Eric,
>    I will check how to do that.
>>>
>>> Also, please note that I sent an e2fsck patch to try to fix this
>>> problem after the fact; it'd be great if in your testing, you could
>>> also confirm that e2fsck w/ my patch fixes it correctly.
>>>
>> I checked you patch.
>> This was the extent tree situation after removing 1st extent index:
>> debugfs:  ex abc
>> Level Entries       Logical        Physical Length Flags
>>  0/ 2   1/  1     0 -  8399  32857            8400
>>  1/ 2   1/  4  2048 -  4081   4138            2034
>>  2/ 2   1/339  2048 -  2053  69632 -  69637      6
>>  2/ 2   2/339  2054 -  2059  69656 -  69661      6
>>
>> E2fsck's output with your patch=>
>> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
>> e2fsck 1.42.6.1 (06-DEC-2012)
>> Pass 1: Checking inodes, blocks, and sizes
>> Interior extent node level 0 of inode 31:
>> Logical start 0 does not match logical start 2048 at next level.  Fix<y>? yes
>> Inode 31, i_blocks is 50856, should be 16280.  Fix<y>? yes
> 
> I got similar result, pb->num_blocks is incorrect if
> ext2fs_extent_fix_parents called.

Maybe I should ask what it looks like without my patch?  I didn't
think my patch would change anything at all about block count
or use.

-Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Dec. 8, 2012, 5:31 p.m. UTC | #15
On 12/4/12 6:29 AM, forrest wrote:
> This problem is easily to reproduce
> 
> Create a file with size larger than 1GB.
> 
> dd if=/dev/zero of=/test_file bs=1M count=1024
> 
> Punch every even block in test_file, and then use debugfs to dump
> extents,  followings is dumped result
> 
>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>  0/ 2   2/  2 231201 - 262143   3901486              30943
>  1/ 2   1/ 46 231201 - 231880   3901488                680
>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
> 
> Punch blocks #231779 ~#231201 , 

Can you explain what you mean by the above?  Which blocks get punched?

-Eric

> to remove extent index, and then use
> debugfs to dump extents,  followings is dumped result


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 8, 2012, 6:56 p.m. UTC | #16
2012/12/9 Eric Sandeen <sandeen@redhat.com>:
> On 12/4/12 6:29 AM, forrest wrote:
>> This problem is easily to reproduce
>>
>> Create a file with size larger than 1GB.
>>
>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>
>> Punch every even block in test_file, and then use debugfs to dump
>> extents,  followings is dumped result
>>
>>  2/ 2 339/340 231197 - 231197   3917597 -   3917597      1
>>  2/ 2 340/340 231199 - 231199   3917599 -   3917599      1
>>  0/ 2   2/  2 231201 - 262143   3901486              30943
>>  1/ 2   1/ 46 231201 - 231880   3901488                680
>>  2/ 2   1/340 231201 - 231201   3917601 -   3917601      1
>>  2/ 2   2/340 231203 - 231203   3917603 -   3917603      1
>>
>> Punch blocks #231779 ~#231201 ,
>
> Can you explain what you mean by the above?  Which blocks get punched?

Hi Eric,

At first, i found blowing code may have some problem

ext4_ext_rm_leaf ()
{
        ...
       /////when eh->eh_entries equals to zero, logical start value
will not be update;
       //// If parent is first index in block, this cause problem.
        if (correct_index && eh->eh_entries)
                err = ext4_ext_correct_indexes(handle, inode, path);
}

To prove that, I tried to trigger this case (eh->eh_entries = 0)

1) create a file which size greater than 1GB

2) punch all even blocks 0, 2, 4, 6, 8, .......  (blocks depends on
file size) one by one to make extent tree with depth equals to 2

3) use debugfs to find an extent index entry at level1 (assume leaf at
level2) witch logical start value is equals to is't parent node.

In example, founded entry is
1/ 2   1/ 46 231201 - 231880   3901488                680

4 ) punch blocks covered by extent index found by step3 one by one.
In example,
231879, 231877, ..., 231201..

5 ) Then, we can use debugfs to  check if logical start value is correct or not

In example, 231201 is not equals to 231881.

0/ 2   2/  2 231201 - 262143   3901486              30943
1/ 2   1/ 45 231881 - 232560   3901490                680

Thanks,
-Forrest

>
> -Eric
>
>> to remove extent index, and then use
>> debugfs to dump extents,  followings is dumped result
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Forrest Liu Dec. 8, 2012, 7:01 p.m. UTC | #17
2012/12/8 Eric Sandeen <sandeen@redhat.com>:
> On 12/7/12 6:26 AM, Forrest Liu wrote:
>> 2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>:
>>> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote:
>>>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>> of extent tree is greater than 1.
>>>>>
>>>>> This is interesting; we had 2 reports of similar corruption on the
>>>>> list, I wonder if the application in question was doing hole punching.
>>>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>>>> the hole-punch cases for parent index updates.  Hm.  I'll have
>>>>> to look into that.
>>>>> Could you turn your testcase into an xfstest regression test?
>>
>> Hi Eric,
>>    I will check how to do that.
>>>>
>>>> Also, please note that I sent an e2fsck patch to try to fix this
>>>> problem after the fact; it'd be great if in your testing, you could
>>>> also confirm that e2fsck w/ my patch fixes it correctly.
>>>>
>>> I checked you patch.
>>> This was the extent tree situation after removing 1st extent index:
>>> debugfs:  ex abc
>>> Level Entries       Logical        Physical Length Flags
>>>  0/ 2   1/  1     0 -  8399  32857            8400
>>>  1/ 2   1/  4  2048 -  4081   4138            2034
>>>  2/ 2   1/339  2048 -  2053  69632 -  69637      6
>>>  2/ 2   2/339  2054 -  2059  69656 -  69661      6
>>>
>>> E2fsck's output with your patch=>
>>> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
>>> e2fsck 1.42.6.1 (06-DEC-2012)
>>> Pass 1: Checking inodes, blocks, and sizes
>>> Interior extent node level 0 of inode 31:
>>> Logical start 0 does not match logical start 2048 at next level.  Fix<y>? yes
>>> Inode 31, i_blocks is 50856, should be 16280.  Fix<y>? yes
>>
>> I got similar result, pb->num_blocks is incorrect if
>> ext2fs_extent_fix_parents called.
>
> Maybe I should ask what it looks like without my patch?  I didn't
> think my patch would change anything at all about block count
> or use.
>
> -Eric
>
When i choose not to repair problem of logical start value, there have
no message like

Inode 31, i_blocks is 50856, should be 16280.  Fix<y>?

-Forrest
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan Dec. 10, 2012, 6:22 a.m. UTC | #18
> Hi Ashish,
>
> First, create a file lager than 4GB, and then punch out every even blocks;
> Use ex command in debugfs to dump extents, belowing is the result.
>
> Level Entries           Logical              Physical Length Flags
>  0/ 3   1/  1       1 - 1048575     40013             1048575
>  1/ 3   1/  5       1 -  231160     40012             231160
>  2/ 3   1/340       1 -     680     40011                680
>  3/ 3   1/340       1 -       1   2228225 -   2228225      1
>  3/ 3   2/340       3 -       3   2228227 -   2228227      1
>  3/ 3   3/340       5 -       5   2228229 -   2228229      1
>
>
> punch out 231159, 231157, ..., 1..
>
>
> Level Entries           Logical              Physical Length Flags
>  0/ 3   1/  1       1 - 1048575     40013             1048575
>  1/ 3   1/  4  231161 -  462320   2457604             231160
>  2/ 3   1/340  231161 -  231840   2457606                680
>  3/ 3   1/340  231161 -  231161   2459385 -   2459385      1
>
>
> Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf.

Ok, In your patch we can do without varibale border and instead use:
path->p_idx->ei_block = (path+1)->p_idx->ei_block;

Rest of the patch seems Ok.

Thanks,
Ashish
>
>
> Thanks,
> Forrest
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d3dd618..b10b8c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2190,13 +2190,15 @@  errout:
  * removes index from the index block.
  */
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
-			struct ext4_ext_path *path)
+			struct ext4_ext_path *path, int depth)
 {
 	int err;
 	ext4_fsblk_t leaf;
+	__le32 border;
 
 	/* free index block */
-	path--;
+	depth--;
+	path = path + depth;
 	leaf = ext4_idx_pblock(path->p_idx);
 	if (unlikely(path->p_hdr->eh_entries == 0)) {
 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2221,6 +2223,20 @@  static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 
 	ext4_free_blocks(handle, inode, NULL, leaf, 1,
 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+	border = path->p_idx->ei_block;
+	while (--depth >= 0) {
+		if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+			break;
+		path--;
+		err = ext4_ext_get_access(handle, inode, path);
+		if (err)
+			break;
+		path->p_idx->ei_block = border;
+		err = ext4_ext_dirty(handle, inode, path);
+		if (err)
+			break;
+	}
 	return err;
 }
 
@@ -2557,7 +2573,7 @@  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	/* if this leaf is free, then we should
 	 * remove it from index block above */
 	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
-		err = ext4_ext_rm_idx(handle, inode, path + depth);
+		err = ext4_ext_rm_idx(handle, inode, path, depth);
 
 out:
 	return err;
@@ -2760,7 +2776,7 @@  again:
 				/* index is empty, remove it;
 				 * handle must be already prepared by the
 				 * truncatei_leaf() */
-				err = ext4_ext_rm_idx(handle, inode, path + i);
+				err = ext4_ext_rm_idx(handle, inode, path, i);
 			}
 			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);