Message ID | 1354623069-8124-1-git-send-email-forrestl@synology.com |
---|---|
State | Superseded, archived |
Headers | show |
This problem is easily to reproduce Create a file with size larger than 1GB. dd if=/dev/zero of=/test_file bs=1M count=1024 Punch every even block in test_file, and then use debugfs to dump extents, followings is dumped result 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 0/ 2 2/ 2 231201 - 262143 3901486 30943 1/ 2 1/ 46 231201 - 231880 3901488 680 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 Punch blocks #231779 ~#231201 , to remove extent index, and then use debugfs to dump extents, followings is dumped result 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 0/ 2 2/ 2 231201 - 262143 3901486 30943 -------> logical block index didn't update when remove extent index 1/ 2 1/ 45 231881 - 232560 3901490 680 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 Thanks, Forrest 2012/12/4 Forrest Liu <forrestl@synology.com>: > Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth > of extent tree is greater than 1. > > Signed-off-by: Forrest Liu <forrestl@synology.com> > --- > fs/ext4/extents.c | 24 ++++++++++++++++++++---- > 1 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index d3dd618..b10b8c0 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -2190,13 +2190,15 @@ errout: > * removes index from the index block. > */ > static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, > - struct ext4_ext_path *path) > + struct ext4_ext_path *path, int depth) > { > int err; > ext4_fsblk_t leaf; > + __le32 border; > > /* free index block */ > - path--; > + depth--; > + path = path + depth; > leaf = ext4_idx_pblock(path->p_idx); > if (unlikely(path->p_hdr->eh_entries == 0)) { > EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); > @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, > > ext4_free_blocks(handle, inode, NULL, leaf, 1, > EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); > + > + border = path->p_idx->ei_block; > + while (--depth >= 0) { > + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) > + break; > + path--; > + err = ext4_ext_get_access(handle, inode, path); > + if (err) > + break; > + path->p_idx->ei_block = border; > + err = ext4_ext_dirty(handle, inode, path); > + if (err) > + break; > + } > return err; > } > > @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, > /* if this leaf is free, then we should > * remove it from index block above */ > if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) > - err = ext4_ext_rm_idx(handle, inode, path + depth); > + err = ext4_ext_rm_idx(handle, inode, path, depth); > > out: > return err; > @@ -2760,7 +2776,7 @@ again: > /* index is empty, remove it; > * handle must be already prepared by the > * truncatei_leaf() */ > - err = ext4_ext_rm_idx(handle, inode, path + i); > + err = ext4_ext_rm_idx(handle, inode, path, i); > } > /* root level has p_bh == NULL, brelse() eats this */ > brelse(path[i].p_bh); > -- > 1.7.5.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/12/4 forrest <forrestl@synology.com>: > This problem is easily to reproduce > > Create a file with size larger than 1GB. > > dd if=/dev/zero of=/test_file bs=1M count=1024 > > Punch every even block in test_file, and then use debugfs to dump > extents, followings is dumped result > > 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 > 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 > 0/ 2 2/ 2 231201 - 262143 3901486 30943 > 1/ 2 1/ 46 231201 - 231880 3901488 680 > 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 > 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 > > Punch blocks #231779 ~#231201 , to remove extent index, and then use > debugfs to dump extents, followings is dumped result > > 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 > 0/ 2 2/ 2 231201 - 262143 3901486 30943 > -------> logical block index didn't update when remove extent index > 1/ 2 1/ 45 231881 - 232560 3901490 680 > 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 > 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 After blocks #231202~#231880 had been punch out, theses blocks can't map again. If we do map a block in #231202~#231880, we will get error messages like followings. EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! EXT4-fs (md2): delayed block allocation failed for inode 13 at logical offset 231203 with max blocks 1 with error -5 EXT4-fs (md2): This should not happen!! Data will be lost Regards, Forrest > > 2012/12/4 Forrest Liu <forrestl@synology.com>: >> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >> of extent tree is greater than 1. >> >> Signed-off-by: Forrest Liu <forrestl@synology.com> >> --- >> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >> 1 files changed, 20 insertions(+), 4 deletions(-) >> >> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >> index d3dd618..b10b8c0 100644 >> --- a/fs/ext4/extents.c >> +++ b/fs/ext4/extents.c >> @@ -2190,13 +2190,15 @@ errout: >> * removes index from the index block. >> */ >> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >> - struct ext4_ext_path *path) >> + struct ext4_ext_path *path, int depth) >> { >> int err; >> ext4_fsblk_t leaf; >> + __le32 border; >> >> /* free index block */ >> - path--; >> + depth--; >> + path = path + depth; >> leaf = ext4_idx_pblock(path->p_idx); >> if (unlikely(path->p_hdr->eh_entries == 0)) { >> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >> >> ext4_free_blocks(handle, inode, NULL, leaf, 1, >> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >> + >> + border = path->p_idx->ei_block; >> + while (--depth >= 0) { >> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >> + break; >> + path--; >> + err = ext4_ext_get_access(handle, inode, path); >> + if (err) >> + break; >> + path->p_idx->ei_block = border; >> + err = ext4_ext_dirty(handle, inode, path); >> + if (err) >> + break; >> + } >> return err; >> } >> >> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >> /* if this leaf is free, then we should >> * remove it from index block above */ >> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >> - err = ext4_ext_rm_idx(handle, inode, path + depth); >> + err = ext4_ext_rm_idx(handle, inode, path, depth); >> >> out: >> return err; >> @@ -2760,7 +2776,7 @@ again: >> /* index is empty, remove it; >> * handle must be already prepared by the >> * truncatei_leaf() */ >> - err = ext4_ext_rm_idx(handle, inode, path + i); >> + err = ext4_ext_rm_idx(handle, inode, path, i); >> } >> /* root level has p_bh == NULL, brelse() eats this */ >> brelse(path[i].p_bh); >> -- >> 1.7.5.4 >> -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I have tested it and the problem is real. It happens when depth => 2 and punch hole completely removes any of the first extent indexes at depth 1. The change in ei_block is not propagated backwards to index at depth 0. However, AFAICS, the problematic call to ext4_ext_rm_idx is from ext4_ext_rm_leaf and there is no need to change in ext4_ext_remove_space or in ext4_ext_rm_idx. Forrest, what do you think about below change ? basically its your code, but its been added to ext4_ext_rm_leaf after removing index. @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { err = ext4_ext_rm_idx(handle, inode, path + depth); + /* If this was the first extent index in node, + * propagates the ei_block updation info to top */ + if(!err) { + --depth; + path = path + depth; + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = border; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } + } + } + On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: > 2012/12/4 forrest <forrestl@synology.com>: >> This problem is easily to reproduce >> >> Create a file with size larger than 1GB. >> >> dd if=/dev/zero of=/test_file bs=1M count=1024 >> >> Punch every even block in test_file, and then use debugfs to dump >> extents, followings is dumped result >> >> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >> 1/ 2 1/ 46 231201 - 231880 3901488 680 >> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >> >> Punch blocks #231779 ~#231201 , to remove extent index, and then use >> debugfs to dump extents, followings is dumped result >> >> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >> -------> logical block index didn't update when remove extent index >> 1/ 2 1/ 45 231881 - 232560 3901490 680 >> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 > > After blocks #231202~#231880 had been punch out, theses blocks can't map again. > If we do map a block in #231202~#231880, we will get error messages > like followings. > > EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm > flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! > EXT4-fs (md2): delayed block allocation failed for inode 13 at logical > offset 231203 with max blocks 1 with error -5 > EXT4-fs (md2): This should not happen!! Data will be lost > > Regards, > Forrest > >> >> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>> of extent tree is greater than 1. >>> >>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>> --- >>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>> 1 files changed, 20 insertions(+), 4 deletions(-) >>> >>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>> index d3dd618..b10b8c0 100644 >>> --- a/fs/ext4/extents.c >>> +++ b/fs/ext4/extents.c >>> @@ -2190,13 +2190,15 @@ errout: >>> * removes index from the index block. >>> */ >>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>> - struct ext4_ext_path *path) >>> + struct ext4_ext_path *path, int depth) >>> { >>> int err; >>> ext4_fsblk_t leaf; >>> + __le32 border; >>> >>> /* free index block */ >>> - path--; >>> + depth--; >>> + path = path + depth; >>> leaf = ext4_idx_pblock(path->p_idx); >>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>> >>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>> + >>> + border = path->p_idx->ei_block; >>> + while (--depth >= 0) { >>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>> + break; >>> + path--; >>> + err = ext4_ext_get_access(handle, inode, path); >>> + if (err) >>> + break; >>> + path->p_idx->ei_block = border; >>> + err = ext4_ext_dirty(handle, inode, path); >>> + if (err) >>> + break; >>> + } >>> return err; >>> } >>> >>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>> /* if this leaf is free, then we should >>> * remove it from index block above */ >>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>> >>> out: >>> return err; >>> @@ -2760,7 +2776,7 @@ again: >>> /* index is empty, remove it; >>> * handle must be already prepared by the >>> * truncatei_leaf() */ >>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>> } >>> /* root level has p_bh == NULL, brelse() eats this */ >>> brelse(path[i].p_bh); >>> -- >>> 1.7.5.4 >>> > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sorry 1 typo. It should be + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; instead of + path->p_idx->ei_block = border; On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote: > I have tested it and the problem is real. > It happens when depth => 2 and punch hole completely removes any of > the first extent indexes at depth 1. > The change in ei_block is not propagated backwards to index at depth 0. > > However, AFAICS, the problematic call to ext4_ext_rm_idx is from > ext4_ext_rm_leaf and there is no need to change in > ext4_ext_remove_space or in ext4_ext_rm_idx. > > Forrest, what do you think about below change ? > basically its your code, but its been added to ext4_ext_rm_leaf after > removing index. > > @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, > > /* if this leaf is free, then we should > * remove it from index block above */ > - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) > + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { > err = ext4_ext_rm_idx(handle, inode, path + depth); > + /* If this was the first extent index in node, > + * propagates the ei_block updation info to top */ > + if(!err) { > + --depth; > + path = path + depth; > + while (--depth >= 0) { > + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) > + break; > + path--; > + err = ext4_ext_get_access(handle, inode, path); > + if (err) > + break; > + path->p_idx->ei_block = border; > + err = ext4_ext_dirty(handle, inode, path); > + if (err) > + break; > + } > + } > + } > + > > On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: >> 2012/12/4 forrest <forrestl@synology.com>: >>> This problem is easily to reproduce >>> >>> Create a file with size larger than 1GB. >>> >>> dd if=/dev/zero of=/test_file bs=1M count=1024 >>> >>> Punch every even block in test_file, and then use debugfs to dump >>> extents, followings is dumped result >>> >>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>> 1/ 2 1/ 46 231201 - 231880 3901488 680 >>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >>> >>> Punch blocks #231779 ~#231201 , to remove extent index, and then use >>> debugfs to dump extents, followings is dumped result >>> >>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>> -------> logical block index didn't update when remove extent index >>> 1/ 2 1/ 45 231881 - 232560 3901490 680 >>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 >> >> After blocks #231202~#231880 had been punch out, theses blocks can't map again. >> If we do map a block in #231202~#231880, we will get error messages >> like followings. >> >> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm >> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! >> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical >> offset 231203 with max blocks 1 with error -5 >> EXT4-fs (md2): This should not happen!! Data will be lost >> >> Regards, >> Forrest >> >>> >>> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>> of extent tree is greater than 1. >>>> >>>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>>> --- >>>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>>> 1 files changed, 20 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>>> index d3dd618..b10b8c0 100644 >>>> --- a/fs/ext4/extents.c >>>> +++ b/fs/ext4/extents.c >>>> @@ -2190,13 +2190,15 @@ errout: >>>> * removes index from the index block. >>>> */ >>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>> - struct ext4_ext_path *path) >>>> + struct ext4_ext_path *path, int depth) >>>> { >>>> int err; >>>> ext4_fsblk_t leaf; >>>> + __le32 border; >>>> >>>> /* free index block */ >>>> - path--; >>>> + depth--; >>>> + path = path + depth; >>>> leaf = ext4_idx_pblock(path->p_idx); >>>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>> >>>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>>> + >>>> + border = path->p_idx->ei_block; >>>> + while (--depth >= 0) { >>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>> + break; >>>> + path--; >>>> + err = ext4_ext_get_access(handle, inode, path); >>>> + if (err) >>>> + break; >>>> + path->p_idx->ei_block = border; >>>> + err = ext4_ext_dirty(handle, inode, path); >>>> + if (err) >>>> + break; >>>> + } >>>> return err; >>>> } >>>> >>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>> /* if this leaf is free, then we should >>>> * remove it from index block above */ >>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>>> >>>> out: >>>> return err; >>>> @@ -2760,7 +2776,7 @@ again: >>>> /* index is empty, remove it; >>>> * handle must be already prepared by the >>>> * truncatei_leaf() */ >>>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>>> } >>>> /* root level has p_bh == NULL, brelse() eats this */ >>>> brelse(path[i].p_bh); >>>> -- >>>> 1.7.5.4 >>>> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ashish, Thanks for your review and fixed hole punch bugs. We can't propogate ei_block only in ext4_ext_rm_leave. There have two cases to call ext4_ext_rm_idx 1) extent block has no entry, ext4_ext_rm_leave calls ext4_ext_rm_idx to release extent block. 2) extent index block has no entry, ext4_ext_remove_space calls ext4_ext_rm_idx to release extent index block. Consider remove third level of extent index block that indexed by second level of extent index, and the extent index in second level is first entry in block. In this case, ei_block will not propogate to top level correctly, if we only do propogation in ext4_ext_rm_leave. Regards, Forrest 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>: > Sorry 1 typo. > It should be > + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; > instead of > + path->p_idx->ei_block = border; > > On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote: >> I have tested it and the problem is real. >> It happens when depth => 2 and punch hole completely removes any of >> the first extent indexes at depth 1. >> The change in ei_block is not propagated backwards to index at depth 0. >> >> However, AFAICS, the problematic call to ext4_ext_rm_idx is from >> ext4_ext_rm_leaf and there is no need to change in >> ext4_ext_remove_space or in ext4_ext_rm_idx. >> >> Forrest, what do you think about below change ? >> basically its your code, but its been added to ext4_ext_rm_leaf after >> removing index. >> >> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >> >> /* if this leaf is free, then we should >> * remove it from index block above */ >> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { >> err = ext4_ext_rm_idx(handle, inode, path + depth); >> + /* If this was the first extent index in node, >> + * propagates the ei_block updation info to top */ >> + if(!err) { >> + --depth; >> + path = path + depth; >> + while (--depth >= 0) { >> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >> + break; >> + path--; >> + err = ext4_ext_get_access(handle, inode, path); >> + if (err) >> + break; >> + path->p_idx->ei_block = border; >> + err = ext4_ext_dirty(handle, inode, path); >> + if (err) >> + break; >> + } >> + } >> + } >> + >> >> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: >>> 2012/12/4 forrest <forrestl@synology.com>: >>>> This problem is easily to reproduce >>>> >>>> Create a file with size larger than 1GB. >>>> >>>> dd if=/dev/zero of=/test_file bs=1M count=1024 >>>> >>>> Punch every even block in test_file, and then use debugfs to dump >>>> extents, followings is dumped result >>>> >>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>> 1/ 2 1/ 46 231201 - 231880 3901488 680 >>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >>>> >>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use >>>> debugfs to dump extents, followings is dumped result >>>> >>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>> -------> logical block index didn't update when remove extent index >>>> 1/ 2 1/ 45 231881 - 232560 3901490 680 >>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 >>> >>> After blocks #231202~#231880 had been punch out, theses blocks can't map again. >>> If we do map a block in #231202~#231880, we will get error messages >>> like followings. >>> >>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm >>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! >>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical >>> offset 231203 with max blocks 1 with error -5 >>> EXT4-fs (md2): This should not happen!! Data will be lost >>> >>> Regards, >>> Forrest >>> >>>> >>>> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>> of extent tree is greater than 1. >>>>> >>>>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>>>> --- >>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>>>> 1 files changed, 20 insertions(+), 4 deletions(-) >>>>> >>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>>>> index d3dd618..b10b8c0 100644 >>>>> --- a/fs/ext4/extents.c >>>>> +++ b/fs/ext4/extents.c >>>>> @@ -2190,13 +2190,15 @@ errout: >>>>> * removes index from the index block. >>>>> */ >>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>> - struct ext4_ext_path *path) >>>>> + struct ext4_ext_path *path, int depth) >>>>> { >>>>> int err; >>>>> ext4_fsblk_t leaf; >>>>> + __le32 border; >>>>> >>>>> /* free index block */ >>>>> - path--; >>>>> + depth--; >>>>> + path = path + depth; >>>>> leaf = ext4_idx_pblock(path->p_idx); >>>>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>> >>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>>>> + >>>>> + border = path->p_idx->ei_block; >>>>> + while (--depth >= 0) { >>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>>> + break; >>>>> + path--; >>>>> + err = ext4_ext_get_access(handle, inode, path); >>>>> + if (err) >>>>> + break; >>>>> + path->p_idx->ei_block = border; >>>>> + err = ext4_ext_dirty(handle, inode, path); >>>>> + if (err) >>>>> + break; >>>>> + } >>>>> return err; >>>>> } >>>>> >>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>>> /* if this leaf is free, then we should >>>>> * remove it from index block above */ >>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>>>> >>>>> out: >>>>> return err; >>>>> @@ -2760,7 +2776,7 @@ again: >>>>> /* index is empty, remove it; >>>>> * handle must be already prepared by the >>>>> * truncatei_leaf() */ >>>>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>>>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>>>> } >>>>> /* root level has p_bh == NULL, brelse() eats this */ >>>>> brelse(path[i].p_bh); >>>>> -- >>>>> 1.7.5.4 >>>>> >>> -- >>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in >>> the body of a message to majordomo@vger.kernel.org >>> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote: > Hi Ashish, > Thanks for your review and fixed hole punch bugs. > > We can't propogate ei_block only in ext4_ext_rm_leave. > > There have two cases to call ext4_ext_rm_idx > 1) extent block has no entry, ext4_ext_rm_leave calls > ext4_ext_rm_idx to release extent block. > 1.1) And when the extent block is removed, the corresponding index entry has also to be removed. And if this was the first index entry, all the other index entries, are also updated. This updation will continue till we reach the root of extent tree OR the currently updated index entry is not the first entry in the node. > 2) extent index block has no entry, ext4_ext_remove_space > calls ext4_ext_rm_idx to release extent index block. > > Consider remove third level of extent index block that indexed by > second level of extent index, and the extent index in second level > is first entry in block. > So, I think the above case would be handled by point (1.1) i.e. the earlier index updation done from within ext4_ext_rm_leaf. The earlier ei_block updation will reach to the top of tree. > In this case, ei_block will not propogate to top level correctly, > if we only do propogation in ext4_ext_rm_leave. And thanks for pointing the problem. Can you reproduce it with my code suggesstion even with depth 3? If no, than please resend the patch with suggested changes. The intent here is to solve the problem without changing any function signature and with minimal changes. Regards, Ashish > > Regards, > Forrest > > 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>: >> Sorry 1 typo. >> It should be >> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; >> instead of >> + path->p_idx->ei_block = border; >> >> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote: >>> I have tested it and the problem is real. >>> It happens when depth => 2 and punch hole completely removes any of >>> the first extent indexes at depth 1. >>> The change in ei_block is not propagated backwards to index at depth 0. >>> >>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from >>> ext4_ext_rm_leaf and there is no need to change in >>> ext4_ext_remove_space or in ext4_ext_rm_idx. >>> >>> Forrest, what do you think about below change ? >>> basically its your code, but its been added to ext4_ext_rm_leaf after >>> removing index. >>> >>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>> >>> /* if this leaf is free, then we should >>> * remove it from index block above */ >>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { >>> err = ext4_ext_rm_idx(handle, inode, path + depth); >>> + /* If this was the first extent index in node, >>> + * propagates the ei_block updation info to top */ >>> + if(!err) { >>> + --depth; >>> + path = path + depth; >>> + while (--depth >= 0) { >>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>> + break; >>> + path--; >>> + err = ext4_ext_get_access(handle, inode, path); >>> + if (err) >>> + break; >>> + path->p_idx->ei_block = border; >>> + err = ext4_ext_dirty(handle, inode, path); >>> + if (err) >>> + break; >>> + } >>> + } >>> + } >>> + >>> >>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: >>>> 2012/12/4 forrest <forrestl@synology.com>: >>>>> This problem is easily to reproduce >>>>> >>>>> Create a file with size larger than 1GB. >>>>> >>>>> dd if=/dev/zero of=/test_file bs=1M count=1024 >>>>> >>>>> Punch every even block in test_file, and then use debugfs to dump >>>>> extents, followings is dumped result >>>>> >>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680 >>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >>>>> >>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use >>>>> debugfs to dump extents, followings is dumped result >>>>> >>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>> -------> logical block index didn't update when remove extent index >>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680 >>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 >>>> >>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again. >>>> If we do map a block in #231202~#231880, we will get error messages >>>> like followings. >>>> >>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm >>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! >>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical >>>> offset 231203 with max blocks 1 with error -5 >>>> EXT4-fs (md2): This should not happen!! Data will be lost >>>> >>>> Regards, >>>> Forrest >>>> >>>>> >>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>>> of extent tree is greater than 1. >>>>>> >>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>>>>> --- >>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>>>>> 1 files changed, 20 insertions(+), 4 deletions(-) >>>>>> >>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>>>>> index d3dd618..b10b8c0 100644 >>>>>> --- a/fs/ext4/extents.c >>>>>> +++ b/fs/ext4/extents.c >>>>>> @@ -2190,13 +2190,15 @@ errout: >>>>>> * removes index from the index block. >>>>>> */ >>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>> - struct ext4_ext_path *path) >>>>>> + struct ext4_ext_path *path, int depth) >>>>>> { >>>>>> int err; >>>>>> ext4_fsblk_t leaf; >>>>>> + __le32 border; >>>>>> >>>>>> /* free index block */ >>>>>> - path--; >>>>>> + depth--; >>>>>> + path = path + depth; >>>>>> leaf = ext4_idx_pblock(path->p_idx); >>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>> >>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>>>>> + >>>>>> + border = path->p_idx->ei_block; >>>>>> + while (--depth >= 0) { >>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>>>> + break; >>>>>> + path--; >>>>>> + err = ext4_ext_get_access(handle, inode, path); >>>>>> + if (err) >>>>>> + break; >>>>>> + path->p_idx->ei_block = border; >>>>>> + err = ext4_ext_dirty(handle, inode, path); >>>>>> + if (err) >>>>>> + break; >>>>>> + } >>>>>> return err; >>>>>> } >>>>>> >>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>>>> /* if this leaf is free, then we should >>>>>> * remove it from index block above */ >>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>>>>> >>>>>> out: >>>>>> return err; >>>>>> @@ -2760,7 +2776,7 @@ again: >>>>>> /* index is empty, remove it; >>>>>> * handle must be already prepared by the >>>>>> * truncatei_leaf() */ >>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>>>>> } >>>>>> /* root level has p_bh == NULL, brelse() eats this */ >>>>>> brelse(path[i].p_bh); >>>>>> -- >>>>>> 1.7.5.4 >>>>>> >>>> -- >>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in >>>> the body of a message to majordomo@vger.kernel.org >>>> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ashish, There have a chance to do a trivial update, and this case will be covered in ext4_ext_remove_space. + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { err = ext4_ext_rm_idx(handle, inode, path + depth); + /* If this was the first extent index in node, + * propagates the ei_block updation info to top */ + if(!err) { + --depth; + path = path + depth; + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; trivial update, when (path + 1)->p_idx->eh_entries == 0 + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } + } + } + I will trying to reproduce the problem tomorrow. Thanks, Forrest 2012/12/6 Ashish Sangwan <ashishsangwan2@gmail.com>: > On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote: >> Hi Ashish, >> Thanks for your review and fixed hole punch bugs. >> >> We can't propogate ei_block only in ext4_ext_rm_leave. >> >> There have two cases to call ext4_ext_rm_idx >> 1) extent block has no entry, ext4_ext_rm_leave calls >> ext4_ext_rm_idx to release extent block. >> > > 1.1) And when the extent block is removed, the corresponding index > entry has also to be removed. And if this was the first index entry, > all the other index entries, are also updated. This updation will > continue till we reach the root of extent tree OR the currently > updated index entry is not the first entry in the node. > >> 2) extent index block has no entry, ext4_ext_remove_space >> calls ext4_ext_rm_idx to release extent index block. >> >> Consider remove third level of extent index block that indexed by >> second level of extent index, and the extent index in second level >> is first entry in block. >> > So, I think the above case would be handled by point (1.1) i.e. the > earlier index updation done from within ext4_ext_rm_leaf. > The earlier ei_block updation will reach to the top of tree. > >> In this case, ei_block will not propogate to top level correctly, >> if we only do propogation in ext4_ext_rm_leave. > > And thanks for pointing the problem. Can you reproduce it with my code > suggesstion even with depth 3? > If no, than please resend the patch with suggested changes. > The intent here is to solve the problem without changing any function > signature and with minimal changes. > > Regards, > Ashish >> >> Regards, >> Forrest >> >> 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>: >>> Sorry 1 typo. >>> It should be >>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; >>> instead of >>> + path->p_idx->ei_block = border; >>> >>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote: >>>> I have tested it and the problem is real. >>>> It happens when depth => 2 and punch hole completely removes any of >>>> the first extent indexes at depth 1. >>>> The change in ei_block is not propagated backwards to index at depth 0. >>>> >>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from >>>> ext4_ext_rm_leaf and there is no need to change in >>>> ext4_ext_remove_space or in ext4_ext_rm_idx. >>>> >>>> Forrest, what do you think about below change ? >>>> basically its your code, but its been added to ext4_ext_rm_leaf after >>>> removing index. >>>> >>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>> >>>> /* if this leaf is free, then we should >>>> * remove it from index block above */ >>>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { >>>> err = ext4_ext_rm_idx(handle, inode, path + depth); >>>> + /* If this was the first extent index in node, >>>> + * propagates the ei_block updation info to top */ >>>> + if(!err) { >>>> + --depth; >>>> + path = path + depth; >>>> + while (--depth >= 0) { >>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>> + break; >>>> + path--; >>>> + err = ext4_ext_get_access(handle, inode, path); >>>> + if (err) >>>> + break; >>>> + path->p_idx->ei_block = border; >>>> + err = ext4_ext_dirty(handle, inode, path); >>>> + if (err) >>>> + break; >>>> + } >>>> + } >>>> + } >>>> + >>>> >>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: >>>>> 2012/12/4 forrest <forrestl@synology.com>: >>>>>> This problem is easily to reproduce >>>>>> >>>>>> Create a file with size larger than 1GB. >>>>>> >>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024 >>>>>> >>>>>> Punch every even block in test_file, and then use debugfs to dump >>>>>> extents, followings is dumped result >>>>>> >>>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680 >>>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >>>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >>>>>> >>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use >>>>>> debugfs to dump extents, followings is dumped result >>>>>> >>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>>> -------> logical block index didn't update when remove extent index >>>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680 >>>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >>>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 >>>>> >>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again. >>>>> If we do map a block in #231202~#231880, we will get error messages >>>>> like followings. >>>>> >>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm >>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! >>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical >>>>> offset 231203 with max blocks 1 with error -5 >>>>> EXT4-fs (md2): This should not happen!! Data will be lost >>>>> >>>>> Regards, >>>>> Forrest >>>>> >>>>>> >>>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>>>> of extent tree is greater than 1. >>>>>>> >>>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>>>>>> --- >>>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>>>>>> 1 files changed, 20 insertions(+), 4 deletions(-) >>>>>>> >>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>>>>>> index d3dd618..b10b8c0 100644 >>>>>>> --- a/fs/ext4/extents.c >>>>>>> +++ b/fs/ext4/extents.c >>>>>>> @@ -2190,13 +2190,15 @@ errout: >>>>>>> * removes index from the index block. >>>>>>> */ >>>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>>> - struct ext4_ext_path *path) >>>>>>> + struct ext4_ext_path *path, int depth) >>>>>>> { >>>>>>> int err; >>>>>>> ext4_fsblk_t leaf; >>>>>>> + __le32 border; >>>>>>> >>>>>>> /* free index block */ >>>>>>> - path--; >>>>>>> + depth--; >>>>>>> + path = path + depth; >>>>>>> leaf = ext4_idx_pblock(path->p_idx); >>>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>>> >>>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>>>>>> + >>>>>>> + border = path->p_idx->ei_block; >>>>>>> + while (--depth >= 0) { >>>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>>>>> + break; >>>>>>> + path--; >>>>>>> + err = ext4_ext_get_access(handle, inode, path); >>>>>>> + if (err) >>>>>>> + break; >>>>>>> + path->p_idx->ei_block = border; >>>>>>> + err = ext4_ext_dirty(handle, inode, path); >>>>>>> + if (err) >>>>>>> + break; >>>>>>> + } >>>>>>> return err; >>>>>>> } >>>>>>> >>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>>>>> /* if this leaf is free, then we should >>>>>>> * remove it from index block above */ >>>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>>>>>> >>>>>>> out: >>>>>>> return err; >>>>>>> @@ -2760,7 +2776,7 @@ again: >>>>>>> /* index is empty, remove it; >>>>>>> * handle must be already prepared by the >>>>>>> * truncatei_leaf() */ >>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>>>>>> } >>>>>>> /* root level has p_bh == NULL, brelse() eats this */ >>>>>>> brelse(path[i].p_bh); >>>>>>> -- >>>>>>> 1.7.5.4 >>>>>>> >>>>> -- >>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in >>>>> the body of a message to majordomo@vger.kernel.org >>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/4/12 6:11 AM, Forrest Liu wrote: > Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth > of extent tree is greater than 1. This is interesting; we had 2 reports of similar corruption on the list, I wonder if the application in question was doing hole punching. I didn't expect that they were, so TBH I was pretty much ignoring the hole-punch cases for parent index updates. Hm. I'll have to look into that. Could you turn your testcase into an xfstest regression test? -Eric > Signed-off-by: Forrest Liu <forrestl@synology.com> > --- > fs/ext4/extents.c | 24 ++++++++++++++++++++---- > 1 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index d3dd618..b10b8c0 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -2190,13 +2190,15 @@ errout: > * removes index from the index block. > */ > static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, > - struct ext4_ext_path *path) > + struct ext4_ext_path *path, int depth) > { > int err; > ext4_fsblk_t leaf; > + __le32 border; > > /* free index block */ > - path--; > + depth--; > + path = path + depth; > leaf = ext4_idx_pblock(path->p_idx); > if (unlikely(path->p_hdr->eh_entries == 0)) { > EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); > @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, > > ext4_free_blocks(handle, inode, NULL, leaf, 1, > EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); > + > + border = path->p_idx->ei_block; > + while (--depth >= 0) { > + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) > + break; > + path--; > + err = ext4_ext_get_access(handle, inode, path); > + if (err) > + break; > + path->p_idx->ei_block = border; > + err = ext4_ext_dirty(handle, inode, path); > + if (err) > + break; > + } > return err; > } > > @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, > /* if this leaf is free, then we should > * remove it from index block above */ > if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) > - err = ext4_ext_rm_idx(handle, inode, path + depth); > + err = ext4_ext_rm_idx(handle, inode, path, depth); > > out: > return err; > @@ -2760,7 +2776,7 @@ again: > /* index is empty, remove it; > * handle must be already prepared by the > * truncatei_leaf() */ > - err = ext4_ext_rm_idx(handle, inode, path + i); > + err = ext4_ext_rm_idx(handle, inode, path, i); > } > /* root level has p_bh == NULL, brelse() eats this */ > brelse(path[i].p_bh); > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/6/12 9:45 AM, Eric Sandeen wrote: > On 12/4/12 6:11 AM, Forrest Liu wrote: >> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >> of extent tree is greater than 1. > > This is interesting; we had 2 reports of similar corruption on the > list, I wonder if the application in question was doing hole punching. > I didn't expect that they were, so TBH I was pretty much ignoring > the hole-punch cases for parent index updates. Hm. I'll have > to look into that. > > Could you turn your testcase into an xfstest regression test? Also, please note that I sent an e2fsck patch to try to fix this problem after the fact; it'd be great if in your testing, you could also confirm that e2fsck w/ my patch fixes it correctly. Thanks, -Eric > -Eric > >> Signed-off-by: Forrest Liu <forrestl@synology.com> >> --- >> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >> 1 files changed, 20 insertions(+), 4 deletions(-) >> >> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >> index d3dd618..b10b8c0 100644 >> --- a/fs/ext4/extents.c >> +++ b/fs/ext4/extents.c >> @@ -2190,13 +2190,15 @@ errout: >> * removes index from the index block. >> */ >> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >> - struct ext4_ext_path *path) >> + struct ext4_ext_path *path, int depth) >> { >> int err; >> ext4_fsblk_t leaf; >> + __le32 border; >> >> /* free index block */ >> - path--; >> + depth--; >> + path = path + depth; >> leaf = ext4_idx_pblock(path->p_idx); >> if (unlikely(path->p_hdr->eh_entries == 0)) { >> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >> >> ext4_free_blocks(handle, inode, NULL, leaf, 1, >> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >> + >> + border = path->p_idx->ei_block; >> + while (--depth >= 0) { >> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >> + break; >> + path--; >> + err = ext4_ext_get_access(handle, inode, path); >> + if (err) >> + break; >> + path->p_idx->ei_block = border; >> + err = ext4_ext_dirty(handle, inode, path); >> + if (err) >> + break; >> + } >> return err; >> } >> >> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >> /* if this leaf is free, then we should >> * remove it from index block above */ >> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >> - err = ext4_ext_rm_idx(handle, inode, path + depth); >> + err = ext4_ext_rm_idx(handle, inode, path, depth); >> >> out: >> return err; >> @@ -2760,7 +2776,7 @@ again: >> /* index is empty, remove it; >> * handle must be already prepared by the >> * truncatei_leaf() */ >> - err = ext4_ext_rm_idx(handle, inode, path + i); >> + err = ext4_ext_rm_idx(handle, inode, path, i); >> } >> /* root level has p_bh == NULL, brelse() eats this */ >> brelse(path[i].p_bh); >> > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <forrestl@synology.com> wrote: > Hi Ashish, > > There have a chance to do a trivial update, and this case will be > covered in ext4_ext_remove_space. > > + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { > err = ext4_ext_rm_idx(handle, inode, path + depth); > + /* If this was the first extent index in node, > + * propagates the ei_block updation info to top */ > + if(!err) { > + --depth; > + path = path + depth; > + while (--depth >= 0) { > + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) > + break; > + path--; > + err = ext4_ext_get_access(handle, inode, path); > + if (err) > + break; > > trivial update, when (path + 1)->p_idx->eh_entries == 0 > > + path->p_idx->ei_block =(path + > 1)->p_idx->ei_block ; > + err = ext4_ext_dirty(handle, inode, path); > + if (err) > + break; > + } > + } > + } > + > > I will trying to reproduce the problem tomorrow. I think it will work fine atleast till depth = 2. It would be great if you could check with depth > 2. If still having this problem, we can think more. > > Thanks, > Forrest > > 2012/12/6 Ashish Sangwan <ashishsangwan2@gmail.com>: >> On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <forrestl@synology.com> wrote: >>> Hi Ashish, >>> Thanks for your review and fixed hole punch bugs. >>> >>> We can't propogate ei_block only in ext4_ext_rm_leave. >>> >>> There have two cases to call ext4_ext_rm_idx >>> 1) extent block has no entry, ext4_ext_rm_leave calls >>> ext4_ext_rm_idx to release extent block. >>> >> >> 1.1) And when the extent block is removed, the corresponding index >> entry has also to be removed. And if this was the first index entry, >> all the other index entries, are also updated. This updation will >> continue till we reach the root of extent tree OR the currently >> updated index entry is not the first entry in the node. >> >>> 2) extent index block has no entry, ext4_ext_remove_space >>> calls ext4_ext_rm_idx to release extent index block. >>> >>> Consider remove third level of extent index block that indexed by >>> second level of extent index, and the extent index in second level >>> is first entry in block. >>> >> So, I think the above case would be handled by point (1.1) i.e. the >> earlier index updation done from within ext4_ext_rm_leaf. >> The earlier ei_block updation will reach to the top of tree. >> >>> In this case, ei_block will not propogate to top level correctly, >>> if we only do propogation in ext4_ext_rm_leave. >> >> And thanks for pointing the problem. Can you reproduce it with my code >> suggesstion even with depth 3? >> If no, than please resend the patch with suggested changes. >> The intent here is to solve the problem without changing any function >> signature and with minimal changes. >> >> Regards, >> Ashish >>> >>> Regards, >>> Forrest >>> >>> 2012/12/5 Ashish Sangwan <ashishsangwan2@gmail.com>: >>>> Sorry 1 typo. >>>> It should be >>>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ; >>>> instead of >>>> + path->p_idx->ei_block = border; >>>> >>>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote: >>>>> I have tested it and the problem is real. >>>>> It happens when depth => 2 and punch hole completely removes any of >>>>> the first extent indexes at depth 1. >>>>> The change in ei_block is not propagated backwards to index at depth 0. >>>>> >>>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from >>>>> ext4_ext_rm_leaf and there is no need to change in >>>>> ext4_ext_remove_space or in ext4_ext_rm_idx. >>>>> >>>>> Forrest, what do you think about below change ? >>>>> basically its your code, but its been added to ext4_ext_rm_leaf after >>>>> removing index. >>>>> >>>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>>> >>>>> /* if this leaf is free, then we should >>>>> * remove it from index block above */ >>>>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { >>>>> err = ext4_ext_rm_idx(handle, inode, path + depth); >>>>> + /* If this was the first extent index in node, >>>>> + * propagates the ei_block updation info to top */ >>>>> + if(!err) { >>>>> + --depth; >>>>> + path = path + depth; >>>>> + while (--depth >= 0) { >>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>>> + break; >>>>> + path--; >>>>> + err = ext4_ext_get_access(handle, inode, path); >>>>> + if (err) >>>>> + break; >>>>> + path->p_idx->ei_block = border; >>>>> + err = ext4_ext_dirty(handle, inode, path); >>>>> + if (err) >>>>> + break; >>>>> + } >>>>> + } >>>>> + } >>>>> + >>>>> >>>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <forrestl@synology.com> wrote: >>>>>> 2012/12/4 forrest <forrestl@synology.com>: >>>>>>> This problem is easily to reproduce >>>>>>> >>>>>>> Create a file with size larger than 1GB. >>>>>>> >>>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024 >>>>>>> >>>>>>> Punch every even block in test_file, and then use debugfs to dump >>>>>>> extents, followings is dumped result >>>>>>> >>>>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680 >>>>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >>>>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >>>>>>> >>>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use >>>>>>> debugfs to dump extents, followings is dumped result >>>>>>> >>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >>>>>>> -------> logical block index didn't update when remove extent index >>>>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680 >>>>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1 >>>>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1 >>>>>> >>>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again. >>>>>> If we do map a block in #231202~#231880, we will get error messages >>>>>> like followings. >>>>>> >>>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm >>>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)! >>>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical >>>>>> offset 231203 with max blocks 1 with error -5 >>>>>> EXT4-fs (md2): This should not happen!! Data will be lost >>>>>> >>>>>> Regards, >>>>>> Forrest >>>>>> >>>>>>> >>>>>>> 2012/12/4 Forrest Liu <forrestl@synology.com>: >>>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>>>>> of extent tree is greater than 1. >>>>>>>> >>>>>>>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>>>>>>> --- >>>>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>>>>>>> 1 files changed, 20 insertions(+), 4 deletions(-) >>>>>>>> >>>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>>>>>>> index d3dd618..b10b8c0 100644 >>>>>>>> --- a/fs/ext4/extents.c >>>>>>>> +++ b/fs/ext4/extents.c >>>>>>>> @@ -2190,13 +2190,15 @@ errout: >>>>>>>> * removes index from the index block. >>>>>>>> */ >>>>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>>>> - struct ext4_ext_path *path) >>>>>>>> + struct ext4_ext_path *path, int depth) >>>>>>>> { >>>>>>>> int err; >>>>>>>> ext4_fsblk_t leaf; >>>>>>>> + __le32 border; >>>>>>>> >>>>>>>> /* free index block */ >>>>>>>> - path--; >>>>>>>> + depth--; >>>>>>>> + path = path + depth; >>>>>>>> leaf = ext4_idx_pblock(path->p_idx); >>>>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>>>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>>>>>>> >>>>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>>>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>>>>>>> + >>>>>>>> + border = path->p_idx->ei_block; >>>>>>>> + while (--depth >= 0) { >>>>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>>>>>>> + break; >>>>>>>> + path--; >>>>>>>> + err = ext4_ext_get_access(handle, inode, path); >>>>>>>> + if (err) >>>>>>>> + break; >>>>>>>> + path->p_idx->ei_block = border; >>>>>>>> + err = ext4_ext_dirty(handle, inode, path); >>>>>>>> + if (err) >>>>>>>> + break; >>>>>>>> + } >>>>>>>> return err; >>>>>>>> } >>>>>>>> >>>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>>>>>>> /* if this leaf is free, then we should >>>>>>>> * remove it from index block above */ >>>>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>>>>>>> >>>>>>>> out: >>>>>>>> return err; >>>>>>>> @@ -2760,7 +2776,7 @@ again: >>>>>>>> /* index is empty, remove it; >>>>>>>> * handle must be already prepared by the >>>>>>>> * truncatei_leaf() */ >>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>>>>>>> } >>>>>>>> /* root level has p_bh == NULL, brelse() eats this */ >>>>>>>> brelse(path[i].p_bh); >>>>>>>> -- >>>>>>>> 1.7.5.4 >>>>>>>> >>>>>> -- >>>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in >>>>>> the body of a message to majordomo@vger.kernel.org >>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote: > On 12/6/12 9:45 AM, Eric Sandeen wrote: >> On 12/4/12 6:11 AM, Forrest Liu wrote: >>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>> of extent tree is greater than 1. >> >> This is interesting; we had 2 reports of similar corruption on the >> list, I wonder if the application in question was doing hole punching. >> I didn't expect that they were, so TBH I was pretty much ignoring >> the hole-punch cases for parent index updates. Hm. I'll have >> to look into that. >> >> Could you turn your testcase into an xfstest regression test? > > Also, please note that I sent an e2fsck patch to try to fix this > problem after the fact; it'd be great if in your testing, you could > also confirm that e2fsck w/ my patch fixes it correctly. > I checked you patch. This was the extent tree situation after removing 1st extent index: debugfs: ex abc Level Entries Logical Physical Length Flags 0/ 2 1/ 1 0 - 8399 32857 8400 1/ 2 1/ 4 2048 - 4081 4138 2034 2/ 2 1/339 2048 - 2053 69632 - 69637 6 2/ 2 2/339 2054 - 2059 69656 - 69661 6 E2fsck's output with your patch=> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f e2fsck 1.42.6.1 (06-DEC-2012) Pass 1: Checking inodes, blocks, and sizes Interior extent node level 0 of inode 31: Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information Block bitmap differences: -4138 -(73712--73717) -73728 -(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383) -(98390--98395) -(98402--98407) < Similarly this was huge list of around 50-60 lines, so I skip to last > 910--106915) -(106922--106927) -(106934--106939) -(106946--106951) -(106958--106963) -(122872--122879) Fix<y>? yes Free blocks count wrong for group #0 (14308, counted=14309). Fix<y>? yes Free blocks count wrong for group #2 (12297, counted=12304). Fix<y>? yes Free blocks count wrong for group #3 (9770, counted=14084). Fix<y>? yes Free blocks count wrong (52407, counted=56729). Fix<y>? yes /dev/sda1: ***** FILE SYSTEM WAS MODIFIED ***** /dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks Linux#> > Thanks, > -Eric > >> -Eric >> >>> Signed-off-by: Forrest Liu <forrestl@synology.com> >>> --- >>> fs/ext4/extents.c | 24 ++++++++++++++++++++---- >>> 1 files changed, 20 insertions(+), 4 deletions(-) >>> >>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c >>> index d3dd618..b10b8c0 100644 >>> --- a/fs/ext4/extents.c >>> +++ b/fs/ext4/extents.c >>> @@ -2190,13 +2190,15 @@ errout: >>> * removes index from the index block. >>> */ >>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>> - struct ext4_ext_path *path) >>> + struct ext4_ext_path *path, int depth) >>> { >>> int err; >>> ext4_fsblk_t leaf; >>> + __le32 border; >>> >>> /* free index block */ >>> - path--; >>> + depth--; >>> + path = path + depth; >>> leaf = ext4_idx_pblock(path->p_idx); >>> if (unlikely(path->p_hdr->eh_entries == 0)) { >>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); >>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, >>> >>> ext4_free_blocks(handle, inode, NULL, leaf, 1, >>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); >>> + >>> + border = path->p_idx->ei_block; >>> + while (--depth >= 0) { >>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >>> + break; >>> + path--; >>> + err = ext4_ext_get_access(handle, inode, path); >>> + if (err) >>> + break; >>> + path->p_idx->ei_block = border; >>> + err = ext4_ext_dirty(handle, inode, path); >>> + if (err) >>> + break; >>> + } >>> return err; >>> } >>> >>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, >>> /* if this leaf is free, then we should >>> * remove it from index block above */ >>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) >>> - err = ext4_ext_rm_idx(handle, inode, path + depth); >>> + err = ext4_ext_rm_idx(handle, inode, path, depth); >>> >>> out: >>> return err; >>> @@ -2760,7 +2776,7 @@ again: >>> /* index is empty, remove it; >>> * handle must be already prepared by the >>> * truncatei_leaf() */ >>> - err = ext4_ext_rm_idx(handle, inode, path + i); >>> + err = ext4_ext_rm_idx(handle, inode, path, i); >>> } >>> /* root level has p_bh == NULL, brelse() eats this */ >>> brelse(path[i].p_bh); >>> >> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>: > On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <forrestl@synology.com> wrote: >> Hi Ashish, >> >> There have a chance to do a trivial update, and this case will be >> covered in ext4_ext_remove_space. >> >> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) { >> err = ext4_ext_rm_idx(handle, inode, path + depth); >> + /* If this was the first extent index in node, >> + * propagates the ei_block updation info to top */ >> + if(!err) { >> + --depth; >> + path = path + depth; >> + while (--depth >= 0) { >> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) >> + break; >> + path--; >> + err = ext4_ext_get_access(handle, inode, path); >> + if (err) >> + break; >> >> trivial update, when (path + 1)->p_idx->eh_entries == 0 >> >> + path->p_idx->ei_block =(path + >> 1)->p_idx->ei_block ; >> + err = ext4_ext_dirty(handle, inode, path); >> + if (err) >> + break; >> + } >> + } >> + } >> + >> >> I will trying to reproduce the problem tomorrow. > I think it will work fine atleast till depth = 2. > It would be great if you could check with depth > 2. > If still having this problem, we can think more. Hi Ashish, First, create a file lager than 4GB, and then punch out every even blocks; Use ex command in debugfs to dump extents, belowing is the result. Level Entries Logical Physical Length Flags 0/ 3 1/ 1 1 - 1048575 40013 1048575 1/ 3 1/ 5 1 - 231160 40012 231160 2/ 3 1/340 1 - 680 40011 680 3/ 3 1/340 1 - 1 2228225 - 2228225 1 3/ 3 2/340 3 - 3 2228227 - 2228227 1 3/ 3 3/340 5 - 5 2228229 - 2228229 1 punch out 231159, 231157, ..., 1.. Level Entries Logical Physical Length Flags 0/ 3 1/ 1 1 - 1048575 40013 1048575 1/ 3 1/ 4 231161 - 462320 2457604 231160 2/ 3 1/340 231161 - 231840 2457606 680 3/ 3 1/340 231161 - 231161 2459385 - 2459385 1 Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf. Thanks, Forrest -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>: > On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote: >> On 12/6/12 9:45 AM, Eric Sandeen wrote: >>> On 12/4/12 6:11 AM, Forrest Liu wrote: >>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>> of extent tree is greater than 1. >>> >>> This is interesting; we had 2 reports of similar corruption on the >>> list, I wonder if the application in question was doing hole punching. >>> I didn't expect that they were, so TBH I was pretty much ignoring >>> the hole-punch cases for parent index updates. Hm. I'll have >>> to look into that. >>> Could you turn your testcase into an xfstest regression test? Hi Eric, I will check how to do that. >> >> Also, please note that I sent an e2fsck patch to try to fix this >> problem after the fact; it'd be great if in your testing, you could >> also confirm that e2fsck w/ my patch fixes it correctly. >> > I checked you patch. > This was the extent tree situation after removing 1st extent index: > debugfs: ex abc > Level Entries Logical Physical Length Flags > 0/ 2 1/ 1 0 - 8399 32857 8400 > 1/ 2 1/ 4 2048 - 4081 4138 2034 > 2/ 2 1/339 2048 - 2053 69632 - 69637 6 > 2/ 2 2/339 2054 - 2059 69656 - 69661 6 > > E2fsck's output with your patch=> > Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f > e2fsck 1.42.6.1 (06-DEC-2012) > Pass 1: Checking inodes, blocks, and sizes > Interior extent node level 0 of inode 31: > Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes > Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes I got similar result, pb->num_blocks is incorrect if ext2fs_extent_fix_parents called. > Pass 2: Checking directory structure > Pass 3: Checking directory connectivity > Pass 4: Checking reference counts > Pass 5: Checking group summary information > Block bitmap differences: -4138 -(73712--73717) -73728 > -(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383) > -(98390--98395) -(98402--98407) > < Similarly this was huge list of around 50-60 lines, so I skip to last > > 910--106915) -(106922--106927) -(106934--106939) -(106946--106951) > -(106958--106963) -(122872--122879) > Fix<y>? yes > Free blocks count wrong for group #0 (14308, counted=14309). > Fix<y>? yes > Free blocks count wrong for group #2 (12297, counted=12304). > Fix<y>? yes > Free blocks count wrong for group #3 (9770, counted=14084). > Fix<y>? yes > Free blocks count wrong (52407, counted=56729). > Fix<y>? yes > /dev/sda1: ***** FILE SYSTEM WAS MODIFIED ***** > /dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks > Linux#> > >> Thanks, >> -Eric >> >>> -Eric >>> -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/7/12 6:26 AM, Forrest Liu wrote: > 2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>: >> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote: >>> On 12/6/12 9:45 AM, Eric Sandeen wrote: >>>> On 12/4/12 6:11 AM, Forrest Liu wrote: >>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>> of extent tree is greater than 1. >>>> >>>> This is interesting; we had 2 reports of similar corruption on the >>>> list, I wonder if the application in question was doing hole punching. >>>> I didn't expect that they were, so TBH I was pretty much ignoring >>>> the hole-punch cases for parent index updates. Hm. I'll have >>>> to look into that. >>>> Could you turn your testcase into an xfstest regression test? > > Hi Eric, > I will check how to do that. >>> >>> Also, please note that I sent an e2fsck patch to try to fix this >>> problem after the fact; it'd be great if in your testing, you could >>> also confirm that e2fsck w/ my patch fixes it correctly. >>> >> I checked you patch. >> This was the extent tree situation after removing 1st extent index: >> debugfs: ex abc >> Level Entries Logical Physical Length Flags >> 0/ 2 1/ 1 0 - 8399 32857 8400 >> 1/ 2 1/ 4 2048 - 4081 4138 2034 >> 2/ 2 1/339 2048 - 2053 69632 - 69637 6 >> 2/ 2 2/339 2054 - 2059 69656 - 69661 6 >> >> E2fsck's output with your patch=> >> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f >> e2fsck 1.42.6.1 (06-DEC-2012) >> Pass 1: Checking inodes, blocks, and sizes >> Interior extent node level 0 of inode 31: >> Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes >> Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes > > I got similar result, pb->num_blocks is incorrect if > ext2fs_extent_fix_parents called. Maybe I should ask what it looks like without my patch? I didn't think my patch would change anything at all about block count or use. -Eric -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/4/12 6:29 AM, forrest wrote: > This problem is easily to reproduce > > Create a file with size larger than 1GB. > > dd if=/dev/zero of=/test_file bs=1M count=1024 > > Punch every even block in test_file, and then use debugfs to dump > extents, followings is dumped result > > 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 > 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 > 0/ 2 2/ 2 231201 - 262143 3901486 30943 > 1/ 2 1/ 46 231201 - 231880 3901488 680 > 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 > 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 > > Punch blocks #231779 ~#231201 , Can you explain what you mean by the above? Which blocks get punched? -Eric > to remove extent index, and then use > debugfs to dump extents, followings is dumped result -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/12/9 Eric Sandeen <sandeen@redhat.com>: > On 12/4/12 6:29 AM, forrest wrote: >> This problem is easily to reproduce >> >> Create a file with size larger than 1GB. >> >> dd if=/dev/zero of=/test_file bs=1M count=1024 >> >> Punch every even block in test_file, and then use debugfs to dump >> extents, followings is dumped result >> >> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1 >> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1 >> 0/ 2 2/ 2 231201 - 262143 3901486 30943 >> 1/ 2 1/ 46 231201 - 231880 3901488 680 >> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1 >> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1 >> >> Punch blocks #231779 ~#231201 , > > Can you explain what you mean by the above? Which blocks get punched? Hi Eric, At first, i found blowing code may have some problem ext4_ext_rm_leaf () { ... /////when eh->eh_entries equals to zero, logical start value will not be update; //// If parent is first index in block, this cause problem. if (correct_index && eh->eh_entries) err = ext4_ext_correct_indexes(handle, inode, path); } To prove that, I tried to trigger this case (eh->eh_entries = 0) 1) create a file which size greater than 1GB 2) punch all even blocks 0, 2, 4, 6, 8, ....... (blocks depends on file size) one by one to make extent tree with depth equals to 2 3) use debugfs to find an extent index entry at level1 (assume leaf at level2) witch logical start value is equals to is't parent node. In example, founded entry is 1/ 2 1/ 46 231201 - 231880 3901488 680 4 ) punch blocks covered by extent index found by step3 one by one. In example, 231879, 231877, ..., 231201.. 5 ) Then, we can use debugfs to check if logical start value is correct or not In example, 231201 is not equals to 231881. 0/ 2 2/ 2 231201 - 262143 3901486 30943 1/ 2 1/ 45 231881 - 232560 3901490 680 Thanks, -Forrest > > -Eric > >> to remove extent index, and then use >> debugfs to dump extents, followings is dumped result > > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/12/8 Eric Sandeen <sandeen@redhat.com>: > On 12/7/12 6:26 AM, Forrest Liu wrote: >> 2012/12/7 Ashish Sangwan <ashishsangwan2@gmail.com>: >>> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <sandeen@redhat.com> wrote: >>>> On 12/6/12 9:45 AM, Eric Sandeen wrote: >>>>> On 12/4/12 6:11 AM, Forrest Liu wrote: >>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth >>>>>> of extent tree is greater than 1. >>>>> >>>>> This is interesting; we had 2 reports of similar corruption on the >>>>> list, I wonder if the application in question was doing hole punching. >>>>> I didn't expect that they were, so TBH I was pretty much ignoring >>>>> the hole-punch cases for parent index updates. Hm. I'll have >>>>> to look into that. >>>>> Could you turn your testcase into an xfstest regression test? >> >> Hi Eric, >> I will check how to do that. >>>> >>>> Also, please note that I sent an e2fsck patch to try to fix this >>>> problem after the fact; it'd be great if in your testing, you could >>>> also confirm that e2fsck w/ my patch fixes it correctly. >>>> >>> I checked you patch. >>> This was the extent tree situation after removing 1st extent index: >>> debugfs: ex abc >>> Level Entries Logical Physical Length Flags >>> 0/ 2 1/ 1 0 - 8399 32857 8400 >>> 1/ 2 1/ 4 2048 - 4081 4138 2034 >>> 2/ 2 1/339 2048 - 2053 69632 - 69637 6 >>> 2/ 2 2/339 2054 - 2059 69656 - 69661 6 >>> >>> E2fsck's output with your patch=> >>> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f >>> e2fsck 1.42.6.1 (06-DEC-2012) >>> Pass 1: Checking inodes, blocks, and sizes >>> Interior extent node level 0 of inode 31: >>> Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes >>> Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes >> >> I got similar result, pb->num_blocks is incorrect if >> ext2fs_extent_fix_parents called. > > Maybe I should ask what it looks like without my patch? I didn't > think my patch would change anything at all about block count > or use. > > -Eric > When i choose not to repair problem of logical start value, there have no message like Inode 31, i_blocks is 50856, should be 16280. Fix<y>? -Forrest -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Hi Ashish, > > First, create a file lager than 4GB, and then punch out every even blocks; > Use ex command in debugfs to dump extents, belowing is the result. > > Level Entries Logical Physical Length Flags > 0/ 3 1/ 1 1 - 1048575 40013 1048575 > 1/ 3 1/ 5 1 - 231160 40012 231160 > 2/ 3 1/340 1 - 680 40011 680 > 3/ 3 1/340 1 - 1 2228225 - 2228225 1 > 3/ 3 2/340 3 - 3 2228227 - 2228227 1 > 3/ 3 3/340 5 - 5 2228229 - 2228229 1 > > > punch out 231159, 231157, ..., 1.. > > > Level Entries Logical Physical Length Flags > 0/ 3 1/ 1 1 - 1048575 40013 1048575 > 1/ 3 1/ 4 231161 - 462320 2457604 231160 > 2/ 3 1/340 231161 - 231840 2457606 680 > 3/ 3 1/340 231161 - 231161 2459385 - 2459385 1 > > > Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf. Ok, In your patch we can do without varibale border and instead use: path->p_idx->ei_block = (path+1)->p_idx->ei_block; Rest of the patch seems Ok. Thanks, Ashish > > > Thanks, > Forrest -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d3dd618..b10b8c0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2190,13 +2190,15 @@ errout: * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; + __le32 border; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + border = path->p_idx->ei_block; + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = border; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2760,7 +2776,7 @@ again: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh);
Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth of extent tree is greater than 1. Signed-off-by: Forrest Liu <forrestl@synology.com> --- fs/ext4/extents.c | 24 ++++++++++++++++++++---- 1 files changed, 20 insertions(+), 4 deletions(-)