Message ID | 1358510446-19174-5-git-send-email-jack@suse.cz |
---|---|
State | Accepted, archived |
Headers | show |
On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > Merging of uninitialized extents creates all sorts of interesting race > possibilities when writeback / DIO races with fallocate. Thus > ext4_convert_unwritten_extents_endio() has to deal with a case where > extent to be converted needs to be split out first. That isn't nice > for two reasons: > > 1) It may need allocation of extent tree block so ENOSPC is possible. > 2) It complicates end_io handling code As we already discussed your idea is 100% correct, but even with what patch I still able to trigger situation where split it required. I've got following error with this patch applied on top of 7f5118629f7 EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: inode #12: comm kworker/u:4: Written extent modified before IO finished: extent logical block 1379787, len 64; IO logical block 1379787, len 21 ------------[ cut here ]------------ WARNING: at fs/ext4/extents.c:4518 ext4_convert_unwritten_extents+0x149/0x210 [ext4]() Hardware name: Modules linked in: ext4 jbd2 cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel microcode sg button ext3 jbd mbcache sd_mod crc_t10dif ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod Pid: 249, comm: kworker/u:4 Not tainted 3.8.0-rc3+ #16 Call Trace: [<ffffffff8106fc23>] warn_slowpath_common+0xc3/0xf0 [<ffffffff8106fc6a>] warn_slowpath_null+0x1a/0x20 [<ffffffffa03fb909>] ext4_convert_unwritten_extents+0x149/0x210 [ext4] [<ffffffff811064fa>] ? __lock_release+0x1da/0x1f0 [<ffffffffa03c368e>] ext4_end_io+0x3e/0x160 [ext4] [<ffffffff813aab40>] ? __list_del_entry+0x210/0x250 [<ffffffffa03c3a21>] ext4_do_flush_completed_IO+0x101/0x280 [ext4] [<ffffffffa03c3bb6>] ext4_end_io_work+0x16/0x20 [ext4] [<ffffffff8109f7dd>] process_one_work+0x4ad/0x780 [<ffffffff8109f6d2>] ? process_one_work+0x3a2/0x780 [<ffffffffa03c3ba0>] ? ext4_do_flush_completed_IO+0x280/0x280 [ext4] [<ffffffff810a3ed1>] worker_thread+0x3f1/0x590 [<ffffffff810a3ae0>] ? manage_workers+0x210/0x210 [<ffffffff810ac870>] kthread+0x100/0x110 [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 [<ffffffff81812e2c>] ret_from_fork+0x7c/0xb0 [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 ---[ end trace add5cefed72186f8 ]--- EXT4-fs (dm-3): ext4_convert_unwritten_extents:4522: inode #12: block 1379787: len 21: ext4_ext_map_blocks returned -5 EXT4-fs (dm-3): failed to convert unwritten extents to written extents -- potential data loss! (inode 12, offset 5651562496, size 131072, error -5) I've run 286'th xfstest (this is my own copy of xfstest so 286'th test is differ from mainstream one) you can find it here https://raw.github.com/dmonakhov/xfstests/devel/286 In two words it is stress test which run DIO/AIO,truncate,fallocate in parallel. Also you need recent FIO(http://git.kernel.dk/?p=fio.git;a=summary) Currently I try to understand what caused this issue. > > So we disable merging of uninitialized extents which allows us to simplify > the code. Extents will get merged after they are converted to initialized > ones. > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > Signed-off-by: Jan Kara <jack@suse.cz> > --- > fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- > 1 files changed, 18 insertions(+), 43 deletions(-) > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index 26af228..f1ce33a 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -54,9 +54,6 @@ > #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ > #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ > > -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ > -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ > - > static __le32 ext4_extent_block_csum(struct inode *inode, > struct ext4_extent_header *eh) > { > @@ -1579,20 +1576,17 @@ int > ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > struct ext4_extent *ex2) > { > - unsigned short ext1_ee_len, ext2_ee_len, max_len; > + unsigned ext1_ee_len, ext2_ee_len; > > /* > - * Make sure that either both extents are uninitialized, or > - * both are _not_. > + * Make sure that both extents are initialized. We don't merge > + * uninitialized extents so that we can be sure that end_io code has > + * the extent that was written properly split out and conversion to > + * initialized is trivial. > */ > - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) > + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) > return 0; > > - if (ext4_ext_is_uninitialized(ex1)) > - max_len = EXT_UNINIT_MAX_LEN; > - else > - max_len = EXT_INIT_MAX_LEN; > - > ext1_ee_len = ext4_ext_get_actual_len(ex1); > ext2_ee_len = ext4_ext_get_actual_len(ex2); > > @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > * as an RO_COMPAT feature, refuse to merge to extents if > * this can result in the top bit of ee_len being set. > */ > - if (ext1_ee_len + ext2_ee_len > max_len) > + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) > return 0; > #ifdef AGGRESSIVE_TEST > if (ext1_ee_len >= 4) > @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, > unsigned int ee_len, depth; > int err = 0; > > - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == > - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); > - > ext_debug("ext4_split_extents_at: inode %lu, logical" > "block %llu\n", inode->i_ino, (unsigned long long)split); > > @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, > > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { > - if (split_flag & EXT4_EXT_DATA_VALID1) > - err = ext4_ext_zeroout(inode, ex2); > - else > - err = ext4_ext_zeroout(inode, ex); > - } else > - err = ext4_ext_zeroout(inode, &orig_ex); > - > + err = ext4_ext_zeroout(inode, &orig_ex); > if (err) > goto fix_extent_len; > /* update the extent length and mark as initialized */ > @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, > if (uninitialized) > split_flag1 |= EXT4_EXT_MARK_UNINIT1 | > EXT4_EXT_MARK_UNINIT2; > - if (split_flag & EXT4_EXT_DATA_VALID2) > - split_flag1 |= EXT4_EXT_DATA_VALID1; > err = ext4_split_extent_at(handle, inode, path, > map->m_lblk + map->m_len, split_flag1, flags1); > if (err) > @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, > return PTR_ERR(path); > > if (map->m_lblk >= ee_block) { > - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | > - EXT4_EXT_DATA_VALID2); > + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; > if (uninitialized) > split_flag1 |= EXT4_EXT_MARK_UNINIT1; > if (split_flag & EXT4_EXT_MARK_UNINIT2) > @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, > > split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > split_flag |= EXT4_EXT_MARK_UNINIT2; > - if (flags & EXT4_GET_BLOCKS_CONVERT) > - split_flag |= EXT4_EXT_DATA_VALID2; > + > flags |= EXT4_GET_BLOCKS_PRE_IO; > return ext4_split_extent(handle, inode, path, map, split_flag, flags); > } > @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, > "block %llu, max_blocks %u\n", inode->i_ino, > (unsigned long long)ee_block, ee_len); > > - /* If extent is larger than requested then split is required */ > + /* Extent is larger than requested? */ > if (ee_block != map->m_lblk || ee_len > map->m_len) { > - err = ext4_split_unwritten_extents(handle, inode, map, path, > - EXT4_GET_BLOCKS_CONVERT); > - if (err < 0) > - goto out; > - ext4_ext_drop_refs(path); > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > - if (IS_ERR(path)) { > - err = PTR_ERR(path); > - goto out; > - } > - depth = ext_depth(inode); > - ex = path[depth].p_ext; > + EXT4_ERROR_INODE(inode, "Written extent modified before IO" > + " finished: extent logical block %llu, len %u; IO" > + " logical block %llu, len %u\n", > + (unsigned long long)ee_block, ee_len, > + (unsigned long long)map->m_lblk, map->m_len); > + err = -EIO; > + goto out; > } > > err = ext4_ext_get_access(handle, inode, path + depth); > -- > 1.7.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu 24-01-13 13:49:45, Dmitry Monakhov wrote: > On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > > Merging of uninitialized extents creates all sorts of interesting race > > possibilities when writeback / DIO races with fallocate. Thus > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > extent to be converted needs to be split out first. That isn't nice > > for two reasons: > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > 2) It complicates end_io handling code > As we already discussed your idea is 100% correct, but even with > what patch I still able to trigger situation where split it required. > I've got following error with this patch applied on top of 7f5118629f7 > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > inode #12: comm kworker/u:4: Written extent modified before IO finished: > extent logical block 1379787, len 64; IO logical block 1379787, len 21 Drat, thanks for heads up. I did run xfstests on the patch set but apparently you are doing something more evil :) If I get your test & error right, you do AIO DIO to a file while doing truncate 0, fallocate SIZE, in a loop. And extent is found longer when we finish the IO. Am I right? Honza > ------------[ cut here ]------------ > WARNING: at fs/ext4/extents.c:4518 > ext4_convert_unwritten_extents+0x149/0x210 [ext4]() > Hardware name: > Modules linked in: ext4 jbd2 cpufreq_ondemand acpi_cpufreq freq_table > mperf coretemp kvm_intel kvm crc32c_intel microcode sg button ext3 jbd > mbcache sd_mod crc_t10dif ahci libahci pata_acpi ata_generic dm_mirror > dm_region_hash dm_log dm_mod > Pid: 249, comm: kworker/u:4 Not tainted 3.8.0-rc3+ #16 > Call Trace: > [<ffffffff8106fc23>] warn_slowpath_common+0xc3/0xf0 > [<ffffffff8106fc6a>] warn_slowpath_null+0x1a/0x20 > [<ffffffffa03fb909>] ext4_convert_unwritten_extents+0x149/0x210 [ext4] > [<ffffffff811064fa>] ? __lock_release+0x1da/0x1f0 > [<ffffffffa03c368e>] ext4_end_io+0x3e/0x160 [ext4] > [<ffffffff813aab40>] ? __list_del_entry+0x210/0x250 > [<ffffffffa03c3a21>] ext4_do_flush_completed_IO+0x101/0x280 [ext4] > [<ffffffffa03c3bb6>] ext4_end_io_work+0x16/0x20 [ext4] > [<ffffffff8109f7dd>] process_one_work+0x4ad/0x780 > [<ffffffff8109f6d2>] ? process_one_work+0x3a2/0x780 > [<ffffffffa03c3ba0>] ? ext4_do_flush_completed_IO+0x280/0x280 [ext4] > [<ffffffff810a3ed1>] worker_thread+0x3f1/0x590 > [<ffffffff810a3ae0>] ? manage_workers+0x210/0x210 > [<ffffffff810ac870>] kthread+0x100/0x110 > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > [<ffffffff81812e2c>] ret_from_fork+0x7c/0xb0 > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > ---[ end trace add5cefed72186f8 ]--- > EXT4-fs (dm-3): ext4_convert_unwritten_extents:4522: inode #12: block > 1379787: len 21: ext4_ext_map_blocks returned -5 > EXT4-fs (dm-3): failed to convert unwritten extents to written > extents -- potential data loss! (inode 12, offset 5651562496, size > 131072, error -5) > > I've run 286'th xfstest (this is my own copy of xfstest so 286'th test > is differ from mainstream one) you can find it here > https://raw.github.com/dmonakhov/xfstests/devel/286 > In two words it is stress test which run DIO/AIO,truncate,fallocate in parallel. > Also you need recent FIO(http://git.kernel.dk/?p=fio.git;a=summary) > > Currently I try to understand what caused this issue. > > > > So we disable merging of uninitialized extents which allows us to simplify > > the code. Extents will get merged after they are converted to initialized > > ones. > > > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > > Signed-off-by: Jan Kara <jack@suse.cz> > > --- > > fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- > > 1 files changed, 18 insertions(+), 43 deletions(-) > > > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > > index 26af228..f1ce33a 100644 > > --- a/fs/ext4/extents.c > > +++ b/fs/ext4/extents.c > > @@ -54,9 +54,6 @@ > > #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ > > #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ > > > > -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ > > -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ > > - > > static __le32 ext4_extent_block_csum(struct inode *inode, > > struct ext4_extent_header *eh) > > { > > @@ -1579,20 +1576,17 @@ int > > ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > struct ext4_extent *ex2) > > { > > - unsigned short ext1_ee_len, ext2_ee_len, max_len; > > + unsigned ext1_ee_len, ext2_ee_len; > > > > /* > > - * Make sure that either both extents are uninitialized, or > > - * both are _not_. > > + * Make sure that both extents are initialized. We don't merge > > + * uninitialized extents so that we can be sure that end_io code has > > + * the extent that was written properly split out and conversion to > > + * initialized is trivial. > > */ > > - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) > > + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) > > return 0; > > > > - if (ext4_ext_is_uninitialized(ex1)) > > - max_len = EXT_UNINIT_MAX_LEN; > > - else > > - max_len = EXT_INIT_MAX_LEN; > > - > > ext1_ee_len = ext4_ext_get_actual_len(ex1); > > ext2_ee_len = ext4_ext_get_actual_len(ex2); > > > > @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > * as an RO_COMPAT feature, refuse to merge to extents if > > * this can result in the top bit of ee_len being set. > > */ > > - if (ext1_ee_len + ext2_ee_len > max_len) > > + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) > > return 0; > > #ifdef AGGRESSIVE_TEST > > if (ext1_ee_len >= 4) > > @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, > > unsigned int ee_len, depth; > > int err = 0; > > > > - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == > > - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); > > - > > ext_debug("ext4_split_extents_at: inode %lu, logical" > > "block %llu\n", inode->i_ino, (unsigned long long)split); > > > > @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, > > > > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > > if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > > - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { > > - if (split_flag & EXT4_EXT_DATA_VALID1) > > - err = ext4_ext_zeroout(inode, ex2); > > - else > > - err = ext4_ext_zeroout(inode, ex); > > - } else > > - err = ext4_ext_zeroout(inode, &orig_ex); > > - > > + err = ext4_ext_zeroout(inode, &orig_ex); > > if (err) > > goto fix_extent_len; > > /* update the extent length and mark as initialized */ > > @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, > > if (uninitialized) > > split_flag1 |= EXT4_EXT_MARK_UNINIT1 | > > EXT4_EXT_MARK_UNINIT2; > > - if (split_flag & EXT4_EXT_DATA_VALID2) > > - split_flag1 |= EXT4_EXT_DATA_VALID1; > > err = ext4_split_extent_at(handle, inode, path, > > map->m_lblk + map->m_len, split_flag1, flags1); > > if (err) > > @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, > > return PTR_ERR(path); > > > > if (map->m_lblk >= ee_block) { > > - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | > > - EXT4_EXT_DATA_VALID2); > > + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; > > if (uninitialized) > > split_flag1 |= EXT4_EXT_MARK_UNINIT1; > > if (split_flag & EXT4_EXT_MARK_UNINIT2) > > @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, > > > > split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > > split_flag |= EXT4_EXT_MARK_UNINIT2; > > - if (flags & EXT4_GET_BLOCKS_CONVERT) > > - split_flag |= EXT4_EXT_DATA_VALID2; > > + > > flags |= EXT4_GET_BLOCKS_PRE_IO; > > return ext4_split_extent(handle, inode, path, map, split_flag, flags); > > } > > @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, > > "block %llu, max_blocks %u\n", inode->i_ino, > > (unsigned long long)ee_block, ee_len); > > > > - /* If extent is larger than requested then split is required */ > > + /* Extent is larger than requested? */ > > if (ee_block != map->m_lblk || ee_len > map->m_len) { > > - err = ext4_split_unwritten_extents(handle, inode, map, path, > > - EXT4_GET_BLOCKS_CONVERT); > > - if (err < 0) > > - goto out; > > - ext4_ext_drop_refs(path); > > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > > - if (IS_ERR(path)) { > > - err = PTR_ERR(path); > > - goto out; > > - } > > - depth = ext_depth(inode); > > - ex = path[depth].p_ext; > > + EXT4_ERROR_INODE(inode, "Written extent modified before IO" > > + " finished: extent logical block %llu, len %u; IO" > > + " logical block %llu, len %u\n", > > + (unsigned long long)ee_block, ee_len, > > + (unsigned long long)map->m_lblk, map->m_len); > > + err = -EIO; > > + goto out; > > } > > > > err = ext4_ext_get_access(handle, inode, path + depth); > > -- > > 1.7.1 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 24 Jan 2013 16:12:24 +0100, Jan Kara <jack@suse.cz> wrote: > On Thu 24-01-13 13:49:45, Dmitry Monakhov wrote: > > On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > > > Merging of uninitialized extents creates all sorts of interesting race > > > possibilities when writeback / DIO races with fallocate. Thus > > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > > extent to be converted needs to be split out first. That isn't nice > > > for two reasons: > > > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > > 2) It complicates end_io handling code > > As we already discussed your idea is 100% correct, but even with > > what patch I still able to trigger situation where split it required. > > I've got following error with this patch applied on top of 7f5118629f7 > > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > > inode #12: comm kworker/u:4: Written extent modified before IO finished: > > extent logical block 1379787, len 64; IO logical block 1379787, len 21 > Drat, thanks for heads up. I did run xfstests on the patch set but > apparently you are doing something more evil :) If I get your test & error > right, you do AIO DIO to a file while doing truncate 0, fallocate SIZE, in > a loop. And extent is found longer when we finish the IO. Am I right? Correct. AFAIU we have another bug which break things I've added prink for ext4_can_extents_be_merged if it was positive And have got following output: lblk len pblk uninit lblk len pblk uninit ext4_can_extents_be_merged:1618 ex1[2254944:32, 2052192]:0 ex2[2254976:32, 2052224]:0 ext4_can_extents_be_merged:1618 ex1[398176:32, 2198368]:0 ex2[398208:32,2198400]:0 ext4_can_extents_be_merged:1618 ex1[584704:32, 1379328]:0 ex2[584736:32,1379360]:0 ext4_can_extents_be_merged:1618 ex1[1407488:32, 762368]:0 ex2[1407520:32, 762400]:0 ext4_can_extents_be_merged:1618 ex1[443744:32, 2495456]:0 ex2[443776:32,2495488]:0 ext4_can_extents_be_merged:1618 ex1[1057230:1, 2190944]:0 ex2[1057231:17, 2190945]:0 ext4_can_extents_be_merged:1618 ex1[2108160:832, 2563328]:0 ex2[2108992:32, 2564160]:0 ##### Here Both extents was initialized ^^^ ^^^ EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3426: inode #12: comm kworker/u:4: Written extent modif ied before IO finished: extent logical block 2108576, len 448; IO logical block 2108576, len 32 #####But right after that it is appeared uninitialized. > > Honza > > ------------[ cut here ]------------ > > WARNING: at fs/ext4/extents.c:4518 > > ext4_convert_unwritten_extents+0x149/0x210 [ext4]() > > Hardware name: > > Modules linked in: ext4 jbd2 cpufreq_ondemand acpi_cpufreq freq_table > > mperf coretemp kvm_intel kvm crc32c_intel microcode sg button ext3 jbd > > mbcache sd_mod crc_t10dif ahci libahci pata_acpi ata_generic dm_mirror > > dm_region_hash dm_log dm_mod > > Pid: 249, comm: kworker/u:4 Not tainted 3.8.0-rc3+ #16 > > Call Trace: > > [<ffffffff8106fc23>] warn_slowpath_common+0xc3/0xf0 > > [<ffffffff8106fc6a>] warn_slowpath_null+0x1a/0x20 > > [<ffffffffa03fb909>] ext4_convert_unwritten_extents+0x149/0x210 [ext4] > > [<ffffffff811064fa>] ? __lock_release+0x1da/0x1f0 > > [<ffffffffa03c368e>] ext4_end_io+0x3e/0x160 [ext4] > > [<ffffffff813aab40>] ? __list_del_entry+0x210/0x250 > > [<ffffffffa03c3a21>] ext4_do_flush_completed_IO+0x101/0x280 [ext4] > > [<ffffffffa03c3bb6>] ext4_end_io_work+0x16/0x20 [ext4] > > [<ffffffff8109f7dd>] process_one_work+0x4ad/0x780 > > [<ffffffff8109f6d2>] ? process_one_work+0x3a2/0x780 > > [<ffffffffa03c3ba0>] ? ext4_do_flush_completed_IO+0x280/0x280 [ext4] > > [<ffffffff810a3ed1>] worker_thread+0x3f1/0x590 > > [<ffffffff810a3ae0>] ? manage_workers+0x210/0x210 > > [<ffffffff810ac870>] kthread+0x100/0x110 > > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > > [<ffffffff81812e2c>] ret_from_fork+0x7c/0xb0 > > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > > ---[ end trace add5cefed72186f8 ]--- > > EXT4-fs (dm-3): ext4_convert_unwritten_extents:4522: inode #12: block > > 1379787: len 21: ext4_ext_map_blocks returned -5 > > EXT4-fs (dm-3): failed to convert unwritten extents to written > > extents -- potential data loss! (inode 12, offset 5651562496, size > > 131072, error -5) > > > > I've run 286'th xfstest (this is my own copy of xfstest so 286'th test > > is differ from mainstream one) you can find it here > > https://raw.github.com/dmonakhov/xfstests/devel/286 > > In two words it is stress test which run DIO/AIO,truncate,fallocate in parallel. > > Also you need recent FIO(http://git.kernel.dk/?p=fio.git;a=summary) > > > > Currently I try to understand what caused this issue. > > > > > > So we disable merging of uninitialized extents which allows us to simplify > > > the code. Extents will get merged after they are converted to initialized > > > ones. > > > > > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > > > Signed-off-by: Jan Kara <jack@suse.cz> > > > --- > > > fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- > > > 1 files changed, 18 insertions(+), 43 deletions(-) > > > > > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > > > index 26af228..f1ce33a 100644 > > > --- a/fs/ext4/extents.c > > > +++ b/fs/ext4/extents.c > > > @@ -54,9 +54,6 @@ > > > #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ > > > #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ > > > > > > -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ > > > -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ > > > - > > > static __le32 ext4_extent_block_csum(struct inode *inode, > > > struct ext4_extent_header *eh) > > > { > > > @@ -1579,20 +1576,17 @@ int > > > ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > > struct ext4_extent *ex2) > > > { > > > - unsigned short ext1_ee_len, ext2_ee_len, max_len; > > > + unsigned ext1_ee_len, ext2_ee_len; > > > > > > /* > > > - * Make sure that either both extents are uninitialized, or > > > - * both are _not_. > > > + * Make sure that both extents are initialized. We don't merge > > > + * uninitialized extents so that we can be sure that end_io code has > > > + * the extent that was written properly split out and conversion to > > > + * initialized is trivial. > > > */ > > > - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) > > > + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) > > > return 0; > > > > > > - if (ext4_ext_is_uninitialized(ex1)) > > > - max_len = EXT_UNINIT_MAX_LEN; > > > - else > > > - max_len = EXT_INIT_MAX_LEN; > > > - > > > ext1_ee_len = ext4_ext_get_actual_len(ex1); > > > ext2_ee_len = ext4_ext_get_actual_len(ex2); > > > > > > @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > > * as an RO_COMPAT feature, refuse to merge to extents if > > > * this can result in the top bit of ee_len being set. > > > */ > > > - if (ext1_ee_len + ext2_ee_len > max_len) > > > + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) > > > return 0; > > > #ifdef AGGRESSIVE_TEST > > > if (ext1_ee_len >= 4) > > > @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, > > > unsigned int ee_len, depth; > > > int err = 0; > > > > > > - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == > > > - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); > > > - > > > ext_debug("ext4_split_extents_at: inode %lu, logical" > > > "block %llu\n", inode->i_ino, (unsigned long long)split); > > > > > > @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, > > > > > > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > > > if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > > > - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { > > > - if (split_flag & EXT4_EXT_DATA_VALID1) > > > - err = ext4_ext_zeroout(inode, ex2); > > > - else > > > - err = ext4_ext_zeroout(inode, ex); > > > - } else > > > - err = ext4_ext_zeroout(inode, &orig_ex); > > > - > > > + err = ext4_ext_zeroout(inode, &orig_ex); > > > if (err) > > > goto fix_extent_len; > > > /* update the extent length and mark as initialized */ > > > @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, > > > if (uninitialized) > > > split_flag1 |= EXT4_EXT_MARK_UNINIT1 | > > > EXT4_EXT_MARK_UNINIT2; > > > - if (split_flag & EXT4_EXT_DATA_VALID2) > > > - split_flag1 |= EXT4_EXT_DATA_VALID1; > > > err = ext4_split_extent_at(handle, inode, path, > > > map->m_lblk + map->m_len, split_flag1, flags1); > > > if (err) > > > @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, > > > return PTR_ERR(path); > > > > > > if (map->m_lblk >= ee_block) { > > > - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | > > > - EXT4_EXT_DATA_VALID2); > > > + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; > > > if (uninitialized) > > > split_flag1 |= EXT4_EXT_MARK_UNINIT1; > > > if (split_flag & EXT4_EXT_MARK_UNINIT2) > > > @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, > > > > > > split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > > > split_flag |= EXT4_EXT_MARK_UNINIT2; > > > - if (flags & EXT4_GET_BLOCKS_CONVERT) > > > - split_flag |= EXT4_EXT_DATA_VALID2; > > > + > > > flags |= EXT4_GET_BLOCKS_PRE_IO; > > > return ext4_split_extent(handle, inode, path, map, split_flag, flags); > > > } > > > @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, > > > "block %llu, max_blocks %u\n", inode->i_ino, > > > (unsigned long long)ee_block, ee_len); > > > > > > - /* If extent is larger than requested then split is required */ > > > + /* Extent is larger than requested? */ > > > if (ee_block != map->m_lblk || ee_len > map->m_len) { > > > - err = ext4_split_unwritten_extents(handle, inode, map, path, > > > - EXT4_GET_BLOCKS_CONVERT); > > > - if (err < 0) > > > - goto out; > > > - ext4_ext_drop_refs(path); > > > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > > > - if (IS_ERR(path)) { > > > - err = PTR_ERR(path); > > > - goto out; > > > - } > > > - depth = ext_depth(inode); > > > - ex = path[depth].p_ext; > > > + EXT4_ERROR_INODE(inode, "Written extent modified before IO" > > > + " finished: extent logical block %llu, len %u; IO" > > > + " logical block %llu, len %u\n", > > > + (unsigned long long)ee_block, ee_len, > > > + (unsigned long long)map->m_lblk, map->m_len); > > > + err = -EIO; > > > + goto out; > > > } > > > > > > err = ext4_ext_get_access(handle, inode, path + depth); > > > -- > > > 1.7.1 > > > > > > -- > > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > Jan Kara <jack@suse.cz> > SUSE Labs, CR > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 24, 2013 at 01:49:45PM +0400, Dmitry Monakhov wrote: > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > 2) It complicates end_io handling code > > As we already discussed your idea is 100% correct, but even with > what patch I still able to trigger situation where split it required. > I've got following error with this patch applied on top of 7f5118629f7 > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > inode #12: comm kworker/u:4: Written extent modified before IO finished: > extent logical block 1379787, len 64; IO logical block 1379787, len 21 So does this patch makes this better enough that it's worth applying now? Or should we hold off until we figure out what's going on with the race that you've foind? Thanks, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 28 Jan 2013 09:36:47 -0500, Theodore Ts'o <tytso@mit.edu> wrote: > On Thu, Jan 24, 2013 at 01:49:45PM +0400, Dmitry Monakhov wrote: > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > > 2) It complicates end_io handling code > > > > As we already discussed your idea is 100% correct, but even with > > what patch I still able to trigger situation where split it required. > > I've got following error with this patch applied on top of 7f5118629f7 > > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > > inode #12: comm kworker/u:4: Written extent modified before IO finished: > > extent logical block 1379787, len 64; IO logical block 1379787, len 21 > > So does this patch makes this better enough that it's worth applying > now? Or should we hold off until we figure out what's going on with > the race that you've foind? Actually this patch consists of two peaces 1) disable merging of uninitialized extents. (1 line change) I'm absolutely agree with it. 2) Remove explicit extent split inside endio. By assumption explicit split is workaround against 1'st bug, but even w/ disabled merging we still have another race which result in situation where split is required. At this moment i try to localize this issue. Let's commit first part, but keep second one until I'll find the race. > > Thanks, > > - Ted > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 24 Jan 2013 13:49:45 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote: > On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > > Merging of uninitialized extents creates all sorts of interesting race > > possibilities when writeback / DIO races with fallocate. Thus > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > extent to be converted needs to be split out first. That isn't nice > > for two reasons: > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > 2) It complicates end_io handling code > As we already discussed your idea is 100% correct, but even with > what patch I still able to trigger situation where split it required. > I've got following error with this patch applied on top of 7f5118629f7 > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > inode #12: comm kworker/u:4: Written extent modified before IO finished: > extent logical block 1379787, len 64; IO logical block 1379787, len 21 > > ------------[ cut here ]------------ > WARNING: at fs/ext4/extents.c:4518 > ext4_convert_unwritten_extents+0x149/0x210 [ext4]() OK I've found it. I'm a bit disappointed, it is even not a race condition, but simple corruption. Patch is available here: http://article.gmane.org/gmane.comp.file-systems.ext4/36762 link for sain mailer client: <1359617098-18451-1-git-send-email-dmonakhov@openvz.org> After this bug was fixed it is safe to apply both Jan's patches: [PATCH 04/12] ext4: Disable merging of uninitialized extents [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() At least it survives after all my tests. BTW: It is appeared that ext4_debug() infrastructure is almost unusable because based on printk() instead of light-wait event tracing infrastructure. I'm now work on patch-set which fix that. > Hardware name: > Modules linked in: ext4 jbd2 cpufreq_ondemand acpi_cpufreq freq_table > mperf coretemp kvm_intel kvm crc32c_intel microcode sg button ext3 jbd > mbcache sd_mod crc_t10dif ahci libahci pata_acpi ata_generic dm_mirror > dm_region_hash dm_log dm_mod > Pid: 249, comm: kworker/u:4 Not tainted 3.8.0-rc3+ #16 > Call Trace: > [<ffffffff8106fc23>] warn_slowpath_common+0xc3/0xf0 > [<ffffffff8106fc6a>] warn_slowpath_null+0x1a/0x20 > [<ffffffffa03fb909>] ext4_convert_unwritten_extents+0x149/0x210 [ext4] > [<ffffffff811064fa>] ? __lock_release+0x1da/0x1f0 > [<ffffffffa03c368e>] ext4_end_io+0x3e/0x160 [ext4] > [<ffffffff813aab40>] ? __list_del_entry+0x210/0x250 > [<ffffffffa03c3a21>] ext4_do_flush_completed_IO+0x101/0x280 [ext4] > [<ffffffffa03c3bb6>] ext4_end_io_work+0x16/0x20 [ext4] > [<ffffffff8109f7dd>] process_one_work+0x4ad/0x780 > [<ffffffff8109f6d2>] ? process_one_work+0x3a2/0x780 > [<ffffffffa03c3ba0>] ? ext4_do_flush_completed_IO+0x280/0x280 [ext4] > [<ffffffff810a3ed1>] worker_thread+0x3f1/0x590 > [<ffffffff810a3ae0>] ? manage_workers+0x210/0x210 > [<ffffffff810ac870>] kthread+0x100/0x110 > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > [<ffffffff81812e2c>] ret_from_fork+0x7c/0xb0 > [<ffffffff810ac770>] ? __init_kthread_worker+0x70/0x70 > ---[ end trace add5cefed72186f8 ]--- > EXT4-fs (dm-3): ext4_convert_unwritten_extents:4522: inode #12: block > 1379787: len 21: ext4_ext_map_blocks returned -5 > EXT4-fs (dm-3): failed to convert unwritten extents to written > extents -- potential data loss! (inode 12, offset 5651562496, size > 131072, error -5) > > I've run 286'th xfstest (this is my own copy of xfstest so 286'th test > is differ from mainstream one) you can find it here > https://raw.github.com/dmonakhov/xfstests/devel/286 > In two words it is stress test which run DIO/AIO,truncate,fallocate in parallel. > Also you need recent FIO(http://git.kernel.dk/?p=fio.git;a=summary) > > Currently I try to understand what caused this issue. > > > > So we disable merging of uninitialized extents which allows us to simplify > > the code. Extents will get merged after they are converted to initialized > > ones. > > > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > > Signed-off-by: Jan Kara <jack@suse.cz> > > --- > > fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- > > 1 files changed, 18 insertions(+), 43 deletions(-) > > > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > > index 26af228..f1ce33a 100644 > > --- a/fs/ext4/extents.c > > +++ b/fs/ext4/extents.c > > @@ -54,9 +54,6 @@ > > #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ > > #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ > > > > -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ > > -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ > > - > > static __le32 ext4_extent_block_csum(struct inode *inode, > > struct ext4_extent_header *eh) > > { > > @@ -1579,20 +1576,17 @@ int > > ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > struct ext4_extent *ex2) > > { > > - unsigned short ext1_ee_len, ext2_ee_len, max_len; > > + unsigned ext1_ee_len, ext2_ee_len; > > > > /* > > - * Make sure that either both extents are uninitialized, or > > - * both are _not_. > > + * Make sure that both extents are initialized. We don't merge > > + * uninitialized extents so that we can be sure that end_io code has > > + * the extent that was written properly split out and conversion to > > + * initialized is trivial. > > */ > > - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) > > + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) > > return 0; > > > > - if (ext4_ext_is_uninitialized(ex1)) > > - max_len = EXT_UNINIT_MAX_LEN; > > - else > > - max_len = EXT_INIT_MAX_LEN; > > - > > ext1_ee_len = ext4_ext_get_actual_len(ex1); > > ext2_ee_len = ext4_ext_get_actual_len(ex2); > > > > @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > > * as an RO_COMPAT feature, refuse to merge to extents if > > * this can result in the top bit of ee_len being set. > > */ > > - if (ext1_ee_len + ext2_ee_len > max_len) > > + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) > > return 0; > > #ifdef AGGRESSIVE_TEST > > if (ext1_ee_len >= 4) > > @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, > > unsigned int ee_len, depth; > > int err = 0; > > > > - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == > > - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); > > - > > ext_debug("ext4_split_extents_at: inode %lu, logical" > > "block %llu\n", inode->i_ino, (unsigned long long)split); > > > > @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, > > > > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > > if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > > - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { > > - if (split_flag & EXT4_EXT_DATA_VALID1) > > - err = ext4_ext_zeroout(inode, ex2); > > - else > > - err = ext4_ext_zeroout(inode, ex); > > - } else > > - err = ext4_ext_zeroout(inode, &orig_ex); > > - > > + err = ext4_ext_zeroout(inode, &orig_ex); > > if (err) > > goto fix_extent_len; > > /* update the extent length and mark as initialized */ > > @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, > > if (uninitialized) > > split_flag1 |= EXT4_EXT_MARK_UNINIT1 | > > EXT4_EXT_MARK_UNINIT2; > > - if (split_flag & EXT4_EXT_DATA_VALID2) > > - split_flag1 |= EXT4_EXT_DATA_VALID1; > > err = ext4_split_extent_at(handle, inode, path, > > map->m_lblk + map->m_len, split_flag1, flags1); > > if (err) > > @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, > > return PTR_ERR(path); > > > > if (map->m_lblk >= ee_block) { > > - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | > > - EXT4_EXT_DATA_VALID2); > > + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; > > if (uninitialized) > > split_flag1 |= EXT4_EXT_MARK_UNINIT1; > > if (split_flag & EXT4_EXT_MARK_UNINIT2) > > @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, > > > > split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > > split_flag |= EXT4_EXT_MARK_UNINIT2; > > - if (flags & EXT4_GET_BLOCKS_CONVERT) > > - split_flag |= EXT4_EXT_DATA_VALID2; > > + > > flags |= EXT4_GET_BLOCKS_PRE_IO; > > return ext4_split_extent(handle, inode, path, map, split_flag, flags); > > } > > @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, > > "block %llu, max_blocks %u\n", inode->i_ino, > > (unsigned long long)ee_block, ee_len); > > > > - /* If extent is larger than requested then split is required */ > > + /* Extent is larger than requested? */ > > if (ee_block != map->m_lblk || ee_len > map->m_len) { > > - err = ext4_split_unwritten_extents(handle, inode, map, path, > > - EXT4_GET_BLOCKS_CONVERT); > > - if (err < 0) > > - goto out; > > - ext4_ext_drop_refs(path); > > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > > - if (IS_ERR(path)) { > > - err = PTR_ERR(path); > > - goto out; > > - } > > - depth = ext_depth(inode); > > - ex = path[depth].p_ext; > > + EXT4_ERROR_INODE(inode, "Written extent modified before IO" > > + " finished: extent logical block %llu, len %u; IO" > > + " logical block %llu, len %u\n", > > + (unsigned long long)ee_block, ee_len, > > + (unsigned long long)map->m_lblk, map->m_len); > > + err = -EIO; > > + goto out; > > } > > > > err = ext4_ext_get_access(handle, inode, path + depth); > > -- > > 1.7.1 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu 31-01-13 11:47:23, Dmitry Monakhov wrote: > On Thu, 24 Jan 2013 13:49:45 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote: > > On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > > > Merging of uninitialized extents creates all sorts of interesting race > > > possibilities when writeback / DIO races with fallocate. Thus > > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > > extent to be converted needs to be split out first. That isn't nice > > > for two reasons: > > > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > > 2) It complicates end_io handling code > > As we already discussed your idea is 100% correct, but even with > > what patch I still able to trigger situation where split it required. > > I've got following error with this patch applied on top of 7f5118629f7 > > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > > inode #12: comm kworker/u:4: Written extent modified before IO finished: > > extent logical block 1379787, len 64; IO logical block 1379787, len 21 > > > > ------------[ cut here ]------------ > > WARNING: at fs/ext4/extents.c:4518 > > ext4_convert_unwritten_extents+0x149/0x210 [ext4]() > OK I've found it. I'm a bit disappointed, it is even not a race > condition, but simple corruption. > Patch is available here: http://article.gmane.org/gmane.comp.file-systems.ext4/36762 > link for sain mailer client: <1359617098-18451-1-git-send-email-dmonakhov@openvz.org> > After this bug was fixed it is safe to apply both Jan's patches: > [PATCH 04/12] ext4: Disable merging of uninitialized extents > [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() > At least it survives after all my tests. Thanks for debugging this. I was looking into the code but it didn't come to my mind what happens when ext4_ext_split() fails. > BTW: It is appeared that ext4_debug() infrastructure is almost unusable > because based on printk() instead of light-wait event tracing infrastructure. > I'm now work on patch-set which fix that. That would be certainly welcome. ext4_debug() infrastructure comes from times when tracing didn't exist so we used what we could. These days it's cumbersome... Honza
On Thu, 31 Jan 2013 13:39:30 +0100, Jan Kara <jack@suse.cz> wrote: > On Thu 31-01-13 11:47:23, Dmitry Monakhov wrote: > > On Thu, 24 Jan 2013 13:49:45 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote: > > > On Fri, 18 Jan 2013 13:00:38 +0100, Jan Kara <jack@suse.cz> wrote: > > > > Merging of uninitialized extents creates all sorts of interesting race > > > > possibilities when writeback / DIO races with fallocate. Thus > > > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > > > extent to be converted needs to be split out first. That isn't nice > > > > for two reasons: > > > > > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > > > 2) It complicates end_io handling code > > > As we already discussed your idea is 100% correct, but even with > > > what patch I still able to trigger situation where split it required. > > > I've got following error with this patch applied on top of 7f5118629f7 > > > EXT4-fs error (device dm-3): ext4_convert_unwritten_extents_endio:3411: > > > inode #12: comm kworker/u:4: Written extent modified before IO finished: > > > extent logical block 1379787, len 64; IO logical block 1379787, len 21 > > > > > > ------------[ cut here ]------------ > > > WARNING: at fs/ext4/extents.c:4518 > > > ext4_convert_unwritten_extents+0x149/0x210 [ext4]() > > OK I've found it. I'm a bit disappointed, it is even not a race > > condition, but simple corruption. > > Patch is available here: http://article.gmane.org/gmane.comp.file-systems.ext4/36762 > > link for sain mailer client: <1359617098-18451-1-git-send-email-dmonakhov@openvz.org> > > After this bug was fixed it is safe to apply both Jan's patches: > > [PATCH 04/12] ext4: Disable merging of uninitialized extents > > [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() > > At least it survives after all my tests. > Thanks for debugging this. I was looking into the code but it didn't come > to my mind what happens when ext4_ext_split() fails. ext4_ext_split does have not fail. EXTENT:a----------b-------------c------d It call ext4_ext_split_at twice: first ext4_ext_split_at() should split [a,d] to [a,c],[c,d] second ext4_ext_split_at() should split [a,c] to [a,b],[b,c] But if first ext4_ext_split_at() internally failed due to ENOSPC it will does ext_ext_zeroout() which is correct(if ). Second one expect to operate on [a,c] which should be uninitialized, but it operate on [a,d] which was initialized. At the end it mark extent as uninitialized here: if (split == ee_block) { /* * case b: block @split is the block that the extent begins with * then we just change the state of the extent, and splitting * is not needed. */ if (split_flag & EXT4_EXT_MARK_UNINIT2) ext4_ext_mark_uninitialized(ex); ####^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ else ext4_ext_mark_initialized(ex); ... In fact my first ext4_ext_split_at() result in zeroout we can skip second one because it is not necessary, but I just clear EXT4_EXT_MARK_UNINIT2 flag if extent becomes initialized. And fairly to say my patch is not quite right because it is not not correct to perform zeroout on initialized extent. I'll send updated version in a minute. > > > BTW: It is appeared that ext4_debug() infrastructure is almost unusable > > because based on printk() instead of light-wait event tracing infrastructure. > > I'm now work on patch-set which fix that. > That would be certainly welcome. ext4_debug() infrastructure comes from > times when tracing didn't exist so we used what we could. These days it's > cumbersome... > > Honza > -- > Jan Kara <jack@suse.cz> > SUSE Labs, CR > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jan 31, 2013 at 11:47:23AM +0400, Dmitry Monakhov wrote: > OK I've found it. I'm a bit disappointed, it is even not a race > condition, but simple corruption. > Patch is available here: http://article.gmane.org/gmane.comp.file-systems.ext4/36762 > link for sain mailer client: <1359617098-18451-1-git-send-email-dmonakhov@openvz.org> > After this bug was fixed it is safe to apply both Jan's patches: > [PATCH 04/12] ext4: Disable merging of uninitialized extents > [PATCH 05/12] ext4: Remove unnecessary wait for extent conversion in ext4_fallocate() > At least it survives after all my tests. Thanks for finding it! As folks have probably noticed the dev branch and linux-next already has all of the other patches (except for #3, which we've agreed isn't needed since the original code was correct as-is). I'll work on reviewing your patch and then applying the last three, and then over the weekend I'll push the master branch up, thus casting all of the patches currently in the tree into stone. > BTW: It is appeared that ext4_debug() infrastructure is almost unusable > because based on printk() instead of light-wait event tracing infrastructure. > I'm now work on patch-set which fix that. Yes, I haven't used the ext4_debug infrastructure in quite some time. As I've needed to do debugging I've added new tracepoints, and in some cases I hadn't gotten around to removing the old ext4_debug code, on the assumption that maybe someone else was still using it, but mostly because I was lazy. That would be a great thing to do. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jan 18, 2013 at 01:00:38PM +0100, Jan Kara wrote: > Merging of uninitialized extents creates all sorts of interesting race > possibilities when writeback / DIO races with fallocate. Thus > ext4_convert_unwritten_extents_endio() has to deal with a case where > extent to be converted needs to be split out first. That isn't nice > for two reasons: > > 1) It may need allocation of extent tree block so ENOSPC is possible. > 2) It complicates end_io handling code > > So we disable merging of uninitialized extents which allows us to simplify > the code. Extents will get merged after they are converted to initialized > ones. > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > Signed-off-by: Jan Kara <jack@suse.cz> Sorry for not noticing this earlier, but this patch is causing a regression. It is loading to test 113 failing when dioread_nolock is used: 113 [ 47.619363] EXT4-fs error (device vdb): ext4_convert_unwritten_extents_endio:3411: i node #10951: comm kworker/u:0: Written extent modified before IO finished: extent logical block 1024, len 1024; IO logical block 1024, len 127 [ 47.619363] [ 47.623239] EXT4-fs warning (device vdb): ext4_convert_unwritten_extents:4522: inode #10951: block 1024: len 127: ext4_ext_map_blocks returned -5 [ 47.628975] EXT4-fs (vdb): failed to convert unwritten extents to written extents -- potenti al data loss! (inode 10951, offset 4194304, size 520192, error -5) As a result, I am considering whether or not I should to drop the following patches from the ext4 tree: e63dd9c ext4: disable merging of uninitialized extents de39534 ext4: remove unnecessary wait for extent conversion in ext4_fallocate() 37bf0a8 ext4: ext4_split_extent should take care of extent zeroout I know that these patches fix other potential races which causes data loss, but they've been around for a while, and in practice seem to be relatively rarely hit. Jan, Dmitry, any chance you can take a look at this, and give me suggestions about how to proceed at this point? Thanks, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat 09-02-13 12:10:15, Ted Tso wrote: > On Fri, Jan 18, 2013 at 01:00:38PM +0100, Jan Kara wrote: > > Merging of uninitialized extents creates all sorts of interesting race > > possibilities when writeback / DIO races with fallocate. Thus > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > extent to be converted needs to be split out first. That isn't nice > > for two reasons: > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > 2) It complicates end_io handling code > > > > So we disable merging of uninitialized extents which allows us to simplify > > the code. Extents will get merged after they are converted to initialized > > ones. > > > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > > Signed-off-by: Jan Kara <jack@suse.cz> > > Sorry for not noticing this earlier, but this patch is causing a > regression. It is loading to test 113 failing when dioread_nolock is > used: > > 113 [ 47.619363] EXT4-fs error (device vdb): ext4_convert_unwritten_extents_endio:3411: i > node #10951: comm kworker/u:0: Written extent modified before IO finished: extent logical block > 1024, len 1024; IO logical block 1024, len 127 > [ 47.619363] > [ 47.623239] EXT4-fs warning (device vdb): ext4_convert_unwritten_extents:4522: inode #10951: > block 1024: len 127: ext4_ext_map_blocks returned -5 > [ 47.628975] EXT4-fs (vdb): failed to convert unwritten extents to written extents -- potenti > al data loss! (inode 10951, offset 4194304, size 520192, error -5) Hum, yes. Thanks for letting us know. I was able to reproduce this and I'm looking into it now. I guess it is a similar problem as Dmitry fixed in his fix... > As a result, I am considering whether or not I should to drop the > following patches from the ext4 tree: > > e63dd9c ext4: disable merging of uninitialized extents > de39534 ext4: remove unnecessary wait for extent conversion in ext4_fallocate() > 37bf0a8 ext4: ext4_split_extent should take care of extent zeroout > > I know that these patches fix other potential races which causes data > loss, but they've been around for a while, and in practice seem to be > relatively rarely hit. The third patch is a fix which shouldn't cause any issues. So you can take just that one and leave the other two aside until we are able to resolve the issue. Honza
On Tue, Feb 12, 2013 at 10:58:32PM +0100, Jan Kara wrote: > The third patch is a fix which shouldn't cause any issues. So you can > take just that one and leave the other two aside until we are able to > resolve the issue. I thought the third patch depending on the first two? Certainly it doesn't apply cleanly without the first two patches... - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 12 Feb 2013 23:57:59 -0500, Theodore Ts'o <tytso@mit.edu> wrote: > On Tue, Feb 12, 2013 at 10:58:32PM +0100, Jan Kara wrote: > > The third patch is a fix which shouldn't cause any issues. So you can > > take just that one and leave the other two aside until we are able to > > resolve the issue. > > I thought the third patch depending on the first two? Certainly it > doesn't apply cleanly without the first two patches... My patch fix old issue, but i've prepared it on top of Jan's patches only for simplicity. I'll send new version which not depend on his patches today. Over-all Jan's statment that split should not happen inside end_io and it is clear sing of a bug is absolutely right decision. This helps us to spot several hidden issues (number is still unknown) so may be it is reasonable to split first patch in two parts: 1) disable uninitialized extents merging itself. 2) Print warning if split is required inside end_io(so only warning will be printed, but w/o data corruption) 3) Get rid of extent split machinery from end_io (because it is not longer valid situation) (1) and (2) should be accepted ASAP and will help us to spot and fix other hidden issues. And we fix all related issues it will be safe to apply (3)'rd one. I'll send patches soon. > > - Ted > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Feb 13, 2013 at 11:26:40AM +0400, Dmitry Monakhov wrote: > This helps us to spot several hidden issues (number is still unknown) > so may be it is reasonable to split first patch in two parts: > 1) disable uninitialized extents merging itself. > 2) Print warning if split is required inside end_io(so only warning will > be printed, but w/o data corruption) > 3) Get rid of extent split machinery from end_io (because it is not > longer valid situation) > > (1) and (2) should be accepted ASAP and will help us to spot and fix > other hidden issues. And we fix all related issues it will be safe > to apply (3)'rd one. > I'll send patches soon. That seems like reasonable approach; I'm not sure we'll be able to get to (3) by the time the merge window opens, but getting (1) and (2) in would be great; if the warning is getting triggered a lot, we might need to only enable it under CONFIG_EXT4_DEBUG, though. If you could you base the patches off of tip of the dev branch (which I just updated), I'd really appreciate it. The last commit on the newly updated dev branch is: 7eedefe ext4: support simple conversion of extent-mapped inodes to use i_blocks In this updated dev branch, I've moved the extent status patches to the unstable portion of the tree due to the bigalloc regressions. So the plan for the merge window is that currently, the extent status patches and the uninitialized extent merging patches are currently on the unstable portion of the tree here: http://repo.or.cz/w/ext4-patch-queue.git git://repo.or.cz/ext4-patch-queue.git That way we can do intensive testing on the rest of the ext4 tree. Once the rest of the patches have been validated, I'll push the master branch on the ext4 git tree to point at the fully validated set of patches, and then we can try to push the rest of the patches that have been moved past the stable-boundary in the ext4 patch queue back onto the dev branch, and see if we can shake out the rest of the problems before the merge window opens. I will be leaving for Hawaii tomorrow, so the amount of time I will have to do a development/debugging work will be limited. On the other hand, it's easy to let regression tests run in the hotel room while I'm visiting volcano craters, so please do send me updated patches, and I'll review and test them as I have time. The joys of having the pre-merge window crunch coincide with your vacation... :-) - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed 13-02-13 11:26:40, Dmitry Monakhov wrote: > On Tue, 12 Feb 2013 23:57:59 -0500, Theodore Ts'o <tytso@mit.edu> wrote: > > On Tue, Feb 12, 2013 at 10:58:32PM +0100, Jan Kara wrote: > > > The third patch is a fix which shouldn't cause any issues. So you can > > > take just that one and leave the other two aside until we are able to > > > resolve the issue. > > > > I thought the third patch depending on the first two? Certainly it > > doesn't apply cleanly without the first two patches... > My patch fix old issue, but i've prepared it on top of Jan's patches > only for simplicity. I'll send new version which not depend on his > patches today. > Over-all Jan's statment that split should not happen inside end_io > and it is clear sing of a bug is absolutely right decision. > This helps us to spot several hidden issues (number is still unknown) > so may be it is reasonable to split first patch in two parts: > 1) disable uninitialized extents merging itself. > 2) Print warning if split is required inside end_io(so only warning will > be printed, but w/o data corruption) > 3) Get rid of extent split machinery from end_io (because it is not > longer valid situation) > > (1) and (2) should be accepted ASAP and will help us to spot and fix > other hidden issues. And we fix all related issues it will be safe > to apply (3)'rd one. > I'll send patches soon. I agree with this plan, I just think the warnings are currently too easy to trigger (just random AIO DIO writing seems to trigger them with dioread_nolock) so as Ted said we'll probably have to hide them under EXT4_DEBUG or something like that. I'll try to debug the current problem while you prepare the patches :). Honza
On Sat 09-02-13 12:10:15, Ted Tso wrote: > On Fri, Jan 18, 2013 at 01:00:38PM +0100, Jan Kara wrote: > > Merging of uninitialized extents creates all sorts of interesting race > > possibilities when writeback / DIO races with fallocate. Thus > > ext4_convert_unwritten_extents_endio() has to deal with a case where > > extent to be converted needs to be split out first. That isn't nice > > for two reasons: > > > > 1) It may need allocation of extent tree block so ENOSPC is possible. > > 2) It complicates end_io handling code > > > > So we disable merging of uninitialized extents which allows us to simplify > > the code. Extents will get merged after they are converted to initialized > > ones. > > > > Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> > > Signed-off-by: Jan Kara <jack@suse.cz> > > Sorry for not noticing this earlier, but this patch is causing a > regression. It is loading to test 113 failing when dioread_nolock is > used: > > 113 [ 47.619363] EXT4-fs error (device vdb): ext4_convert_unwritten_extents_endio:3411: i > node #10951: comm kworker/u:0: Written extent modified before IO finished: extent logical block > 1024, len 1024; IO logical block 1024, len 127 > [ 47.619363] > [ 47.623239] EXT4-fs warning (device vdb): ext4_convert_unwritten_extents:4522: inode #10951: > block 1024: len 127: ext4_ext_map_blocks returned -5 > [ 47.628975] EXT4-fs (vdb): failed to convert unwritten extents to written extents -- potenti > al data loss! (inode 10951, offset 4194304, size 520192, error -5) > > > As a result, I am considering whether or not I should to drop the > following patches from the ext4 tree: > > e63dd9c ext4: disable merging of uninitialized extents > de39534 ext4: remove unnecessary wait for extent conversion in ext4_fallocate() > 37bf0a8 ext4: ext4_split_extent should take care of extent zeroout > > I know that these patches fix other potential races which causes data > loss, but they've been around for a while, and in practice seem to be > relatively rarely hit. OK, so I've debugged this. It wasn't actually that hard. The problem is that mpage_da_map_and_submit() prepares extent with e.g. 256 blocks but later we submit a shorter bio e.g. because it cannot carry that many pages. So ->end_io is called only for first 128 blocks or so. I spotted this problem already before, just it didn't come up when Ted sent me this bug report. The fix isn't trivial. What we need to do is to be able to attach multiple bios to one io_end structure and start the conversion only once they are all finished. I actually have patches for this in the second part of my patch set. So for this merge window I'd just do what Dmitry suggested (and the warning can be triggered really trivially by a sequential write with dioread_nolock so that definitely has to be hidden by default). And I'll go off to finish that second part of my patch set so that it can get to Ted's tree as soon as possible. Honza
On Thu, Feb 14, 2013 at 05:11:16PM +0100, Jan Kara wrote: > OK, so I've debugged this. It wasn't actually that hard. The problem is > that mpage_da_map_and_submit() prepares extent with e.g. 256 blocks but > later we submit a shorter bio e.g. because it cannot carry that many pages. > So ->end_io is called only for first 128 blocks or so. I spotted this > problem already before, just it didn't come up when Ted sent me this bug > report. So can we get a corruption without these patches, but it was a lot harder to hit that case before? - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu 14-02-13 14:05:55, Ted Tso wrote: > On Thu, Feb 14, 2013 at 05:11:16PM +0100, Jan Kara wrote: > > OK, so I've debugged this. It wasn't actually that hard. The problem is > > that mpage_da_map_and_submit() prepares extent with e.g. 256 blocks but > > later we submit a shorter bio e.g. because it cannot carry that many pages. > > So ->end_io is called only for first 128 blocks or so. I spotted this > > problem already before, just it didn't come up when Ted sent me this bug > > report. > > So can we get a corruption without these patches, but it was a lot > harder to hit that case before? When the bio is shorter we just need to split the unwritten extent when processing end_io callback. That is properly handled after Dmitry's fixes so corruption can happen only when we are close to ENOSPC and the split fails. Now we started seeing the split is happening because one of the patches added the warning that the split from end_io is happening... Honza
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 26af228..f1ce33a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -54,9 +54,6 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ - static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -1579,20 +1576,17 @@ int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len, max_len; + unsigned ext1_ee_len, ext2_ee_len; /* - * Make sure that either both extents are uninitialized, or - * both are _not_. + * Make sure that both extents are initialized. We don't merge + * uninitialized extents so that we can be sure that end_io code has + * the extent that was written properly split out and conversion to + * initialized is trivial. */ - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; - if (ext4_ext_is_uninitialized(ex1)) - max_len = EXT_UNINIT_MAX_LEN; - else - max_len = EXT_INIT_MAX_LEN; - ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > max_len) + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, unsigned int ee_len, depth; int err = 0; - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); - ext_debug("ext4_split_extents_at: inode %lu, logical" "block %llu\n", inode->i_ino, (unsigned long long)split); @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { - if (split_flag & EXT4_EXT_DATA_VALID1) - err = ext4_ext_zeroout(inode, ex2); - else - err = ext4_ext_zeroout(inode, ex); - } else - err = ext4_ext_zeroout(inode, &orig_ex); - + err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1 | EXT4_EXT_MARK_UNINIT2; - if (split_flag & EXT4_EXT_DATA_VALID2) - split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (err) @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_DATA_VALID2); + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1; if (split_flag & EXT4_EXT_MARK_UNINIT2) @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNINIT2; - if (flags & EXT4_GET_BLOCKS_CONVERT) - split_flag |= EXT4_EXT_DATA_VALID2; + flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); } @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, "block %llu, max_blocks %u\n", inode->i_ino, (unsigned long long)ee_block, ee_len); - /* If extent is larger than requested then split is required */ + /* Extent is larger than requested? */ if (ee_block != map->m_lblk || ee_len > map->m_len) { - err = ext4_split_unwritten_extents(handle, inode, map, path, - EXT4_GET_BLOCKS_CONVERT); - if (err < 0) - goto out; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - depth = ext_depth(inode); - ex = path[depth].p_ext; + EXT4_ERROR_INODE(inode, "Written extent modified before IO" + " finished: extent logical block %llu, len %u; IO" + " logical block %llu, len %u\n", + (unsigned long long)ee_block, ee_len, + (unsigned long long)map->m_lblk, map->m_len); + err = -EIO; + goto out; } err = ext4_ext_get_access(handle, inode, path + depth);