Message ID | 1357148744-4895-5-git-send-email-jack@suse.cz |
---|---|
State | Superseded, archived |
Headers | show |
On Wed, Jan 02, 2013 at 06:45:43PM +0100, Jan Kara wrote: > Merging of uninitialized extents creates all sorts of interesting race > possibilities when writeback / DIO races with fallocate. Thus > ext4_convert_unwritten_extents_endio() has to deal with a case where > extent to be converted needs to be split out first. That isn't nice > for two reasons: > > 1) It may need allocation of extent tree block so ENOSPC is possible. > 2) It complicates end_io handling code > > So we disable merging of uninitialized extents which allows us to simplify > the code. Extents will get merged after they are converted to initialized > ones. > > Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Regards, - Zheng > --- > fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- > 1 files changed, 18 insertions(+), 43 deletions(-) > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index 26af228..f1ce33a 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -54,9 +54,6 @@ > #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ > #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ > > -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ > -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ > - > static __le32 ext4_extent_block_csum(struct inode *inode, > struct ext4_extent_header *eh) > { > @@ -1579,20 +1576,17 @@ int > ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > struct ext4_extent *ex2) > { > - unsigned short ext1_ee_len, ext2_ee_len, max_len; > + unsigned ext1_ee_len, ext2_ee_len; > > /* > - * Make sure that either both extents are uninitialized, or > - * both are _not_. > + * Make sure that both extents are initialized. We don't merge > + * uninitialized extents so that we can be sure that end_io code has > + * the extent that was written properly split out and conversion to > + * initialized is trivial. > */ > - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) > + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) > return 0; > > - if (ext4_ext_is_uninitialized(ex1)) > - max_len = EXT_UNINIT_MAX_LEN; > - else > - max_len = EXT_INIT_MAX_LEN; > - > ext1_ee_len = ext4_ext_get_actual_len(ex1); > ext2_ee_len = ext4_ext_get_actual_len(ex2); > > @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, > * as an RO_COMPAT feature, refuse to merge to extents if > * this can result in the top bit of ee_len being set. > */ > - if (ext1_ee_len + ext2_ee_len > max_len) > + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) > return 0; > #ifdef AGGRESSIVE_TEST > if (ext1_ee_len >= 4) > @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, > unsigned int ee_len, depth; > int err = 0; > > - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == > - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); > - > ext_debug("ext4_split_extents_at: inode %lu, logical" > "block %llu\n", inode->i_ino, (unsigned long long)split); > > @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, > > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { > - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { > - if (split_flag & EXT4_EXT_DATA_VALID1) > - err = ext4_ext_zeroout(inode, ex2); > - else > - err = ext4_ext_zeroout(inode, ex); > - } else > - err = ext4_ext_zeroout(inode, &orig_ex); > - > + err = ext4_ext_zeroout(inode, &orig_ex); > if (err) > goto fix_extent_len; > /* update the extent length and mark as initialized */ > @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, > if (uninitialized) > split_flag1 |= EXT4_EXT_MARK_UNINIT1 | > EXT4_EXT_MARK_UNINIT2; > - if (split_flag & EXT4_EXT_DATA_VALID2) > - split_flag1 |= EXT4_EXT_DATA_VALID1; > err = ext4_split_extent_at(handle, inode, path, > map->m_lblk + map->m_len, split_flag1, flags1); > if (err) > @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, > return PTR_ERR(path); > > if (map->m_lblk >= ee_block) { > - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | > - EXT4_EXT_DATA_VALID2); > + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; > if (uninitialized) > split_flag1 |= EXT4_EXT_MARK_UNINIT1; > if (split_flag & EXT4_EXT_MARK_UNINIT2) > @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, > > split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; > split_flag |= EXT4_EXT_MARK_UNINIT2; > - if (flags & EXT4_GET_BLOCKS_CONVERT) > - split_flag |= EXT4_EXT_DATA_VALID2; > + > flags |= EXT4_GET_BLOCKS_PRE_IO; > return ext4_split_extent(handle, inode, path, map, split_flag, flags); > } > @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, > "block %llu, max_blocks %u\n", inode->i_ino, > (unsigned long long)ee_block, ee_len); > > - /* If extent is larger than requested then split is required */ > + /* Extent is larger than requested? */ > if (ee_block != map->m_lblk || ee_len > map->m_len) { > - err = ext4_split_unwritten_extents(handle, inode, map, path, > - EXT4_GET_BLOCKS_CONVERT); > - if (err < 0) > - goto out; > - ext4_ext_drop_refs(path); > - path = ext4_ext_find_extent(inode, map->m_lblk, path); > - if (IS_ERR(path)) { > - err = PTR_ERR(path); > - goto out; > - } > - depth = ext_depth(inode); > - ex = path[depth].p_ext; > + EXT4_ERROR_INODE(inode, "Written extent modified before IO" > + " finished: extent logical block %llu, len %u; IO" > + " logical block %llu, len %u\n", > + (unsigned long long)ee_block, ee_len, > + (unsigned long long)map->m_lblk, map->m_len); > + err = -EIO; > + goto out; > } > > err = ext4_ext_get_access(handle, inode, path + depth); > -- > 1.7.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 26af228..f1ce33a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -54,9 +54,6 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ -#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ -#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ - static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -1579,20 +1576,17 @@ int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len, max_len; + unsigned ext1_ee_len, ext2_ee_len; /* - * Make sure that either both extents are uninitialized, or - * both are _not_. + * Make sure that both extents are initialized. We don't merge + * uninitialized extents so that we can be sure that end_io code has + * the extent that was written properly split out and conversion to + * initialized is trivial. */ - if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) + if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; - if (ext4_ext_is_uninitialized(ex1)) - max_len = EXT_UNINIT_MAX_LEN; - else - max_len = EXT_INIT_MAX_LEN; - ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > max_len) + if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle, unsigned int ee_len, depth; int err = 0; - BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == - (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); - ext_debug("ext4_split_extents_at: inode %lu, logical" "block %llu\n", inode->i_ino, (unsigned long long)split); @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { - if (split_flag & EXT4_EXT_DATA_VALID1) - err = ext4_ext_zeroout(inode, ex2); - else - err = ext4_ext_zeroout(inode, ex); - } else - err = ext4_ext_zeroout(inode, &orig_ex); - + err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle, if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1 | EXT4_EXT_MARK_UNINIT2; - if (split_flag & EXT4_EXT_DATA_VALID2) - split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (err) @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_DATA_VALID2); + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1; if (split_flag & EXT4_EXT_MARK_UNINIT2) @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNINIT2; - if (flags & EXT4_GET_BLOCKS_CONVERT) - split_flag |= EXT4_EXT_DATA_VALID2; + flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); } @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, "block %llu, max_blocks %u\n", inode->i_ino, (unsigned long long)ee_block, ee_len); - /* If extent is larger than requested then split is required */ + /* Extent is larger than requested? */ if (ee_block != map->m_lblk || ee_len > map->m_len) { - err = ext4_split_unwritten_extents(handle, inode, map, path, - EXT4_GET_BLOCKS_CONVERT); - if (err < 0) - goto out; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - depth = ext_depth(inode); - ex = path[depth].p_ext; + EXT4_ERROR_INODE(inode, "Written extent modified before IO" + " finished: extent logical block %llu, len %u; IO" + " logical block %llu, len %u\n", + (unsigned long long)ee_block, ee_len, + (unsigned long long)map->m_lblk, map->m_len); + err = -EIO; + goto out; } err = ext4_ext_get_access(handle, inode, path + depth);
Merging of uninitialized extents creates all sorts of interesting race possibilities when writeback / DIO races with fallocate. Thus ext4_convert_unwritten_extents_endio() has to deal with a case where extent to be converted needs to be split out first. That isn't nice for two reasons: 1) It may need allocation of extent tree block so ENOSPC is possible. 2) It complicates end_io handling code So we disable merging of uninitialized extents which allows us to simplify the code. Extents will get merged after they are converted to initialized ones. Signed-off-by: Jan Kara <jack@suse.cz> --- fs/ext4/extents.c | 61 +++++++++++++++------------------------------------- 1 files changed, 18 insertions(+), 43 deletions(-)