Patchwork [4/5] ext4: Disable merging of uninitialized extents

login
register
mail settings
Submitter Jan Kara
Date Jan. 2, 2013, 5:45 p.m.
Message ID <1357148744-4895-5-git-send-email-jack@suse.cz>
Download mbox | patch
Permalink /patch/209121/
State Superseded
Headers show

Comments

Jan Kara - Jan. 2, 2013, 5:45 p.m.
Merging of uninitialized extents creates all sorts of interesting race
possibilities when writeback / DIO races with fallocate. Thus
ext4_convert_unwritten_extents_endio() has to deal with a case where
extent to be converted needs to be split out first. That isn't nice
for two reasons:

1) It may need allocation of extent tree block so ENOSPC is possible.
2) It complicates end_io handling code

So we disable merging of uninitialized extents which allows us to simplify
the code. Extents will get merged after they are converted to initialized
ones.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c |   61 +++++++++++++++-------------------------------------
 1 files changed, 18 insertions(+), 43 deletions(-)
Zheng Liu - Jan. 4, 2013, 7:25 a.m.
On Wed, Jan 02, 2013 at 06:45:43PM +0100, Jan Kara wrote:
> Merging of uninitialized extents creates all sorts of interesting race
> possibilities when writeback / DIO races with fallocate. Thus
> ext4_convert_unwritten_extents_endio() has to deal with a case where
> extent to be converted needs to be split out first. That isn't nice
> for two reasons:
> 
> 1) It may need allocation of extent tree block so ENOSPC is possible.
> 2) It complicates end_io handling code
> 
> So we disable merging of uninitialized extents which allows us to simplify
> the code. Extents will get merged after they are converted to initialized
> ones.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Regards,
                                                - Zheng

> ---
>  fs/ext4/extents.c |   61 +++++++++++++++-------------------------------------
>  1 files changed, 18 insertions(+), 43 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 26af228..f1ce33a 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -54,9 +54,6 @@
>  #define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
>  #define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
>  
> -#define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */
> -#define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */
> -
>  static __le32 ext4_extent_block_csum(struct inode *inode,
>  				     struct ext4_extent_header *eh)
>  {
> @@ -1579,20 +1576,17 @@ int
>  ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
>  				struct ext4_extent *ex2)
>  {
> -	unsigned short ext1_ee_len, ext2_ee_len, max_len;
> +	unsigned ext1_ee_len, ext2_ee_len;
>  
>  	/*
> -	 * Make sure that either both extents are uninitialized, or
> -	 * both are _not_.
> +	 * Make sure that both extents are initialized. We don't merge
> +	 * uninitialized extents so that we can be sure that end_io code has
> +	 * the extent that was written properly split out and conversion to
> +	 * initialized is trivial.
>  	 */
> -	if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
> +	if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
>  		return 0;
>  
> -	if (ext4_ext_is_uninitialized(ex1))
> -		max_len = EXT_UNINIT_MAX_LEN;
> -	else
> -		max_len = EXT_INIT_MAX_LEN;
> -
>  	ext1_ee_len = ext4_ext_get_actual_len(ex1);
>  	ext2_ee_len = ext4_ext_get_actual_len(ex2);
>  
> @@ -1605,7 +1599,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
>  	 * as an RO_COMPAT feature, refuse to merge to extents if
>  	 * this can result in the top bit of ee_len being set.
>  	 */
> -	if (ext1_ee_len + ext2_ee_len > max_len)
> +	if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
>  		return 0;
>  #ifdef AGGRESSIVE_TEST
>  	if (ext1_ee_len >= 4)
> @@ -2959,9 +2953,6 @@ static int ext4_split_extent_at(handle_t *handle,
>  	unsigned int ee_len, depth;
>  	int err = 0;
>  
> -	BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
> -	       (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
> -
>  	ext_debug("ext4_split_extents_at: inode %lu, logical"
>  		"block %llu\n", inode->i_ino, (unsigned long long)split);
>  
> @@ -3020,14 +3011,7 @@ static int ext4_split_extent_at(handle_t *handle,
>  
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>  	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
> -		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
> -			if (split_flag & EXT4_EXT_DATA_VALID1)
> -				err = ext4_ext_zeroout(inode, ex2);
> -			else
> -				err = ext4_ext_zeroout(inode, ex);
> -		} else
> -			err = ext4_ext_zeroout(inode, &orig_ex);
> -
> +		err = ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
>  		/* update the extent length and mark as initialized */
> @@ -3085,8 +3069,6 @@ static int ext4_split_extent(handle_t *handle,
>  		if (uninitialized)
>  			split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
>  				       EXT4_EXT_MARK_UNINIT2;
> -		if (split_flag & EXT4_EXT_DATA_VALID2)
> -			split_flag1 |= EXT4_EXT_DATA_VALID1;
>  		err = ext4_split_extent_at(handle, inode, path,
>  				map->m_lblk + map->m_len, split_flag1, flags1);
>  		if (err)
> @@ -3099,8 +3081,7 @@ static int ext4_split_extent(handle_t *handle,
>  		return PTR_ERR(path);
>  
>  	if (map->m_lblk >= ee_block) {
> -		split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
> -					    EXT4_EXT_DATA_VALID2);
> +		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
>  		if (uninitialized)
>  			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
>  		if (split_flag & EXT4_EXT_MARK_UNINIT2)
> @@ -3379,8 +3360,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  
>  	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
>  	split_flag |= EXT4_EXT_MARK_UNINIT2;
> -	if (flags & EXT4_GET_BLOCKS_CONVERT)
> -		split_flag |= EXT4_EXT_DATA_VALID2;
> +
>  	flags |= EXT4_GET_BLOCKS_PRE_IO;
>  	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
>  }
> @@ -3405,20 +3385,15 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
>  		"block %llu, max_blocks %u\n", inode->i_ino,
>  		  (unsigned long long)ee_block, ee_len);
>  
> -	/* If extent is larger than requested then split is required */
> +	/* Extent is larger than requested? */
>  	if (ee_block != map->m_lblk || ee_len > map->m_len) {
> -		err = ext4_split_unwritten_extents(handle, inode, map, path,
> -						   EXT4_GET_BLOCKS_CONVERT);
> -		if (err < 0)
> -			goto out;
> -		ext4_ext_drop_refs(path);
> -		path = ext4_ext_find_extent(inode, map->m_lblk, path);
> -		if (IS_ERR(path)) {
> -			err = PTR_ERR(path);
> -			goto out;
> -		}
> -		depth = ext_depth(inode);
> -		ex = path[depth].p_ext;
> +		EXT4_ERROR_INODE(inode, "Written extent modified before IO"
> +			" finished: extent logical block %llu, len %u; IO"
> +			" logical block %llu, len %u\n",
> +			(unsigned long long)ee_block, ee_len,
> +			(unsigned long long)map->m_lblk, map->m_len);
> +		err = -EIO;
> +		goto out;
>  	}
>  
>  	err = ext4_ext_get_access(handle, inode, path + depth);
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 26af228..f1ce33a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -54,9 +54,6 @@ 
 #define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
 #define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
 
-#define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */
-#define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */
-
 static __le32 ext4_extent_block_csum(struct inode *inode,
 				     struct ext4_extent_header *eh)
 {
@@ -1579,20 +1576,17 @@  int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-	unsigned short ext1_ee_len, ext2_ee_len, max_len;
+	unsigned ext1_ee_len, ext2_ee_len;
 
 	/*
-	 * Make sure that either both extents are uninitialized, or
-	 * both are _not_.
+	 * Make sure that both extents are initialized. We don't merge
+	 * uninitialized extents so that we can be sure that end_io code has
+	 * the extent that was written properly split out and conversion to
+	 * initialized is trivial.
 	 */
-	if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+	if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
 		return 0;
 
-	if (ext4_ext_is_uninitialized(ex1))
-		max_len = EXT_UNINIT_MAX_LEN;
-	else
-		max_len = EXT_INIT_MAX_LEN;
-
 	ext1_ee_len = ext4_ext_get_actual_len(ex1);
 	ext2_ee_len = ext4_ext_get_actual_len(ex2);
 
@@ -1605,7 +1599,7 @@  ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	 * as an RO_COMPAT feature, refuse to merge to extents if
 	 * this can result in the top bit of ee_len being set.
 	 */
-	if (ext1_ee_len + ext2_ee_len > max_len)
+	if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
 		return 0;
 #ifdef AGGRESSIVE_TEST
 	if (ext1_ee_len >= 4)
@@ -2959,9 +2953,6 @@  static int ext4_split_extent_at(handle_t *handle,
 	unsigned int ee_len, depth;
 	int err = 0;
 
-	BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
-	       (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
-
 	ext_debug("ext4_split_extents_at: inode %lu, logical"
 		"block %llu\n", inode->i_ino, (unsigned long long)split);
 
@@ -3020,14 +3011,7 @@  static int ext4_split_extent_at(handle_t *handle,
 
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
-		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
-			if (split_flag & EXT4_EXT_DATA_VALID1)
-				err = ext4_ext_zeroout(inode, ex2);
-			else
-				err = ext4_ext_zeroout(inode, ex);
-		} else
-			err = ext4_ext_zeroout(inode, &orig_ex);
-
+		err = ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
 		/* update the extent length and mark as initialized */
@@ -3085,8 +3069,6 @@  static int ext4_split_extent(handle_t *handle,
 		if (uninitialized)
 			split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
 				       EXT4_EXT_MARK_UNINIT2;
-		if (split_flag & EXT4_EXT_DATA_VALID2)
-			split_flag1 |= EXT4_EXT_DATA_VALID1;
 		err = ext4_split_extent_at(handle, inode, path,
 				map->m_lblk + map->m_len, split_flag1, flags1);
 		if (err)
@@ -3099,8 +3081,7 @@  static int ext4_split_extent(handle_t *handle,
 		return PTR_ERR(path);
 
 	if (map->m_lblk >= ee_block) {
-		split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
-					    EXT4_EXT_DATA_VALID2);
+		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
 		if (uninitialized)
 			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
 		if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3379,8 +3360,7 @@  static int ext4_split_unwritten_extents(handle_t *handle,
 
 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 	split_flag |= EXT4_EXT_MARK_UNINIT2;
-	if (flags & EXT4_GET_BLOCKS_CONVERT)
-		split_flag |= EXT4_EXT_DATA_VALID2;
+
 	flags |= EXT4_GET_BLOCKS_PRE_IO;
 	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
@@ -3405,20 +3385,15 @@  static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 		"block %llu, max_blocks %u\n", inode->i_ino,
 		  (unsigned long long)ee_block, ee_len);
 
-	/* If extent is larger than requested then split is required */
+	/* Extent is larger than requested? */
 	if (ee_block != map->m_lblk || ee_len > map->m_len) {
-		err = ext4_split_unwritten_extents(handle, inode, map, path,
-						   EXT4_GET_BLOCKS_CONVERT);
-		if (err < 0)
-			goto out;
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode, map->m_lblk, path);
-		if (IS_ERR(path)) {
-			err = PTR_ERR(path);
-			goto out;
-		}
-		depth = ext_depth(inode);
-		ex = path[depth].p_ext;
+		EXT4_ERROR_INODE(inode, "Written extent modified before IO"
+			" finished: extent logical block %llu, len %u; IO"
+			" logical block %llu, len %u\n",
+			(unsigned long long)ee_block, ee_len,
+			(unsigned long long)map->m_lblk, map->m_len);
+		err = -EIO;
+		goto out;
 	}
 
 	err = ext4_ext_get_access(handle, inode, path + depth);