diff mbox

ext4: Do not update quota for reserved blocks on error paths v3

Message ID 1274959030-8001-1-git-send-email-dmonakhov@openvz.org
State New, archived
Headers show

Commit Message

Dmitry Monakhov May 27, 2010, 11:17 a.m. UTC
If we have failed some where inside ext4_get_blocks() internals we may
have allocated some new blocks, which was not yet claimed to quota.
We have to free such blocks, but without touching quota. Quota will
be updated later on exit from ext4_get_blocks().
The bug hapens on heavily loaded node.

Changes from v2:
 - After Eric's quota-patches metadata charged immediately to quota
   inside new_meta_blocks(), so we have to free quota credits regardless
   to BLOCKS_RESERVED flag.
Changes from v1:
 - Dectement i_allocated_meta_blocks for metadata blocks.
 - Add some sanity checks.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/ext4.h    |    1 +
 fs/ext4/extents.c |   18 +++++++++++++-----
 fs/ext4/inode.c   |   40 ++++++++++++++++++++--------------------
 fs/ext4/mballoc.c |   41 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 27 deletions(-)

Comments

Jan Kara June 15, 2010, 3:51 p.m. UTC | #1
Hi Dmitry,

> If we have failed some where inside ext4_get_blocks() internals we may
> have allocated some new blocks, which was not yet claimed to quota.
> We have to free such blocks, but without touching quota. Quota will
> be updated later on exit from ext4_get_blocks().
> The bug hapens on heavily loaded node.
> 
> Changes from v2:
>  - After Eric's quota-patches metadata charged immediately to quota
>    inside new_meta_blocks(), so we have to free quota credits regardless
>    to BLOCKS_RESERVED flag.
> Changes from v1:
>  - Dectement i_allocated_meta_blocks for metadata blocks.
>  - Add some sanity checks.
  I had a look at the patch and I miss two things:
Why do we need EXT4_FREE_BLOCKS_RESERVED flag? Cannot we just directly
use i_delalloc_reserved?
  Also adding EXT4_FREE_BLOCKS_METADATA to some calls will also result
in avoiding to reallocate these blocks for the same transaction. Why
do you do this?

  Besides that a few style / language nitpicks:
Please use empty line to separate variable declaration and code. It's
a good custom followed in most places of the kernel.

> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 377309c..e3cc230 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -3528,12 +3532,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
>  	}
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>  	if (err) {
> -		/* free data blocks we just allocated */
> -		/* not a good idea to call discard here directly,
> -		 * but otherwise we'd need to call it every free() */
> +		int fb_flags = 0;
> +		/* free data blocks we just allocated
> +		 * Not a good idea to call discard here directly,
> +		 * but otherwise we'd need to call it every free().
> +		 * On delalloc blocks are not yet accounted to quota */
  I have some troubles understanding the above comment so if you actually
know what 'discard' and 'free()' mean in this context, please update it.
Otherwise, just leave it...
  Also suggested format for multiline comments is:
/*
 *  Some text
 *  more text
 */

> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 502b07d..c3b4443 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
...
>  }
> -
  This removal of empty line is unintended?

>  }
> -
  And this one as well?
>  /*
>   * The ext4_ind_map_blocks() function handles non-extents inodes
>   * (i.e., using the traditional indirect/double-indirect i_blocks
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 12b3bc0..c87243b 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4682,11 +4684,46 @@ do_more:
>  	}
>  	sb->s_dirt = 1;
>  error_return:
> -	if (freed)
> -		dquot_free_block(inode, freed);
> +	/*  Update quotas */
          ^ Superfluous space

> +	if (freed) {
> +		if (!(res_fl & EXT4_FREE_BLOCKS_RESERVED)) {
> +			dquot_free_block(inode, freed);
> +			goto out;
> +		}
> +		/* Blocks reserved case */
                ^ Do we really need this comment? It seems obvious...

> +		if (res_fl & EXT4_FREE_BLOCKS_METADATA) {
> +			/*
> +			 * Meta data blocks was charged to quota and to
                                            ^^^ were
> +			 * inode's mblock alloc counter in
                                   ^^^^^^^^^^^^ mballoc?
> +			 * ext4_new_meta_blocks(). */
                                                   ^^ put on a separate line
> +			spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> +			if (EXT4_I(inode)->i_allocated_meta_blocks <
> +				freed)
  Line wrap doesn't seem to be needed above...

> +				goto rsv_error;
> +			EXT4_I(inode)->i_allocated_meta_blocks -= freed;
> +			spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +			dquot_free_block(inode, freed);
> +		} else {
> +			/* Data blocks allocated was reserved, but not yet
                                                 ^^^ were
> +			 * claimed to quota. Caller is responsibleo for
                                                                ^^^ remove 'o'
> +			 * quota reservation update. */
> +		}
> +	}
> +out:
>  	brelse(bitmap_bh);
>  	ext4_std_error(sb, err);
>  	if (ac)
>  		kmem_cache_free(ext4_ac_cachep, ac);
>  	return;
> +
> +rsv_error:
> +	ext4_msg(sb, KERN_ERR," inode %ld, reservation counters goes"
                                                                ^^^^ are?
> +		" inconsistent rsv_data=%u, rsv_mdata=%u, alloc_mblk=%u"
> +		" freed=%lu", inode->i_ino,
> +		EXT4_I(inode)->i_reserved_data_blocks,
> +		EXT4_I(inode)->i_reserved_meta_blocks,
> +		EXT4_I(inode)->i_allocated_meta_blocks, freed);
> +	EXT4_I(inode)->i_allocated_meta_blocks = 0;
> +	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> +	goto out;
>  }
> -- 
> 1.6.6.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 60bd310..231b132 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -490,6 +490,7 @@  struct ext4_new_group_data {
 #define EXT4_FREE_BLOCKS_METADATA	0x0001
 #define EXT4_FREE_BLOCKS_FORGET		0x0002
 #define EXT4_FREE_BLOCKS_VALIDATED	0x0004
+#define EXT4_FREE_BLOCKS_RESERVED	0x0008
 
 /*
  * ioctl commands
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 377309c..e3cc230 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1057,11 +1057,15 @@  cleanup:
 
 	if (err) {
 		/* free all allocated blocks in error case */
+		int fb_flags = EXT4_FREE_BLOCKS_METADATA;
+		if (EXT4_I(inode)->i_delalloc_reserved_flag)
+			fb_flags |= EXT4_FREE_BLOCKS_RESERVED;
+
 		for (i = 0; i < depth; i++) {
 			if (!ablocks[i])
 				continue;
 			ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
-					 EXT4_FREE_BLOCKS_METADATA);
+					 fb_flags);
 		}
 	}
 	kfree(ablocks);
@@ -3528,12 +3532,16 @@  int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	}
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err) {
-		/* free data blocks we just allocated */
-		/* not a good idea to call discard here directly,
-		 * but otherwise we'd need to call it every free() */
+		int fb_flags = 0;
+		/* free data blocks we just allocated
+		 * Not a good idea to call discard here directly,
+		 * but otherwise we'd need to call it every free().
+		 * On delalloc blocks are not yet accounted to quota */
+		if (EXT4_I(inode)->i_delalloc_reserved_flag)
+			fb_flags = EXT4_FREE_BLOCKS_RESERVED;
 		ext4_discard_preallocations(inode);
 		ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
-				 ext4_ext_get_actual_len(&newex), 0);
+				 ext4_ext_get_actual_len(&newex), fb_flags);
 		goto out2;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 502b07d..c3b4443 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -591,7 +591,9 @@  static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
 	int index = 0;
 	ext4_fsblk_t current_block = 0;
 	int ret = 0;
-
+	int fb_flags = EXT4_FREE_BLOCKS_METADATA;
+	if (EXT4_I(inode)->i_delalloc_reserved_flag)
+		fb_flags |= EXT4_FREE_BLOCKS_RESERVED;
 	/*
 	 * Here we try to allocate the requested multiple blocks at once,
 	 * on a best-effort basis.
@@ -686,7 +688,7 @@  allocated:
 	return ret;
 failed_out:
 	for (i = 0; i < index; i++)
-		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, fb_flags);
 	return ret;
 }
 
@@ -727,6 +729,9 @@  static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 	int num;
 	ext4_fsblk_t new_blocks[4];
 	ext4_fsblk_t current_block;
+	int fb_flags = 0;
+	if (EXT4_I(inode)->i_delalloc_reserved_flag)
+		fb_flags |= EXT4_FREE_BLOCKS_RESERVED;
 
 	num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
 				*blks, new_blocks, &err);
@@ -782,24 +787,20 @@  static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 	return err;
 failed:
 	/* Allocation failed, free what we already allocated */
-	ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+	ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, fb_flags);
 	for (i = 1; i <= n ; i++) {
-		/*
-		 * branch[i].bh is newly allocated, so there is no
-		 * need to revoke the block, which is why we don't
-		 * need to set EXT4_FREE_BLOCKS_METADATA.
-		 */
 		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
-				 EXT4_FREE_BLOCKS_FORGET);
+				fb_flags | EXT4_FREE_BLOCKS_METADATA |
+				EXT4_FREE_BLOCKS_FORGET);
 	}
 	for (i = n+1; i < indirect_blks; i++)
-		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+				fb_flags | EXT4_FREE_BLOCKS_METADATA);
 
-	ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+	ext4_free_blocks(handle, inode, 0, new_blocks[i], num, fb_flags);
 
 	return err;
 }
-
 /**
  * ext4_splice_branch - splice the allocated branch onto inode.
  * @inode: owner
@@ -821,6 +822,9 @@  static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 	int i;
 	int err = 0;
 	ext4_fsblk_t current_block;
+	int fb_flags = 0;
+	if (EXT4_I(inode)->i_delalloc_reserved_flag)
+		fb_flags |= EXT4_FREE_BLOCKS_RESERVED;
 
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
@@ -872,22 +876,18 @@  static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 	}
 	return err;
 
+
 err_out:
 	for (i = 1; i <= num; i++) {
-		/*
-		 * branch[i].bh is newly allocated, so there is no
-		 * need to revoke the block, which is why we don't
-		 * need to set EXT4_FREE_BLOCKS_METADATA.
-		 */
 		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
-				 EXT4_FREE_BLOCKS_FORGET);
+				fb_flags | EXT4_FREE_BLOCKS_METADATA |
+				EXT4_FREE_BLOCKS_FORGET);
 	}
 	ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
-			 blks, 0);
+			 blks, fb_flags);
 
 	return err;
 }
-
 /*
  * The ext4_ind_map_blocks() function handles non-extents inodes
  * (i.e., using the traditional indirect/double-indirect i_blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3bc0..c87243b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4503,6 +4503,8 @@  void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	struct ext4_sb_info *sbi;
 	struct ext4_buddy e4b;
 	int err = 0;
+	int res_fl = flags & (EXT4_FREE_BLOCKS_RESERVED |
+				EXT4_FREE_BLOCKS_METADATA);
 	int ret;
 
 	if (bh) {
@@ -4682,11 +4684,46 @@  do_more:
 	}
 	sb->s_dirt = 1;
 error_return:
-	if (freed)
-		dquot_free_block(inode, freed);
+	/*  Update quotas */
+	if (freed) {
+		if (!(res_fl & EXT4_FREE_BLOCKS_RESERVED)) {
+			dquot_free_block(inode, freed);
+			goto out;
+		}
+		/* Blocks reserved case */
+		if (res_fl & EXT4_FREE_BLOCKS_METADATA) {
+			/*
+			 * Meta data blocks was charged to quota and to
+			 * inode's mblock alloc counter in
+			 * ext4_new_meta_blocks(). */
+			spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+			if (EXT4_I(inode)->i_allocated_meta_blocks <
+				freed)
+				goto rsv_error;
+			EXT4_I(inode)->i_allocated_meta_blocks -= freed;
+			spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+			dquot_free_block(inode, freed);
+		} else {
+			/* Data blocks allocated was reserved, but not yet
+			 * claimed to quota. Caller is responsibleo for
+			 * quota reservation update. */
+		}
+	}
+out:
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	if (ac)
 		kmem_cache_free(ext4_ac_cachep, ac);
 	return;
+
+rsv_error:
+	ext4_msg(sb, KERN_ERR," inode %ld, reservation counters goes"
+		" inconsistent rsv_data=%u, rsv_mdata=%u, alloc_mblk=%u"
+		" freed=%lu", inode->i_ino,
+		EXT4_I(inode)->i_reserved_data_blocks,
+		EXT4_I(inode)->i_reserved_meta_blocks,
+		EXT4_I(inode)->i_allocated_meta_blocks, freed);
+	EXT4_I(inode)->i_allocated_meta_blocks = 0;
+	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+	goto out;
 }