diff mbox

[1/5] ext4: unlock group before ext4_error

Message ID 1227205580-27596-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
State Superseded, archived
Headers show

Commit Message

Aneesh Kumar K.V Nov. 20, 2008, 6:26 p.m. UTC
Otherwise ext4_error will cause BUG because of
scheduling in atomic context.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/mballoc.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

Comments

Peter Staubach Nov. 20, 2008, 6:35 p.m. UTC | #1
Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/ext4/mballoc.c |   12 ++++++++++++
>  1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 772e05b..039a5a6 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>  			blocknr +=
>  			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
>  
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "double-free of inode"
>  				   " %lu's block %llu(bit %u in group %u)\n",
>  				   inode ? inode->i_ino : 0, blocknr,
>  				   first + i, e4b->bd_group);
> +			ext4_unlock_group(sb, e4b->bd_group);
>   

This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it?

    Thanx...

       ps

>  		}
>  		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
>  	}
> @@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
>  	grp->bb_fragments = fragments;
>  
>  	if (free != grp->bb_free) {
> +		ext4_unlock_group(sb, group);
>  		ext4_error(sb, __func__,
>  			"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
>  			group, free, grp->bb_free);
> @@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
>  		 * If we intent to continue, we consider group descritor
>  		 * corrupt and update bb_free using bitmap value
>  		 */
> +		ext4_lock_group(sb, group);
>  		grp->bb_free = free;
>  	}
>  
> @@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  			 * free blocks even though group info says we
>  			 * we have free blocks
>  			 */
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "%d free blocks as per "
>  					"group info. But bitmap says 0\n",
>  					free);
> +			ext4_lock_group(sb, e4b->bd_group);
>  			break;
>  		}
>  
>  		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
>  		BUG_ON(ex.fe_len <= 0);
>  		if (free < ex.fe_len) {
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "%d free blocks as per "
>  					"group info. But got %d blocks\n",
>  					free, ex.fe_len);
> @@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  			 * indicate that the bitmap is corrupt. So exit
>  			 * without claiming the space.
>  			 */
> +			ext4_lock_group(sb, e4b->bd_group);
>  			break;
>  		}
>  
> @@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
>  		bit = next + 1;
>  	}
>  	if (free != pa->pa_free) {
> +		ext4_unlock_group(sb, group);
>  		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
>  			pa, (unsigned long) pa->pa_lstart,
>  			(unsigned long) pa->pa_pstart,
> @@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
>  		 * pa is already deleted so we use the value obtained
>  		 * from the bitmap and continue.
>  		 */
> +		ext4_lock_group(sb, group);
>  	}
>  	atomic_add(free, &sbi->s_mb_discarded);
>  
> @@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
>  		else if (block >= (entry->start_blk + entry->count))
>  			n = &(*n)->rb_right;
>  		else {
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__,
>  			    "Double free of blocks %d (%d %d)\n",
>  			    block, entry->start_blk, entry->count);
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			return 0;
>  		}
>  	}
>   

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Nov. 20, 2008, 10:38 p.m. UTC | #2
On Thu, Nov 20, 2008 at 11:56:18PM +0530, Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.

Granted that it isn't necessarily safe as it is when errors-behaviour
is set to continue, that is the default on a large number of
filesystems.  And unlocking the group, calling the ext4_error(), and
then relocking the group can't possibly be safe; after all, we're
holding the lock for a reason!  

In the errors=continue case, we don't actually need to unlock and
relock the group, since all we need to do is to printk a message to
the console.  In the errors=panic case, we'll never return; and in the
errors=remount-ro case, relocking is kind of pointless but harmless,
since once the journal is aborted, there's no way the allocation is
going to succeed anyway, and a subsequent call will return an error.

This gets ugly to get right.  Since the situation where we need to
call ext4_error() while holding a spinlock which should be preserved
in the errors=continue case seems to be unique to mballoc, maybe
something like this?

static int ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
       	    		          const char *function,
            		          const char *fmt, ...)
{

	va_start(args, fmt);
	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
	vprintk(fmt, args);
	printk("\n");
	va_end(args);

	if (test_opt(sb, ERROR_CONT)) {
		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
		ext4_commit_super(sb, es, 0);
		return 0;
	}
	ext4_unlock_group(sb, grp);
	ext4_handle_error(sb);
	/* 
	 * We only get here in the ERRORS_RO case; relocking the group
	 * may be dangerous, but nothing bad will happen since the
	 * filesystem will have already been marked read/only and the
	 * journal has been aborted.  We return 1 as a hint to callers
	 * who might what to use the return value from
	 * ext4_grp_locked_error() to distinguish beween the
	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
	 * aggressively from the ext4 function in question, with a
	 * more appropriate error code.
	 */
	ext4_lock_group(sb, grp);
}

This requires access to two static functions in fs/ext4/super.c, so
probably the best thing to do is to put this function in
fs/ext4/super.c, and move the definition of ext4_[un]lock_group from
mballoc.h to ext4.h.

Comments?

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 772e05b..039a5a6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -460,10 +460,12 @@  static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 			blocknr +=
 			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %u)\n",
 				   inode ? inode->i_ino : 0, blocknr,
 				   first + i, e4b->bd_group);
+			ext4_unlock_group(sb, e4b->bd_group);
 		}
 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
 	}
@@ -704,6 +706,7 @@  static void ext4_mb_generate_buddy(struct super_block *sb,
 	grp->bb_fragments = fragments;
 
 	if (free != grp->bb_free) {
+		ext4_unlock_group(sb, group);
 		ext4_error(sb, __func__,
 			"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
 			group, free, grp->bb_free);
@@ -711,6 +714,7 @@  static void ext4_mb_generate_buddy(struct super_block *sb,
 		 * If we intent to continue, we consider group descritor
 		 * corrupt and update bb_free using bitmap value
 		 */
+		ext4_lock_group(sb, group);
 		grp->bb_free = free;
 	}
 
@@ -1625,15 +1629,18 @@  static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * free blocks even though group info says we
 			 * we have free blocks
 			 */
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "%d free blocks as per "
 					"group info. But bitmap says 0\n",
 					free);
+			ext4_lock_group(sb, e4b->bd_group);
 			break;
 		}
 
 		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
 		BUG_ON(ex.fe_len <= 0);
 		if (free < ex.fe_len) {
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "%d free blocks as per "
 					"group info. But got %d blocks\n",
 					free, ex.fe_len);
@@ -1642,6 +1649,7 @@  static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * indicate that the bitmap is corrupt. So exit
 			 * without claiming the space.
 			 */
+			ext4_lock_group(sb, e4b->bd_group);
 			break;
 		}
 
@@ -3789,6 +3797,7 @@  ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		bit = next + 1;
 	}
 	if (free != pa->pa_free) {
+		ext4_unlock_group(sb, group);
 		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
 			pa, (unsigned long) pa->pa_lstart,
 			(unsigned long) pa->pa_pstart,
@@ -3799,6 +3808,7 @@  ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		 * pa is already deleted so we use the value obtained
 		 * from the bitmap and continue.
 		 */
+		ext4_lock_group(sb, group);
 	}
 	atomic_add(free, &sbi->s_mb_discarded);
 
@@ -4607,9 +4617,11 @@  ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		else if (block >= (entry->start_blk + entry->count))
 			n = &(*n)->rb_right;
 		else {
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__,
 			    "Double free of blocks %d (%d %d)\n",
 			    block, entry->start_blk, entry->count);
+			ext4_unlock_group(sb, e4b->bd_group);
 			return 0;
 		}
 	}