Patchwork libext2: clear BG_BLOCK_UNINIT in any group containing metadata

login
register
mail settings
Submitter Eric Sandeen
Date Feb. 23, 2012, 6:20 p.m.
Message ID <4F46835F.5080009@redhat.com>
Download mbox | patch
Permalink /patch/142689/
State Rejected
Headers show

Comments

Eric Sandeen - Feb. 23, 2012, 6:20 p.m.
Running a test like the m_uninit test runs:

# mke2fs -F -o Linux -O uninit_bg testfs 131072

results in groups which have blocks allocated for metadata, but
which still have the BLOCK_UNINIT flag set:

Group 2: (Blocks 16385-24576) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
  Checksum 0x17c2, unused inodes 2048
  Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
  Inode table at 16387-16642 (+2)

however, e2fsck doesn't find this error - which seems like another bug.

Doing another test like this:

mkfs.ext4 -I 256 -b 4096 -N 256 -G 1 -g 256 /dev/loop0 1024
mount /dev/loop0 /mnt/test
mkdir /mnt/test/dir
umount /mnt/test
fsck.ext4 -fn /dev/loop0

does find the error on fsck, I think because BLOCK_UNINIT gets
cleared, and the group isn't ignored at fsck time:

Free blocks count wrong for group #2 (253, counted=249).

In the first case, flex_bg is not set; in the 2nd it is set,
but s_log_groups_per_flex == 0 (due to -G 1), so in neither
case is flexbg_size set in ext2fs_allocate_group_table().

This leads to skipping the code which clears BLOCK_UNINIT after
the metadata is allocated.

I think the fix is to unconditionally clear the BLOCK_UNINIT flag
in whatever block group gets metadata allocated to it, as follows.

the m_uninit test needs to be fixed up too, as does e2fsck it seems,
to catch this error in the first place?

Sorry for the long commit message, if the patch looks right I can
try to do better ;)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen - Feb. 23, 2012, 7:03 p.m.
On 2/23/12 12:20 PM, Eric Sandeen wrote:
> Running a test like the m_uninit test runs:
> 
> # mke2fs -F -o Linux -O uninit_bg testfs 131072
> 
> results in groups which have blocks allocated for metadata, but
> which still have the BLOCK_UNINIT flag set:
> 
> Group 2: (Blocks 16385-24576) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
>   Checksum 0x17c2, unused inodes 2048
>   Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
>   Inode table at 16387-16642 (+2)
> 
> however, e2fsck doesn't find this error - which seems like another bug.
> 
> Doing another test like this:
> 
> mkfs.ext4 -I 256 -b 4096 -N 256 -G 1 -g 256 /dev/loop0 1024
> mount /dev/loop0 /mnt/test
> mkdir /mnt/test/dir
> umount /mnt/test
> fsck.ext4 -fn /dev/loop0
> 
> does find the error on fsck, I think because BLOCK_UNINIT gets
> cleared, and the group isn't ignored at fsck time:
> 
> Free blocks count wrong for group #2 (253, counted=249).
> 
> In the first case, flex_bg is not set; in the 2nd it is set,
> but s_log_groups_per_flex == 0 (due to -G 1), so in neither
> case is flexbg_size set in ext2fs_allocate_group_table().
> 
> This leads to skipping the code which clears BLOCK_UNINIT after
> the metadata is allocated.
> 
> I think the fix is to unconditionally clear the BLOCK_UNINIT flag
> in whatever block group gets metadata allocated to it, as follows.

I guess that's not right after talking to adilger.  I have to punt
on this for now, though, other deadlines approach.  Testcases above
if anyone else wants to look.

-Eric

> the m_uninit test needs to be fixed up too, as does e2fsck it seems,
> to catch this error in the first place?
> 
> Sorry for the long commit message, if the patch looks right I can
> try to do better ;)
> 
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> ---
> 
> diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
> index 9f3d4e0..3462e85 100644
> --- a/lib/ext2fs/alloc_tables.c
> +++ b/lib/ext2fs/alloc_tables.c
> @@ -88,6 +88,7 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
>  	blk64_t		group_blk, start_blk, last_blk, new_blk, blk;
>  	dgrp_t		last_grp = 0;
>  	int		rem_grps = 0, flexbg_size = 0;
> +	dgrp_t		gr;
>  
>  	group_blk = ext2fs_group_first_block2(fs, group);
>  	last_blk = ext2fs_group_last_block2(fs, group);
> @@ -141,13 +142,13 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
>  			return retval;
>  		ext2fs_mark_block_bitmap2(bmap, new_blk);
>  		ext2fs_block_bitmap_loc_set(fs, group, new_blk);
> +		gr = ext2fs_group_of_blk2(fs, new_blk);
> +		ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
>  		if (flexbg_size) {
> -			dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
>  			ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
>  			ext2fs_free_blocks_count_add(fs->super, -1);
> -			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
> -			ext2fs_group_desc_csum_set(fs, gr);
>  		}
> +		ext2fs_group_desc_csum_set(fs, gr);
>  	}
>  
>  	if (flexbg_size) {
> @@ -172,13 +173,13 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
>  			return retval;
>  		ext2fs_mark_block_bitmap2(bmap, new_blk);
>  		ext2fs_inode_bitmap_loc_set(fs, group, new_blk);
> +		gr = ext2fs_group_of_blk2(fs, new_blk);
> +		ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
>  		if (flexbg_size) {
> -			dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
>  			ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
>  			ext2fs_free_blocks_count_add(fs->super, -1);
> -			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
> -			ext2fs_group_desc_csum_set(fs, gr);
>  		}
> +		ext2fs_group_desc_csum_set(fs, gr);
>  	}
>  
>  	/*
> @@ -209,14 +210,13 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
>  		     j < fs->inode_blocks_per_group;
>  		     j++, blk++) {
>  			ext2fs_mark_block_bitmap2(bmap, blk);
> +			gr = ext2fs_group_of_blk2(fs, blk);
> +			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
>  			if (flexbg_size) {
> -				dgrp_t gr = ext2fs_group_of_blk2(fs, blk);
>  				ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
>  				ext2fs_free_blocks_count_add(fs->super, -1);
> -				ext2fs_bg_flags_clear(fs, gr,
> -						     EXT2_BG_BLOCK_UNINIT);
> -				ext2fs_group_desc_csum_set(fs, gr);
>  			}
> +			ext2fs_group_desc_csum_set(fs, gr);
>  		}
>  		ext2fs_inode_table_loc_set(fs, group, new_blk);
>  	}
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - March 27, 2012, 10:40 p.m.
On Thu, Feb 23, 2012 at 12:20:15PM -0600, Eric Sandeen wrote:
> Running a test like the m_uninit test runs:
> 
> # mke2fs -F -o Linux -O uninit_bg testfs 131072
> 
> results in groups which have blocks allocated for metadata, but
> which still have the BLOCK_UNINIT flag set:
> 
> Group 2: (Blocks 16385-24576) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
>   Checksum 0x17c2, unused inodes 2048
>   Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
>   Inode table at 16387-16642 (+2)

Sorry for the delay in getting back to you.  The general rule here is
what triggers BLOCK_UNINIT getting cleared is when we allocate
metadata blocks for *another* block group in that block group.  When
we initialize an uninitialized block group, it's easy enough to set
the bits for the block group's allocation bitmaps and inode tables.

However, we *don't* want to have to scan all of the block groups to
see if they have metadata blocks appearing in a particular block
group, so that's the reason for this rule.

So in the above dumpe2fs output, since we aren't using flex_bg, the
only metadata blocks which are in a bg is the bg's own metadata
blocks, and so that's why it's OK for BLOCK_UNINIT to be set.  This
isn't a bug.

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen - March 28, 2012, 3:28 p.m.
On 3/27/12 5:40 PM, Ted Ts'o wrote:
> On Thu, Feb 23, 2012 at 12:20:15PM -0600, Eric Sandeen wrote:
>> Running a test like the m_uninit test runs:
>>
>> # mke2fs -F -o Linux -O uninit_bg testfs 131072
>>
>> results in groups which have blocks allocated for metadata, but
>> which still have the BLOCK_UNINIT flag set:
>>
>> Group 2: (Blocks 16385-24576) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
>>   Checksum 0x17c2, unused inodes 2048
>>   Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
>>   Inode table at 16387-16642 (+2)
> 
> Sorry for the delay in getting back to you.  The general rule here is
> what triggers BLOCK_UNINIT getting cleared is when we allocate
> metadata blocks for *another* block group in that block group.  When
> we initialize an uninitialized block group, it's easy enough to set
> the bits for the block group's allocation bitmaps and inode tables.
> 
> However, we *don't* want to have to scan all of the block groups to
> see if they have metadata blocks appearing in a particular block
> group, so that's the reason for this rule.
> 
> So in the above dumpe2fs output, since we aren't using flex_bg, the
> only metadata blocks which are in a bg is the bg's own metadata
> blocks, and so that's why it's OK for BLOCK_UNINIT to be set.  This
> isn't a bug.

Hrm, ok, thanks for looking at it.

FWIW, this still fails:

dd if=/dev/zero of=fsfile bs=1M count=128
losetup /dev/loop0 fsfile
mkfs.ext4 -I 256 -b 4096 -N 256 -G 1 -g 256 /dev/loop0 1024
mount /dev/loop0 /mnt/test
mkdir /mnt/test/dir
umount /mnt/test
fsck.ext4 -fn /dev/loop0

with:

Free blocks count wrong for group #2 (253, counted=249).
Fix? no

so will have to look further at what's going on I guess.

-Eric

 
> 					- Ted

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 9f3d4e0..3462e85 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -88,6 +88,7 @@  errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
 	blk64_t		group_blk, start_blk, last_blk, new_blk, blk;
 	dgrp_t		last_grp = 0;
 	int		rem_grps = 0, flexbg_size = 0;
+	dgrp_t		gr;
 
 	group_blk = ext2fs_group_first_block2(fs, group);
 	last_blk = ext2fs_group_last_block2(fs, group);
@@ -141,13 +142,13 @@  errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
 			return retval;
 		ext2fs_mark_block_bitmap2(bmap, new_blk);
 		ext2fs_block_bitmap_loc_set(fs, group, new_blk);
+		gr = ext2fs_group_of_blk2(fs, new_blk);
+		ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
 		if (flexbg_size) {
-			dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
 			ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
 			ext2fs_free_blocks_count_add(fs->super, -1);
-			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
-			ext2fs_group_desc_csum_set(fs, gr);
 		}
+		ext2fs_group_desc_csum_set(fs, gr);
 	}
 
 	if (flexbg_size) {
@@ -172,13 +173,13 @@  errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
 			return retval;
 		ext2fs_mark_block_bitmap2(bmap, new_blk);
 		ext2fs_inode_bitmap_loc_set(fs, group, new_blk);
+		gr = ext2fs_group_of_blk2(fs, new_blk);
+		ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
 		if (flexbg_size) {
-			dgrp_t gr = ext2fs_group_of_blk2(fs, new_blk);
 			ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
 			ext2fs_free_blocks_count_add(fs->super, -1);
-			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
-			ext2fs_group_desc_csum_set(fs, gr);
 		}
+		ext2fs_group_desc_csum_set(fs, gr);
 	}
 
 	/*
@@ -209,14 +210,13 @@  errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
 		     j < fs->inode_blocks_per_group;
 		     j++, blk++) {
 			ext2fs_mark_block_bitmap2(bmap, blk);
+			gr = ext2fs_group_of_blk2(fs, blk);
+			ext2fs_bg_flags_clear(fs, gr, EXT2_BG_BLOCK_UNINIT);
 			if (flexbg_size) {
-				dgrp_t gr = ext2fs_group_of_blk2(fs, blk);
 				ext2fs_bg_free_blocks_count_set(fs, gr, ext2fs_bg_free_blocks_count(fs, gr) - 1);
 				ext2fs_free_blocks_count_add(fs->super, -1);
-				ext2fs_bg_flags_clear(fs, gr,
-						     EXT2_BG_BLOCK_UNINIT);
-				ext2fs_group_desc_csum_set(fs, gr);
 			}
+			ext2fs_group_desc_csum_set(fs, gr);
 		}
 		ext2fs_inode_table_loc_set(fs, group, new_blk);
 	}