Message ID | 1346690758-21072-2-git-send-email-tytso@mit.edu |
---|---|
State | Superseded, archived |
Headers | show |
Hi Kevin, Ted has sent out the patches on online resizing for meta_bg and 64bits, so you can have a try again. It seems that the bug in e2fsprogs has been fixed. Yongqiang. On Tue, Sep 4, 2012 at 12:45 AM, Theodore Ts'o <tytso@mit.edu> wrote: > The file system overhead calculation in calculate_minimum_resize_size > was incorrect meta_bg file systems. This caused the minimum size to > underflow for very large file systems, which threw resize2fs into a > loop generally lasted longer than the user's patience. > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > resize/resize2fs.c | 25 ++++++++++++++++++++++++- > 1 file changed, 24 insertions(+), 1 deletion(-) > > diff --git a/resize/resize2fs.c b/resize/resize2fs.c > index dc2805d..1dce498 100644 > --- a/resize/resize2fs.c > +++ b/resize/resize2fs.c > @@ -1890,6 +1890,8 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) > blk64_t grp, data_needed, last_start; > blk64_t overhead = 0; > int num_of_superblocks = 0; > + blk64_t super_overhead = 0; > + int old_desc_blocks; > int extra_groups = 0; > int flexbg_size = 1 << fs->super->s_log_groups_per_flex; > > @@ -1909,15 +1911,36 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) > * we need to figure out how many backup superblocks we have so we can > * account for that in the metadata > */ > + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) > + old_desc_blocks = fs->super->s_first_meta_bg; > + else > + old_desc_blocks = fs->desc_blocks + > + fs->super->s_reserved_gdt_blocks; > + > for (grp = 0; grp < fs->group_desc_count; grp++) { > + blk64_t super_blk, old_desc_blk, new_desc_blk; > + int has_super; > + > + ext2fs_super_and_bgd_loc2(fs, grp, &super_blk, > + &old_desc_blk, &new_desc_blk, 0); > + has_super = ((grp == 0) || super_blk); > + if (has_super) > + super_overhead++; > + if (old_desc_blk) > + super_overhead += old_desc_blocks; > + else if (new_desc_blk) > + super_overhead++; > if (ext2fs_bg_has_super(fs, grp)) > num_of_superblocks++; > + > } > + printf("super overhead is %llu, old algorithm was %llu\n", > + super_overhead, SUPER_OVERHEAD(fs) * num_of_superblocks); > > /* calculate how many blocks are needed for data */ > data_needed = ext2fs_blocks_count(fs->super) - > ext2fs_free_blocks_count(fs->super); > - data_needed -= SUPER_OVERHEAD(fs) * num_of_superblocks; > + data_needed -= super_overhead; > data_needed -= META_OVERHEAD(fs) * fs->group_desc_count; > > if (fs->super->s_feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) { > -- > 1.7.12.rc0.22.gcdd159b > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Anssi, Ted has sent out the patches on online-resizing on meta_bg and 64bits, please have a try! Yongqiang. On Tue, Sep 4, 2012 at 12:45 AM, Theodore Ts'o <tytso@mit.edu> wrote: > The file system overhead calculation in calculate_minimum_resize_size > was incorrect meta_bg file systems. This caused the minimum size to > underflow for very large file systems, which threw resize2fs into a > loop generally lasted longer than the user's patience. > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > resize/resize2fs.c | 25 ++++++++++++++++++++++++- > 1 file changed, 24 insertions(+), 1 deletion(-) > > diff --git a/resize/resize2fs.c b/resize/resize2fs.c > index dc2805d..1dce498 100644 > --- a/resize/resize2fs.c > +++ b/resize/resize2fs.c > @@ -1890,6 +1890,8 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) > blk64_t grp, data_needed, last_start; > blk64_t overhead = 0; > int num_of_superblocks = 0; > + blk64_t super_overhead = 0; > + int old_desc_blocks; > int extra_groups = 0; > int flexbg_size = 1 << fs->super->s_log_groups_per_flex; > > @@ -1909,15 +1911,36 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) > * we need to figure out how many backup superblocks we have so we can > * account for that in the metadata > */ > + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) > + old_desc_blocks = fs->super->s_first_meta_bg; > + else > + old_desc_blocks = fs->desc_blocks + > + fs->super->s_reserved_gdt_blocks; > + > for (grp = 0; grp < fs->group_desc_count; grp++) { > + blk64_t super_blk, old_desc_blk, new_desc_blk; > + int has_super; > + > + ext2fs_super_and_bgd_loc2(fs, grp, &super_blk, > + &old_desc_blk, &new_desc_blk, 0); > + has_super = ((grp == 0) || super_blk); > + if (has_super) > + super_overhead++; > + if (old_desc_blk) > + super_overhead += old_desc_blocks; > + else if (new_desc_blk) > + super_overhead++; > if (ext2fs_bg_has_super(fs, grp)) > num_of_superblocks++; > + > } > + printf("super overhead is %llu, old algorithm was %llu\n", > + super_overhead, SUPER_OVERHEAD(fs) * num_of_superblocks); > > /* calculate how many blocks are needed for data */ > data_needed = ext2fs_blocks_count(fs->super) - > ext2fs_free_blocks_count(fs->super); > - data_needed -= SUPER_OVERHEAD(fs) * num_of_superblocks; > + data_needed -= super_overhead; > data_needed -= META_OVERHEAD(fs) * fs->group_desc_count; > > if (fs->super->s_feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) { > -- > 1.7.12.rc0.22.gcdd159b > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/9/4 Yongqiang Yang <xiaoqiangnk@gmail.com> > > Hi Kevin, > > Ted has sent out the patches on online resizing for meta_bg and > 64bits, so you can have a try again. It seems that the bug in > e2fsprogs has been fixed. > > Yongqiang. > Hi Ted & Yongqiang, I will try to test the patch as soon as possible. Thanks a lot for your effort. Regards, Kevin Liao > On Tue, Sep 4, 2012 at 12:45 AM, Theodore Ts'o <tytso@mit.edu> wrote: > > The file system overhead calculation in calculate_minimum_resize_size > > was incorrect meta_bg file systems. This caused the minimum size to > > underflow for very large file systems, which threw resize2fs into a > > loop generally lasted longer than the user's patience. > > > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > > --- > > resize/resize2fs.c | 25 ++++++++++++++++++++++++- > > 1 file changed, 24 insertions(+), 1 deletion(-) > > > > diff --git a/resize/resize2fs.c b/resize/resize2fs.c > > index dc2805d..1dce498 100644 > > --- a/resize/resize2fs.c > > +++ b/resize/resize2fs.c > > @@ -1890,6 +1890,8 @@ blk64_t calculate_minimum_resize_size(ext2_filsys > > fs) > > blk64_t grp, data_needed, last_start; > > blk64_t overhead = 0; > > int num_of_superblocks = 0; > > + blk64_t super_overhead = 0; > > + int old_desc_blocks; > > int extra_groups = 0; > > int flexbg_size = 1 << fs->super->s_log_groups_per_flex; > > > > @@ -1909,15 +1911,36 @@ blk64_t > > calculate_minimum_resize_size(ext2_filsys fs) > > * we need to figure out how many backup superblocks we have so > > we can > > * account for that in the metadata > > */ > > + if (fs->super->s_feature_incompat & > > EXT2_FEATURE_INCOMPAT_META_BG) > > + old_desc_blocks = fs->super->s_first_meta_bg; > > + else > > + old_desc_blocks = fs->desc_blocks + > > + fs->super->s_reserved_gdt_blocks; > > + > > for (grp = 0; grp < fs->group_desc_count; grp++) { > > + blk64_t super_blk, old_desc_blk, new_desc_blk; > > + int has_super; > > + > > + ext2fs_super_and_bgd_loc2(fs, grp, &super_blk, > > + &old_desc_blk, &new_desc_blk, > > 0); > > + has_super = ((grp == 0) || super_blk); > > + if (has_super) > > + super_overhead++; > > + if (old_desc_blk) > > + super_overhead += old_desc_blocks; > > + else if (new_desc_blk) > > + super_overhead++; > > if (ext2fs_bg_has_super(fs, grp)) > > num_of_superblocks++; > > + > > } > > + printf("super overhead is %llu, old algorithm was %llu\n", > > + super_overhead, SUPER_OVERHEAD(fs) * num_of_superblocks); > > > > /* calculate how many blocks are needed for data */ > > data_needed = ext2fs_blocks_count(fs->super) - > > ext2fs_free_blocks_count(fs->super); > > - data_needed -= SUPER_OVERHEAD(fs) * num_of_superblocks; > > + data_needed -= super_overhead; > > data_needed -= META_OVERHEAD(fs) * fs->group_desc_count; > > > > if (fs->super->s_feature_incompat & > > EXT4_FEATURE_INCOMPAT_FLEX_BG) { > > -- > > 1.7.12.rc0.22.gcdd159b > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > -- > Best Wishes > Yongqiang Yang -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: > Hi Kevin, > > Ted has sent out the patches on online resizing for meta_bg and > 64bits, so you can have a try again. It seems that the bug in > e2fsprogs has been fixed. Make sure you use the latest version of the kernel patches that I just sent out. There quite a number of bugs in the Yongqiang's original patch set which I tripped over while I was testing 64-bit resize --- and please note that there are definitely still rough edges (especially for in cases where the file system was created < 16TB, but with the 64-bit feature and resize_inode features enabled). There may also be bugs for the straightforward case of resizing very large file systems. So while I very much appreciate users giving the code a try and sending us feedback, please do think twice before using this code on file systems with data that hasn't been backed up recently. (Of course, being good System Administrators you are all keeping --- and verifying --- regular backups, right? :-) Regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
04.09.2012 05:14, Theodore Ts'o kirjoitti: > On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >> Hi Kevin, >> >> Ted has sent out the patches on online resizing for meta_bg and >> 64bits, so you can have a try again. It seems that the bug in >> e2fsprogs has been fixed. > > Make sure you use the latest version of the kernel patches that I just > sent out. There quite a number of bugs in the Yongqiang's original > patch set which I tripped over while I was testing 64-bit resize --- > and please note that there are definitely still rough edges > (especially for in cases where the file system was created < 16TB, but > with the 64-bit feature and resize_inode features enabled). There may > also be bugs for the straightforward case of resizing very large file > systems. Indeed, I hit a BUG_ON() on resize from 8589934590 blocks to 8589934640 blocks (4k): [ 676.140165] ------------[ cut here ]------------ [ 676.150026] kernel BUG at fs/ext4/resize.c:255! [ 676.150026] invalid opcode: 0000 [#1] SMP [ 676.150026] CPU 0 [ 676.150026] Modules linked in:[ 676.150026] dm_snapshot dm_zero af_packet dm_mod joydev hid_generic ppdev snd_intel8x0 snd_ac97_codec ac97_bus usbhid microcode e1000 snd_pcm snd_page_alloc snd_timer hid i2c_piix4 i2c_core button snd soundcore ac parport_pc parport processor evdev ipv6 autofs4 ext4 crc16 jbd2 ohci_hcd sd_mod crc_t10dif usbcore usb_common sr_mod ata_piix ahci libahci libata scsi_mod [last unloaded: nf_defrag_ipv4] [ 676.150026] Pid: 1793, comm: resize2fs Not tainted 3.5.3-server-2anssi.9.ext4.10.2 #1 innotek GmbH VirtualBox [ 676.150026] RIP: 0010:[<ffffffffa014e9bc>] [<ffffffffa014e9bc>] ext4_resize_fs+0x94c/0xa30 [ext4] [ 676.150026] RSP: 0018:ffff880046eedd18 EFLAGS: 00010246 [ 676.150026] RAX: 0000000000040001 RBX: ffff88005b799800 RCX: 0000000000000001 [ 676.150026] RDX: 0000000000081bf1 RSI: 0000000000040001 RDI: ffff88005b068000 [ 676.150026] RBP: ffff880046eeddd8 R08: 0000000200000003 R09: 0000000000000000 [ 676.150026] R10: 0000000000000000 R11: 0000000080042000 R12: 0000000000040001 [ 676.150026] R13: ffff880037fb5e20 R14: 0000000000000000 R15: ffff88005b068000 [ 676.150026] FS: 00007fb43e067740(0000) GS:ffff88005fc00000(0000) knlGS:0000000000000000 [ 676.150026] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 676.150026] CR2: 00007fd905261178 CR3: 0000000044993000 CR4: 00000000000006f0 [ 676.150026] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 676.150026] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 676.150026] Process resize2fs (pid: 1793, threadinfo ffff880046eec000, task ffff880053f824c0) [ 676.150026] Stack: [ 676.150026] ffff880046eedda8 ffffffff8117971e 0000000f53529c40 0000000000000000 [ 676.150026] 000000020000002f 0000000000000000 0000000200000030 ffff88005b20e990 [ 676.150026] 0000000100000001 ffff880000000001 0000000200000000 0000000200000000 [ 676.150026] Call Trace: [ 676.150026] [<ffffffff8117971e>] ? do_last+0x2ee/0x9f0 [ 676.150026] [<ffffffffa012e05f>] ext4_ioctl+0x9af/0xbc0 [ext4] [ 676.150026] [<ffffffff8117db6f>] do_vfs_ioctl+0x8f/0x4e0 [ 676.150026] [<ffffffff8117e051>] sys_ioctl+0x91/0xa0 [ 676.150026] [<ffffffff8147d0bd>] system_call_fastpath+0x1a/0x1f [ 676.150026] Code: c7 c1 60 2c 17 a0 ba 0c 07 00 00 48 c7 c6 9b e8 16 a0 4c 89 e7 31 c0 e8 b3 80 ff ff c7 85 6c ff ff ff ea ff ff ff e9 4c f8 ff ff <0f> 0b 8b 55 a4 8b 45 a0 f7 da 44 31 e0 85 c2 0f 84 6c fb ff ff [ 676.150026] RIP [<ffffffffa014e9bc>] ext4_resize_fs+0x94c/0xa30 [ext4] [ 676.150026] RSP <ffff880046eedd18> [ 676.788513] ---[ end trace fbf2bd5a59c2ab99 ]--- This is BUG_ON(src_group >= group_data[0].group + flex_gd->count); I was using the below basic test script which uses a virtual large volume in LVM (e2fsprogs is 1.42.5, except for resize2fs): #!/bin/bash -ex VG=delta LV=ext4test LVSIZE=40T MOUNTPOINT="/mnt/iso" RESIZE2FS=/root/resize2fs INITIAL_SIZE_K=4294967295 NEW_BLOCKS=8589934590 lvcreate -l 100%FREE -V "$LVSIZE" -n "$LV" "$VG" mkfs.ext4 -O meta_bg,64bit,^resize_inode "/dev/$VG/$LV" "$INITIAL_SIZE_K" mount "/dev/$VG/$LV" "$MOUNTPOINT" mkdir "$MOUNTPOINT/test" for file in 1 2; do dd if=/dev/urandom bs=1M count=50 of="$MOUNTPOINT/test/$file" done md5sum $MOUNTPOINT/test/* > $MOUNTPOINT/MD5SUM for N in $NEW_BLOCKS $((NEW_BLOCKS + 50)); do $RESIZE2FS "/dev/$VG/$LV" "$N" umount "$MOUNTPOINT" fsck.ext4 -nvf "/dev/$VG/$LV" mount "/dev/$VG/$LV" "$MOUNTPOINT" md5sum -c "$MOUNTPOINT/MD5SUM" done > So while I very much appreciate users giving the code a try and > sending us feedback, please do think twice before using this code on > file systems with data that hasn't been backed up recently. (Of > course, being good System Administrators you are all keeping --- and > verifying --- regular backups, right? :-)
Hi Anssi, The bug was fixed for a while, please check the patches: [PATCH 1/2] ext4: teach resize report old blocks count correctly [PATCH 2/2] ext4: ignore last group without enough space when resizing Please have a try!!! Thanks, Yongqiang. On Wed, Sep 5, 2012 at 1:05 AM, Anssi Hannula <anssi.hannula@iki.fi> wrote: > 04.09.2012 05:14, Theodore Ts'o kirjoitti: >> On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >>> Hi Kevin, >>> >>> Ted has sent out the patches on online resizing for meta_bg and >>> 64bits, so you can have a try again. It seems that the bug in >>> e2fsprogs has been fixed. >> >> Make sure you use the latest version of the kernel patches that I just >> sent out. There quite a number of bugs in the Yongqiang's original >> patch set which I tripped over while I was testing 64-bit resize --- >> and please note that there are definitely still rough edges >> (especially for in cases where the file system was created < 16TB, but >> with the 64-bit feature and resize_inode features enabled). There may >> also be bugs for the straightforward case of resizing very large file >> systems. > > Indeed, I hit a BUG_ON() on resize from 8589934590 blocks to 8589934640 > blocks (4k): > [ 676.140165] ------------[ cut here ]------------ > [ 676.150026] kernel BUG at fs/ext4/resize.c:255! > [ 676.150026] invalid opcode: 0000 [#1] SMP > [ 676.150026] CPU 0 > [ 676.150026] Modules linked in:[ 676.150026] dm_snapshot dm_zero > af_packet dm_mod joydev hid_generic ppdev snd_intel8x0 snd_ac97_codec > ac97_bus usbhid microcode e1000 snd_pcm snd_page_alloc snd_timer hid > i2c_piix4 i2c_core button snd soundcore ac parport_pc parport processor > evdev ipv6 autofs4 ext4 crc16 jbd2 ohci_hcd sd_mod crc_t10dif usbcore > usb_common sr_mod ata_piix ahci libahci libata scsi_mod [last unloaded: > nf_defrag_ipv4] > > [ 676.150026] Pid: 1793, comm: resize2fs Not tainted > 3.5.3-server-2anssi.9.ext4.10.2 #1 innotek GmbH VirtualBox > [ 676.150026] RIP: 0010:[<ffffffffa014e9bc>] [<ffffffffa014e9bc>] > ext4_resize_fs+0x94c/0xa30 [ext4] > [ 676.150026] RSP: 0018:ffff880046eedd18 EFLAGS: 00010246 > [ 676.150026] RAX: 0000000000040001 RBX: ffff88005b799800 RCX: > 0000000000000001 > [ 676.150026] RDX: 0000000000081bf1 RSI: 0000000000040001 RDI: > ffff88005b068000 > [ 676.150026] RBP: ffff880046eeddd8 R08: 0000000200000003 R09: > 0000000000000000 > [ 676.150026] R10: 0000000000000000 R11: 0000000080042000 R12: > 0000000000040001 > [ 676.150026] R13: ffff880037fb5e20 R14: 0000000000000000 R15: > ffff88005b068000 > [ 676.150026] FS: 00007fb43e067740(0000) GS:ffff88005fc00000(0000) > knlGS:0000000000000000 > [ 676.150026] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > [ 676.150026] CR2: 00007fd905261178 CR3: 0000000044993000 CR4: > 00000000000006f0 > [ 676.150026] DR0: 0000000000000000 DR1: 0000000000000000 DR2: > 0000000000000000 > [ 676.150026] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: > 0000000000000400 > [ 676.150026] Process resize2fs (pid: 1793, threadinfo > ffff880046eec000, task ffff880053f824c0) > [ 676.150026] Stack: > [ 676.150026] ffff880046eedda8 ffffffff8117971e 0000000f53529c40 > 0000000000000000 > [ 676.150026] 000000020000002f 0000000000000000 0000000200000030 > ffff88005b20e990 > [ 676.150026] 0000000100000001 ffff880000000001 0000000200000000 > 0000000200000000 > [ 676.150026] Call Trace: > [ 676.150026] [<ffffffff8117971e>] ? do_last+0x2ee/0x9f0 > [ 676.150026] [<ffffffffa012e05f>] ext4_ioctl+0x9af/0xbc0 [ext4] > [ 676.150026] [<ffffffff8117db6f>] do_vfs_ioctl+0x8f/0x4e0 > [ 676.150026] [<ffffffff8117e051>] sys_ioctl+0x91/0xa0 > [ 676.150026] [<ffffffff8147d0bd>] system_call_fastpath+0x1a/0x1f > [ 676.150026] Code: c7 c1 60 2c 17 a0 ba 0c 07 00 00 48 c7 c6 9b e8 16 > a0 4c 89 e7 31 c0 e8 b3 80 ff ff c7 85 6c ff ff ff ea ff ff ff e9 4c f8 > ff ff <0f> 0b 8b 55 a4 8b 45 a0 f7 da 44 31 e0 85 c2 0f 84 6c fb ff ff > [ 676.150026] RIP [<ffffffffa014e9bc>] ext4_resize_fs+0x94c/0xa30 [ext4] > [ 676.150026] RSP <ffff880046eedd18> > [ 676.788513] ---[ end trace fbf2bd5a59c2ab99 ]--- > > This is BUG_ON(src_group >= group_data[0].group + flex_gd->count); > > I was using the below basic test script which uses a virtual large > volume in LVM (e2fsprogs is 1.42.5, except for resize2fs): > > #!/bin/bash -ex > > VG=delta > LV=ext4test > LVSIZE=40T > MOUNTPOINT="/mnt/iso" > RESIZE2FS=/root/resize2fs > > INITIAL_SIZE_K=4294967295 > NEW_BLOCKS=8589934590 > > lvcreate -l 100%FREE -V "$LVSIZE" -n "$LV" "$VG" > mkfs.ext4 -O meta_bg,64bit,^resize_inode "/dev/$VG/$LV" "$INITIAL_SIZE_K" > > mount "/dev/$VG/$LV" "$MOUNTPOINT" > > mkdir "$MOUNTPOINT/test" > for file in 1 2; do > dd if=/dev/urandom bs=1M count=50 of="$MOUNTPOINT/test/$file" > done > md5sum $MOUNTPOINT/test/* > $MOUNTPOINT/MD5SUM > > for N in $NEW_BLOCKS $((NEW_BLOCKS + 50)); do > $RESIZE2FS "/dev/$VG/$LV" "$N" > > umount "$MOUNTPOINT" > fsck.ext4 -nvf "/dev/$VG/$LV" > mount "/dev/$VG/$LV" "$MOUNTPOINT" > md5sum -c "$MOUNTPOINT/MD5SUM" > done > > > >> So while I very much appreciate users giving the code a try and >> sending us feedback, please do think twice before using this code on >> file systems with data that hasn't been backed up recently. (Of >> course, being good System Administrators you are all keeping --- and >> verifying --- regular backups, right? :-) > > -- > Anssi Hannula
On Wed, Sep 05, 2012 at 10:10:29AM +0800, Yongqiang Yang wrote: > Hi Anssi, > > The bug was fixed for a while, please check the patches: > [PATCH 1/2] ext4: teach resize report old blocks count correctly > [PATCH 2/2] ext4: ignore last group without enough space when resizing > > Please have a try!!! Yongqiang, In the future, if a patch is going to fix a BUG_ON or kernel crash, please state so explicitly in the commit description along with instructions about how to reproduce the problem. The urgency of a patch which (for example) fixes a debugging printk (such as your 1/2 patch above) is quite different from a patch which causes a kernel BUG_ON. One of the reasons why I hadn't gotten around to processing your patches until now was partially because I knew there was a lot of testing and fixing before the patches were fully baked (as soon as I started doing testing I found all sorts of other problems, which I had to fix), but also because the commit descriptions were not clear enough. Patches where it's obvious what they fix, and where there is a clear explanation about what they fix and the priority of their fix makes life easier for me, and makes it more likely that I can process the patches quickly. Also, if you have a follow-on set of patches which is dependent on the initila set of patches, it's very helpful to resend a v2 version of the patches so that it's clear how the patches fit together. I'll take care of these two extra patches, and then you'll see me send out a -v2 set of the patches which contain all of the online resize patches rebased to the latest kernel and tested as much as possible. In general, though, in order for me to scale, I really need ext4 developers to do as much of this testing, rebasing, and reposting patches as possible, and for other ext4 developers to review the patches. If I have to do all of this myself, patches will flow into mainline more slowly, and we'll start accumulating a much longer backlog. Regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Got it! Thanks, Yongqiang. On Wed, Sep 5, 2012 at 12:55 PM, Theodore Ts'o <tytso@mit.edu> wrote: > On Wed, Sep 05, 2012 at 10:10:29AM +0800, Yongqiang Yang wrote: >> Hi Anssi, >> >> The bug was fixed for a while, please check the patches: >> [PATCH 1/2] ext4: teach resize report old blocks count correctly >> [PATCH 2/2] ext4: ignore last group without enough space when resizing >> >> Please have a try!!! > > Yongqiang, > > In the future, if a patch is going to fix a BUG_ON or kernel crash, > please state so explicitly in the commit description along with > instructions about how to reproduce the problem. The urgency of a > patch which (for example) fixes a debugging printk (such as your 1/2 > patch above) is quite different from a patch which causes a kernel > BUG_ON. > > One of the reasons why I hadn't gotten around to processing your > patches until now was partially because I knew there was a lot of > testing and fixing before the patches were fully baked (as soon as I > started doing testing I found all sorts of other problems, which I had > to fix), but also because the commit descriptions were not clear > enough. > > Patches where it's obvious what they fix, and where there is a clear > explanation about what they fix and the priority of their fix makes > life easier for me, and makes it more likely that I can process the > patches quickly. > > Also, if you have a follow-on set of patches which is dependent on the > initila set of patches, it's very helpful to resend a v2 version of > the patches so that it's clear how the patches fit together. > > I'll take care of these two extra patches, and then you'll see me send > out a -v2 set of the patches which contain all of the online resize > patches rebased to the latest kernel and tested as much as possible. > In general, though, in order for me to scale, I really need ext4 > developers to do as much of this testing, rebasing, and reposting > patches as possible, and for other ext4 developers to review the > patches. If I have to do all of this myself, patches will flow into > mainline more slowly, and we'll start accumulating a much longer > backlog. > > Regards, > > - Ted
BTW, it looks like your 2/2 patch does not have a dependency on the rest of the resize patches, and fixes a problem which exists today with the flex_bg resizing. So you'll see I moved it to the beginning of the patch series, and added a "Cc: stable@vger.kernel.org", since it's a patch that should be backported to older stable kernels. This kind of applicability statement is helpful for me, since I don't have to try to figure it out (and because I might get it wrong as I try to figure out the reasoning behind a patch, and the priority of the patch). Things that are useful to include is whether it is fixing a recent regression, or if it is fixing a bug that is in older kernels, how long has the bug been present, so we know which older kernels need the patch, and in particular, whether enterprise distributions need to worry about backporting the patch. And as I mentioned earlier, if it causes user data loss/corruption, or causes the kernel to crash, please make a note of this in the commit description, since that's also important information when trying to decide if a patch needs priority handling or needs to be backported to older stable kernels. Thanks, regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/9/4 Theodore Ts'o <tytso@mit.edu>: > On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >> Hi Kevin, >> >> Ted has sent out the patches on online resizing for meta_bg and >> 64bits, so you can have a try again. It seems that the bug in >> e2fsprogs has been fixed. > > Make sure you use the latest version of the kernel patches that I just > sent out. There quite a number of bugs in the Yongqiang's original > patch set which I tripped over while I was testing 64-bit resize --- > and please note that there are definitely still rough edges > (especially for in cases where the file system was created < 16TB, but > with the 64-bit feature and resize_inode features enabled). There may > also be bugs for the straightforward case of resizing very large file > systems. > > So while I very much appreciate users giving the code a try and > sending us feedback, please do think twice before using this code on > file systems with data that hasn't been backed up recently. (Of > course, being good System Administrators you are all keeping --- and > verifying --- regular backups, right? :-) > > Regards, > > - Ted I had done some simple and quick test. The following is the result. Kernel: 3.4.7 + 5 patches e2fsprogs: 1.42.5 + 2 patches The format command I used is: mke2fs -t ext4 -m0 -b 4096 -F -O 64bit,meta_bg,^resize_inode /dev/md0 nnnn Case 1: Simplly resize 1st step: resize from 14T to 18T => ok 2nd step: resize from 18T to 20T => ok (calculate_minimum_resize_size issue gone) 3rd step: resize from 20T to 21T => ok Case 2: case 1 + file read-write (just like Anssi did) 1st step: resize from 14T to 20T (5368709120 blocks) => ok 2nd step: resize from 20T to 5368709170 blocks => same kernel bug_on Case 3: case 2 + Yongqiang's 2 patches 1st step: resize from 14T to 20T (5368709120 blocks) => ok 2nd step: resize from 20T to 5368709170 blocks => ok Basically I think the resize funtionality should be ok. However I also observe some performance drop. That is, the time needed for mke2fs, mount and e2fsck are longer than before. Here is some detailed data: For 12TB with 64bit,meta_bg,^resize_inode mke2fs: 54.699s mount: 12.108s e2fsck: 1m52.027s For 12TB without 64bit,meta_bg,^resize_inode mke2fs: 39.763s mount: 0.897s e2fsck: 1m17.554s For 20TB with 64bit,meta_bg,^resize_inode mke2fs: 1m25.090s mount: 19.992s e2fsck: 2m55.048s For 20TB without 64bit,meta_bg,^resize_inode mke2fs: 1m3.660s mount: 1.458s e2fsck: 1m56.055s Yongqiang had told me previously that it may be caused by using meta_bg. I am still wondering is there anything we can do to improve the peroformance? Thanks a lot. Regards, Kevin Liao -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Sep 5, 2012 at 2:32 PM, Kevin Liao <kevinlia@gmail.com> wrote: > 2012/9/4 Theodore Ts'o <tytso@mit.edu>: >> On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >>> Hi Kevin, >>> >>> Ted has sent out the patches on online resizing for meta_bg and >>> 64bits, so you can have a try again. It seems that the bug in >>> e2fsprogs has been fixed. >> >> Make sure you use the latest version of the kernel patches that I just >> sent out. There quite a number of bugs in the Yongqiang's original >> patch set which I tripped over while I was testing 64-bit resize --- >> and please note that there are definitely still rough edges >> (especially for in cases where the file system was created < 16TB, but >> with the 64-bit feature and resize_inode features enabled). There may >> also be bugs for the straightforward case of resizing very large file >> systems. >> >> So while I very much appreciate users giving the code a try and >> sending us feedback, please do think twice before using this code on >> file systems with data that hasn't been backed up recently. (Of >> course, being good System Administrators you are all keeping --- and >> verifying --- regular backups, right? :-) >> >> Regards, >> >> - Ted > > I had done some simple and quick test. The following is the result. > > Kernel: 3.4.7 + 5 patches > e2fsprogs: 1.42.5 + 2 patches > > The format command I used is: > mke2fs -t ext4 -m0 -b 4096 -F -O 64bit,meta_bg,^resize_inode /dev/md0 nnnn > > Case 1: Simplly resize > 1st step: resize from 14T to 18T => ok > 2nd step: resize from 18T to 20T => ok (calculate_minimum_resize_size > issue gone) > 3rd step: resize from 20T to 21T => ok > > Case 2: case 1 + file read-write (just like Anssi did) > 1st step: resize from 14T to 20T (5368709120 blocks) => ok > 2nd step: resize from 20T to 5368709170 blocks => same kernel bug_on > > Case 3: case 2 + Yongqiang's 2 patches > 1st step: resize from 14T to 20T (5368709120 blocks) => ok > 2nd step: resize from 20T to 5368709170 blocks => ok > > Basically I think the resize funtionality should be ok. However I also > observe some performance drop. That is, the time needed for mke2fs, > mount and e2fsck are longer than before. Here is some detailed data: > > For 12TB with 64bit,meta_bg,^resize_inode > mke2fs: 54.699s > mount: 12.108s > e2fsck: 1m52.027s > > For 12TB without 64bit,meta_bg,^resize_inode Did you mean without 64bit and without meta_bg OR with without 64bit and with meta_bg? I am guessing you meant without 64bit and without meta_bg, am I right? Yongqiang. > mke2fs: 39.763s > mount: 0.897s > e2fsck: 1m17.554s > > For 20TB with 64bit,meta_bg,^resize_inode > mke2fs: 1m25.090s > mount: 19.992s > e2fsck: 2m55.048s > > For 20TB without 64bit,meta_bg,^resize_inode > mke2fs: 1m3.660s > mount: 1.458s > e2fsck: 1m56.055s > > Yongqiang had told me previously that it may be caused by using > meta_bg. I am still wondering is there anything we can do to improve > the peroformance? Thanks a lot. > > Regards, > Kevin Liao
2012/9/5 Yongqiang Yang <xiaoqiangnk@gmail.com>: > On Wed, Sep 5, 2012 at 2:32 PM, Kevin Liao <kevinlia@gmail.com> wrote: >> 2012/9/4 Theodore Ts'o <tytso@mit.edu>: >>> On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >>>> Hi Kevin, >>>> >>>> Ted has sent out the patches on online resizing for meta_bg and >>>> 64bits, so you can have a try again. It seems that the bug in >>>> e2fsprogs has been fixed. >>> >>> Make sure you use the latest version of the kernel patches that I just >>> sent out. There quite a number of bugs in the Yongqiang's original >>> patch set which I tripped over while I was testing 64-bit resize --- >>> and please note that there are definitely still rough edges >>> (especially for in cases where the file system was created < 16TB, but >>> with the 64-bit feature and resize_inode features enabled). There may >>> also be bugs for the straightforward case of resizing very large file >>> systems. >>> >>> So while I very much appreciate users giving the code a try and >>> sending us feedback, please do think twice before using this code on >>> file systems with data that hasn't been backed up recently. (Of >>> course, being good System Administrators you are all keeping --- and >>> verifying --- regular backups, right? :-) >>> >>> Regards, >>> >>> - Ted >> >> I had done some simple and quick test. The following is the result. >> >> Kernel: 3.4.7 + 5 patches >> e2fsprogs: 1.42.5 + 2 patches >> >> The format command I used is: >> mke2fs -t ext4 -m0 -b 4096 -F -O 64bit,meta_bg,^resize_inode /dev/md0 nnnn >> >> Case 1: Simplly resize >> 1st step: resize from 14T to 18T => ok >> 2nd step: resize from 18T to 20T => ok (calculate_minimum_resize_size >> issue gone) >> 3rd step: resize from 20T to 21T => ok >> >> Case 2: case 1 + file read-write (just like Anssi did) >> 1st step: resize from 14T to 20T (5368709120 blocks) => ok >> 2nd step: resize from 20T to 5368709170 blocks => same kernel bug_on >> >> Case 3: case 2 + Yongqiang's 2 patches >> 1st step: resize from 14T to 20T (5368709120 blocks) => ok >> 2nd step: resize from 20T to 5368709170 blocks => ok >> >> Basically I think the resize funtionality should be ok. However I also >> observe some performance drop. That is, the time needed for mke2fs, >> mount and e2fsck are longer than before. Here is some detailed data: >> >> For 12TB with 64bit,meta_bg,^resize_inode >> mke2fs: 54.699s >> mount: 12.108s >> e2fsck: 1m52.027s >> >> For 12TB without 64bit,meta_bg,^resize_inode > Did you mean without 64bit and without meta_bg OR with without 64bit > and with meta_bg? > > I am guessing you meant without 64bit and without meta_bg, am I right? > Yongqiang. What I mean with 64bit,meta_bg,^resize_inode is to use the following format command mke2fs -t ext4 -m0 -b 4096 -F -O 64bit,meta_bg,^resize_inode /dev/md0 3758096384 And without 64bit,meta_bg,^resize_inode means mke2fs -t ext4 -m0 -b 4096 -F /dev/md0 3758096384 Regards, Kevin Liao >> mke2fs: 39.763s >> mount: 0.897s >> e2fsck: 1m17.554s >> >> For 20TB with 64bit,meta_bg,^resize_inode >> mke2fs: 1m25.090s >> mount: 19.992s >> e2fsck: 2m55.048s >> >> For 20TB without 64bit,meta_bg,^resize_inode >> mke2fs: 1m3.660s >> mount: 1.458s >> e2fsck: 1m56.055s >> >> Yongqiang had told me previously that it may be caused by using >> meta_bg. I am still wondering is there anything we can do to improve >> the peroformance? Thanks a lot. >> >> Regards, >> Kevin Liao > > > > -- > Best Wishes > Yongqiang Yang -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
05.09.2012 05:10, Yongqiang Yang kirjoitti: > Hi Anssi, Hi, > The bug was fixed for a while, please check the patches: > [PATCH 1/2] ext4: teach resize report old blocks count correctly > [PATCH 2/2] ext4: ignore last group without enough space when resizing > > Please have a try!!! Confirmed that with these patches the simple test passes :) > Thanks, > Yongqiang. > > On Wed, Sep 5, 2012 at 1:05 AM, Anssi Hannula <anssi.hannula@iki.fi> wrote: >> 04.09.2012 05:14, Theodore Ts'o kirjoitti: >>> On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >>>> Hi Kevin, >>>> >>>> Ted has sent out the patches on online resizing for meta_bg and >>>> 64bits, so you can have a try again. It seems that the bug in >>>> e2fsprogs has been fixed. >>> >>> Make sure you use the latest version of the kernel patches that I just >>> sent out. There quite a number of bugs in the Yongqiang's original >>> patch set which I tripped over while I was testing 64-bit resize --- >>> and please note that there are definitely still rough edges >>> (especially for in cases where the file system was created < 16TB, but >>> with the 64-bit feature and resize_inode features enabled). There may >>> also be bugs for the straightforward case of resizing very large file >>> systems. >> >> Indeed, I hit a BUG_ON() on resize from 8589934590 blocks to 8589934640 >> blocks (4k): >> [ 676.140165] ------------[ cut here ]------------ >> [ 676.150026] kernel BUG at fs/ext4/resize.c:255! >> [ 676.150026] invalid opcode: 0000 [#1] SMP >> [ 676.150026] CPU 0 >> [ 676.150026] Modules linked in:[ 676.150026] dm_snapshot dm_zero >> af_packet dm_mod joydev hid_generic ppdev snd_intel8x0 snd_ac97_codec >> ac97_bus usbhid microcode e1000 snd_pcm snd_page_alloc snd_timer hid >> i2c_piix4 i2c_core button snd soundcore ac parport_pc parport processor >> evdev ipv6 autofs4 ext4 crc16 jbd2 ohci_hcd sd_mod crc_t10dif usbcore >> usb_common sr_mod ata_piix ahci libahci libata scsi_mod [last unloaded: >> nf_defrag_ipv4] >> >> [ 676.150026] Pid: 1793, comm: resize2fs Not tainted >> 3.5.3-server-2anssi.9.ext4.10.2 #1 innotek GmbH VirtualBox >> [ 676.150026] RIP: 0010:[<ffffffffa014e9bc>] [<ffffffffa014e9bc>] >> ext4_resize_fs+0x94c/0xa30 [ext4] >> [ 676.150026] RSP: 0018:ffff880046eedd18 EFLAGS: 00010246 >> [ 676.150026] RAX: 0000000000040001 RBX: ffff88005b799800 RCX: >> 0000000000000001 >> [ 676.150026] RDX: 0000000000081bf1 RSI: 0000000000040001 RDI: >> ffff88005b068000 >> [ 676.150026] RBP: ffff880046eeddd8 R08: 0000000200000003 R09: >> 0000000000000000 >> [ 676.150026] R10: 0000000000000000 R11: 0000000080042000 R12: >> 0000000000040001 >> [ 676.150026] R13: ffff880037fb5e20 R14: 0000000000000000 R15: >> ffff88005b068000 >> [ 676.150026] FS: 00007fb43e067740(0000) GS:ffff88005fc00000(0000) >> knlGS:0000000000000000 >> [ 676.150026] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b >> [ 676.150026] CR2: 00007fd905261178 CR3: 0000000044993000 CR4: >> 00000000000006f0 >> [ 676.150026] DR0: 0000000000000000 DR1: 0000000000000000 DR2: >> 0000000000000000 >> [ 676.150026] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: >> 0000000000000400 >> [ 676.150026] Process resize2fs (pid: 1793, threadinfo >> ffff880046eec000, task ffff880053f824c0) >> [ 676.150026] Stack: >> [ 676.150026] ffff880046eedda8 ffffffff8117971e 0000000f53529c40 >> 0000000000000000 >> [ 676.150026] 000000020000002f 0000000000000000 0000000200000030 >> ffff88005b20e990 >> [ 676.150026] 0000000100000001 ffff880000000001 0000000200000000 >> 0000000200000000 >> [ 676.150026] Call Trace: >> [ 676.150026] [<ffffffff8117971e>] ? do_last+0x2ee/0x9f0 >> [ 676.150026] [<ffffffffa012e05f>] ext4_ioctl+0x9af/0xbc0 [ext4] >> [ 676.150026] [<ffffffff8117db6f>] do_vfs_ioctl+0x8f/0x4e0 >> [ 676.150026] [<ffffffff8117e051>] sys_ioctl+0x91/0xa0 >> [ 676.150026] [<ffffffff8147d0bd>] system_call_fastpath+0x1a/0x1f >> [ 676.150026] Code: c7 c1 60 2c 17 a0 ba 0c 07 00 00 48 c7 c6 9b e8 16 >> a0 4c 89 e7 31 c0 e8 b3 80 ff ff c7 85 6c ff ff ff ea ff ff ff e9 4c f8 >> ff ff <0f> 0b 8b 55 a4 8b 45 a0 f7 da 44 31 e0 85 c2 0f 84 6c fb ff ff >> [ 676.150026] RIP [<ffffffffa014e9bc>] ext4_resize_fs+0x94c/0xa30 [ext4] >> [ 676.150026] RSP <ffff880046eedd18> >> [ 676.788513] ---[ end trace fbf2bd5a59c2ab99 ]--- >> >> This is BUG_ON(src_group >= group_data[0].group + flex_gd->count); >> >> I was using the below basic test script which uses a virtual large >> volume in LVM (e2fsprogs is 1.42.5, except for resize2fs): >> >> #!/bin/bash -ex >> >> VG=delta >> LV=ext4test >> LVSIZE=40T >> MOUNTPOINT="/mnt/iso" >> RESIZE2FS=/root/resize2fs >> >> INITIAL_SIZE_K=4294967295 >> NEW_BLOCKS=8589934590 >> >> lvcreate -l 100%FREE -V "$LVSIZE" -n "$LV" "$VG" >> mkfs.ext4 -O meta_bg,64bit,^resize_inode "/dev/$VG/$LV" "$INITIAL_SIZE_K" >> >> mount "/dev/$VG/$LV" "$MOUNTPOINT" >> >> mkdir "$MOUNTPOINT/test" >> for file in 1 2; do >> dd if=/dev/urandom bs=1M count=50 of="$MOUNTPOINT/test/$file" >> done >> md5sum $MOUNTPOINT/test/* > $MOUNTPOINT/MD5SUM >> >> for N in $NEW_BLOCKS $((NEW_BLOCKS + 50)); do >> $RESIZE2FS "/dev/$VG/$LV" "$N" >> >> umount "$MOUNTPOINT" >> fsck.ext4 -nvf "/dev/$VG/$LV" >> mount "/dev/$VG/$LV" "$MOUNTPOINT" >> md5sum -c "$MOUNTPOINT/MD5SUM" >> done >> >> >> >>> So while I very much appreciate users giving the code a try and >>> sending us feedback, please do think twice before using this code on >>> file systems with data that hasn't been backed up recently. (Of >>> course, being good System Administrators you are all keeping --- and >>> verifying --- regular backups, right? :-) >> >> -- >> Anssi Hannula > > >
Thanks for your testing. Yongqiang. On Thu, Sep 6, 2012 at 10:22 PM, Anssi Hannula <anssi.hannula@iki.fi> wrote: > 05.09.2012 05:10, Yongqiang Yang kirjoitti: >> Hi Anssi, > > Hi, > >> The bug was fixed for a while, please check the patches: >> [PATCH 1/2] ext4: teach resize report old blocks count correctly >> [PATCH 2/2] ext4: ignore last group without enough space when resizing >> >> Please have a try!!! > > Confirmed that with these patches the simple test passes :) > > >> Thanks, >> Yongqiang. >> >> On Wed, Sep 5, 2012 at 1:05 AM, Anssi Hannula <anssi.hannula@iki.fi> wrote: >>> 04.09.2012 05:14, Theodore Ts'o kirjoitti: >>>> On Tue, Sep 04, 2012 at 09:59:55AM +0800, Yongqiang Yang wrote: >>>>> Hi Kevin, >>>>> >>>>> Ted has sent out the patches on online resizing for meta_bg and >>>>> 64bits, so you can have a try again. It seems that the bug in >>>>> e2fsprogs has been fixed. >>>> >>>> Make sure you use the latest version of the kernel patches that I just >>>> sent out. There quite a number of bugs in the Yongqiang's original >>>> patch set which I tripped over while I was testing 64-bit resize --- >>>> and please note that there are definitely still rough edges >>>> (especially for in cases where the file system was created < 16TB, but >>>> with the 64-bit feature and resize_inode features enabled). There may >>>> also be bugs for the straightforward case of resizing very large file >>>> systems. >>> >>> Indeed, I hit a BUG_ON() on resize from 8589934590 blocks to 8589934640 >>> blocks (4k): >>> [ 676.140165] ------------[ cut here ]------------ >>> [ 676.150026] kernel BUG at fs/ext4/resize.c:255! >>> [ 676.150026] invalid opcode: 0000 [#1] SMP >>> [ 676.150026] CPU 0 >>> [ 676.150026] Modules linked in:[ 676.150026] dm_snapshot dm_zero >>> af_packet dm_mod joydev hid_generic ppdev snd_intel8x0 snd_ac97_codec >>> ac97_bus usbhid microcode e1000 snd_pcm snd_page_alloc snd_timer hid >>> i2c_piix4 i2c_core button snd soundcore ac parport_pc parport processor >>> evdev ipv6 autofs4 ext4 crc16 jbd2 ohci_hcd sd_mod crc_t10dif usbcore >>> usb_common sr_mod ata_piix ahci libahci libata scsi_mod [last unloaded: >>> nf_defrag_ipv4] >>> >>> [ 676.150026] Pid: 1793, comm: resize2fs Not tainted >>> 3.5.3-server-2anssi.9.ext4.10.2 #1 innotek GmbH VirtualBox >>> [ 676.150026] RIP: 0010:[<ffffffffa014e9bc>] [<ffffffffa014e9bc>] >>> ext4_resize_fs+0x94c/0xa30 [ext4] >>> [ 676.150026] RSP: 0018:ffff880046eedd18 EFLAGS: 00010246 >>> [ 676.150026] RAX: 0000000000040001 RBX: ffff88005b799800 RCX: >>> 0000000000000001 >>> [ 676.150026] RDX: 0000000000081bf1 RSI: 0000000000040001 RDI: >>> ffff88005b068000 >>> [ 676.150026] RBP: ffff880046eeddd8 R08: 0000000200000003 R09: >>> 0000000000000000 >>> [ 676.150026] R10: 0000000000000000 R11: 0000000080042000 R12: >>> 0000000000040001 >>> [ 676.150026] R13: ffff880037fb5e20 R14: 0000000000000000 R15: >>> ffff88005b068000 >>> [ 676.150026] FS: 00007fb43e067740(0000) GS:ffff88005fc00000(0000) >>> knlGS:0000000000000000 >>> [ 676.150026] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b >>> [ 676.150026] CR2: 00007fd905261178 CR3: 0000000044993000 CR4: >>> 00000000000006f0 >>> [ 676.150026] DR0: 0000000000000000 DR1: 0000000000000000 DR2: >>> 0000000000000000 >>> [ 676.150026] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: >>> 0000000000000400 >>> [ 676.150026] Process resize2fs (pid: 1793, threadinfo >>> ffff880046eec000, task ffff880053f824c0) >>> [ 676.150026] Stack: >>> [ 676.150026] ffff880046eedda8 ffffffff8117971e 0000000f53529c40 >>> 0000000000000000 >>> [ 676.150026] 000000020000002f 0000000000000000 0000000200000030 >>> ffff88005b20e990 >>> [ 676.150026] 0000000100000001 ffff880000000001 0000000200000000 >>> 0000000200000000 >>> [ 676.150026] Call Trace: >>> [ 676.150026] [<ffffffff8117971e>] ? do_last+0x2ee/0x9f0 >>> [ 676.150026] [<ffffffffa012e05f>] ext4_ioctl+0x9af/0xbc0 [ext4] >>> [ 676.150026] [<ffffffff8117db6f>] do_vfs_ioctl+0x8f/0x4e0 >>> [ 676.150026] [<ffffffff8117e051>] sys_ioctl+0x91/0xa0 >>> [ 676.150026] [<ffffffff8147d0bd>] system_call_fastpath+0x1a/0x1f >>> [ 676.150026] Code: c7 c1 60 2c 17 a0 ba 0c 07 00 00 48 c7 c6 9b e8 16 >>> a0 4c 89 e7 31 c0 e8 b3 80 ff ff c7 85 6c ff ff ff ea ff ff ff e9 4c f8 >>> ff ff <0f> 0b 8b 55 a4 8b 45 a0 f7 da 44 31 e0 85 c2 0f 84 6c fb ff ff >>> [ 676.150026] RIP [<ffffffffa014e9bc>] ext4_resize_fs+0x94c/0xa30 [ext4] >>> [ 676.150026] RSP <ffff880046eedd18> >>> [ 676.788513] ---[ end trace fbf2bd5a59c2ab99 ]--- >>> >>> This is BUG_ON(src_group >= group_data[0].group + flex_gd->count); >>> >>> I was using the below basic test script which uses a virtual large >>> volume in LVM (e2fsprogs is 1.42.5, except for resize2fs): >>> >>> #!/bin/bash -ex >>> >>> VG=delta >>> LV=ext4test >>> LVSIZE=40T >>> MOUNTPOINT="/mnt/iso" >>> RESIZE2FS=/root/resize2fs >>> >>> INITIAL_SIZE_K=4294967295 >>> NEW_BLOCKS=8589934590 >>> >>> lvcreate -l 100%FREE -V "$LVSIZE" -n "$LV" "$VG" >>> mkfs.ext4 -O meta_bg,64bit,^resize_inode "/dev/$VG/$LV" "$INITIAL_SIZE_K" >>> >>> mount "/dev/$VG/$LV" "$MOUNTPOINT" >>> >>> mkdir "$MOUNTPOINT/test" >>> for file in 1 2; do >>> dd if=/dev/urandom bs=1M count=50 of="$MOUNTPOINT/test/$file" >>> done >>> md5sum $MOUNTPOINT/test/* > $MOUNTPOINT/MD5SUM >>> >>> for N in $NEW_BLOCKS $((NEW_BLOCKS + 50)); do >>> $RESIZE2FS "/dev/$VG/$LV" "$N" >>> >>> umount "$MOUNTPOINT" >>> fsck.ext4 -nvf "/dev/$VG/$LV" >>> mount "/dev/$VG/$LV" "$MOUNTPOINT" >>> md5sum -c "$MOUNTPOINT/MD5SUM" >>> done >>> >>> >>> >>>> So while I very much appreciate users giving the code a try and >>>> sending us feedback, please do think twice before using this code on >>>> file systems with data that hasn't been backed up recently. (Of >>>> course, being good System Administrators you are all keeping --- and >>>> verifying --- regular backups, right? :-) >>> >>> -- >>> Anssi Hannula >> >> >> > > > -- > Anssi Hannula
On Wed, Sep 05, 2012 at 02:32:32PM +0800, Kevin Liao wrote: > > I had done some simple and quick test. The following is the result. > > For 20TB with 64bit,meta_bg,^resize_inode > mke2fs: 1m25.090s > mount: 19.992s > e2fsck: 2m55.048s > > For 20TB without 64bit,meta_bg,^resize_inode > mke2fs: 1m3.660s > mount: 1.458s > e2fsck: 1m56.055s The reason for this is how meta_bg changes how the block group descriptors are laid out. Originally, the block group descriptors were located contiguously. From a 12T filesystem without meta_bg, you'll see this from dumpe2fs: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptors at 1-768 If the file system is created with meta_bg, then group descriptors that have to be read when the file system is opened by libext2fs or when the file system is mounted look like this: Group 0: (Blocks 0-32767) Primary superblock at 0, Group descriptor at 1 Group 128: (Blocks 4194304-4227071) [INODE_UNINIT] Group descriptor at 4194304 Group 256: (Blocks 8388608-8421375) [INODE_UNINIT] Group descriptor at 8388608 Group 384: (Blocks 12582912-12615679) [INODE_UNINIT] Group descriptor at 12582912 ... In the set of kernel and e2fsprogs patches that I just released, we can partially work around this problem by starting with the resize_inode, and only switch over to the meta_bg once we have exhausted the resize_inode scheme. So now we can do this: mke2fs -t ext4 -q -O 64bit /dev/vdc 12T mount /dev/vdc resize2fs /dev/vdc 18T After the resize2fs, the block group descriptors for the first 16TB will be contiguous: Group 0: (Blocks 0-32767) [ITABLE_ZEROED] Primary superblock at 0, Group descriptors at 1-2048 after that, there will be singleton block group descriptor blocks, i.e.: Group 131136: (Blocks 4297064448-4297097215) [INODE_UNINIT] Group descriptor at 4297064448 The other thing we can do to speed up the mount times is change how the kernel to lazily read the block group descriptors, instead of trying to read them all at mount time, at least once they are no longer contiguous. I'll look into seeing what we can do to improve things on that front. Regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/9/14 Theodore Ts'o <tytso@mit.edu>: > > The reason for this is how meta_bg changes how the block group > descriptors are laid out. Originally, the block group descriptors > were located contiguously. From a 12T filesystem without meta_bg, > you'll see this from dumpe2fs: > > Group 0: (Blocks 0-32767) > Primary superblock at 0, Group descriptors at 1-768 > > If the file system is created with meta_bg, then group descriptors > that have to be read when the file system is opened by libext2fs or > when the file system is mounted look like this: > > Group 0: (Blocks 0-32767) > Primary superblock at 0, Group descriptor at 1 > Group 128: (Blocks 4194304-4227071) [INODE_UNINIT] > Group descriptor at 4194304 > Group 256: (Blocks 8388608-8421375) [INODE_UNINIT] > Group descriptor at 8388608 > Group 384: (Blocks 12582912-12615679) [INODE_UNINIT] > Group descriptor at 12582912 > ... > > In the set of kernel and e2fsprogs patches that I just released, we > can partially work around this problem by starting with the > resize_inode, and only switch over to the meta_bg once we have > exhausted the resize_inode scheme. So now we can do this: > > mke2fs -t ext4 -q -O 64bit /dev/vdc 12T > mount /dev/vdc > resize2fs /dev/vdc 18T > > After the resize2fs, the block group descriptors for the first 16TB > will be contiguous: > > Group 0: (Blocks 0-32767) [ITABLE_ZEROED] > Primary superblock at 0, Group descriptors at 1-2048 > > after that, there will be singleton block group descriptor blocks, i.e.: > > Group 131136: (Blocks 4297064448-4297097215) [INODE_UNINIT] > Group descriptor at 4297064448 > > The other thing we can do to speed up the mount times is change how > the kernel to lazily read the block group descriptors, instead of > trying to read them all at mount time, at least once they are no > longer contiguous. I'll look into seeing what we can do to improve > things on that front. > > Regards, > > - Ted Ted, thanks a lot for the detailed explanation. It is very clear. I will find time to test your new patches.. Regards, Kevin Liao -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/resize/resize2fs.c b/resize/resize2fs.c index dc2805d..1dce498 100644 --- a/resize/resize2fs.c +++ b/resize/resize2fs.c @@ -1890,6 +1890,8 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) blk64_t grp, data_needed, last_start; blk64_t overhead = 0; int num_of_superblocks = 0; + blk64_t super_overhead = 0; + int old_desc_blocks; int extra_groups = 0; int flexbg_size = 1 << fs->super->s_log_groups_per_flex; @@ -1909,15 +1911,36 @@ blk64_t calculate_minimum_resize_size(ext2_filsys fs) * we need to figure out how many backup superblocks we have so we can * account for that in the metadata */ + if (fs->super->s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) + old_desc_blocks = fs->super->s_first_meta_bg; + else + old_desc_blocks = fs->desc_blocks + + fs->super->s_reserved_gdt_blocks; + for (grp = 0; grp < fs->group_desc_count; grp++) { + blk64_t super_blk, old_desc_blk, new_desc_blk; + int has_super; + + ext2fs_super_and_bgd_loc2(fs, grp, &super_blk, + &old_desc_blk, &new_desc_blk, 0); + has_super = ((grp == 0) || super_blk); + if (has_super) + super_overhead++; + if (old_desc_blk) + super_overhead += old_desc_blocks; + else if (new_desc_blk) + super_overhead++; if (ext2fs_bg_has_super(fs, grp)) num_of_superblocks++; + } + printf("super overhead is %llu, old algorithm was %llu\n", + super_overhead, SUPER_OVERHEAD(fs) * num_of_superblocks); /* calculate how many blocks are needed for data */ data_needed = ext2fs_blocks_count(fs->super) - ext2fs_free_blocks_count(fs->super); - data_needed -= SUPER_OVERHEAD(fs) * num_of_superblocks; + data_needed -= super_overhead; data_needed -= META_OVERHEAD(fs) * fs->group_desc_count; if (fs->super->s_feature_incompat & EXT4_FEATURE_INCOMPAT_FLEX_BG) {
The file system overhead calculation in calculate_minimum_resize_size was incorrect meta_bg file systems. This caused the minimum size to underflow for very large file systems, which threw resize2fs into a loop generally lasted longer than the user's patience. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- resize/resize2fs.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-)