Message ID | 20110901003615.1176.76957.stgit@elm3c44.beaverton.ibm.com |
---|---|
State | Superseded, archived |
Headers | show |
On 2011-08-31, at 6:36 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: > Write out checksummed inodes even when writing out a zeroed table. > > Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> > --- > misc/mke2fs.c | 37 ++++++++++++++++++++++++++++++------- > 1 files changed, 30 insertions(+), 7 deletions(-) > > > diff --git a/misc/mke2fs.c b/misc/mke2fs.c > index 2d57d09..bbc0533 100644 > --- a/misc/mke2fs.c > +++ b/misc/mke2fs.c > @@ -309,6 +309,8 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) > dgrp_t i; > int num; > struct ext2fs_numeric_progress_struct progress; > + ext2_ino_t ino; > + struct ext2_inode_large inode; > > ext2fs_numeric_progress_init(fs, &progress, > _("Writing inode tables: "), > @@ -330,12 +332,32 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) > ext2fs_bg_flags_set(fs, i, EXT2_BG_INODE_ZEROED); > ext2fs_group_desc_csum_set(fs, i); > } > - retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); > - if (retval) { > - fprintf(stderr, _("\nCould not write %d " > - "blocks in inode table starting at %llu: %s\n"), > - num, blk, error_message(retval)); > - exit(1); > + if (fs->super->s_creator_os == EXT2_OS_LINUX && > + fs->super->s_feature_ro_compat & > + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { Somehow it doesn't look like this is skipping the zeroing of the inode table blocks if lazy itable zeroing is set. Any measurements on how much this slows down inode table writing (which is already the slowest part of mke2fs)? > + bzero(&inode, sizeof(inode)); > + for (ino = fs->super->s_inodes_per_group * i; > + ino < fs->super->s_inodes_per_group * (i + 1); > + ino++) { Why recompute "ino" each time through this loop? It should be enough to simply initialize it at 1 and then increment it for each inode written. > + if (!ino) > + continue; > + retval = ext2fs_write_inode(fs, ino, &inode); > + if (retval) { > + com_err("inode_init", retval, > + "while writing inode %d\n", > + ino); > + exit(1); > + } > + } > + } else { > + retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); > + if (retval) { > + fprintf(stderr, _("\nCould not write %d " > + "blocks in inode table starting " > + "at %llu: %s\n"), > + num, blk, error_message(retval)); > + exit(1); > + } > } > if (sync_kludge) { > if (sync_kludge == 1) > @@ -829,7 +851,8 @@ static __u32 ok_features[3] = { > EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| > EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| > EXT4_FEATURE_RO_COMPAT_GDT_CSUM| > - EXT4_FEATURE_RO_COMPAT_BIGALLOC > + EXT4_FEATURE_RO_COMPAT_BIGALLOC| > + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM > }; > > > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Sep 04, 2011 at 12:28:24PM -0600, Andreas Dilger wrote: > On 2011-08-31, at 6:36 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: > > Write out checksummed inodes even when writing out a zeroed table. > > > > Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> > > --- > > misc/mke2fs.c | 37 ++++++++++++++++++++++++++++++------- > > 1 files changed, 30 insertions(+), 7 deletions(-) > > > > > > diff --git a/misc/mke2fs.c b/misc/mke2fs.c > > index 2d57d09..bbc0533 100644 > > --- a/misc/mke2fs.c > > +++ b/misc/mke2fs.c > > @@ -309,6 +309,8 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) > > dgrp_t i; > > int num; > > struct ext2fs_numeric_progress_struct progress; > > + ext2_ino_t ino; > > + struct ext2_inode_large inode; > > > > ext2fs_numeric_progress_init(fs, &progress, > > _("Writing inode tables: "), > > @@ -330,12 +332,32 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) > > ext2fs_bg_flags_set(fs, i, EXT2_BG_INODE_ZEROED); > > ext2fs_group_desc_csum_set(fs, i); > > } > > - retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); > > - if (retval) { > > - fprintf(stderr, _("\nCould not write %d " > > - "blocks in inode table starting at %llu: %s\n"), > > - num, blk, error_message(retval)); > > - exit(1); > > + if (fs->super->s_creator_os == EXT2_OS_LINUX && > > + fs->super->s_feature_ro_compat & > > + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { > > Somehow it doesn't look like this is skipping the zeroing of the inode table > blocks if lazy itable zeroing is set. > > Any measurements on how much this slows down inode table writing (which is > already the slowest part of mke2fs)? Quite a lot, actually. Trouble is, if you're going to write zeroes to the inode table (without using uninit) then I think you need the checksums to match. Maybe the solution is to modify the kernel/e2fsck to ignore the checksum if the inode bitmap says the inode isn't in use? A better solution is to zero the buffer, stuff in all the checksums in the correct places, and then write the block out. > > + bzero(&inode, sizeof(inode)); > > + for (ino = fs->super->s_inodes_per_group * i; > > + ino < fs->super->s_inodes_per_group * (i + 1); > > + ino++) { > > Why recompute "ino" each time through this loop? It should be enough to > simply initialize it at 1 and then increment it for each inode written. Agreed. --D > > + if (!ino) > > + continue; > > + retval = ext2fs_write_inode(fs, ino, &inode); > > + if (retval) { > > + com_err("inode_init", retval, > > + "while writing inode %d\n", > > + ino); > > + exit(1); > > + } > > + } > > + } else { > > + retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); > > + if (retval) { > > + fprintf(stderr, _("\nCould not write %d " > > + "blocks in inode table starting " > > + "at %llu: %s\n"), > > + num, blk, error_message(retval)); > > + exit(1); > > + } > > } > > if (sync_kludge) { > > if (sync_kludge == 1) > > @@ -829,7 +851,8 @@ static __u32 ok_features[3] = { > > EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| > > EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| > > EXT4_FEATURE_RO_COMPAT_GDT_CSUM| > > - EXT4_FEATURE_RO_COMPAT_BIGALLOC > > + EXT4_FEATURE_RO_COMPAT_BIGALLOC| > > + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM > > }; > > > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2011-09-05, at 1:20 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: > On Sun, Sep 04, 2011 at 12:28:24PM -0600, Andreas Dilger wrote: >> On 2011-08-31, at 6:36 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: >>> Write out checksummed inodes even when writing out a zeroed table. >>> >>> + if (fs->super->s_creator_os == EXT2_OS_LINUX && >>> + fs->super->s_feature_ro_compat & >>> + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { >> >> Somehow it doesn't look like this is skipping the zeroing of the inode table >> blocks if lazy itable zeroing is set. >> >> Any measurements on how much this slows down inode table writing (which is >> already the slowest part of mke2fs)? > > Quite a lot, actually. Trouble is, if you're going to write zeroes to the > inode table (without using uninit) then I think you need the checksums to > match. Maybe the solution is to modify the kernel/e2fsck to ignore the > checksum if the inode bitmap says the inode isn't in use? The kernel is already aware of which inodes are not in use if the uninit_bg feature is enabled. Even without uninit_bg, the kernel will not read itable blocks from disk if none of the inodes in that block are used. Also, if the lazy_itable_init is passed to mke2fs it isn't supposed to initialize the inode table at all, and the kernel should do it instead. > A better solution is to zero the buffer, stuff in all the checksums in the > correct places, and then write the block out. Rather, the kernel should do it in the background. >>> + bzero(&inode, sizeof(inode)); >>> + for (ino = fs->super->s_inodes_per_group * i; >>> + ino < fs->super->s_inodes_per_group * (i + 1); >>> + ino++) { >> >> Why recompute "ino" each time through this loop? It should be enough to >> simply initialize it at 1 and then increment it for each inode written. > > Agreed. > > --D >>> + if (!ino) >>> + continue; >>> + retval = ext2fs_write_inode(fs, ino, &inode); >>> + if (retval) { >>> + com_err("inode_init", retval, >>> + "while writing inode %d\n", >>> + ino); >>> + exit(1); >>> + } >>> + } >>> + } else { >>> + retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); >>> + if (retval) { >>> + fprintf(stderr, _("\nCould not write %d " >>> + "blocks in inode table starting " >>> + "at %llu: %s\n"), >>> + num, blk, error_message(retval)); >>> + exit(1); >>> + } >>> } >>> if (sync_kludge) { >>> if (sync_kludge == 1) >>> @@ -829,7 +851,8 @@ static __u32 ok_features[3] = { >>> EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| >>> EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| >>> EXT4_FEATURE_RO_COMPAT_GDT_CSUM| >>> - EXT4_FEATURE_RO_COMPAT_BIGALLOC >>> + EXT4_FEATURE_RO_COMPAT_BIGALLOC| >>> + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM >>> }; >>> >>> >>> -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Sep 05, 2011 at 07:54:32PM -0600, Andreas Dilger wrote: > On 2011-09-05, at 1:20 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: > > On Sun, Sep 04, 2011 at 12:28:24PM -0600, Andreas Dilger wrote: > >> On 2011-08-31, at 6:36 PM, "Darrick J. Wong" <djwong@us.ibm.com> wrote: > >>> Write out checksummed inodes even when writing out a zeroed table. > >>> > >>> + if (fs->super->s_creator_os == EXT2_OS_LINUX && > >>> + fs->super->s_feature_ro_compat & > >>> + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { > >> > >> Somehow it doesn't look like this is skipping the zeroing of the inode table > >> blocks if lazy itable zeroing is set. > >> > >> Any measurements on how much this slows down inode table writing (which is > >> already the slowest part of mke2fs)? > > > > Quite a lot, actually. Trouble is, if you're going to write zeroes to the > > inode table (without using uninit) then I think you need the checksums to > > match. Maybe the solution is to modify the kernel/e2fsck to ignore the > > checksum if the inode bitmap says the inode isn't in use? > > The kernel is already aware of which inodes are not in use if the uninit_bg > feature is enabled. Even without uninit_bg, the kernel will not read itable > blocks from disk if none of the inodes in that block are used. > > Also, if the lazy_itable_init is passed to mke2fs it isn't supposed to > initialize the inode table at all, and the kernel should do it instead. Ok. > > A better solution is to zero the buffer, stuff in all the checksums in the > > correct places, and then write the block out. > > Rather, the kernel should do it in the background. Append "...If the kernel won't do it in the background." to my earlier statement. :) There seems to be some code that probes around in sysfs to make sure that the kernel can handle uninit_bg ... /sys/fs/ext4/features/lazy_itable_init I think? --D > >>> + bzero(&inode, sizeof(inode)); > >>> + for (ino = fs->super->s_inodes_per_group * i; > >>> + ino < fs->super->s_inodes_per_group * (i + 1); > >>> + ino++) { > >> > >> Why recompute "ino" each time through this loop? It should be enough to > >> simply initialize it at 1 and then increment it for each inode written. > > > > Agreed. > > > > --D > >>> + if (!ino) > >>> + continue; > >>> + retval = ext2fs_write_inode(fs, ino, &inode); > >>> + if (retval) { > >>> + com_err("inode_init", retval, > >>> + "while writing inode %d\n", > >>> + ino); > >>> + exit(1); > >>> + } > >>> + } > >>> + } else { > >>> + retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); > >>> + if (retval) { > >>> + fprintf(stderr, _("\nCould not write %d " > >>> + "blocks in inode table starting " > >>> + "at %llu: %s\n"), > >>> + num, blk, error_message(retval)); > >>> + exit(1); > >>> + } > >>> } > >>> if (sync_kludge) { > >>> if (sync_kludge == 1) > >>> @@ -829,7 +851,8 @@ static __u32 ok_features[3] = { > >>> EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| > >>> EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| > >>> EXT4_FEATURE_RO_COMPAT_GDT_CSUM| > >>> - EXT4_FEATURE_RO_COMPAT_BIGALLOC > >>> + EXT4_FEATURE_RO_COMPAT_BIGALLOC| > >>> + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM > >>> }; > >>> > >>> > >>> -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/misc/mke2fs.c b/misc/mke2fs.c index 2d57d09..bbc0533 100644 --- a/misc/mke2fs.c +++ b/misc/mke2fs.c @@ -309,6 +309,8 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) dgrp_t i; int num; struct ext2fs_numeric_progress_struct progress; + ext2_ino_t ino; + struct ext2_inode_large inode; ext2fs_numeric_progress_init(fs, &progress, _("Writing inode tables: "), @@ -330,12 +332,32 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed) ext2fs_bg_flags_set(fs, i, EXT2_BG_INODE_ZEROED); ext2fs_group_desc_csum_set(fs, i); } - retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); - if (retval) { - fprintf(stderr, _("\nCould not write %d " - "blocks in inode table starting at %llu: %s\n"), - num, blk, error_message(retval)); - exit(1); + if (fs->super->s_creator_os == EXT2_OS_LINUX && + fs->super->s_feature_ro_compat & + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { + bzero(&inode, sizeof(inode)); + for (ino = fs->super->s_inodes_per_group * i; + ino < fs->super->s_inodes_per_group * (i + 1); + ino++) { + if (!ino) + continue; + retval = ext2fs_write_inode(fs, ino, &inode); + if (retval) { + com_err("inode_init", retval, + "while writing inode %d\n", + ino); + exit(1); + } + } + } else { + retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num); + if (retval) { + fprintf(stderr, _("\nCould not write %d " + "blocks in inode table starting " + "at %llu: %s\n"), + num, blk, error_message(retval)); + exit(1); + } } if (sync_kludge) { if (sync_kludge == 1) @@ -829,7 +851,8 @@ static __u32 ok_features[3] = { EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| EXT4_FEATURE_RO_COMPAT_GDT_CSUM| - EXT4_FEATURE_RO_COMPAT_BIGALLOC + EXT4_FEATURE_RO_COMPAT_BIGALLOC| + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM };
Write out checksummed inodes even when writing out a zeroed table. Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> --- misc/mke2fs.c | 37 ++++++++++++++++++++++++++++++------- 1 files changed, 30 insertions(+), 7 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html