diff mbox series

[4/4] ext4: add prefetch_block_bitmaps mount options

Message ID 20200717155352.1053040-5-tytso@mit.edu
State Superseded
Headers show
Series ex4 block bitmap prefetching | expand

Commit Message

Theodore Ts'o July 17, 2020, 3:53 p.m. UTC
For file systems where we can afford to keep the buddy bitmaps cached,
we can speed up initial writes to large file systems by starting to
load the block allocation bitmaps as soon as the file system is
mounted.  This won't work well for _super_ large file systems, or
memory constrained systems, so we only enable this when it is
requested via a mount option.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h    | 13 ++++++++++++
 fs/ext4/mballoc.c | 10 ++++------
 fs/ext4/super.c   | 51 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 57 insertions(+), 17 deletions(-)

Comments

Andreas Dilger July 21, 2020, 8:20 a.m. UTC | #1
On Jul 17, 2020, at 9:53 AM, Theodore Ts'o <tytso@mit.edu> wrote:
> 
> For file systems where we can afford to keep the buddy bitmaps cached,
> we can speed up initial writes to large file systems by starting to
> load the block allocation bitmaps as soon as the file system is
> mounted.  This won't work well for _super_ large file systems, or
> memory constrained systems, so we only enable this when it is
> requested via a mount option.

I was looking at this, and maybe I misunderstand the code, but it looks
like it does the itable zeroing first, then once that is completed it
will do the block bitmap prefetch:

	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
	    !test_opt(sb, INIT_INODE_TABLE)) {
		if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
			first_not_zeroed = 0;
			lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
	}

so it would only get into this case of first_not_zeroed == ngroups
after all of the inode tables are marked zeroed in the GDT.

However, it seems to me that the order of these two should be reversed,
since inode table zeroing can take ages (GB/TB to write), while block
bitmap loading could be done much more quickly.  Also, if the itable
zeroing is ever changed to verify that the itable blocks are properly
marked in the block bitmap (which seems reasonable), then "prefetch"
of the block bitmaps would be too late.

It isn't clear if there is any benefit to also prefetch the inode
bitmaps at the same time?  It doesn't look like they are used by
the itable zeroing code, only the GDT inode high watermark.

It would seem to me that if an application is writing to a newly-mounted
filesystem that the block bitmaps will *always* be needed, while itable
zeroing is a "nice to have when it can be done" since there is no real
expectation in the code that the itable is zeroed.  Only e2fsck cares
about this, and if your system fails before it finishes, you have
bigger problems to worry about.

I'm not adamant about changing this before landing, if you see a real
benefit from this today, just some thoughts for possible improvement.

Cheers, Andreas

> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
> fs/ext4/ext4.h    | 13 ++++++++++++
> fs/ext4/mballoc.c | 10 ++++------
> fs/ext4/super.c   | 51 +++++++++++++++++++++++++++++++++++++----------
> 3 files changed, 57 insertions(+), 17 deletions(-)
> 
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 172994349bf6..c072d06d678d 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2224,9 +2224,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
>  * Start prefetching @nr block bitmaps starting at @group.
>  * Return the next group which needs to be prefetched.
>  */
> -static ext4_group_t
> -ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> -		 unsigned int nr, int *cnt)
> +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> +			      unsigned int nr, int *cnt)
> {
> 	ext4_group_t ngroups = ext4_get_groups_count(sb);
> 	struct buffer_head *bh;
> @@ -2276,9 +2275,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
>  * waiting for the block allocation bitmap read to finish when
>  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
>  */
> -static void
> -ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> -		      unsigned int nr)
> +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +			   unsigned int nr)
> {
> 	while (nr-- > 0) {
> 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..9e19d5830745 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1521,6 +1521,7 @@ enum {
> 	Opt_dioread_nolock, Opt_dioread_lock,
> 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
> 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
> +	Opt_prefetch_block_bitmaps,
> };
> 
> static const match_table_t tokens = {
> @@ -1612,6 +1613,7 @@ static const match_table_t tokens = {
> 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> 	{Opt_nombcache, "nombcache"},
> 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
> +	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
> 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
> 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
> 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
> @@ -1829,6 +1831,8 @@ static const struct mount_opts {
> 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
> 	{Opt_test_dummy_encryption, 0, MOPT_STRING},
> 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> +	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
> +	 MOPT_SET},
> 	{Opt_err, 0, 0}
> };
> 
> @@ -3197,19 +3201,33 @@ static void print_daily_error_info(struct timer_list *t)
> 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
> }
> 
> +static int ext4_run_li_prefetch(struct ext4_li_request *elr,
> +				struct super_block *sb, ext4_group_t group)
> +{
> +	unsigned int prefetch_ios = 0;
> +
> +	elr->lr_next_group = ext4_mb_prefetch(sb, group,
> +					      EXT4_SB(sb)->s_mb_prefetch,
> +					      &prefetch_ios);
> +	if (prefetch_ios)
> +		ext4_mb_prefetch_fini(sb, elr->lr_next_group, prefetch_ios);
> +	return (group >= elr->lr_next_group);
> +}
> +
> /* Find next suitable group and run ext4_init_inode_table */
> static int ext4_run_li_request(struct ext4_li_request *elr)
> {
> 	struct ext4_group_desc *gdp = NULL;
> -	ext4_group_t group, ngroups;
> -	struct super_block *sb;
> +	ext4_group_t group = elr->lr_next_group;
> +	struct super_block *sb = elr->lr_super;
> +	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
> 	unsigned long timeout = 0;
> 	int ret = 0;
> 
> -	sb = elr->lr_super;
> -	ngroups = EXT4_SB(sb)->s_groups_count;
> +	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP)
> +		return ext4_run_li_prefetch(elr, sb, group);
> 
> -	for (group = elr->lr_next_group; group < ngroups; group++) {
> +	for (; group < ngroups; group++) {
> 		gdp = ext4_get_group_desc(sb, group, NULL);
> 		if (!gdp) {
> 			ret = 1;
> @@ -3219,13 +3237,12 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
> 			break;
> 	}
> -
> 	if (group >= ngroups)
> 		ret = 1;
> 
> 	if (!ret) {
> 		timeout = jiffies;
> -		ret = ext4_init_inode_table(sb, group,
> +		ret = ext4_init_inode_table(elr->lr_super, group,
> 					    elr->lr_timeout ? 0 : 1);
> 		if (elr->lr_timeout == 0) {
> 			timeout = (jiffies - timeout) *
> @@ -3234,6 +3251,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		}
> 		elr->lr_next_sched = jiffies + elr->lr_timeout;
> 		elr->lr_next_group = group + 1;
> +	} else if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +		elr->lr_next_group = 0;
> +		ret = 0;
> 	}
> 	return ret;
> }
> @@ -3459,7 +3480,8 @@ static int ext4_li_info_new(void)
> }
> 
> static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> -					    ext4_group_t start)
> +						   ext4_group_t start,
> +						   ext4_li_mode mode)
> {
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr;
> @@ -3468,6 +3490,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 	if (!elr)
> 		return NULL;
> 
> +	elr->lr_mode = mode;
> 	elr->lr_super = sb;
> 	elr->lr_sbi = sbi;
> 	elr->lr_next_group = start;
> @@ -3488,6 +3511,7 @@ int ext4_register_li_request(struct super_block *sb,
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr = NULL;
> 	ext4_group_t ngroups = sbi->s_groups_count;
> +	ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
> 	int ret = 0;
> 
> 	mutex_lock(&ext4_li_mtx);
> @@ -3501,10 +3525,15 @@ int ext4_register_li_request(struct super_block *sb,
> 	}
> 
> 	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> -	    !test_opt(sb, INIT_INODE_TABLE))
> -		goto out;
> +	    !test_opt(sb, INIT_INODE_TABLE)) {
> +		if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +			first_not_zeroed = 0;
> +			lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +		} else
> +			goto out;
> +	}
> 
> -	elr = ext4_li_request_new(sb, first_not_zeroed);
> +	elr = ext4_li_request_new(sb, first_not_zeroed, lr_mode);
> 	if (!elr) {
> 		ret = -ENOMEM;
> 		goto out;
> --
> 2.24.1
> 


Cheers, Andreas
Artem Blagodarenko July 24, 2020, 1:58 p.m. UTC | #2
Hello,

Thanks for patch. I believe it can be useful in some case. I have tried this patch. The option works fine.
One comment is placed bellow.

Best regards,
Artem Blagodarenko

> On 17 Jul 2020, at 18:53, Theodore Ts'o <tytso@mit.edu> wrote:
> 
> For file systems where we can afford to keep the buddy bitmaps cached,
> we can speed up initial writes to large file systems by starting to
> load the block allocation bitmaps as soon as the file system is
> mounted.  This won't work well for _super_ large file systems, or
> memory constrained systems, so we only enable this when it is
> requested via a mount option.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
> fs/ext4/ext4.h    | 13 ++++++++++++
> fs/ext4/mballoc.c | 10 ++++------
> fs/ext4/super.c   | 51 +++++++++++++++++++++++++++++++++++++----------
> 3 files changed, 57 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7451662e092a..c04d4ef0b77a 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1172,6 +1172,7 @@ struct ext4_inode_info {
> #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
> #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
> #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
> +#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
> #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
> #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
> #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
> @@ -2315,9 +2316,15 @@ struct ext4_lazy_init {
> 	struct mutex		li_list_mtx;
> };
> 
> +typedef enum {
> +	EXT4_LI_MODE_ITABLE,
> +	EXT4_LI_MODE_PREFETCH_BBITMAP
> +} ext4_li_mode;
> +
> struct ext4_li_request {
> 	struct super_block	*lr_super;
> 	struct ext4_sb_info	*lr_sbi;
> +	ext4_li_mode		lr_mode;
> 	ext4_group_t		lr_next_group;
> 	struct list_head	lr_request;
> 	unsigned long		lr_next_sched;
> @@ -2657,6 +2664,12 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
> extern void ext4_discard_preallocations(struct inode *);
> extern int __init ext4_init_mballoc(void);
> extern void ext4_exit_mballoc(void);
> +extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
> +				     ext4_group_t group,
> +				     unsigned int nr, int *cnt);
> +extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +				  unsigned int nr);
> +
> extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
> 			     struct buffer_head *bh, ext4_fsblk_t block,
> 			     unsigned long count, int flags);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 172994349bf6..c072d06d678d 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2224,9 +2224,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
>  * Start prefetching @nr block bitmaps starting at @group.
>  * Return the next group which needs to be prefetched.
>  */
> -static ext4_group_t
> -ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> -		 unsigned int nr, int *cnt)
> +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
> +			      unsigned int nr, int *cnt)
> {
> 	ext4_group_t ngroups = ext4_get_groups_count(sb);
> 	struct buffer_head *bh;
> @@ -2276,9 +2275,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
>  * waiting for the block allocation bitmap read to finish when
>  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
>  */
> -static void
> -ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> -		      unsigned int nr)
> +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
> +			   unsigned int nr)
> {
> 	while (nr-- > 0) {
> 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..9e19d5830745 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1521,6 +1521,7 @@ enum {
> 	Opt_dioread_nolock, Opt_dioread_lock,
> 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
> 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
> +	Opt_prefetch_block_bitmaps,
> };
> 
> static const match_table_t tokens = {
> @@ -1612,6 +1613,7 @@ static const match_table_t tokens = {
> 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
> 	{Opt_nombcache, "nombcache"},
> 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
> +	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
> 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
> 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
> 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
> @@ -1829,6 +1831,8 @@ static const struct mount_opts {
> 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
> 	{Opt_test_dummy_encryption, 0, MOPT_STRING},
> 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> +	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
> +	 MOPT_SET},
> 	{Opt_err, 0, 0}
> };
> 
> @@ -3197,19 +3201,33 @@ static void print_daily_error_info(struct timer_list *t)
> 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
> }
> 
> +static int ext4_run_li_prefetch(struct ext4_li_request *elr,
> +				struct super_block *sb, ext4_group_t group)
> +{
> +	unsigned int prefetch_ios = 0;
> +
> +	elr->lr_next_group = ext4_mb_prefetch(sb, group,
> +					      EXT4_SB(sb)->s_mb_prefetch,
> +					      &prefetch_ios);
> +	if (prefetch_ios)
> +		ext4_mb_prefetch_fini(sb, elr->lr_next_group, prefetch_ios);
> +	return (group >= elr->lr_next_group);
> +}
> +
> /* Find next suitable group and run ext4_init_inode_table */
> static int ext4_run_li_request(struct ext4_li_request *elr)
> {
> 	struct ext4_group_desc *gdp = NULL;
> -	ext4_group_t group, ngroups;
> -	struct super_block *sb;
> +	ext4_group_t group = elr->lr_next_group;
> +	struct super_block *sb = elr->lr_super;
> +	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
> 	unsigned long timeout = 0;
> 	int ret = 0;
> 
> -	sb = elr->lr_super;
> -	ngroups = EXT4_SB(sb)->s_groups_count;
> +	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP)
> +		return ext4_run_li_prefetch(elr, sb, group);
> 
> -	for (group = elr->lr_next_group; group < ngroups; group++) {
> +	for (; group < ngroups; group++) {
> 		gdp = ext4_get_group_desc(sb, group, NULL);
> 		if (!gdp) {
> 			ret = 1;
> @@ -3219,13 +3237,12 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
> 			break;
> 	}
> -
> 	if (group >= ngroups)
> 		ret = 1;
> 
> 	if (!ret) {
> 		timeout = jiffies;
> -		ret = ext4_init_inode_table(sb, group,
> +		ret = ext4_init_inode_table(elr->lr_super, group,
> 					    elr->lr_timeout ? 0 : 1);
> 		if (elr->lr_timeout == 0) {
> 			timeout = (jiffies - timeout) *
> @@ -3234,6 +3251,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
> 		}
> 		elr->lr_next_sched = jiffies + elr->lr_timeout;
> 		elr->lr_next_group = group + 1;
> +	} else if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +		elr->lr_next_group = 0;
> +		ret = 0;
> 	}
> 	return ret;
> }
> @@ -3459,7 +3480,8 @@ static int ext4_li_info_new(void)
> }
> 
> static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> -					    ext4_group_t start)
> +						   ext4_group_t start,
> +						   ext4_li_mode mode)
> {
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr;
> @@ -3468,6 +3490,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
> 	if (!elr)
> 		return NULL;
> 
> +	elr->lr_mode = mode;
> 	elr->lr_super = sb;
> 	elr->lr_sbi = sbi;
> 	elr->lr_next_group = start;
> @@ -3488,6 +3511,7 @@ int ext4_register_li_request(struct super_block *sb,
> 	struct ext4_sb_info *sbi = EXT4_SB(sb);
> 	struct ext4_li_request *elr = NULL;
> 	ext4_group_t ngroups = sbi->s_groups_count;
> +	ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
> 	int ret = 0;
> 
> 	mutex_lock(&ext4_li_mtx);
> @@ -3501,10 +3525,15 @@ int ext4_register_li_request(struct super_block *sb,
> 	}
> 
> 	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
> -	    !test_opt(sb, INIT_INODE_TABLE))
> -		goto out;
> +	    !test_opt(sb, INIT_INODE_TABLE)) {
> +		if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
> +			first_not_zeroed = 0;

ext4_register_li_request() can be called on EXT4_IOC_RESIZE_FS lctl as
	err = ext4_register_li_request(sb, o_group);

In this case inode tables will be initialised started from new block range, but block bitmaps loading loop will start from group 0. Yes, this loaded groups will be skipped finally, but there are useless CPU time.


> +			lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
> +		} else
> +			goto out;
> +	}
> 
> -	elr = ext4_li_request_new(sb, first_not_zeroed);
> +	elr = ext4_li_request_new(sb, first_not_zeroed, lr_mode);
> 	if (!elr) {
> 		ret = -ENOMEM;
> 		goto out;
> -- 
> 2.24.1
>
diff mbox series

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7451662e092a..c04d4ef0b77a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1172,6 +1172,7 @@  struct ext4_inode_info {
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
+#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
@@ -2315,9 +2316,15 @@  struct ext4_lazy_init {
 	struct mutex		li_list_mtx;
 };
 
+typedef enum {
+	EXT4_LI_MODE_ITABLE,
+	EXT4_LI_MODE_PREFETCH_BBITMAP
+} ext4_li_mode;
+
 struct ext4_li_request {
 	struct super_block	*lr_super;
 	struct ext4_sb_info	*lr_sbi;
+	ext4_li_mode		lr_mode;
 	ext4_group_t		lr_next_group;
 	struct list_head	lr_request;
 	unsigned long		lr_next_sched;
@@ -2657,6 +2664,12 @@  extern int ext4_mb_reserve_blocks(struct super_block *, int);
 extern void ext4_discard_preallocations(struct inode *);
 extern int __init ext4_init_mballoc(void);
 extern void ext4_exit_mballoc(void);
+extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
+				     ext4_group_t group,
+				     unsigned int nr, int *cnt);
+extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+				  unsigned int nr);
+
 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			     struct buffer_head *bh, ext4_fsblk_t block,
 			     unsigned long count, int flags);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 172994349bf6..c072d06d678d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2224,9 +2224,8 @@  static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
  * Start prefetching @nr block bitmaps starting at @group.
  * Return the next group which needs to be prefetched.
  */
-static ext4_group_t
-ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
-		 unsigned int nr, int *cnt)
+ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
+			      unsigned int nr, int *cnt)
 {
 	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct buffer_head *bh;
@@ -2276,9 +2275,8 @@  ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
  * waiting for the block allocation bitmap read to finish when
  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
  */
-static void
-ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
-		      unsigned int nr)
+void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+			   unsigned int nr)
 {
 	while (nr-- > 0) {
 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 330957ed1f05..9e19d5830745 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1521,6 +1521,7 @@  enum {
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+	Opt_prefetch_block_bitmaps,
 };
 
 static const match_table_t tokens = {
@@ -1612,6 +1613,7 @@  static const match_table_t tokens = {
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
 	{Opt_nombcache, "nombcache"},
 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
+	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
@@ -1829,6 +1831,8 @@  static const struct mount_opts {
 	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
 	{Opt_test_dummy_encryption, 0, MOPT_STRING},
 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
+	 MOPT_SET},
 	{Opt_err, 0, 0}
 };
 
@@ -3197,19 +3201,33 @@  static void print_daily_error_info(struct timer_list *t)
 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
 }
 
+static int ext4_run_li_prefetch(struct ext4_li_request *elr,
+				struct super_block *sb, ext4_group_t group)
+{
+	unsigned int prefetch_ios = 0;
+
+	elr->lr_next_group = ext4_mb_prefetch(sb, group,
+					      EXT4_SB(sb)->s_mb_prefetch,
+					      &prefetch_ios);
+	if (prefetch_ios)
+		ext4_mb_prefetch_fini(sb, elr->lr_next_group, prefetch_ios);
+	return (group >= elr->lr_next_group);
+}
+
 /* Find next suitable group and run ext4_init_inode_table */
 static int ext4_run_li_request(struct ext4_li_request *elr)
 {
 	struct ext4_group_desc *gdp = NULL;
-	ext4_group_t group, ngroups;
-	struct super_block *sb;
+	ext4_group_t group = elr->lr_next_group;
+	struct super_block *sb = elr->lr_super;
+	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
 	unsigned long timeout = 0;
 	int ret = 0;
 
-	sb = elr->lr_super;
-	ngroups = EXT4_SB(sb)->s_groups_count;
+	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP)
+		return ext4_run_li_prefetch(elr, sb, group);
 
-	for (group = elr->lr_next_group; group < ngroups; group++) {
+	for (; group < ngroups; group++) {
 		gdp = ext4_get_group_desc(sb, group, NULL);
 		if (!gdp) {
 			ret = 1;
@@ -3219,13 +3237,12 @@  static int ext4_run_li_request(struct ext4_li_request *elr)
 		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
 			break;
 	}
-
 	if (group >= ngroups)
 		ret = 1;
 
 	if (!ret) {
 		timeout = jiffies;
-		ret = ext4_init_inode_table(sb, group,
+		ret = ext4_init_inode_table(elr->lr_super, group,
 					    elr->lr_timeout ? 0 : 1);
 		if (elr->lr_timeout == 0) {
 			timeout = (jiffies - timeout) *
@@ -3234,6 +3251,10 @@  static int ext4_run_li_request(struct ext4_li_request *elr)
 		}
 		elr->lr_next_sched = jiffies + elr->lr_timeout;
 		elr->lr_next_group = group + 1;
+	} else if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
+		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+		elr->lr_next_group = 0;
+		ret = 0;
 	}
 	return ret;
 }
@@ -3459,7 +3480,8 @@  static int ext4_li_info_new(void)
 }
 
 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
-					    ext4_group_t start)
+						   ext4_group_t start,
+						   ext4_li_mode mode)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_li_request *elr;
@@ -3468,6 +3490,7 @@  static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
 	if (!elr)
 		return NULL;
 
+	elr->lr_mode = mode;
 	elr->lr_super = sb;
 	elr->lr_sbi = sbi;
 	elr->lr_next_group = start;
@@ -3488,6 +3511,7 @@  int ext4_register_li_request(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_li_request *elr = NULL;
 	ext4_group_t ngroups = sbi->s_groups_count;
+	ext4_li_mode lr_mode = EXT4_LI_MODE_ITABLE;
 	int ret = 0;
 
 	mutex_lock(&ext4_li_mtx);
@@ -3501,10 +3525,15 @@  int ext4_register_li_request(struct super_block *sb,
 	}
 
 	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
-	    !test_opt(sb, INIT_INODE_TABLE))
-		goto out;
+	    !test_opt(sb, INIT_INODE_TABLE)) {
+		if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) {
+			first_not_zeroed = 0;
+			lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+		} else
+			goto out;
+	}
 
-	elr = ext4_li_request_new(sb, first_not_zeroed);
+	elr = ext4_li_request_new(sb, first_not_zeroed, lr_mode);
 	if (!elr) {
 		ret = -ENOMEM;
 		goto out;