diff mbox

[29/42] ext4: Use readahead when reading an inode from the inode table

Message ID 1223525160-9887-30-git-send-email-tytso@mit.edu
State Accepted, archived
Headers show

Commit Message

Theodore Ts'o Oct. 9, 2008, 4:05 a.m. UTC
With modern hard drives, reading 64k takes roughly the same time as
reading a 4k block.  So request readahead for adjacent inode table
blocks to reduce the time it takes when iterating over directories
(especially when doing this in htree sort order) in a cold cache case.
With this patch, the time it takes to run "git status" on a kernel
tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches"
is reduced by 21%.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h    |    2 +
 fs/ext4/ext4_sb.h |    1 +
 fs/ext4/inode.c   |  134 +++++++++++++++++++++++++---------------------------
 fs/ext4/super.c   |   27 ++++++++++-
 4 files changed, 92 insertions(+), 72 deletions(-)

Comments

Aneesh Kumar K.V Oct. 9, 2008, 8:18 a.m. UTC | #1
On Thu, Oct 09, 2008 at 12:05:47AM -0400, Theodore Ts'o wrote:
> With modern hard drives, reading 64k takes roughly the same time as
> reading a 4k block.  So request readahead for adjacent inode table
> blocks to reduce the time it takes when iterating over directories
> (especially when doing this in htree sort order) in a cold cache case.
> With this patch, the time it takes to run "git status" on a kernel
> tree after flushing the caches via "echo 3 > /proc/sys/vm/drop_caches"
> is reduced by 21%.
> 
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> ---
>  fs/ext4/ext4.h    |    2 +
>  fs/ext4/ext4_sb.h |    1 +
>  fs/ext4/inode.c   |  134 +++++++++++++++++++++++++---------------------------
>  fs/ext4/super.c   |   27 ++++++++++-
>  4 files changed, 92 insertions(+), 72 deletions(-)


Need documentation for the new mount option and the /proc tunable.


> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 163c445..922d187 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -790,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
>  #define	EXT4_DEF_RESUID		0
>  #define	EXT4_DEF_RESGID		0
> 
> +#define EXT4_DEF_INODE_READAHEAD_BLKS	32
> +
>  /*
>   * Default mount options
>   */
> diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
> index f92af01..94e0757 100644
> --- a/fs/ext4/ext4_sb.h
> +++ b/fs/ext4/ext4_sb.h
> @@ -52,6 +52,7 @@ struct ext4_sb_info {
>  	int s_desc_per_block_bits;
>  	int s_inode_size;
>  	int s_first_ino;
> +	unsigned int s_inode_readahead_blks;
>  	spinlock_t s_next_gen_lock;
>  	u32 s_next_generation;
>  	u32 s_hash_seed[4];
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 22fcbb6..ef4ca3d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3833,41 +3833,6 @@ out_stop:
>  	ext4_journal_stop(handle);
>  }
> 
> -static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
> -		unsigned long ino, struct ext4_iloc *iloc)
> -{
> -	ext4_group_t block_group;
> -	unsigned long offset;
> -	ext4_fsblk_t block;
> -	struct ext4_group_desc *gdp;
> -
> -	if (!ext4_valid_inum(sb, ino)) {
> -		/*
> -		 * This error is already checked for in namei.c unless we are
> -		 * looking at an NFS filehandle, in which case no error
> -		 * report is needed
> -		 */
> -		return 0;
> -	}
> -
> -	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
> -	gdp = ext4_get_group_desc(sb, block_group, NULL);
> -	if (!gdp)
> -		return 0;
> -
> -	/*
> -	 * Figure out the offset within the block group inode table
> -	 */
> -	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
> -		EXT4_INODE_SIZE(sb);
> -	block = ext4_inode_table(sb, gdp) +
> -		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
> -
> -	iloc->block_group = block_group;
> -	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
> -	return block;
> -}
> -
>  /*
>   * ext4_get_inode_loc returns with an extra refcount against the inode's
>   * underlying buffer_head on success. If 'in_mem' is true, we have all
> @@ -3877,19 +3842,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
>  static int __ext4_get_inode_loc(struct inode *inode,
>  				struct ext4_iloc *iloc, int in_mem)
>  {
> -	ext4_fsblk_t block;
> -	struct buffer_head *bh;
> +	struct ext4_group_desc	*gdp;
> +	struct buffer_head	*bh;
> +	struct super_block	*sb = inode->i_sb;
> +	ext4_fsblk_t		block;
> +	int			inodes_per_block, inode_offset;
> +
> +	iloc->bh = 0;
> +	if (!ext4_valid_inum(sb, inode->i_ino))
> +		return -EIO;
> 
> -	block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
> -	if (!block)
> +	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
> +	gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
> +	if (!gdp)
>  		return -EIO;
> 
> -	bh = sb_getblk(inode->i_sb, block);
> +	/*
> +	 * Figure out the offset within the block group inode table
> +	 */
> +	inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
> +	inode_offset = ((inode->i_ino - 1) %
> +			EXT4_INODES_PER_GROUP(sb));
> +	block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
> +	iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
> +
> +	bh = sb_getblk(sb, block);
>  	if (!bh) {
> -		ext4_error (inode->i_sb, "ext4_get_inode_loc",
> -				"unable to read inode block - "
> -				"inode=%lu, block=%llu",
> -				 inode->i_ino, block);
> +		ext4_error(sb, "ext4_get_inode_loc", "unable to read "
> +			   "inode block - inode=%lu, block=%llu",
> +			   inode->i_ino, block);
>  		return -EIO;
>  	}
>  	if (!buffer_uptodate(bh)) {
> @@ -3917,28 +3898,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
>  		 */
>  		if (in_mem) {
>  			struct buffer_head *bitmap_bh;
> -			struct ext4_group_desc *desc;
> -			int inodes_per_buffer;
> -			int inode_offset, i;
> -			ext4_group_t block_group;
> -			int start;
> -
> -			block_group = (inode->i_ino - 1) /
> -					EXT4_INODES_PER_GROUP(inode->i_sb);
> -			inodes_per_buffer = bh->b_size /
> -				EXT4_INODE_SIZE(inode->i_sb);
> -			inode_offset = ((inode->i_ino - 1) %
> -					EXT4_INODES_PER_GROUP(inode->i_sb));
> -			start = inode_offset & ~(inodes_per_buffer - 1);
> +			int i, start;
> 
> -			/* Is the inode bitmap in cache? */
> -			desc = ext4_get_group_desc(inode->i_sb,
> -						block_group, NULL);
> -			if (!desc)
> -				goto make_io;
> +			start = inode_offset & ~(inodes_per_block - 1);
> 
> -			bitmap_bh = sb_getblk(inode->i_sb,
> -				ext4_inode_bitmap(inode->i_sb, desc));
> +			/* Is the inode bitmap in cache? */
> +			bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
>  			if (!bitmap_bh)
>  				goto make_io;
> 
> @@ -3951,14 +3916,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
>  				brelse(bitmap_bh);
>  				goto make_io;
>  			}
> -			for (i = start; i < start + inodes_per_buffer; i++) {
> +			for (i = start; i < start + inodes_per_block; i++) {
>  				if (i == inode_offset)
>  					continue;
>  				if (ext4_test_bit(i, bitmap_bh->b_data))
>  					break;
>  			}
>  			brelse(bitmap_bh);
> -			if (i == start + inodes_per_buffer) {
> +			if (i == start + inodes_per_block) {
>  				/* all other inodes are free, so skip I/O */
>  				memset(bh->b_data, 0, bh->b_size);
>  				set_buffer_uptodate(bh);
> @@ -3969,6 +3934,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
> 
>  make_io:
>  		/*
> +		 * If we need to do any I/O, try to pre-readahead extra
> +		 * blocks from the inode table.
> +		 */
> +		if (EXT4_SB(sb)->s_inode_readahead_blks) {
> +			ext4_fsblk_t b, end, table;
> +			unsigned num;
> +
> +			table = ext4_inode_table(sb, gdp);
> +			/* Make sure s_inode_readahead_blks is a power of 2 */
> +			while (EXT4_SB(sb)->s_inode_readahead_blks &
> +			       (EXT4_SB(sb)->s_inode_readahead_blks-1))
> +				EXT4_SB(sb)->s_inode_readahead_blks = 
> +				   (EXT4_SB(sb)->s_inode_readahead_blks &
> +				    (EXT4_SB(sb)->s_inode_readahead_blks-1));
> +			b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
> +			if (table > b)
> +				b = table;
> +			end = b + EXT4_SB(sb)->s_inode_readahead_blks;
> +			num = EXT4_INODES_PER_GROUP(sb);
> +			if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +				       EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
> +				num -= le16_to_cpu(gdp->bg_itable_unused);
> +			table += num / inodes_per_block;
> +			if (end > table)
> +				end = table;
> +			while (b <= end)
> +				sb_breadahead(sb, b++);
> +		}
> +
> +		/*
>  		 * There are other valid inodes in the buffer, this inode
>  		 * has in-inode xattrs, or we don't have this inode in memory.
>  		 * Read the block from disk.
> @@ -3978,10 +3973,9 @@ make_io:
>  		submit_bh(READ_META, bh);
>  		wait_on_buffer(bh);
>  		if (!buffer_uptodate(bh)) {
> -			ext4_error(inode->i_sb, "ext4_get_inode_loc",
> -					"unable to read inode block - "
> -					"inode=%lu, block=%llu",
> -					inode->i_ino, block);
> +			ext4_error(sb, __func__,
> +				   "unable to read inode block - inode=%lu, "
> +				   "block=%llu", inode->i_ino, block);
>  			brelse(bh);
>  			return -EIO;
>  		}
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 9f5468f..6583aee 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -515,8 +515,10 @@ static void ext4_put_super(struct super_block *sb)
>  		mark_buffer_dirty(sbi->s_sbh);
>  		ext4_commit_super(sb, es, 1);
>  	}
> -	if (sbi->s_proc)
> +	if (sbi->s_proc) {
> +		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
>  		remove_proc_entry(sb->s_id, ext4_proc_root);
> +	}
> 
>  	for (i = 0; i < sbi->s_gdb_count; i++)
>  		brelse(sbi->s_group_desc[i]);
> @@ -779,6 +781,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
>  	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
>  		seq_puts(seq, ",data=writeback");
> 
> +	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
> +		seq_printf(seq, ",inode_readahead_blks=%u",
> +			   sbi->s_inode_readahead_blks);
> +
>  	ext4_show_quota_options(seq, sb);
>  	return 0;
>  }
> @@ -913,6 +919,7 @@ enum {
>  	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
>  	Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
>  	Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
> +	Opt_inode_readahead_blks
>  };
> 
>  static match_table_t tokens = {
> @@ -973,6 +980,7 @@ static match_table_t tokens = {
>  	{Opt_resize, "resize"},
>  	{Opt_delalloc, "delalloc"},
>  	{Opt_nodelalloc, "nodelalloc"},
> +	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
>  	{Opt_err, NULL},
>  };
> 
> @@ -1381,6 +1389,13 @@ set_qf_format:
>  		case Opt_delalloc:
>  			set_opt(sbi->s_mount_opt, DELALLOC);
>  			break;
> +		case Opt_inode_readahead_blks:
> +			if (match_int(&args[0], &option))
> +				return 0;
> +			if (option < 0 || option > (1 << 30))
> +				return 0;
> +			sbi->s_inode_readahead_blks = option;
> +			break;
>  		default:
>  			printk(KERN_ERR
>  			       "EXT4-fs: Unrecognized mount option \"%s\" "
> @@ -1938,6 +1953,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  	sbi->s_mount_opt = 0;
>  	sbi->s_resuid = EXT4_DEF_RESUID;
>  	sbi->s_resgid = EXT4_DEF_RESGID;
> +	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
>  	sbi->s_sb_block = sb_block;
> 
>  	unlock_kernel();
> @@ -2234,6 +2250,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>  	if (ext4_proc_root)
>  		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
> 
> +	if (sbi->s_proc)
> +		proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
> +				 &ext4_ui_proc_fops,
> +				 &sbi->s_inode_readahead_blks);
> +
>  	bgl_lock_init(&sbi->s_blockgroup_lock);
> 
>  	for (i = 0; i < db_count; i++) {
> @@ -2513,8 +2534,10 @@ failed_mount2:
>  		brelse(sbi->s_group_desc[i]);
>  	kfree(sbi->s_group_desc);
>  failed_mount:
> -	if (sbi->s_proc)
> +	if (sbi->s_proc) {
> +		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
>  		remove_proc_entry(sb->s_id, ext4_proc_root);
> +	}
>  #ifdef CONFIG_QUOTA
>  	for (i = 0; i < MAXQUOTAS; i++)
>  		kfree(sbi->s_qf_names[i]);
> -- 
> 1.5.6.1.205.ge2c7.dirty
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 163c445..922d187 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -790,6 +790,8 @@  static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define	EXT4_DEF_RESUID		0
 #define	EXT4_DEF_RESGID		0
 
+#define EXT4_DEF_INODE_READAHEAD_BLKS	32
+
 /*
  * Default mount options
  */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index f92af01..94e0757 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -52,6 +52,7 @@  struct ext4_sb_info {
 	int s_desc_per_block_bits;
 	int s_inode_size;
 	int s_first_ino;
+	unsigned int s_inode_readahead_blks;
 	spinlock_t s_next_gen_lock;
 	u32 s_next_generation;
 	u32 s_hash_seed[4];
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 22fcbb6..ef4ca3d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3833,41 +3833,6 @@  out_stop:
 	ext4_journal_stop(handle);
 }
 
-static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
-		unsigned long ino, struct ext4_iloc *iloc)
-{
-	ext4_group_t block_group;
-	unsigned long offset;
-	ext4_fsblk_t block;
-	struct ext4_group_desc *gdp;
-
-	if (!ext4_valid_inum(sb, ino)) {
-		/*
-		 * This error is already checked for in namei.c unless we are
-		 * looking at an NFS filehandle, in which case no error
-		 * report is needed
-		 */
-		return 0;
-	}
-
-	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-	gdp = ext4_get_group_desc(sb, block_group, NULL);
-	if (!gdp)
-		return 0;
-
-	/*
-	 * Figure out the offset within the block group inode table
-	 */
-	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
-		EXT4_INODE_SIZE(sb);
-	block = ext4_inode_table(sb, gdp) +
-		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
-
-	iloc->block_group = block_group;
-	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
-	return block;
-}
-
 /*
  * ext4_get_inode_loc returns with an extra refcount against the inode's
  * underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3877,19 +3842,35 @@  static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 static int __ext4_get_inode_loc(struct inode *inode,
 				struct ext4_iloc *iloc, int in_mem)
 {
-	ext4_fsblk_t block;
-	struct buffer_head *bh;
+	struct ext4_group_desc	*gdp;
+	struct buffer_head	*bh;
+	struct super_block	*sb = inode->i_sb;
+	ext4_fsblk_t		block;
+	int			inodes_per_block, inode_offset;
+
+	iloc->bh = 0;
+	if (!ext4_valid_inum(sb, inode->i_ino))
+		return -EIO;
 
-	block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
-	if (!block)
+	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
+	if (!gdp)
 		return -EIO;
 
-	bh = sb_getblk(inode->i_sb, block);
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
+	inode_offset = ((inode->i_ino - 1) %
+			EXT4_INODES_PER_GROUP(sb));
+	block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
+	iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+
+	bh = sb_getblk(sb, block);
 	if (!bh) {
-		ext4_error (inode->i_sb, "ext4_get_inode_loc",
-				"unable to read inode block - "
-				"inode=%lu, block=%llu",
-				 inode->i_ino, block);
+		ext4_error(sb, "ext4_get_inode_loc", "unable to read "
+			   "inode block - inode=%lu, block=%llu",
+			   inode->i_ino, block);
 		return -EIO;
 	}
 	if (!buffer_uptodate(bh)) {
@@ -3917,28 +3898,12 @@  static int __ext4_get_inode_loc(struct inode *inode,
 		 */
 		if (in_mem) {
 			struct buffer_head *bitmap_bh;
-			struct ext4_group_desc *desc;
-			int inodes_per_buffer;
-			int inode_offset, i;
-			ext4_group_t block_group;
-			int start;
-
-			block_group = (inode->i_ino - 1) /
-					EXT4_INODES_PER_GROUP(inode->i_sb);
-			inodes_per_buffer = bh->b_size /
-				EXT4_INODE_SIZE(inode->i_sb);
-			inode_offset = ((inode->i_ino - 1) %
-					EXT4_INODES_PER_GROUP(inode->i_sb));
-			start = inode_offset & ~(inodes_per_buffer - 1);
+			int i, start;
 
-			/* Is the inode bitmap in cache? */
-			desc = ext4_get_group_desc(inode->i_sb,
-						block_group, NULL);
-			if (!desc)
-				goto make_io;
+			start = inode_offset & ~(inodes_per_block - 1);
 
-			bitmap_bh = sb_getblk(inode->i_sb,
-				ext4_inode_bitmap(inode->i_sb, desc));
+			/* Is the inode bitmap in cache? */
+			bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
 			if (!bitmap_bh)
 				goto make_io;
 
@@ -3951,14 +3916,14 @@  static int __ext4_get_inode_loc(struct inode *inode,
 				brelse(bitmap_bh);
 				goto make_io;
 			}
-			for (i = start; i < start + inodes_per_buffer; i++) {
+			for (i = start; i < start + inodes_per_block; i++) {
 				if (i == inode_offset)
 					continue;
 				if (ext4_test_bit(i, bitmap_bh->b_data))
 					break;
 			}
 			brelse(bitmap_bh);
-			if (i == start + inodes_per_buffer) {
+			if (i == start + inodes_per_block) {
 				/* all other inodes are free, so skip I/O */
 				memset(bh->b_data, 0, bh->b_size);
 				set_buffer_uptodate(bh);
@@ -3969,6 +3934,36 @@  static int __ext4_get_inode_loc(struct inode *inode,
 
 make_io:
 		/*
+		 * If we need to do any I/O, try to pre-readahead extra
+		 * blocks from the inode table.
+		 */
+		if (EXT4_SB(sb)->s_inode_readahead_blks) {
+			ext4_fsblk_t b, end, table;
+			unsigned num;
+
+			table = ext4_inode_table(sb, gdp);
+			/* Make sure s_inode_readahead_blks is a power of 2 */
+			while (EXT4_SB(sb)->s_inode_readahead_blks &
+			       (EXT4_SB(sb)->s_inode_readahead_blks-1))
+				EXT4_SB(sb)->s_inode_readahead_blks = 
+				   (EXT4_SB(sb)->s_inode_readahead_blks &
+				    (EXT4_SB(sb)->s_inode_readahead_blks-1));
+			b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
+			if (table > b)
+				b = table;
+			end = b + EXT4_SB(sb)->s_inode_readahead_blks;
+			num = EXT4_INODES_PER_GROUP(sb);
+			if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				       EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+				num -= le16_to_cpu(gdp->bg_itable_unused);
+			table += num / inodes_per_block;
+			if (end > table)
+				end = table;
+			while (b <= end)
+				sb_breadahead(sb, b++);
+		}
+
+		/*
 		 * There are other valid inodes in the buffer, this inode
 		 * has in-inode xattrs, or we don't have this inode in memory.
 		 * Read the block from disk.
@@ -3978,10 +3973,9 @@  make_io:
 		submit_bh(READ_META, bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			ext4_error(inode->i_sb, "ext4_get_inode_loc",
-					"unable to read inode block - "
-					"inode=%lu, block=%llu",
-					inode->i_ino, block);
+			ext4_error(sb, __func__,
+				   "unable to read inode block - inode=%lu, "
+				   "block=%llu", inode->i_ino, block);
 			brelse(bh);
 			return -EIO;
 		}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9f5468f..6583aee 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -515,8 +515,10 @@  static void ext4_put_super(struct super_block *sb)
 		mark_buffer_dirty(sbi->s_sbh);
 		ext4_commit_super(sb, es, 1);
 	}
-	if (sbi->s_proc)
+	if (sbi->s_proc) {
+		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
 		remove_proc_entry(sb->s_id, ext4_proc_root);
+	}
 
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
@@ -779,6 +781,10 @@  static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
+	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
+		seq_printf(seq, ",inode_readahead_blks=%u",
+			   sbi->s_inode_readahead_blks);
+
 	ext4_show_quota_options(seq, sb);
 	return 0;
 }
@@ -913,6 +919,7 @@  enum {
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
 	Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
 	Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+	Opt_inode_readahead_blks
 };
 
 static match_table_t tokens = {
@@ -973,6 +980,7 @@  static match_table_t tokens = {
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
 	{Opt_err, NULL},
 };
 
@@ -1381,6 +1389,13 @@  set_qf_format:
 		case Opt_delalloc:
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_inode_readahead_blks:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0 || option > (1 << 30))
+				return 0;
+			sbi->s_inode_readahead_blks = option;
+			break;
 		default:
 			printk(KERN_ERR
 			       "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1938,6 +1953,7 @@  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_mount_opt = 0;
 	sbi->s_resuid = EXT4_DEF_RESUID;
 	sbi->s_resgid = EXT4_DEF_RESGID;
+	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
 	sbi->s_sb_block = sb_block;
 
 	unlock_kernel();
@@ -2234,6 +2250,11 @@  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (ext4_proc_root)
 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
 
+	if (sbi->s_proc)
+		proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
+				 &ext4_ui_proc_fops,
+				 &sbi->s_inode_readahead_blks);
+
 	bgl_lock_init(&sbi->s_blockgroup_lock);
 
 	for (i = 0; i < db_count; i++) {
@@ -2513,8 +2534,10 @@  failed_mount2:
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
 failed_mount:
-	if (sbi->s_proc)
+	if (sbi->s_proc) {
+		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
 		remove_proc_entry(sb->s_id, ext4_proc_root);
+	}
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
 		kfree(sbi->s_qf_names[i]);