ext4: add ability to control the pseudo-random seed used by ext4
diff mbox

Message ID 20160729160035.26626-1-tytso@mit.edu
State New
Headers show

Commit Message

Theodore Ts'o July 29, 2016, 4 p.m. UTC
Ext4 uses a pseudo-random generator in a few places: to spread out
directories when htree is not enabled; to randomize thethe wait times
for MMP backoff and lazy inode table initialization.  For benchmarking
purposes, it's useful to control the psueorandom number seed, expose
this via /sys/fs/ext4/<dev>/prandom_seed.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h         |  4 ++++
 fs/ext4/ialloc.c       |  6 ++++--
 fs/ext4/mmp.c          |  6 +++---
 fs/ext4/super.c        |  3 ++-
 fs/ext4/sysfs.c        | 20 ++++++++++++++++++++
 include/linux/random.h |  1 +
 lib/random32.c         | 10 +++++-----
 7 files changed, 39 insertions(+), 11 deletions(-)

Comments

kernel test robot July 29, 2016, 6:07 p.m. UTC | #1
Hi,

[auto build test WARNING on ext4/dev]
[also build test WARNING on v4.7 next-20160729]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Theodore-Ts-o/ext4-add-ability-to-control-the-pseudo-random-seed-used-by-ext4/20160730-001338
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: x86_64-randconfig-s2-07300120 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   fs/ext4/ialloc.c: In function 'find_group_orlov':
>> fs/ext4/ialloc.c:490: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'ext4_group_t'
   fs/ext4/ialloc.c: In function 'ext4_orphan_get':
   fs/ext4/ialloc.c:1155: warning: 'bit' may be used uninitialized in this function

vim +490 fs/ext4/ialloc.c

   474		do_div(avefreec, ngroups);
   475		ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
   476	
   477		if (S_ISDIR(mode) &&
   478		    ((parent == d_inode(sb->s_root)) ||
   479		     (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
   480			int best_ndir = inodes_per_group;
   481			int ret = -1;
   482	
   483			if (qstr) {
   484				hinfo.hash_version = DX_HASH_HALF_MD4;
   485				hinfo.seed = sbi->s_hash_seed;
   486				ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
   487				grp = hinfo.hash;
   488			} else {
   489				grp = prandom_u32_state(&sbi->s_rnd_state);
 > 490				pr_err("ext4 random: %lu\n", grp);
   491			}
   492			parent_group = (unsigned)grp % ngroups;
   493			for (i = 0; i < ngroups; i++) {
   494				g = (parent_group + i) % ngroups;
   495				get_orlov_stats(sb, g, flex_size, &stats);
   496				if (!stats.free_inodes)
   497					continue;
   498				if (stats.used_dirs >= best_ndir)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 29, 2016, 7:33 p.m. UTC | #2
Hi,

[auto build test WARNING on ext4/dev]
[also build test WARNING on v4.7 next-20160729]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Theodore-Ts-o/ext4-add-ability-to-control-the-pseudo-random-seed-used-by-ext4/20160730-001338
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: m68k-allyesconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 4.9.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   fs/ext4/ialloc.c: In function 'find_group_orlov':
>> fs/ext4/ialloc.c:490:4: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'ext4_group_t' [-Wformat=]
       pr_err("ext4 random: %lu\n", grp);
       ^

vim +490 fs/ext4/ialloc.c

   474		do_div(avefreec, ngroups);
   475		ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
   476	
   477		if (S_ISDIR(mode) &&
   478		    ((parent == d_inode(sb->s_root)) ||
   479		     (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
   480			int best_ndir = inodes_per_group;
   481			int ret = -1;
   482	
   483			if (qstr) {
   484				hinfo.hash_version = DX_HASH_HALF_MD4;
   485				hinfo.seed = sbi->s_hash_seed;
   486				ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
   487				grp = hinfo.hash;
   488			} else {
   489				grp = prandom_u32_state(&sbi->s_rnd_state);
 > 490				pr_err("ext4 random: %lu\n", grp);
   491			}
   492			parent_group = (unsigned)grp % ngroups;
   493			for (i = 0; i < ngroups; i++) {
   494				g = (parent_group + i) % ngroups;
   495				get_orlov_stats(sb, g, flex_size, &stats);
   496				if (!stats.free_inodes)
   497					continue;
   498				if (stats.used_dirs >= best_ndir)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 29, 2016, 7:39 p.m. UTC | #3
Hi,

[auto build test WARNING on ext4/dev]
[also build test WARNING on v4.7 next-20160729]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Theodore-Ts-o/ext4-add-ability-to-control-the-pseudo-random-seed-used-by-ext4/20160730-001338
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: sparc64-allyesconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 5.4.0-6) 5.4.0 20160609
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All warnings (new ones prefixed by >>):

   In file included from include/linux/printk.h:6:0,
                    from include/linux/kernel.h:13,
                    from include/linux/list.h:8,
                    from include/linux/preempt.h:10,
                    from include/linux/spinlock.h:50,
                    from include/linux/seqlock.h:35,
                    from include/linux/time.h:5,
                    from fs/ext4/ialloc.c:15:
   fs/ext4/ialloc.c: In function 'find_group_orlov':
>> include/linux/kern_levels.h:4:18: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'ext4_group_t {aka unsigned int}' [-Wformat=]
    #define KERN_SOH "\001"  /* ASCII Start Of Header */
                     ^
   include/linux/kern_levels.h:10:18: note: in expansion of macro 'KERN_SOH'
    #define KERN_ERR KERN_SOH "3" /* error conditions */
                     ^
   include/linux/printk.h:264:9: note: in expansion of macro 'KERN_ERR'
     printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
            ^
>> fs/ext4/ialloc.c:490:4: note: in expansion of macro 'pr_err'
       pr_err("ext4 random: %lu\n", grp);
       ^

vim +/pr_err +490 fs/ext4/ialloc.c

   474		do_div(avefreec, ngroups);
   475		ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
   476	
   477		if (S_ISDIR(mode) &&
   478		    ((parent == d_inode(sb->s_root)) ||
   479		     (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
   480			int best_ndir = inodes_per_group;
   481			int ret = -1;
   482	
   483			if (qstr) {
   484				hinfo.hash_version = DX_HASH_HALF_MD4;
   485				hinfo.seed = sbi->s_hash_seed;
   486				ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
   487				grp = hinfo.hash;
   488			} else {
   489				grp = prandom_u32_state(&sbi->s_rnd_state);
 > 490				pr_err("ext4 random: %lu\n", grp);
   491			}
   492			parent_group = (unsigned)grp % ngroups;
   493			for (i = 0; i < ngroups; i++) {
   494				g = (parent_group + i) % ngroups;
   495				get_orlov_stats(sb, g, flex_size, &stats);
   496				if (!stats.free_inodes)
   497					continue;
   498				if (stats.used_dirs >= best_ndir)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Patch
diff mbox

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ea31931..3dbb03a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -35,6 +35,7 @@ 
 #include <linux/fscrypto.h>
 #include <linux/falloc.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/random.h>
 #ifdef __KERNEL__
 #include <linux/compat.h>
 #endif
@@ -1491,6 +1492,9 @@  struct ext4_sb_info {
 	/* Precomputed FS UUID checksum for seeding other checksums */
 	__u32 s_csum_seed;
 
+	/* RND state for the file system */
+	struct rnd_state s_rnd_state;
+
 	/* Reclaim extents from extent status tree */
 	struct shrinker s_es_shrinker;
 	struct list_head s_es_list;	/* List of inodes with reclaimable extents */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 35f3518..09a1458 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -485,8 +485,10 @@  static int find_group_orlov(struct super_block *sb, struct inode *parent,
 			hinfo.seed = sbi->s_hash_seed;
 			ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
 			grp = hinfo.hash;
-		} else
-			grp = prandom_u32();
+		} else {
+			grp = prandom_u32_state(&sbi->s_rnd_state);
+			pr_err("ext4 random: %lu\n", grp);
+		}
 		parent_group = (unsigned)grp % ngroups;
 		for (i = 0; i < ngroups; i++) {
 			g = (parent_group + i) % ngroups;
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 23d436d..99f69dc 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -258,12 +258,12 @@  exit_thread:
  * Get a random new sequence number but make sure it is not greater than
  * EXT4_MMP_SEQ_MAX.
  */
-static unsigned int mmp_new_seq(void)
+static unsigned int mmp_new_seq(struct ext4_sb_info *sbi)
 {
 	u32 new_seq;
 
 	do {
-		new_seq = prandom_u32();
+		new_seq = prandom_u32_state(&sbi->s_rnd_state);
 	} while (new_seq > EXT4_MMP_SEQ_MAX);
 
 	return new_seq;
@@ -342,7 +342,7 @@  skip:
 	/*
 	 * write a new random sequence number.
 	 */
-	seq = mmp_new_seq();
+	seq = mmp_new_seq(EXT4_SB(sb));
 	mmp->mmp_seq = cpu_to_le32(seq);
 
 	retval = write_mmp_block(sb, bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c13a4e4..2c86b98 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2969,7 +2969,7 @@  static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
 	 * spread the inode table initialization requests
 	 * better.
 	 */
-	elr->lr_next_sched = jiffies + (prandom_u32() %
+	elr->lr_next_sched = jiffies + (prandom_u32_state(&sbi->s_rnd_state) %
 				(EXT4_DEF_LI_MAX_START_DELAY * HZ));
 	return elr;
 }
@@ -3274,6 +3274,7 @@  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (sb->s_bdev->bd_part)
 		sbi->s_sectors_written_start =
 			part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+	_prandom_seed(&sbi->s_rnd_state, 0, true);
 
 	/* Cleanup superblock name */
 	strreplace(sb->s_id, '/', '!');
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 1420a3c..f74d34b 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -26,6 +26,7 @@  typedef enum {
 	attr_feature,
 	attr_pointer_ui,
 	attr_pointer_atomic,
+	attr_prandom_seed,
 } attr_id_t;
 
 typedef enum {
@@ -90,6 +91,21 @@  static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 	return count;
 }
 
+static ssize_t prandom_seed_store(struct ext4_attr *a,
+				  struct ext4_sb_info *sbi,
+				  const char *buf, size_t count)
+{
+	unsigned long t;
+	int ret;
+
+	ret = kstrtoul(skip_spaces(buf), 0, &t);
+	if (ret)
+		return ret;
+
+	_prandom_seed(&sbi->s_rnd_state, t, false);
+	return count;
+}
+
 static ssize_t reserved_clusters_store(struct ext4_attr *a,
 				   struct ext4_sb_info *sbi,
 				   const char *buf, size_t count)
@@ -178,6 +194,7 @@  EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
 EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
+EXT4_ATTR(prandom_seed, 0200, prandom_seed);
 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
@@ -216,6 +233,7 @@  static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(errors_count),
 	ATTR_LIST(first_error_time),
 	ATTR_LIST(last_error_time),
+	ATTR_LIST(prandom_seed),
 	NULL,
 };
 
@@ -313,6 +331,8 @@  static ssize_t ext4_attr_store(struct kobject *kobj,
 		return inode_readahead_blks_store(a, sbi, buf, len);
 	case attr_trigger_test_error:
 		return trigger_test_error(a, sbi, buf, len);
+	case attr_prandom_seed:
+		return prandom_seed_store(a, sbi, buf, len);
 	}
 	return 0;
 }
diff --git a/include/linux/random.h b/include/linux/random.h
index e47e533..64b70a8 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -45,6 +45,7 @@  struct rnd_state {
 	__u32 s1, s2, s3, s4;
 };
 
+void _prandom_seed(struct rnd_state *state, u32 seed, bool mix_with_hwseed);
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce..5134111 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -157,8 +157,7 @@  static u32 __extract_hwseed(void)
 	return val;
 }
 
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
-			       bool mix_with_hwseed)
+void _prandom_seed(struct rnd_state *state, u32 seed, bool mix_with_hwseed)
 {
 #define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
 #define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
@@ -167,6 +166,7 @@  static void prandom_seed_early(struct rnd_state *state, u32 seed,
 	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
 	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
 }
+EXPORT_SYMBOL(_prandom_seed);
 
 /**
  *	prandom_seed - add entropy to pseudo random number generator
@@ -204,7 +204,7 @@  static int __init prandom_init(void)
 		struct rnd_state *state = &per_cpu(net_rand_state, i);
 		u32 weak_seed = (i + jiffies) ^ random_get_entropy();
 
-		prandom_seed_early(state, weak_seed, true);
+		_prandom_seed(state, weak_seed, true);
 		prandom_warmup(state);
 	}
 
@@ -429,7 +429,7 @@  static void __init prandom_state_selftest(void)
 	for (i = 0; i < ARRAY_SIZE(test1); i++) {
 		struct rnd_state state;
 
-		prandom_seed_early(&state, test1[i].seed, false);
+		_prandom_seed(&state, test1[i].seed, false);
 		prandom_warmup(&state);
 
 		if (test1[i].result != prandom_u32_state(&state))
@@ -444,7 +444,7 @@  static void __init prandom_state_selftest(void)
 	for (i = 0; i < ARRAY_SIZE(test2); i++) {
 		struct rnd_state state;
 
-		prandom_seed_early(&state, test2[i].seed, false);
+		_prandom_seed(&state, test2[i].seed, false);
 		prandom_warmup(&state);
 
 		for (j = 0; j < test2[i].iteration - 1; j++)