diff mbox series

[RFC,v3,46/61] ext2fs: parallel bitmap loading

Message ID 20201118153947.3394530-47-saranyamohan@google.com
State New
Headers show
Series Introduce parallel fsck to e2fsck pass1 | expand

Commit Message

Saranya Muruganandam Nov. 18, 2020, 3:39 p.m. UTC
From: Wang Shilong <wshilong@ddn.com>

In our benchmarking for PiB size filesystem, pass5 takes
10446s to finish and 99.5% of time takes on reading bitmaps.

It makes sense to reading bitmaps using multiple threads,
a quickly benchmark show 10446s to 626s with 64 threads.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>
---
 lib/ext2fs/rw_bitmaps.c | 289 ++++++++++++++++++++++++++++++++++------
 1 file changed, 250 insertions(+), 39 deletions(-)

Comments

Theodore Y. Ts'o Nov. 24, 2020, 2:44 a.m. UTC | #1
On Wed, Nov 18, 2020 at 07:39:32AM -0800, Saranya Muruganandam wrote:
> From: Wang Shilong <wshilong@ddn.com>
> 
> In our benchmarking for PiB size filesystem, pass5 takes
> 10446s to finish and 99.5% of time takes on reading bitmaps.
> 
> It makes sense to reading bitmaps using multiple threads,
> a quickly benchmark show 10446s to 626s with 64 threads.
> 
> Signed-off-by: Wang Shilong <wshilong@ddn.com>
> Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>

Note: This patch will *explode* with much hilarity if num_threads is
greater than the number of block groups.  That's because the
ext2fs_get_avg_group() will return 1 if fs_num_threads is greater than
group_desc_count.

This will result in the group start and end limits to go beyond the
array boundaries.... and then *boom*.  So there will probably need to
be some kind of safety checks if the caller has set fs_num_threads to
a value which is much larger than is appropriate for a given file
system.


Speaking of which, relying on fs_num_threads being set by e2fsck means
that we won't get the benefits of the parallel block bitmap reads for
debugfs and dumpe2fs.  So we should think about how the other tools
should trigger read bitmaps.  And this might be something that we want
to do independent of whether we are doing parallel fsck.

Suggested approach:

1) Create create ext2fs_is_device_rotational() which returns whether
or not a particular device is a HDD, or a non-rotational device (e.g.,
SSD, GCE PD, AWS EBS, etc.), using rotational as a proxy for "reading
using multiple threads is a good thing".

2) Create an ext2fs_get_num_procs() which calls
sysconf(_SC_NPROCESSOR_ONLN) if sysconf and _SC_NPROESSOR_ONLN is
available.  If not, there may be other OS-specific ways of determining
the number of CPU's available.

3) If HAVE_PTHREADS and the number of block groups is greater than the
number of CPU's * 2, a function that (for now) we drop in libsupport
will set fs->fs_num_threads to the number of processors as the
default.  There may not be a reason to change the default for debugfs
and dumpe2fs, but for e2fsck, this would be used for the default, but
it could be over-ridden via "-E multithread=<number of threads>".

   	    		    			       - Ted
diff mbox series

Patch

diff --git a/lib/ext2fs/rw_bitmaps.c b/lib/ext2fs/rw_bitmaps.c
index d80c9eb8..960a6913 100644
--- a/lib/ext2fs/rw_bitmaps.c
+++ b/lib/ext2fs/rw_bitmaps.c
@@ -23,6 +23,7 @@ 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
+#include <pthread.h>
 
 #include "ext2_fs.h"
 #include "ext2fs.h"
@@ -205,22 +206,12 @@  static int bitmap_tail_verify(unsigned char *bitmap, int first, int last)
 	return 1;
 }
 
-static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+static errcode_t read_bitmaps_range_prepare(ext2_filsys fs, int do_inode, int do_block)
 {
-	dgrp_t i;
-	char *block_bitmap = 0, *inode_bitmap = 0;
-	char *buf;
 	errcode_t retval;
 	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
 	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
-	int tail_flags = 0;
-	int csum_flag;
-	unsigned int	cnt;
-	blk64_t	blk;
-	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
-	blk64_t   blk_cnt;
-	ext2_ino_t ino_itr = 1;
-	ext2_ino_t ino_cnt;
+	char *buf;
 
 	EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
@@ -230,11 +221,10 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 
 	fs->write_bitmaps = ext2fs_write_bitmaps;
 
-	csum_flag = ext2fs_has_group_desc_csum(fs);
-
 	retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf);
 	if (retval)
 		return retval;
+
 	if (do_block) {
 		if (fs->block_map)
 			ext2fs_free_block_bitmap(fs->block_map);
@@ -243,11 +233,8 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_block_bitmap(fs, buf, &fs->block_map);
 		if (retval)
 			goto cleanup;
-		retval = io_channel_alloc_buf(fs->io, 0, &block_bitmap);
-		if (retval)
-			goto cleanup;
-	} else
-		block_nbytes = 0;
+	}
+
 	if (do_inode) {
 		if (fs->inode_map)
 			ext2fs_free_inode_bitmap(fs->inode_map);
@@ -256,13 +243,69 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		retval = ext2fs_allocate_inode_bitmap(fs, buf, &fs->inode_map);
 		if (retval)
 			goto cleanup;
-		retval = io_channel_alloc_buf(fs->io, 0, &inode_bitmap);
+	}
+	ext2fs_free_mem(&buf);
+
+	return retval;
+
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	if (buf)
+		ext2fs_free_mem(&buf);
+	return retval;
+}
+
+static errcode_t read_bitmaps_range_start(ext2_filsys fs, int do_inode, int do_block,
+					  dgrp_t start, dgrp_t end, pthread_mutex_t *mutex,
+					  io_channel io)
+{
+	dgrp_t i;
+	char *block_bitmap = 0, *inode_bitmap = 0;
+	char *buf;
+	errcode_t retval;
+	int block_nbytes = EXT2_CLUSTERS_PER_GROUP(fs->super) / 8;
+	int inode_nbytes = EXT2_INODES_PER_GROUP(fs->super) / 8;
+	int tail_flags = 0;
+	int csum_flag;
+	unsigned int	cnt;
+	blk64_t	blk;
+	blk64_t	blk_itr = EXT2FS_B2C(fs, fs->super->s_first_data_block);
+	blk64_t   blk_cnt;
+	ext2_ino_t ino_itr = 1;
+	ext2_ino_t ino_cnt;
+	io_channel this_io;
+
+	if (!io)
+		this_io = fs->io;
+	else
+		this_io = io;
+
+	csum_flag = ext2fs_has_group_desc_csum(fs);
+
+	if (do_block) {
+		retval = io_channel_alloc_buf(this_io, 0, &block_bitmap);
+		if (retval)
+			goto cleanup;
+	} else {
+		block_nbytes = 0;
+	}
+
+	if (do_inode) {
+		retval = io_channel_alloc_buf(this_io, 0, &inode_bitmap);
 		if (retval)
 			goto cleanup;
-	} else
+	} else {
 		inode_nbytes = 0;
-	ext2fs_free_mem(&buf);
+	}
 
+	/* io should be null */
 	if (fs->flags & EXT2_FLAG_IMAGE_FILE) {
 		blk = (ext2fs_le32_to_cpu(fs->image_header->offset_inodemap) / fs->blocksize);
 		ino_cnt = fs->super->s_inodes_count;
@@ -303,7 +346,9 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 		goto success_cleanup;
 	}
 
-	for (i = 0; i < fs->group_desc_count; i++) {
+	blk_itr += ((blk64_t)start * (block_nbytes << 3));
+	ino_itr += ((blk64_t)start * (inode_nbytes << 3));
+	for (i = start; i <= end; i++) {
 		if (block_bitmap) {
 			blk = ext2fs_block_bitmap_loc(fs, i);
 			if ((csum_flag &&
@@ -312,7 +357,7 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			    (blk >= ext2fs_blocks_count(fs->super)))
 				blk = 0;
 			if (blk) {
-				retval = io_channel_read_blk64(fs->io, blk,
+				retval = io_channel_read_blk64(this_io, blk,
 							       1, block_bitmap);
 				if (retval) {
 					retval = EXT2_ET_BLOCK_BITMAP_READ;
@@ -333,8 +378,12 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(block_bitmap, 0, block_nbytes);
 			cnt = block_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_block_bitmap_range2(fs->block_map,
 					       blk_itr, cnt, block_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			blk_itr += block_nbytes << 3;
@@ -347,7 +396,7 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			    (blk >= ext2fs_blocks_count(fs->super)))
 				blk = 0;
 			if (blk) {
-				retval = io_channel_read_blk64(fs->io, blk,
+				retval = io_channel_read_blk64(this_io, blk,
 							       1, inode_bitmap);
 				if (retval) {
 					retval = EXT2_ET_INODE_BITMAP_READ;
@@ -369,29 +418,28 @@  static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 			} else
 				memset(inode_bitmap, 0, inode_nbytes);
 			cnt = inode_nbytes << 3;
+			if (mutex)
+				pthread_mutex_lock(mutex);
 			retval = ext2fs_set_inode_bitmap_range2(fs->inode_map,
 					       ino_itr, cnt, inode_bitmap);
+			if (mutex)
+				pthread_mutex_unlock(mutex);
 			if (retval)
 				goto cleanup;
 			ino_itr += inode_nbytes << 3;
 		}
 	}
 
-	/* Mark group blocks for any BLOCK_UNINIT groups */
-	if (do_block) {
-		retval = mark_uninit_bg_group_blocks(fs);
-		if (retval)
-			goto cleanup;
-	}
-
 success_cleanup:
-	if (inode_bitmap) {
-		ext2fs_free_mem(&inode_bitmap);
-		fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
-	}
-	if (block_bitmap) {
-		ext2fs_free_mem(&block_bitmap);
-		fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	if (start == 0 && end == fs->group_desc_count - 1) {
+		if (inode_bitmap) {
+			ext2fs_free_mem(&inode_bitmap);
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		}
+		if (block_bitmap) {
+			ext2fs_free_mem(&block_bitmap);
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+		}
 	}
 	fs->flags |= tail_flags;
 	return 0;
@@ -412,6 +460,169 @@  cleanup:
 	if (buf)
 		ext2fs_free_mem(&buf);
 	return retval;
+
+}
+
+static errcode_t read_bitmaps_range_end(ext2_filsys fs, int do_inode, int do_block)
+{
+	errcode_t retval = 0;
+
+	/* Mark group blocks for any BLOCK_UNINIT groups */
+	if (do_block) {
+		retval = mark_uninit_bg_group_blocks(fs);
+		if (retval)
+			goto cleanup;
+	}
+
+	return retval;
+cleanup:
+	if (do_block) {
+		ext2fs_free_block_bitmap(fs->block_map);
+		fs->block_map = 0;
+	}
+	if (do_inode) {
+		ext2fs_free_inode_bitmap(fs->inode_map);
+		fs->inode_map = 0;
+	}
+	return retval;
+}
+
+static errcode_t read_bitmaps_range(ext2_filsys fs, int do_inode, int do_block,
+				    dgrp_t start, dgrp_t end)
+{
+	errcode_t retval;
+
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		return retval;
+
+	retval = read_bitmaps_range_start(fs, do_inode, do_block, start, end, NULL, NULL);
+	if (retval)
+		return retval;
+
+	return read_bitmaps_range_end(fs, do_inode, do_block);
+}
+
+#ifdef CONFIG_PFSCK
+struct read_bitmaps_thread_info {
+	ext2_filsys	rbt_fs;
+	int 		rbt_do_inode;
+	int		rbt_do_block;
+	dgrp_t		rbt_grp_start;
+	dgrp_t		rbt_grp_end;
+	errcode_t	rbt_retval;
+	pthread_mutex_t *rbt_mutex;
+	io_channel      rbt_io;
+};
+
+static void* read_bitmaps_thread(void *data)
+{
+	struct read_bitmaps_thread_info *rbt = data;
+
+	rbt->rbt_retval = read_bitmaps_range_start(rbt->rbt_fs,
+				rbt->rbt_do_inode, rbt->rbt_do_block,
+				rbt->rbt_grp_start, rbt->rbt_grp_end,
+				rbt->rbt_mutex, rbt->rbt_io);
+	return NULL;
+}
+#endif
+
+static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
+{
+#ifdef CONFIG_PFSCK
+	pthread_attr_t	attr;
+	int num_threads = fs->fs_num_threads;
+	pthread_t *thread_ids = NULL;
+	struct read_bitmaps_thread_info *thread_infos = NULL;
+	pthread_mutex_t rbt_mutex = PTHREAD_MUTEX_INITIALIZER;
+	errcode_t retval;
+	errcode_t rc;
+	dgrp_t average_group;
+	int i;
+	io_manager manager = unix_io_manager;
+#else
+	int num_threads = 1;
+#endif
+
+	if (num_threads <= 1 || (fs->flags & EXT2_FLAG_IMAGE_FILE))
+		return read_bitmaps_range(fs, do_inode, do_block, 0, fs->group_desc_count - 1);
+
+#ifdef CONFIG_PFSCK
+	retval = pthread_attr_init(&attr);
+	if (retval)
+		return retval;
+
+	thread_ids = calloc(sizeof(pthread_t), num_threads);
+	if (!thread_ids)
+		return -ENOMEM;
+
+	thread_infos = calloc(sizeof(struct read_bitmaps_thread_info),
+				num_threads);
+	if (!thread_infos)
+		goto out;
+
+	average_group = ext2fs_get_avg_group(fs);
+	retval = read_bitmaps_range_prepare(fs, do_inode, do_block);
+	if (retval)
+		goto out;
+
+	fprintf(stdout, "Multiple threads triggered to read bitmaps\n");
+	for (i = 0; i < num_threads; i++) {
+		thread_infos[i].rbt_fs = fs;
+		thread_infos[i].rbt_do_inode = do_inode;
+		thread_infos[i].rbt_do_block = do_block;
+		thread_infos[i].rbt_mutex = &rbt_mutex;
+		if (i == 0)
+			thread_infos[i].rbt_grp_start = 0;
+		else
+			thread_infos[i].rbt_grp_start = average_group * i + 1;
+
+		if (i == num_threads - 1)
+			thread_infos[i].rbt_grp_end = fs->group_desc_count - 1;
+		else
+			thread_infos[i].rbt_grp_end = average_group * (i + 1);
+		retval = manager->open(fs->device_name, IO_FLAG_RW,
+					&thread_infos[i].rbt_io);
+		if (retval)
+			break;
+		io_channel_set_blksize(thread_infos[i].rbt_io, fs->io->block_size);
+		retval = pthread_create(&thread_ids[i], &attr,
+					&read_bitmaps_thread, &thread_infos[i]);
+		if (retval) {
+			io_channel_close(thread_infos[i].rbt_io);
+			break;
+		}
+	}
+	for (i = 0; i < num_threads; i++) {
+		if (!thread_ids[i])
+			break;
+		rc = pthread_join(thread_ids[i], NULL);
+		if (rc && !retval)
+			retval = rc;
+		rc = thread_infos[i].rbt_retval;
+		if (rc && !retval)
+			retval = rc;
+		io_channel_close(thread_infos[i].rbt_io);
+	}
+out:
+	rc = pthread_attr_destroy(&attr);
+	if (rc && !retval)
+		retval = rc;
+	free(thread_infos);
+	free(thread_ids);
+
+	if (!retval)
+		retval = read_bitmaps_range_end(fs, do_inode, do_block);
+
+	if (!retval) {
+		if (do_inode)
+			fs->flags &= ~EXT2_FLAG_IBITMAP_TAIL_PROBLEM;
+		if (do_block)
+			fs->flags &= ~EXT2_FLAG_BBITMAP_TAIL_PROBLEM;
+	}
+
+	return retval;
+#endif
 }
 
 errcode_t ext2fs_read_inode_bitmap(ext2_filsys fs)