diff mbox series

[RFC,v3,03/61] e2fsck: copy fs when using multi-thread fsck

Message ID 20201118153947.3394530-4-saranyamohan@google.com
State Changes Requested
Headers show
Series Introduce parallel fsck to e2fsck pass1 | expand

Commit Message

Saranya Muruganandam Nov. 18, 2020, 3:38 p.m. UTC
From: Li Xi <lixi@ddn.com>

This patch only copy the fs to a new one when -m is enabled.
It doesn't actually start any thread. When pass1 test finishes,
the new fs is copied back to the original context.

This patch handles the fs fields in dblist, inode_map and block_map
properly.

Signed-off-by: Li Xi <lixi@ddn.com>
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>
---
 e2fsck/pass1.c | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

Comments

Theodore Ts'o Nov. 23, 2020, 10:12 p.m. UTC | #1
On Wed, Nov 18, 2020 at 07:38:49AM -0800, Saranya Muruganandam wrote:
> From: Li Xi <lixi@ddn.com>
> 
> This patch only copy the fs to a new one when -m is enabled.
> It doesn't actually start any thread. When pass1 test finishes,
> the new fs is copied back to the original context.
> 
> This patch handles the fs fields in dblist, inode_map and block_map
> properly.
> 
> Signed-off-by: Li Xi <lixi@ddn.com>
> Signed-off-by: Wang Shilong <wshilong@ddn.com>
> Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>

I'm a bit surprised that we're not adding a ext2fs_clone_fs()
function, but instead creating an e2fsck_pass1_copy_fs() function.

Again, what's going to happen when we need to copy the fs structure
for other passes.  Also, just simply copying the fs structure seems
dangerous; how are we going to know which allocated substructures were
created before the fs structure was clonsed (and hence can't be safely
freed in a copy of the fs structure), and which substructures were
allocated *after* the fs structure is cloned, in which case they need
to be freed when the cloned fs structuers are released?

Perhaps the simplest thing to do is to assume that *everything* is
cloned.  If that results in too much memory consumed, maybe we can add
a set of flags indicating which structures should *not* be freed for a
particular fs structure, with the assumption that they will be freed
when the top-level master fs structure is closed.  There may be some
potential traps with this, since in some cases a substructure can be
released and re-allocated, in which case, if that substructure is
shared by child fs structures, they could be left pointing at already
freed copies of the data structures.

Basically, even if this works (and the fact that I saw a crash with a
data structure being double-freed in the f_multithread_ok test
strongly suggesting that it isn't), I'd be worried about making sure
that the resulting architecture is one which is robust and
maintainable, and not leaving traps for future developers when they
don't realize what the assumptions are about shared substructures.
Which strongly suggests that this should be a first-class aspect of
libext2fs and it should be clearly documented how sharing does or
doesn't work.

Cheers,

					- Ted
diff mbox series

Patch

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 64d237d3..5b4947b0 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -49,6 +49,8 @@ 
 
 #include "e2fsck.h"
 #include <ext2fs/ext2_ext_attr.h>
+/* todo remove this finally */
+#include <ext2fs/ext2fsP.h>
 #include <e2p/e2p.h>
 
 #include "problem.h"
@@ -2084,10 +2086,23 @@  endit:
 		ctx->invalid_bitmaps++;
 }
 
+static void e2fsck_pass1_copy_fs(ext2_filsys dest, ext2_filsys src)
+{
+	memcpy(dest, src, sizeof(struct struct_ext2_filsys));
+	if (dest->dblist)
+		dest->dblist->fs = dest;
+	if (dest->inode_map)
+		dest->inode_map->fs = dest;
+	if (dest->block_map)
+		dest->block_map->fs = dest;
+}
+
 static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx)
 {
 	errcode_t	retval;
 	e2fsck_t	thread_context;
+	ext2_filsys	thread_fs;
+	ext2_filsys	global_fs = global_ctx->fs;
 
 	retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context);
 	if (retval) {
@@ -2095,18 +2110,32 @@  static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thre
 		return retval;
 	}
 	memcpy(thread_context, global_ctx, sizeof(struct e2fsck_struct));
-	thread_context->fs->priv_data = thread_context;
 	thread_context->global_ctx = global_ctx;
 
+	retval = ext2fs_get_mem(sizeof(struct struct_ext2_filsys), &thread_fs);
+	if (retval) {
+		com_err(global_ctx->program_name, retval, "while allocating memory");
+		goto out_context;
+	}
+
+	e2fsck_pass1_copy_fs(thread_fs, global_fs);
+	thread_fs->priv_data = thread_context;
+
+	thread_context->fs = thread_fs;
 	*thread_ctx = thread_context;
 	return 0;
+out_context:
+	ext2fs_free_mem(&thread_context);
+	return retval;
 }
 
 static int e2fsck_pass1_thread_join(e2fsck_t global_ctx, e2fsck_t thread_ctx)
 {
-	int	flags = global_ctx->flags;
+	int		flags = global_ctx->flags;
+	ext2_filsys	thread_fs = thread_ctx->fs;
+	ext2_filsys	global_fs = global_ctx->fs;
 #ifdef HAVE_SETJMP_H
-	jmp_buf	old_jmp;
+	jmp_buf		old_jmp;
 
 	memcpy(old_jmp, global_ctx->abort_loc, sizeof(jmp_buf));
 #endif
@@ -2118,7 +2147,11 @@  static int e2fsck_pass1_thread_join(e2fsck_t global_ctx, e2fsck_t thread_ctx)
 	global_ctx->flags |= (flags & E2F_FLAG_SIGNAL_MASK) |
 			     (global_ctx->flags & E2F_FLAG_SIGNAL_MASK);
 
-	global_ctx->fs->priv_data = global_ctx;
+	e2fsck_pass1_copy_fs(global_fs, thread_fs);
+	global_fs->priv_data = global_ctx;
+	global_ctx->fs = global_fs;
+
+	ext2fs_free_mem(&thread_ctx->fs);
 	ext2fs_free_mem(&thread_ctx);
 	return 0;
 }