Message ID | 20201118153947.3394530-3-saranyamohan@google.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Introduce parallel fsck to e2fsck pass1 | expand |
Thanks for the patch. I noticed that this patch doesn't compile. It's because it needs "global_ctx" to be present in the e2fsck_struct but it gets added later in the patch series in the patch 1e1b0216 ("LU-8465 e2fsck: open io-channel when copying fs"). Given that this and also a couple of other patches need global_ctx, should we split 1e1b0216 ("LU-8465 e2fsck: open io-channel when copying fs") should we add global_ctx in this patch or as a separate patch before this? On Wed, Nov 18, 2020 at 7:43 AM Saranya Muruganandam <saranyamohan@google.com> wrote: > > From: Li Xi <lixi@ddn.com> > > This patch only copy the context to a new one when -m is enabled. > It doesn't actually start any thread. When pass1 test finishes, > the new context is copied back to the original context. > > Since the signal handler only changes the original context, so > add global_ctx in "struct e2fsck_struct" and use that to check > whether there is any signal of canceling. > > This patch handles the long jump properly so that all the existing > tests can be passed even the context has been copied. Otherwise, > test f_expisize_ea_del would fail when aborting. > > Signed-off-by: Li Xi <lixi@ddn.com> > Signed-off-by: Wang Shilong <wshilong@ddn.com> > Signed-off-by: Saranya Muruganandam <saranyamohan@google.com> > --- > e2fsck/pass1.c | 114 +++++++++++++++++++++++++++++++++++++++++++++---- > e2fsck/unix.c | 1 + > 2 files changed, 107 insertions(+), 8 deletions(-) > > diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c > index 8eecd958..64d237d3 100644 > --- a/e2fsck/pass1.c > +++ b/e2fsck/pass1.c > @@ -1144,7 +1144,22 @@ static int quota_inum_is_reserved(ext2_filsys fs, ext2_ino_t ino) > return 0; > } > > -void e2fsck_pass1(e2fsck_t ctx) > +static int e2fsck_should_abort(e2fsck_t ctx) > +{ > + e2fsck_t global_ctx; > + > + if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + return 1; > + > + if (ctx->global_ctx) { > + global_ctx = ctx->global_ctx; > + if (global_ctx->flags & E2F_FLAG_SIGNAL_MASK) > + return 1; > + } > + return 0; > +} > + > +void e2fsck_pass1_thread(e2fsck_t ctx) > { > int i; > __u64 max_sizes; > @@ -1360,7 +1375,7 @@ void e2fsck_pass1(e2fsck_t ctx) > if (ino > ino_threshold) > pass1_readahead(ctx, &ra_group, &ino_threshold); > ehandler_operation(old_op); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto endit; > if (pctx.errcode == EXT2_ET_BAD_BLOCK_IN_INODE_TABLE) { > /* > @@ -1955,7 +1970,7 @@ void e2fsck_pass1(e2fsck_t ctx) > if (process_inode_count >= ctx->process_inode_size) { > process_inodes(ctx, block_buf); > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto endit; > } > } > @@ -2068,6 +2083,89 @@ endit: > else > ctx->invalid_bitmaps++; > } > + > +static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx) > +{ > + errcode_t retval; > + e2fsck_t thread_context; > + > + retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context); > + if (retval) { > + com_err(global_ctx->program_name, retval, "while allocating memory"); > + return retval; > + } > + memcpy(thread_context, global_ctx, sizeof(struct e2fsck_struct)); > + thread_context->fs->priv_data = thread_context; > + thread_context->global_ctx = global_ctx; > + > + *thread_ctx = thread_context; > + return 0; > +} > + > +static int e2fsck_pass1_thread_join(e2fsck_t global_ctx, e2fsck_t thread_ctx) > +{ > + int flags = global_ctx->flags; > +#ifdef HAVE_SETJMP_H > + jmp_buf old_jmp; > + > + memcpy(old_jmp, global_ctx->abort_loc, sizeof(jmp_buf)); > +#endif > + memcpy(global_ctx, thread_ctx, sizeof(struct e2fsck_struct)); > +#ifdef HAVE_SETJMP_H > + memcpy(global_ctx->abort_loc, old_jmp, sizeof(jmp_buf)); > +#endif > + /* Keep the global singal flags*/ > + global_ctx->flags |= (flags & E2F_FLAG_SIGNAL_MASK) | > + (global_ctx->flags & E2F_FLAG_SIGNAL_MASK); > + > + global_ctx->fs->priv_data = global_ctx; > + ext2fs_free_mem(&thread_ctx); > + return 0; > +} > + > +void e2fsck_pass1_multithread(e2fsck_t ctx) > +{ > + errcode_t retval; > + e2fsck_t thread_ctx; > + > + retval = e2fsck_pass1_thread_prepare(ctx, &thread_ctx); > + if (retval) { > + com_err(ctx->program_name, 0, > + _("while preparing pass1 thread\n")); > + ctx->flags |= E2F_FLAG_ABORT; > + return; > + } > + > +#ifdef HAVE_SETJMP_H > + /* > + * When fatal_error() happens, jump to here. The thread > + * context's flags will be saved, but its abort_loc will > + * be overwritten by original jump buffer for the later > + * tests. > + */ > + if (setjmp(thread_ctx->abort_loc)) { > + thread_ctx->flags &= ~E2F_FLAG_SETJMP_OK; > + e2fsck_pass1_thread_join(ctx, thread_ctx); > + return; > + } > + thread_ctx->flags |= E2F_FLAG_SETJMP_OK; > +#endif > + > + e2fsck_pass1_thread(thread_ctx); > + retval = e2fsck_pass1_thread_join(ctx, thread_ctx); > + if (retval) { > + com_err(ctx->program_name, 0, > + _("while joining pass1 thread\n")); > + ctx->flags |= E2F_FLAG_ABORT; > + return; > + } > +} > + > +void e2fsck_pass1(e2fsck_t ctx) > +{ > + e2fsck_pass1_multithread(ctx); > +} > + > #undef FINISH_INODE_LOOP > > /* > @@ -2130,7 +2228,7 @@ static void process_inodes(e2fsck_t ctx, char *block_buf) > ehandler_operation(buf); > check_blocks(ctx, &pctx, block_buf, > &inodes_to_process[i].ea_ibody_quota); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > break; > } > ctx->stashed_inode = old_stashed_inode; > @@ -3300,7 +3398,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, > inlinedata_fs = ext2fs_has_feature_inline_data(ctx->fs->super); > > if (check_ext_attr(ctx, pctx, block_buf, &ea_block_quota)) { > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto out; > pb.num_blocks += EXT2FS_B2C(ctx->fs, ea_block_quota.blocks); > } > @@ -3355,7 +3453,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, > } > end_problem_latch(ctx, PR_LATCH_BLOCK); > end_problem_latch(ctx, PR_LATCH_TOOBIG); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto out; > if (pctx->errcode) > fix_problem(ctx, PR_1_BLOCK_ITERATE, pctx); > @@ -3836,7 +3934,7 @@ static int process_bad_block(ext2_filsys fs, > *block_nr = 0; > return BLOCK_CHANGED; > } > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > return BLOCK_ABORT; > } else > mark_block_used(ctx, blk); > @@ -3933,7 +4031,7 @@ static int process_bad_block(ext2_filsys fs, > *block_nr = 0; > return BLOCK_CHANGED; > } > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > return BLOCK_ABORT; > return 0; > } > diff --git a/e2fsck/unix.c b/e2fsck/unix.c > index 051b31a5..42f616e2 100644 > --- a/e2fsck/unix.c > +++ b/e2fsck/unix.c > @@ -1445,6 +1445,7 @@ int main (int argc, char *argv[]) > } > reserve_stdio_fds(); > > + ctx->global_ctx = NULL; > set_up_logging(ctx); > if (ctx->logf) { > int i; > -- > 2.29.2.299.gdc1121823c-goog >
On Wed, Nov 18, 2020 at 07:38:48AM -0800, Saranya Muruganandam wrote: > From: Li Xi <lixi@ddn.com> > > This patch only copy the context to a new one when -m is enabled. > It doesn't actually start any thread. When pass1 test finishes, > the new context is copied back to the original context. > > Since the signal handler only changes the original context, so > add global_ctx in "struct e2fsck_struct" and use that to check > whether there is any signal of canceling. > > This patch handles the long jump properly so that all the existing > tests can be passed even the context has been copied. Otherwise, > test f_expisize_ea_del would fail when aborting. The patch description is a bit misleading. What this is really about is adding the infrastructure to start and join threads in pass #1. Since we presumably will add multiple threading support for other passes, the one-line summary and commit description should make this clear, so that in the future, when a future developer is trying to examine the 60+ commits in this patch series, it will be a bit easier for them to understand what is happening. It might also be good to explain the patch series that this only starts a single thread, since this patch series is really only about adding the multi-threading machinery. Some questions which immediately come up is whether it makes sense to have this machinery in e2fsck/pass1.c, or whether some of this is more general and should be in e2fsck/util.c --- although obviously some portion of the code when we are merging the results from the pass will be pass specific. Of course, none of this is in this commit, so it's hard for me to see whether or not it will make sense in the long run. We can refactor the code later, or in a future patch series, but the point is that it's hard for me to tell whether the existing function breakdown makes the most amount of sense in the first reading of the patches. If you have an opinion of what's the better way to do things, having looked at the whole patch series, feel free to move some of the refactoring into the earlier patches; the patch series doesn't have to reflect the developmental history of the changes, and for the purposes of patch review, it's often simpler when you change earlier patches to simplify things --- although that can make it harder since then you'll have to rework later patches. (If it's too hard, it's fine to leave things as they are.) - Ted
On Wed, Nov 18, 2020 at 07:38:48AM -0800, Saranya Muruganandam wrote: > From: Li Xi <lixi@ddn.com> > > This patch only copy the context to a new one when -m is enabled. > It doesn't actually start any thread. When pass1 test finishes, > the new context is copied back to the original context. > > Since the signal handler only changes the original context, so > add global_ctx in "struct e2fsck_struct" and use that to check > whether there is any signal of canceling. > > This patch handles the long jump properly so that all the existing > tests can be passed even the context has been copied. Otherwise, > test f_expisize_ea_del would fail when aborting. > > Signed-off-by: Li Xi <lixi@ddn.com> > Signed-off-by: Wang Shilong <wshilong@ddn.com> > Signed-off-by: Saranya Muruganandam <saranyamohan@google.com> > --- > e2fsck/pass1.c | 114 +++++++++++++++++++++++++++++++++++++++++++++---- > e2fsck/unix.c | 1 + > 2 files changed, 107 insertions(+), 8 deletions(-) > > diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c > index 8eecd958..64d237d3 100644 > --- a/e2fsck/pass1.c > +++ b/e2fsck/pass1.c > @@ -1144,7 +1144,22 @@ static int quota_inum_is_reserved(ext2_filsys fs, ext2_ino_t ino) > return 0; > } > > -void e2fsck_pass1(e2fsck_t ctx) > +static int e2fsck_should_abort(e2fsck_t ctx) > +{ > + e2fsck_t global_ctx; > + > + if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + return 1; > + > + if (ctx->global_ctx) { > + global_ctx = ctx->global_ctx; > + if (global_ctx->flags & E2F_FLAG_SIGNAL_MASK) > + return 1; > + } > + return 0; > +} > + > +void e2fsck_pass1_thread(e2fsck_t ctx) > { > int i; > __u64 max_sizes; > @@ -1360,7 +1375,7 @@ void e2fsck_pass1(e2fsck_t ctx) > if (ino > ino_threshold) > pass1_readahead(ctx, &ra_group, &ino_threshold); > ehandler_operation(old_op); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto endit; > if (pctx.errcode == EXT2_ET_BAD_BLOCK_IN_INODE_TABLE) { > /* > @@ -1955,7 +1970,7 @@ void e2fsck_pass1(e2fsck_t ctx) > if (process_inode_count >= ctx->process_inode_size) { > process_inodes(ctx, block_buf); > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto endit; > } > } > @@ -2068,6 +2083,89 @@ endit: > else > ctx->invalid_bitmaps++; > } > + > +static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx) > +{ > + errcode_t retval; > + e2fsck_t thread_context; > + > + retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context); Hm, so I guess the strategy here is that parallel e2fsck makes per-thread copies of the ext2_filsys and e2fsck_t global contexts? And then after the threaded parts complete, each thread merges its per-thread contexts back into the global one, right? This means that we have to be careful to track which fields in those cloned contexts have been updated by the thread so that we can copy them back and not lose any data. I'm wondering if for future maintainability it would be better to track the per-thread data in a separate structure to make it very explicit which data (sub)structures are effectively per-thread and hence don't require locking? (I ask that mostly because I'm having a hard time figuring out which fields are supposed to be shared and which ones aren't...) --D > + if (retval) { > + com_err(global_ctx->program_name, retval, "while allocating memory"); > + return retval; > + } > + memcpy(thread_context, global_ctx, sizeof(struct e2fsck_struct)); > + thread_context->fs->priv_data = thread_context; > + thread_context->global_ctx = global_ctx; > + > + *thread_ctx = thread_context; > + return 0; > +} > + > +static int e2fsck_pass1_thread_join(e2fsck_t global_ctx, e2fsck_t thread_ctx) > +{ > + int flags = global_ctx->flags; > +#ifdef HAVE_SETJMP_H > + jmp_buf old_jmp; > + > + memcpy(old_jmp, global_ctx->abort_loc, sizeof(jmp_buf)); > +#endif > + memcpy(global_ctx, thread_ctx, sizeof(struct e2fsck_struct)); > +#ifdef HAVE_SETJMP_H > + memcpy(global_ctx->abort_loc, old_jmp, sizeof(jmp_buf)); > +#endif > + /* Keep the global singal flags*/ > + global_ctx->flags |= (flags & E2F_FLAG_SIGNAL_MASK) | > + (global_ctx->flags & E2F_FLAG_SIGNAL_MASK); > + > + global_ctx->fs->priv_data = global_ctx; > + ext2fs_free_mem(&thread_ctx); > + return 0; > +} > + > +void e2fsck_pass1_multithread(e2fsck_t ctx) > +{ > + errcode_t retval; > + e2fsck_t thread_ctx; > + > + retval = e2fsck_pass1_thread_prepare(ctx, &thread_ctx); > + if (retval) { > + com_err(ctx->program_name, 0, > + _("while preparing pass1 thread\n")); > + ctx->flags |= E2F_FLAG_ABORT; > + return; > + } > + > +#ifdef HAVE_SETJMP_H > + /* > + * When fatal_error() happens, jump to here. The thread > + * context's flags will be saved, but its abort_loc will > + * be overwritten by original jump buffer for the later > + * tests. > + */ > + if (setjmp(thread_ctx->abort_loc)) { > + thread_ctx->flags &= ~E2F_FLAG_SETJMP_OK; > + e2fsck_pass1_thread_join(ctx, thread_ctx); > + return; > + } > + thread_ctx->flags |= E2F_FLAG_SETJMP_OK; > +#endif > + > + e2fsck_pass1_thread(thread_ctx); > + retval = e2fsck_pass1_thread_join(ctx, thread_ctx); > + if (retval) { > + com_err(ctx->program_name, 0, > + _("while joining pass1 thread\n")); > + ctx->flags |= E2F_FLAG_ABORT; > + return; > + } > +} > + > +void e2fsck_pass1(e2fsck_t ctx) > +{ > + e2fsck_pass1_multithread(ctx); > +} > + > #undef FINISH_INODE_LOOP > > /* > @@ -2130,7 +2228,7 @@ static void process_inodes(e2fsck_t ctx, char *block_buf) > ehandler_operation(buf); > check_blocks(ctx, &pctx, block_buf, > &inodes_to_process[i].ea_ibody_quota); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > break; > } > ctx->stashed_inode = old_stashed_inode; > @@ -3300,7 +3398,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, > inlinedata_fs = ext2fs_has_feature_inline_data(ctx->fs->super); > > if (check_ext_attr(ctx, pctx, block_buf, &ea_block_quota)) { > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto out; > pb.num_blocks += EXT2FS_B2C(ctx->fs, ea_block_quota.blocks); > } > @@ -3355,7 +3453,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, > } > end_problem_latch(ctx, PR_LATCH_BLOCK); > end_problem_latch(ctx, PR_LATCH_TOOBIG); > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > goto out; > if (pctx->errcode) > fix_problem(ctx, PR_1_BLOCK_ITERATE, pctx); > @@ -3836,7 +3934,7 @@ static int process_bad_block(ext2_filsys fs, > *block_nr = 0; > return BLOCK_CHANGED; > } > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > return BLOCK_ABORT; > } else > mark_block_used(ctx, blk); > @@ -3933,7 +4031,7 @@ static int process_bad_block(ext2_filsys fs, > *block_nr = 0; > return BLOCK_CHANGED; > } > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > + if (e2fsck_should_abort(ctx)) > return BLOCK_ABORT; > return 0; > } > diff --git a/e2fsck/unix.c b/e2fsck/unix.c > index 051b31a5..42f616e2 100644 > --- a/e2fsck/unix.c > +++ b/e2fsck/unix.c > @@ -1445,6 +1445,7 @@ int main (int argc, char *argv[]) > } > reserve_stdio_fds(); > > + ctx->global_ctx = NULL; > set_up_logging(ctx); > if (ctx->logf) { > int i; > -- > 2.29.2.299.gdc1121823c-goog >
On Fri, Dec 18, 2020 at 8:01 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > On Wed, Nov 18, 2020 at 07:38:48AM -0800, Saranya Muruganandam wrote: > > From: Li Xi <lixi@ddn.com> > > > > This patch only copy the context to a new one when -m is enabled. > > It doesn't actually start any thread. When pass1 test finishes, > > the new context is copied back to the original context. > > > > Since the signal handler only changes the original context, so > > add global_ctx in "struct e2fsck_struct" and use that to check > > whether there is any signal of canceling. > > > > This patch handles the long jump properly so that all the existing > > tests can be passed even the context has been copied. Otherwise, > > test f_expisize_ea_del would fail when aborting. > > > > Signed-off-by: Li Xi <lixi@ddn.com> > > Signed-off-by: Wang Shilong <wshilong@ddn.com> > > Signed-off-by: Saranya Muruganandam <saranyamohan@google.com> > > --- > > e2fsck/pass1.c | 114 +++++++++++++++++++++++++++++++++++++++++++++---- > > e2fsck/unix.c | 1 + > > 2 files changed, 107 insertions(+), 8 deletions(-) > > > > diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c > > index 8eecd958..64d237d3 100644 > > --- a/e2fsck/pass1.c > > +++ b/e2fsck/pass1.c > > @@ -1144,7 +1144,22 @@ static int quota_inum_is_reserved(ext2_filsys fs, ext2_ino_t ino) > > return 0; > > } > > > > -void e2fsck_pass1(e2fsck_t ctx) > > +static int e2fsck_should_abort(e2fsck_t ctx) > > +{ > > + e2fsck_t global_ctx; > > + > > + if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > + return 1; > > + > > + if (ctx->global_ctx) { > > + global_ctx = ctx->global_ctx; > > + if (global_ctx->flags & E2F_FLAG_SIGNAL_MASK) > > + return 1; > > + } > > + return 0; > > +} > > + > > +void e2fsck_pass1_thread(e2fsck_t ctx) > > { > > int i; > > __u64 max_sizes; > > @@ -1360,7 +1375,7 @@ void e2fsck_pass1(e2fsck_t ctx) > > if (ino > ino_threshold) > > pass1_readahead(ctx, &ra_group, &ino_threshold); > > ehandler_operation(old_op); > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > + if (e2fsck_should_abort(ctx)) > > goto endit; > > if (pctx.errcode == EXT2_ET_BAD_BLOCK_IN_INODE_TABLE) { > > /* > > @@ -1955,7 +1970,7 @@ void e2fsck_pass1(e2fsck_t ctx) > > if (process_inode_count >= ctx->process_inode_size) { > > process_inodes(ctx, block_buf); > > > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > + if (e2fsck_should_abort(ctx)) > > goto endit; > > } > > } > > @@ -2068,6 +2083,89 @@ endit: > > else > > ctx->invalid_bitmaps++; > > } > > + > > +static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx) > > +{ > > + errcode_t retval; > > + e2fsck_t thread_context; > > + > > + retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context); > > Hm, so I guess the strategy here is that parallel e2fsck makes > per-thread copies of the ext2_filsys and e2fsck_t global contexts? > And then after the threaded parts complete, each thread merges its > per-thread contexts back into the global one, right? Yes. > > This means that we have to be careful to track which fields in those > cloned contexts have been updated by the thread so that we can copy them > back and not lose any data. > > I'm wondering if for future maintainability it would be better to track > the per-thread data in a separate structure to make it very explicit > which data (sub)structures are effectively per-thread and hence don't > require locking? Maybe use a per-thread structure is better maintained, but i am not sure we could remove locking completely. Locking is mostly used for fix, because fixing is serialized now and for some global structure which could be used seldomly but could simplify codes. > > (I ask that mostly because I'm having a hard time figuring out which > fields are supposed to be shared and which ones aren't...) > > --D > > > + if (retval) { > > + com_err(global_ctx->program_name, retval, "while allocating memory");
On Fri, Dec 18, 2020 at 09:13:25AM +0800, Wang Shilong wrote: > On Fri, Dec 18, 2020 at 8:01 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > > > On Wed, Nov 18, 2020 at 07:38:48AM -0800, Saranya Muruganandam wrote: > > > From: Li Xi <lixi@ddn.com> > > > > > > This patch only copy the context to a new one when -m is enabled. > > > It doesn't actually start any thread. When pass1 test finishes, > > > the new context is copied back to the original context. > > > > > > Since the signal handler only changes the original context, so > > > add global_ctx in "struct e2fsck_struct" and use that to check > > > whether there is any signal of canceling. > > > > > > This patch handles the long jump properly so that all the existing > > > tests can be passed even the context has been copied. Otherwise, > > > test f_expisize_ea_del would fail when aborting. > > > > > > Signed-off-by: Li Xi <lixi@ddn.com> > > > Signed-off-by: Wang Shilong <wshilong@ddn.com> > > > Signed-off-by: Saranya Muruganandam <saranyamohan@google.com> > > > --- > > > e2fsck/pass1.c | 114 +++++++++++++++++++++++++++++++++++++++++++++---- > > > e2fsck/unix.c | 1 + > > > 2 files changed, 107 insertions(+), 8 deletions(-) > > > > > > diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c > > > index 8eecd958..64d237d3 100644 > > > --- a/e2fsck/pass1.c > > > +++ b/e2fsck/pass1.c > > > @@ -1144,7 +1144,22 @@ static int quota_inum_is_reserved(ext2_filsys fs, ext2_ino_t ino) > > > return 0; > > > } > > > > > > -void e2fsck_pass1(e2fsck_t ctx) > > > +static int e2fsck_should_abort(e2fsck_t ctx) > > > +{ > > > + e2fsck_t global_ctx; > > > + > > > + if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > > + return 1; > > > + > > > + if (ctx->global_ctx) { > > > + global_ctx = ctx->global_ctx; > > > + if (global_ctx->flags & E2F_FLAG_SIGNAL_MASK) > > > + return 1; > > > + } > > > + return 0; > > > +} > > > + > > > +void e2fsck_pass1_thread(e2fsck_t ctx) > > > { > > > int i; > > > __u64 max_sizes; > > > @@ -1360,7 +1375,7 @@ void e2fsck_pass1(e2fsck_t ctx) > > > if (ino > ino_threshold) > > > pass1_readahead(ctx, &ra_group, &ino_threshold); > > > ehandler_operation(old_op); > > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > > + if (e2fsck_should_abort(ctx)) > > > goto endit; > > > if (pctx.errcode == EXT2_ET_BAD_BLOCK_IN_INODE_TABLE) { > > > /* > > > @@ -1955,7 +1970,7 @@ void e2fsck_pass1(e2fsck_t ctx) > > > if (process_inode_count >= ctx->process_inode_size) { > > > process_inodes(ctx, block_buf); > > > > > > - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) > > > + if (e2fsck_should_abort(ctx)) > > > goto endit; > > > } > > > } > > > @@ -2068,6 +2083,89 @@ endit: > > > else > > > ctx->invalid_bitmaps++; > > > } > > > + > > > +static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx) > > > +{ > > > + errcode_t retval; > > > + e2fsck_t thread_context; > > > + > > > + retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context); > > > > Hm, so I guess the strategy here is that parallel e2fsck makes > > per-thread copies of the ext2_filsys and e2fsck_t global contexts? > > And then after the threaded parts complete, each thread merges its > > per-thread contexts back into the global one, right? > > Yes. > > > > > This means that we have to be careful to track which fields in those > > cloned contexts have been updated by the thread so that we can copy them > > back and not lose any data. > > > > I'm wondering if for future maintainability it would be better to track > > the per-thread data in a separate structure to make it very explicit > > which data (sub)structures are effectively per-thread and hence don't > > require locking? > > Maybe use a per-thread structure is better maintained, but i am not sure > we could remove locking completely. > > Locking is mostly used for fix, because fixing is serialized now > and for some global structure which could be used seldomly > but could simplify codes. <nod> I was assuming that you'd still put a lock in the global structure and use it for data fields that aren't so frequently accessed. --D > > > > (I ask that mostly because I'm having a hard time figuring out which > > fields are supposed to be shared and which ones aren't...) > > > > --D > > > > > + if (retval) { > > > + com_err(global_ctx->program_name, retval, "while allocating memory");
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c index 8eecd958..64d237d3 100644 --- a/e2fsck/pass1.c +++ b/e2fsck/pass1.c @@ -1144,7 +1144,22 @@ static int quota_inum_is_reserved(ext2_filsys fs, ext2_ino_t ino) return 0; } -void e2fsck_pass1(e2fsck_t ctx) +static int e2fsck_should_abort(e2fsck_t ctx) +{ + e2fsck_t global_ctx; + + if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + return 1; + + if (ctx->global_ctx) { + global_ctx = ctx->global_ctx; + if (global_ctx->flags & E2F_FLAG_SIGNAL_MASK) + return 1; + } + return 0; +} + +void e2fsck_pass1_thread(e2fsck_t ctx) { int i; __u64 max_sizes; @@ -1360,7 +1375,7 @@ void e2fsck_pass1(e2fsck_t ctx) if (ino > ino_threshold) pass1_readahead(ctx, &ra_group, &ino_threshold); ehandler_operation(old_op); - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) goto endit; if (pctx.errcode == EXT2_ET_BAD_BLOCK_IN_INODE_TABLE) { /* @@ -1955,7 +1970,7 @@ void e2fsck_pass1(e2fsck_t ctx) if (process_inode_count >= ctx->process_inode_size) { process_inodes(ctx, block_buf); - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) goto endit; } } @@ -2068,6 +2083,89 @@ endit: else ctx->invalid_bitmaps++; } + +static errcode_t e2fsck_pass1_thread_prepare(e2fsck_t global_ctx, e2fsck_t *thread_ctx) +{ + errcode_t retval; + e2fsck_t thread_context; + + retval = ext2fs_get_mem(sizeof(struct e2fsck_struct), &thread_context); + if (retval) { + com_err(global_ctx->program_name, retval, "while allocating memory"); + return retval; + } + memcpy(thread_context, global_ctx, sizeof(struct e2fsck_struct)); + thread_context->fs->priv_data = thread_context; + thread_context->global_ctx = global_ctx; + + *thread_ctx = thread_context; + return 0; +} + +static int e2fsck_pass1_thread_join(e2fsck_t global_ctx, e2fsck_t thread_ctx) +{ + int flags = global_ctx->flags; +#ifdef HAVE_SETJMP_H + jmp_buf old_jmp; + + memcpy(old_jmp, global_ctx->abort_loc, sizeof(jmp_buf)); +#endif + memcpy(global_ctx, thread_ctx, sizeof(struct e2fsck_struct)); +#ifdef HAVE_SETJMP_H + memcpy(global_ctx->abort_loc, old_jmp, sizeof(jmp_buf)); +#endif + /* Keep the global singal flags*/ + global_ctx->flags |= (flags & E2F_FLAG_SIGNAL_MASK) | + (global_ctx->flags & E2F_FLAG_SIGNAL_MASK); + + global_ctx->fs->priv_data = global_ctx; + ext2fs_free_mem(&thread_ctx); + return 0; +} + +void e2fsck_pass1_multithread(e2fsck_t ctx) +{ + errcode_t retval; + e2fsck_t thread_ctx; + + retval = e2fsck_pass1_thread_prepare(ctx, &thread_ctx); + if (retval) { + com_err(ctx->program_name, 0, + _("while preparing pass1 thread\n")); + ctx->flags |= E2F_FLAG_ABORT; + return; + } + +#ifdef HAVE_SETJMP_H + /* + * When fatal_error() happens, jump to here. The thread + * context's flags will be saved, but its abort_loc will + * be overwritten by original jump buffer for the later + * tests. + */ + if (setjmp(thread_ctx->abort_loc)) { + thread_ctx->flags &= ~E2F_FLAG_SETJMP_OK; + e2fsck_pass1_thread_join(ctx, thread_ctx); + return; + } + thread_ctx->flags |= E2F_FLAG_SETJMP_OK; +#endif + + e2fsck_pass1_thread(thread_ctx); + retval = e2fsck_pass1_thread_join(ctx, thread_ctx); + if (retval) { + com_err(ctx->program_name, 0, + _("while joining pass1 thread\n")); + ctx->flags |= E2F_FLAG_ABORT; + return; + } +} + +void e2fsck_pass1(e2fsck_t ctx) +{ + e2fsck_pass1_multithread(ctx); +} + #undef FINISH_INODE_LOOP /* @@ -2130,7 +2228,7 @@ static void process_inodes(e2fsck_t ctx, char *block_buf) ehandler_operation(buf); check_blocks(ctx, &pctx, block_buf, &inodes_to_process[i].ea_ibody_quota); - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) break; } ctx->stashed_inode = old_stashed_inode; @@ -3300,7 +3398,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, inlinedata_fs = ext2fs_has_feature_inline_data(ctx->fs->super); if (check_ext_attr(ctx, pctx, block_buf, &ea_block_quota)) { - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) goto out; pb.num_blocks += EXT2FS_B2C(ctx->fs, ea_block_quota.blocks); } @@ -3355,7 +3453,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx, } end_problem_latch(ctx, PR_LATCH_BLOCK); end_problem_latch(ctx, PR_LATCH_TOOBIG); - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) goto out; if (pctx->errcode) fix_problem(ctx, PR_1_BLOCK_ITERATE, pctx); @@ -3836,7 +3934,7 @@ static int process_bad_block(ext2_filsys fs, *block_nr = 0; return BLOCK_CHANGED; } - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) return BLOCK_ABORT; } else mark_block_used(ctx, blk); @@ -3933,7 +4031,7 @@ static int process_bad_block(ext2_filsys fs, *block_nr = 0; return BLOCK_CHANGED; } - if (ctx->flags & E2F_FLAG_SIGNAL_MASK) + if (e2fsck_should_abort(ctx)) return BLOCK_ABORT; return 0; } diff --git a/e2fsck/unix.c b/e2fsck/unix.c index 051b31a5..42f616e2 100644 --- a/e2fsck/unix.c +++ b/e2fsck/unix.c @@ -1445,6 +1445,7 @@ int main (int argc, char *argv[]) } reserve_stdio_fds(); + ctx->global_ctx = NULL; set_up_logging(ctx); if (ctx->logf) { int i;