diff mbox series

[v4,4/8] e2fsck: add fast commit scan pass

Message ID 20210122054504.1498532-5-user@harshads-520.kir.corp.google.com
State Accepted
Headers show
Series e2fsck: add fast commit replay path | expand

Commit Message

harshad shirwadkar Jan. 22, 2021, 5:45 a.m. UTC
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

Add fast commit scan pass. Scan pass is responsible for following
things:

* Count total number of fast commit tags that need to be replayed
  during the replay phase.

* Validate whether the fast commit area is valid for a given
  transaction ID.

* Verify the CRC of fast commit area.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 e2fsck/journal.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

Comments

Andreas Dilger Sept. 18, 2023, 8:39 p.m. UTC | #1
On Jan 21, 2021, at 10:45 PM, Harshad Shirwadkar <harshadshirwadkar@gmail.com> wrote:
> 
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> Add fast commit scan pass. Scan pass is responsible for following
> things:
> 
> * Count total number of fast commit tags that need to be replayed
>  during the replay phase.
> 
> * Validate whether the fast commit area is valid for a given
>  transaction ID.
> 
> * Verify the CRC of fast commit area.
> 
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

I was making a fix to debugfs/journal.c today to improve performance
of the revoke hashtable, since it was performing very badly with a
large journal and lots of revokes (separate patch to be submitted).

I noticed that e2fsck/journal.c is totally missing any of the fast
commit handling that was added to in this patch.

This seems dangerous since there are some cases where debugfs and tune2fs
will do journal recovery in userspace, but it appears possible that they
would totally miss any fast commit transaction handling.

It isn't great that we have two nearly identical copies of the same code
in e2fsprogs, but looks is difficult to make them totally identical.
We could potentially play some tricks here (e.g. use a common variable
name for both "ctx" and "fs" in the code, unify copies of e2fsck_journal_*
and ext2fs_journal_* into a single file, and potentially abstract out
some of the differences (mainly from e2fsck/journal.c fixing errors
during journal recovery) into helper functions that are no-ops on the
debugfs/journal.c side.

There would still be two different *builds* of the code, with a lot of
macro expansions to hide the differences, but I think it looks possible
to at least bring these two copies more into sync.  I have some cleanups,
but I don't know much about fast commits and what should be done there.

Cheers, Andreas

> ---
> e2fsck/journal.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 109 insertions(+)
> 
> diff --git a/e2fsck/journal.c b/e2fsck/journal.c
> index 2c8e3441..f1aa0fd6 100644
> --- a/e2fsck/journal.c
> +++ b/e2fsck/journal.c
> @@ -278,6 +278,108 @@ static int process_journal_block(ext2_filsys fs,
> 	return 0;
> }
> 
> +static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
> +				int off, tid_t expected_tid)
> +{
> +	e2fsck_t ctx = j->j_fs_dev->k_ctx;
> +	struct e2fsck_fc_replay_state *state;
> +	int ret = JBD2_FC_REPLAY_CONTINUE;
> +	struct ext4_fc_add_range *ext;
> +	struct ext4_fc_tl *tl;
> +	struct ext4_fc_tail *tail;
> +	__u8 *start, *end;
> +	struct ext4_fc_head *head;
> +	struct ext2fs_extent ext2fs_ex;
> +
> +	state = &ctx->fc_replay_state;
> +
> +	start = (__u8 *)bh->b_data;
> +	end = (__u8 *)bh->b_data + j->j_blocksize - 1;
> +
> +	jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
> +	if (state->fc_replay_expected_off == 0) {
> +		memset(state, 0, sizeof(*state));
> +		/* Check if we can stop early */
> +		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
> +			!= EXT4_FC_TAG_HEAD) {
> +			jbd_debug(1, "Ending early!, not a head tag");
> +			return 0;
> +		}
> +	}
> +
> +	if (off != state->fc_replay_expected_off) {
> +		ret = -EFSCORRUPTED;
> +		goto out_err;
> +	}
> +
> +	state->fc_replay_expected_off++;
> +	fc_for_each_tl(start, end, tl) {
> +		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
> +			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
> +		switch (le16_to_cpu(tl->fc_tag)) {
> +		case EXT4_FC_TAG_ADD_RANGE:
> +			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
> +			ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
> +						   sizeof(ext->fc_ex));
> +			if (ret)
> +				ret = JBD2_FC_REPLAY_STOP;
> +			else
> +				ret = JBD2_FC_REPLAY_CONTINUE;
> +		case EXT4_FC_TAG_DEL_RANGE:
> +		case EXT4_FC_TAG_LINK:
> +		case EXT4_FC_TAG_UNLINK:
> +		case EXT4_FC_TAG_CREAT:
> +		case EXT4_FC_TAG_INODE:
> +		case EXT4_FC_TAG_PAD:
> +			state->fc_cur_tag++;
> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
> +					sizeof(*tl) + ext4_fc_tag_len(tl));
> +			break;
> +		case EXT4_FC_TAG_TAIL:
> +			state->fc_cur_tag++;
> +			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
> +						sizeof(*tl) +
> +						offsetof(struct ext4_fc_tail,
> +						fc_crc));
> +			jbd_debug(1, "tail tid %d, expected %d\n",
> +					le32_to_cpu(tail->fc_tid),
> +					expected_tid);
> +			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
> +				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
> +				state->fc_replay_num_tags = state->fc_cur_tag;
> +			} else {
> +				ret = state->fc_replay_num_tags ?
> +					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
> +			}
> +			state->fc_crc = 0;
> +			break;
> +		case EXT4_FC_TAG_HEAD:
> +			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
> +			if (le32_to_cpu(head->fc_features) &
> +				~EXT4_FC_SUPPORTED_FEATURES) {
> +				ret = -EOPNOTSUPP;
> +				break;
> +			}
> +			if (le32_to_cpu(head->fc_tid) != expected_tid) {
> +				ret = -EINVAL;
> +				break;
> +			}
> +			state->fc_cur_tag++;
> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
> +					sizeof(*tl) + ext4_fc_tag_len(tl));
> +			break;
> +		default:
> +			ret = state->fc_replay_num_tags ?
> +				JBD2_FC_REPLAY_STOP : -ECANCELED;
> +		}
> +		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
> +			break;
> +	}
> +
> +out_err:
> +	return ret;
> +}
> /*
>  * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
>  * to indicate that it is expecting more fast commit blocks. It returns
> @@ -286,6 +388,13 @@ static int process_journal_block(ext2_filsys fs,
> static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
> 				enum passtype pass, int off, tid_t expected_tid)
> {
> +	e2fsck_t ctx = journal->j_fs_dev->k_ctx;
> +	struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
> +
> +	if (pass == PASS_SCAN) {
> +		state->fc_current_pass = PASS_SCAN;
> +		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
> +	}
> 	return JBD2_FC_REPLAY_STOP;
> }
> 
> --
> 2.30.0.280.ga3ce27912f-goog
> 


Cheers, Andreas
Andreas Dilger Sept. 18, 2023, 9:31 p.m. UTC | #2
On Sep 18, 2023, at 2:39 PM, Andreas Dilger <adilger@dilger.ca> wrote:
> 
> On Jan 21, 2021, at 10:45 PM, Harshad Shirwadkar <harshadshirwadkar@gmail.com> wrote:
>> 
>> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
>> 
>> Add fast commit scan pass. Scan pass is responsible for following
>> things:
>> 
>> * Count total number of fast commit tags that need to be replayed
>> during the replay phase.
>> 
>> * Validate whether the fast commit area is valid for a given
>> transaction ID.
>> 
>> * Verify the CRC of fast commit area.
>> 
>> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> I was making a fix to debugfs/journal.c today to improve performance
> of the revoke hashtable, since it was performing very badly with a
> large journal and lots of revokes (separate patch to be submitted).
> 
> I noticed that e2fsck/journal.c is totally missing any of the fast
> commit handling that was added to in this patch.

Sorry, this became incorrect during editing - the debugfs/journal.c
file does not have any of the fast commit handling that was added to
e2fsck/journal.

Cheers, Andreas

> This seems dangerous since there are some cases where debugfs and tune2fs
> will do journal recovery in userspace, but it appears possible that they
> would totally miss any fast commit transaction handling.
> 
> It isn't great that we have two nearly identical copies of the same code
> in e2fsprogs, but looks is difficult to make them totally identical.
> We could potentially play some tricks here (e.g. use a common variable
> name for both "ctx" and "fs" in the code, unify copies of e2fsck_journal_*
> and ext2fs_journal_* into a single file, and potentially abstract out
> some of the differences (mainly from e2fsck/journal.c fixing errors
> during journal recovery) into helper functions that are no-ops on the
> debugfs/journal.c side.
> 
> There would still be two different *builds* of the code, with a lot of
> macro expansions to hide the differences, but I think it looks possible
> to at least bring these two copies more into sync.  I have some cleanups,
> but I don't know much about fast commits and what should be done there.
> 
> Cheers, Andreas
> 
>> ---
>> e2fsck/journal.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 109 insertions(+)
>> 
>> diff --git a/e2fsck/journal.c b/e2fsck/journal.c
>> index 2c8e3441..f1aa0fd6 100644
>> --- a/e2fsck/journal.c
>> +++ b/e2fsck/journal.c
>> @@ -278,6 +278,108 @@ static int process_journal_block(ext2_filsys fs,
>> 	return 0;
>> }
>> 
>> +static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
>> +				int off, tid_t expected_tid)
>> +{
>> +	e2fsck_t ctx = j->j_fs_dev->k_ctx;
>> +	struct e2fsck_fc_replay_state *state;
>> +	int ret = JBD2_FC_REPLAY_CONTINUE;
>> +	struct ext4_fc_add_range *ext;
>> +	struct ext4_fc_tl *tl;
>> +	struct ext4_fc_tail *tail;
>> +	__u8 *start, *end;
>> +	struct ext4_fc_head *head;
>> +	struct ext2fs_extent ext2fs_ex;
>> +
>> +	state = &ctx->fc_replay_state;
>> +
>> +	start = (__u8 *)bh->b_data;
>> +	end = (__u8 *)bh->b_data + j->j_blocksize - 1;
>> +
>> +	jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
>> +	if (state->fc_replay_expected_off == 0) {
>> +		memset(state, 0, sizeof(*state));
>> +		/* Check if we can stop early */
>> +		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
>> +			!= EXT4_FC_TAG_HEAD) {
>> +			jbd_debug(1, "Ending early!, not a head tag");
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	if (off != state->fc_replay_expected_off) {
>> +		ret = -EFSCORRUPTED;
>> +		goto out_err;
>> +	}
>> +
>> +	state->fc_replay_expected_off++;
>> +	fc_for_each_tl(start, end, tl) {
>> +		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
>> +			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
>> +		switch (le16_to_cpu(tl->fc_tag)) {
>> +		case EXT4_FC_TAG_ADD_RANGE:
>> +			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
>> +			ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
>> +						   sizeof(ext->fc_ex));
>> +			if (ret)
>> +				ret = JBD2_FC_REPLAY_STOP;
>> +			else
>> +				ret = JBD2_FC_REPLAY_CONTINUE;
>> +		case EXT4_FC_TAG_DEL_RANGE:
>> +		case EXT4_FC_TAG_LINK:
>> +		case EXT4_FC_TAG_UNLINK:
>> +		case EXT4_FC_TAG_CREAT:
>> +		case EXT4_FC_TAG_INODE:
>> +		case EXT4_FC_TAG_PAD:
>> +			state->fc_cur_tag++;
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +					sizeof(*tl) + ext4_fc_tag_len(tl));
>> +			break;
>> +		case EXT4_FC_TAG_TAIL:
>> +			state->fc_cur_tag++;
>> +			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +						sizeof(*tl) +
>> +						offsetof(struct ext4_fc_tail,
>> +						fc_crc));
>> +			jbd_debug(1, "tail tid %d, expected %d\n",
>> +					le32_to_cpu(tail->fc_tid),
>> +					expected_tid);
>> +			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
>> +				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
>> +				state->fc_replay_num_tags = state->fc_cur_tag;
>> +			} else {
>> +				ret = state->fc_replay_num_tags ?
>> +					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
>> +			}
>> +			state->fc_crc = 0;
>> +			break;
>> +		case EXT4_FC_TAG_HEAD:
>> +			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
>> +			if (le32_to_cpu(head->fc_features) &
>> +				~EXT4_FC_SUPPORTED_FEATURES) {
>> +				ret = -EOPNOTSUPP;
>> +				break;
>> +			}
>> +			if (le32_to_cpu(head->fc_tid) != expected_tid) {
>> +				ret = -EINVAL;
>> +				break;
>> +			}
>> +			state->fc_cur_tag++;
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +					sizeof(*tl) + ext4_fc_tag_len(tl));
>> +			break;
>> +		default:
>> +			ret = state->fc_replay_num_tags ?
>> +				JBD2_FC_REPLAY_STOP : -ECANCELED;
>> +		}
>> +		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
>> +			break;
>> +	}
>> +
>> +out_err:
>> +	return ret;
>> +}
>> /*
>> * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
>> * to indicate that it is expecting more fast commit blocks. It returns
>> @@ -286,6 +388,13 @@ static int process_journal_block(ext2_filsys fs,
>> static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
>> 				enum passtype pass, int off, tid_t expected_tid)
>> {
>> +	e2fsck_t ctx = journal->j_fs_dev->k_ctx;
>> +	struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
>> +
>> +	if (pass == PASS_SCAN) {
>> +		state->fc_current_pass = PASS_SCAN;
>> +		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
>> +	}
>> 	return JBD2_FC_REPLAY_STOP;
>> }
>> 
>> --
>> 2.30.0.280.ga3ce27912f-goog
>> 
> 
> 
> Cheers, Andreas


Cheers, Andreas
Andreas Dilger Sept. 18, 2023, 11:01 p.m. UTC | #3
https://patchwork.ozlabs.org/project/linux-ext4/list/?series=312552On Sep 18, 2023, at 2:39 PM, Andreas Dilger <adilger@dilger.ca> wrote:
> 
> On Jan 21, 2021, at 10:45 PM, Harshad Shirwadkar <harshadshirwadkar@gmail.com> wrote:
>> 
>> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
>> 
>> Add fast commit scan pass. Scan pass is responsible for following
>> things:
>> 
>> * Count total number of fast commit tags that need to be replayed
>> during the replay phase.
>> 
>> * Validate whether the fast commit area is valid for a given
>> transaction ID.
>> 
>> * Verify the CRC of fast commit area.
>> 
>> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> I was making a fix to debugfs/journal.c today to improve performance
> of the revoke hashtable, since it was performing very badly with a
> large journal and lots of revokes (separate patch to be submitted).
> 
> I noticed that debugfs/journal.c is totally missing any of the fast
> commit handling that was added to e2fsck/journal.c in this patch.
> 
> This seems dangerous since there are some cases where debugfs and tune2fs
> will do journal recovery in userspace, but it appears possible that they
> would totally miss any fast commit transaction handling.
> 
> It isn't great that we have two nearly identical copies of the same code
> in e2fsprogs, but looks is difficult to make them totally identical.
> We could potentially play some tricks here (e.g. use a common variable
> name for both "ctx" and "fs" in the code, unify copies of e2fsck_journal_*
> and ext2fs_journal_* into a single file, and potentially abstract out
> some of the differences (mainly from e2fsck/journal.c fixing errors
> during journal recovery) into helper functions that are no-ops on the
> debugfs/journal.c side.
> 
> There would still be two different *builds* of the code, with a lot of
> macro expansions to hide the differences, but I think it looks possible
> to at least bring these two copies more into sync.  I have some cleanups,
> but I don't know much about fast commits and what should be done there.

I was poking in ext4 Patchwork for an unrelated reason and found the
following patch series from Alexey Lyashkov that is already cleaning
up a bunch of this code duplication by moving it into lib/support:

https://patchwork.ozlabs.org/project/linux-ext4/list/?series=312552

This is cleaning up a bunch of the common code between e2fsck and debugfs,
but doesn't get far enough to add in the missing fast commit feature,
AFAICS.

Cheers, Andreas

>> ---
>> e2fsck/journal.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 109 insertions(+)
>> 
>> diff --git a/e2fsck/journal.c b/e2fsck/journal.c
>> index 2c8e3441..f1aa0fd6 100644
>> --- a/e2fsck/journal.c
>> +++ b/e2fsck/journal.c
>> @@ -278,6 +278,108 @@ static int process_journal_block(ext2_filsys fs,
>> 	return 0;
>> }
>> 
>> +static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
>> +				int off, tid_t expected_tid)
>> +{
>> +	e2fsck_t ctx = j->j_fs_dev->k_ctx;
>> +	struct e2fsck_fc_replay_state *state;
>> +	int ret = JBD2_FC_REPLAY_CONTINUE;
>> +	struct ext4_fc_add_range *ext;
>> +	struct ext4_fc_tl *tl;
>> +	struct ext4_fc_tail *tail;
>> +	__u8 *start, *end;
>> +	struct ext4_fc_head *head;
>> +	struct ext2fs_extent ext2fs_ex;
>> +
>> +	state = &ctx->fc_replay_state;
>> +
>> +	start = (__u8 *)bh->b_data;
>> +	end = (__u8 *)bh->b_data + j->j_blocksize - 1;
>> +
>> +	jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
>> +	if (state->fc_replay_expected_off == 0) {
>> +		memset(state, 0, sizeof(*state));
>> +		/* Check if we can stop early */
>> +		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
>> +			!= EXT4_FC_TAG_HEAD) {
>> +			jbd_debug(1, "Ending early!, not a head tag");
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	if (off != state->fc_replay_expected_off) {
>> +		ret = -EFSCORRUPTED;
>> +		goto out_err;
>> +	}
>> +
>> +	state->fc_replay_expected_off++;
>> +	fc_for_each_tl(start, end, tl) {
>> +		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
>> +			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
>> +		switch (le16_to_cpu(tl->fc_tag)) {
>> +		case EXT4_FC_TAG_ADD_RANGE:
>> +			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
>> +			ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
>> +						   sizeof(ext->fc_ex));
>> +			if (ret)
>> +				ret = JBD2_FC_REPLAY_STOP;
>> +			else
>> +				ret = JBD2_FC_REPLAY_CONTINUE;
>> +		case EXT4_FC_TAG_DEL_RANGE:
>> +		case EXT4_FC_TAG_LINK:
>> +		case EXT4_FC_TAG_UNLINK:
>> +		case EXT4_FC_TAG_CREAT:
>> +		case EXT4_FC_TAG_INODE:
>> +		case EXT4_FC_TAG_PAD:
>> +			state->fc_cur_tag++;
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +					sizeof(*tl) + ext4_fc_tag_len(tl));
>> +			break;
>> +		case EXT4_FC_TAG_TAIL:
>> +			state->fc_cur_tag++;
>> +			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +						sizeof(*tl) +
>> +						offsetof(struct ext4_fc_tail,
>> +						fc_crc));
>> +			jbd_debug(1, "tail tid %d, expected %d\n",
>> +					le32_to_cpu(tail->fc_tid),
>> +					expected_tid);
>> +			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
>> +				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
>> +				state->fc_replay_num_tags = state->fc_cur_tag;
>> +			} else {
>> +				ret = state->fc_replay_num_tags ?
>> +					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
>> +			}
>> +			state->fc_crc = 0;
>> +			break;
>> +		case EXT4_FC_TAG_HEAD:
>> +			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
>> +			if (le32_to_cpu(head->fc_features) &
>> +				~EXT4_FC_SUPPORTED_FEATURES) {
>> +				ret = -EOPNOTSUPP;
>> +				break;
>> +			}
>> +			if (le32_to_cpu(head->fc_tid) != expected_tid) {
>> +				ret = -EINVAL;
>> +				break;
>> +			}
>> +			state->fc_cur_tag++;
>> +			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> +					sizeof(*tl) + ext4_fc_tag_len(tl));
>> +			break;
>> +		default:
>> +			ret = state->fc_replay_num_tags ?
>> +				JBD2_FC_REPLAY_STOP : -ECANCELED;
>> +		}
>> +		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
>> +			break;
>> +	}
>> +
>> +out_err:
>> +	return ret;
>> +}
>> /*
>> * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
>> * to indicate that it is expecting more fast commit blocks. It returns
>> @@ -286,6 +388,13 @@ static int process_journal_block(ext2_filsys fs,
>> static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
>> 				enum passtype pass, int off, tid_t expected_tid)
>> {
>> +	e2fsck_t ctx = journal->j_fs_dev->k_ctx;
>> +	struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
>> +
>> +	if (pass == PASS_SCAN) {
>> +		state->fc_current_pass = PASS_SCAN;
>> +		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
>> +	}
>> 	return JBD2_FC_REPLAY_STOP;
>> }
>> 
>> --
>> 2.30.0.280.ga3ce27912f-goog
>> 
> 
> 
> Cheers, Andreas
> 
> 
> 
> 
> 


Cheers, Andreas
diff mbox series

Patch

diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index 2c8e3441..f1aa0fd6 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -278,6 +278,108 @@  static int process_journal_block(ext2_filsys fs,
 	return 0;
 }
 
+static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
+				int off, tid_t expected_tid)
+{
+	e2fsck_t ctx = j->j_fs_dev->k_ctx;
+	struct e2fsck_fc_replay_state *state;
+	int ret = JBD2_FC_REPLAY_CONTINUE;
+	struct ext4_fc_add_range *ext;
+	struct ext4_fc_tl *tl;
+	struct ext4_fc_tail *tail;
+	__u8 *start, *end;
+	struct ext4_fc_head *head;
+	struct ext2fs_extent ext2fs_ex;
+
+	state = &ctx->fc_replay_state;
+
+	start = (__u8 *)bh->b_data;
+	end = (__u8 *)bh->b_data + j->j_blocksize - 1;
+
+	jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
+	if (state->fc_replay_expected_off == 0) {
+		memset(state, 0, sizeof(*state));
+		/* Check if we can stop early */
+		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
+			!= EXT4_FC_TAG_HEAD) {
+			jbd_debug(1, "Ending early!, not a head tag");
+			return 0;
+		}
+	}
+
+	if (off != state->fc_replay_expected_off) {
+		ret = -EFSCORRUPTED;
+		goto out_err;
+	}
+
+	state->fc_replay_expected_off++;
+	fc_for_each_tl(start, end, tl) {
+		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
+			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
+		switch (le16_to_cpu(tl->fc_tag)) {
+		case EXT4_FC_TAG_ADD_RANGE:
+			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
+			ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
+						   sizeof(ext->fc_ex));
+			if (ret)
+				ret = JBD2_FC_REPLAY_STOP;
+			else
+				ret = JBD2_FC_REPLAY_CONTINUE;
+		case EXT4_FC_TAG_DEL_RANGE:
+		case EXT4_FC_TAG_LINK:
+		case EXT4_FC_TAG_UNLINK:
+		case EXT4_FC_TAG_CREAT:
+		case EXT4_FC_TAG_INODE:
+		case EXT4_FC_TAG_PAD:
+			state->fc_cur_tag++;
+			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
+					sizeof(*tl) + ext4_fc_tag_len(tl));
+			break;
+		case EXT4_FC_TAG_TAIL:
+			state->fc_cur_tag++;
+			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
+			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
+						sizeof(*tl) +
+						offsetof(struct ext4_fc_tail,
+						fc_crc));
+			jbd_debug(1, "tail tid %d, expected %d\n",
+					le32_to_cpu(tail->fc_tid),
+					expected_tid);
+			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
+				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
+				state->fc_replay_num_tags = state->fc_cur_tag;
+			} else {
+				ret = state->fc_replay_num_tags ?
+					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
+			}
+			state->fc_crc = 0;
+			break;
+		case EXT4_FC_TAG_HEAD:
+			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
+			if (le32_to_cpu(head->fc_features) &
+				~EXT4_FC_SUPPORTED_FEATURES) {
+				ret = -EOPNOTSUPP;
+				break;
+			}
+			if (le32_to_cpu(head->fc_tid) != expected_tid) {
+				ret = -EINVAL;
+				break;
+			}
+			state->fc_cur_tag++;
+			state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
+					sizeof(*tl) + ext4_fc_tag_len(tl));
+			break;
+		default:
+			ret = state->fc_replay_num_tags ?
+				JBD2_FC_REPLAY_STOP : -ECANCELED;
+		}
+		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
+			break;
+	}
+
+out_err:
+	return ret;
+}
 /*
  * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
  * to indicate that it is expecting more fast commit blocks. It returns
@@ -286,6 +388,13 @@  static int process_journal_block(ext2_filsys fs,
 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 				enum passtype pass, int off, tid_t expected_tid)
 {
+	e2fsck_t ctx = journal->j_fs_dev->k_ctx;
+	struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
+
+	if (pass == PASS_SCAN) {
+		state->fc_current_pass = PASS_SCAN;
+		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
+	}
 	return JBD2_FC_REPLAY_STOP;
 }