diff mbox series

[v5] jbd2: avoid transaction reuse after reformatting

Message ID 20201012134322.5956-1-changfengnan@hikvision.com
State Superseded
Headers show
Series [v5] jbd2: avoid transaction reuse after reformatting | expand

Commit Message

Fengnan Chang Oct. 12, 2020, 1:43 p.m. UTC
When ext4 is formatted with lazy_journal_init=1 and transactions from
the previous filesystem are still on disk, it is possible that they are
considered during a recovery after a crash. Because the checksum seed
has changed, the CRC check will fail, and the journal recovery fails
with checksum error although the journal is otherwise perfectly valid.
Fix the problem by checking commit block time stamps to determine
whether the data in the journal block is just stale or whether it is
indeed corrupt.

Signed-off-by: Fengnan Chang <changfengnan@hikvision.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd2/recovery.c | 97 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 86 insertions(+), 11 deletions(-)

Comments

Jan Kara Oct. 12, 2020, 4:44 p.m. UTC | #1
On Mon 12-10-20 21:43:22, Fengnan Chang wrote:
> When ext4 is formatted with lazy_journal_init=1 and transactions from
> the previous filesystem are still on disk, it is possible that they are
> considered during a recovery after a crash. Because the checksum seed
> has changed, the CRC check will fail, and the journal recovery fails
> with checksum error although the journal is otherwise perfectly valid.
> Fix the problem by checking commit block time stamps to determine
> whether the data in the journal block is just stale or whether it is
> indeed corrupt.
> 
> Signed-off-by: Fengnan Chang <changfengnan@hikvision.com>
> Signed-off-by: Jan Kara <jack@suse.cz>

Thanks for the patch. Current upstream kernel actually slightly differs so
your patch will not apply. I've applied the changes to my tree and also
removed some duplication but overall the logic should be the same as in
your patch. Fstests are still running but I'll post the patch as v6 so that
Ted can pick it up.

								Honza

> ---
>  fs/jbd2/recovery.c | 97 ++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 86 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
> index a4967b27ffb6..4d0aff628884 100644
> --- a/fs/jbd2/recovery.c
> +++ b/fs/jbd2/recovery.c
> @@ -428,6 +428,8 @@ static int do_one_pass(journal_t *journal,
>  	__u32			crc32_sum = ~0; /* Transactional Checksums */
>  	int			descr_csum_size = 0;
>  	int			block_error = 0;
> +	bool		need_check_commit_time = false;
> +	__be64		last_trans_commit_time = 0, commit_time = 0;
>  
>  	/*
>  	 * First thing is to establish what we expect to find in the log
> @@ -520,12 +522,21 @@ static int do_one_pass(journal_t *journal,
>  			if (descr_csum_size > 0 &&
>  			    !jbd2_descriptor_block_csum_verify(journal,
>  							       bh->b_data)) {
> -				printk(KERN_ERR "JBD2: Invalid checksum "
> -				       "recovering block %lu in log\n",
> -				       next_log_block);
> -				err = -EFSBADCRC;
> -				brelse(bh);
> -				goto failed;
> +				/*
> +				 * PASS_SCAN can see stale blocks due to lazy
> +				 * journal init. Don't error out on those yet.
> +				 */
> +				if (pass != PASS_SCAN) {
> +					pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
> +					       next_log_block);
> +					err = -EFSBADCRC;
> +					brelse(bh);
> +					goto failed;
> +				}
> +				need_check_commit_time = true;
> +				jbd_debug(1,
> +					"invalid descriptor block found in %lu\n",
> +					next_log_block);
>  			}
>  
>  			/* If it is a valid descriptor block, replay it
> @@ -535,6 +546,7 @@ static int do_one_pass(journal_t *journal,
>  			if (pass != PASS_REPLAY) {
>  				if (pass == PASS_SCAN &&
>  				    jbd2_has_feature_checksum(journal) &&
> +				    !need_check_commit_time &&
>  				    !info->end_transaction) {
>  					if (calc_chksums(journal, bh,
>  							&next_log_block,
> @@ -683,11 +695,50 @@ static int do_one_pass(journal_t *journal,
>  			 *	 mentioned conditions. Hence assume
>  			 *	 "Interrupted Commit".)
>  			 */
> -
> +			/*
> +			 * If need_check_commit_time is set, it means
> +			 * csum verify failed before, if commit_time is
> +			 * increasing, it's same journal, otherwise it
> +			 * is stale journal block, just end this
> +			 * recovery.
> +			 */
> +			if (pass == PASS_SCAN) {
> +				struct commit_header *cbh =
> +					(struct commit_header *)bh->b_data;
> +				commit_time =
> +					be64_to_cpu(cbh->h_commit_sec);
> +				/*
> +				 * When need check commit time, it means csum
> +				 * verify failed before, if commit time is
> +				 * increasing, it's same journal, otherwise
> +				 * not same journal, just end this recovery.
> +				 */
> +				if (need_check_commit_time) {
> +					if (commit_time >=
> +						last_trans_commit_time) {
> +						pr_err("JBD2: Invalid checksum found in transaction %u\n",
> +								next_commit_ID);
> +						err = -EFSBADCRC;
> +						brelse(bh);
> +						goto failed;
> +					}
> +					/*
> +					 * It likely does not belong to same
> +					 * journal, just end this recovery with
> +					 * success.
> +					 */
> +					jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
> +							next_commit_ID);
> +					err = 0;
> +					brelse(bh);
> +					goto done;
> +				}
> +			}
>  			/* Found an expected commit block: if checksums
>  			 * are present verify them in PASS_SCAN; else not
>  			 * much to do other than move on to the next sequence
> -			 * number. */
> +			 * number.
> +			 */
>  			if (pass == PASS_SCAN &&
>  			    jbd2_has_feature_checksum(journal)) {
>  				int chksum_err, chksum_seen;
> @@ -727,6 +778,13 @@ static int do_one_pass(journal_t *journal,
>  						chksum_err = 1;
>  
>  				if (chksum_err) {
> +					if (commit_time <
> +						last_trans_commit_time) {
> +						jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
> +							next_log_block);
> +						brelse(bh);
> +						goto done;
> +					}
>  					info->end_transaction = next_commit_ID;
>  
>  					if (!jbd2_has_feature_async_commit(journal)) {
> @@ -741,6 +799,12 @@ static int do_one_pass(journal_t *journal,
>  			if (pass == PASS_SCAN &&
>  			    !jbd2_commit_block_csum_verify(journal,
>  							   bh->b_data)) {
> +				if (commit_time < last_trans_commit_time) {
> +					jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
> +						next_log_block);
> +					brelse(bh);
> +					goto done;
> +				}
>  				info->end_transaction = next_commit_ID;
>  
>  				if (!jbd2_has_feature_async_commit(journal)) {
> @@ -750,11 +814,25 @@ static int do_one_pass(journal_t *journal,
>  					break;
>  				}
>  			}
> +			if (pass == PASS_SCAN)
> +				last_trans_commit_time = commit_time;
>  			brelse(bh);
>  			next_commit_ID++;
>  			continue;
>  
>  		case JBD2_REVOKE_BLOCK:
> +			/*
> +			 * Check revoke block crc in pass_scan, if csum verify
> +			 * failed, check commit block time later.
> +			 */
> +			if (pass == PASS_SCAN) {
> +				if (!jbd2_descriptor_block_csum_verify(journal,
> +						bh->b_data)) {
> +					jbd_debug(1, "invalid revoke block found in %lu\n",
> +						next_log_block);
> +					need_check_commit_time = true;
> +				}
> +			}
>  			/* If we aren't in the REVOKE pass, then we can
>  			 * just skip over this block. */
>  			if (pass != PASS_REVOKE) {
> @@ -822,9 +900,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
>  	offset = sizeof(jbd2_journal_revoke_header_t);
>  	rcount = be32_to_cpu(header->r_count);
>  
> -	if (!jbd2_descriptor_block_csum_verify(journal, header))
> -		return -EFSBADCRC;
> -
>  	if (jbd2_journal_has_csum_v2or3(journal))
>  		csum_size = sizeof(struct jbd2_journal_block_tail);
>  	if (rcount > journal->j_blocksize - csum_size)
> -- 
> 2.25.1
>
kernel test robot Oct. 12, 2020, 6:10 p.m. UTC | #2
Hi Fengnan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tytso-fscrypt/master]
[cannot apply to ext4/dev linus/master v5.9 next-20201012]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Fengnan-Chang/jbd2-avoid-transaction-reuse-after-reformatting/20201012-215959
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt.git master
config: sparc64-randconfig-s032-20201012 (attached as .config)
compiler: sparc64-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-rc1-dirty
        # https://github.com/0day-ci/linux/commit/fa9e125afb86200f923868b057a3e77ec7dcb215
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Fengnan-Chang/jbd2-avoid-transaction-reuse-after-reformatting/20201012-215959
        git checkout fa9e125afb86200f923868b057a3e77ec7dcb215
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=sparc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> fs/jbd2/recovery.c:708:45: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __be64 [usertype] commit_time @@     got unsigned long long [usertype] @@
   fs/jbd2/recovery.c:708:45: sparse:     expected restricted __be64 [usertype] commit_time
   fs/jbd2/recovery.c:708:45: sparse:     got unsigned long long [usertype]
   fs/jbd2/recovery.c:717:45: sparse: sparse: restricted __be64 degrades to integer
   fs/jbd2/recovery.c:718:49: sparse: sparse: restricted __be64 degrades to integer
   fs/jbd2/recovery.c:781:45: sparse: sparse: restricted __be64 degrades to integer
   fs/jbd2/recovery.c:782:49: sparse: sparse: restricted __be64 degrades to integer
   fs/jbd2/recovery.c:802:37: sparse: sparse: restricted __be64 degrades to integer
   fs/jbd2/recovery.c:802:51: sparse: sparse: restricted __be64 degrades to integer

vim +708 fs/jbd2/recovery.c

   415	
   416	static int do_one_pass(journal_t *journal,
   417				struct recovery_info *info, enum passtype pass)
   418	{
   419		unsigned int		first_commit_ID, next_commit_ID;
   420		unsigned long		next_log_block;
   421		int			err, success = 0;
   422		journal_superblock_t *	sb;
   423		journal_header_t *	tmp;
   424		struct buffer_head *	bh;
   425		unsigned int		sequence;
   426		int			blocktype;
   427		int			tag_bytes = journal_tag_bytes(journal);
   428		__u32			crc32_sum = ~0; /* Transactional Checksums */
   429		int			descr_csum_size = 0;
   430		int			block_error = 0;
   431		bool		need_check_commit_time = false;
   432		__be64		last_trans_commit_time = 0, commit_time = 0;
   433	
   434		/*
   435		 * First thing is to establish what we expect to find in the log
   436		 * (in terms of transaction IDs), and where (in terms of log
   437		 * block offsets): query the superblock.
   438		 */
   439	
   440		sb = journal->j_superblock;
   441		next_commit_ID = be32_to_cpu(sb->s_sequence);
   442		next_log_block = be32_to_cpu(sb->s_start);
   443	
   444		first_commit_ID = next_commit_ID;
   445		if (pass == PASS_SCAN)
   446			info->start_transaction = first_commit_ID;
   447	
   448		jbd_debug(1, "Starting recovery pass %d\n", pass);
   449	
   450		/*
   451		 * Now we walk through the log, transaction by transaction,
   452		 * making sure that each transaction has a commit block in the
   453		 * expected place.  Each complete transaction gets replayed back
   454		 * into the main filesystem.
   455		 */
   456	
   457		while (1) {
   458			int			flags;
   459			char *			tagp;
   460			journal_block_tag_t *	tag;
   461			struct buffer_head *	obh;
   462			struct buffer_head *	nbh;
   463	
   464			cond_resched();
   465	
   466			/* If we already know where to stop the log traversal,
   467			 * check right now that we haven't gone past the end of
   468			 * the log. */
   469	
   470			if (pass != PASS_SCAN)
   471				if (tid_geq(next_commit_ID, info->end_transaction))
   472					break;
   473	
   474			jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
   475				  next_commit_ID, next_log_block, journal->j_last);
   476	
   477			/* Skip over each chunk of the transaction looking
   478			 * either the next descriptor block or the final commit
   479			 * record. */
   480	
   481			jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
   482			err = jread(&bh, journal, next_log_block);
   483			if (err)
   484				goto failed;
   485	
   486			next_log_block++;
   487			wrap(journal, next_log_block);
   488	
   489			/* What kind of buffer is it?
   490			 *
   491			 * If it is a descriptor block, check that it has the
   492			 * expected sequence number.  Otherwise, we're all done
   493			 * here. */
   494	
   495			tmp = (journal_header_t *)bh->b_data;
   496	
   497			if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
   498				brelse(bh);
   499				break;
   500			}
   501	
   502			blocktype = be32_to_cpu(tmp->h_blocktype);
   503			sequence = be32_to_cpu(tmp->h_sequence);
   504			jbd_debug(3, "Found magic %d, sequence %d\n",
   505				  blocktype, sequence);
   506	
   507			if (sequence != next_commit_ID) {
   508				brelse(bh);
   509				break;
   510			}
   511	
   512			/* OK, we have a valid descriptor block which matches
   513			 * all of the sequence number checks.  What are we going
   514			 * to do with it?  That depends on the pass... */
   515	
   516			switch(blocktype) {
   517			case JBD2_DESCRIPTOR_BLOCK:
   518				/* Verify checksum first */
   519				if (jbd2_journal_has_csum_v2or3(journal))
   520					descr_csum_size =
   521						sizeof(struct jbd2_journal_block_tail);
   522				if (descr_csum_size > 0 &&
   523				    !jbd2_descriptor_block_csum_verify(journal,
   524								       bh->b_data)) {
   525					/*
   526					 * PASS_SCAN can see stale blocks due to lazy
   527					 * journal init. Don't error out on those yet.
   528					 */
   529					if (pass != PASS_SCAN) {
   530						pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
   531						       next_log_block);
   532						err = -EFSBADCRC;
   533						brelse(bh);
   534						goto failed;
   535					}
   536					need_check_commit_time = true;
   537					jbd_debug(1,
   538						"invalid descriptor block found in %lu\n",
   539						next_log_block);
   540				}
   541	
   542				/* If it is a valid descriptor block, replay it
   543				 * in pass REPLAY; if journal_checksums enabled, then
   544				 * calculate checksums in PASS_SCAN, otherwise,
   545				 * just skip over the blocks it describes. */
   546				if (pass != PASS_REPLAY) {
   547					if (pass == PASS_SCAN &&
   548					    jbd2_has_feature_checksum(journal) &&
   549					    !need_check_commit_time &&
   550					    !info->end_transaction) {
   551						if (calc_chksums(journal, bh,
   552								&next_log_block,
   553								&crc32_sum)) {
   554							put_bh(bh);
   555							break;
   556						}
   557						put_bh(bh);
   558						continue;
   559					}
   560					next_log_block += count_tags(journal, bh);
   561					wrap(journal, next_log_block);
   562					put_bh(bh);
   563					continue;
   564				}
   565	
   566				/* A descriptor block: we can now write all of
   567				 * the data blocks.  Yay, useful work is finally
   568				 * getting done here! */
   569	
   570				tagp = &bh->b_data[sizeof(journal_header_t)];
   571				while ((tagp - bh->b_data + tag_bytes)
   572				       <= journal->j_blocksize - descr_csum_size) {
   573					unsigned long io_block;
   574	
   575					tag = (journal_block_tag_t *) tagp;
   576					flags = be16_to_cpu(tag->t_flags);
   577	
   578					io_block = next_log_block++;
   579					wrap(journal, next_log_block);
   580					err = jread(&obh, journal, io_block);
   581					if (err) {
   582						/* Recover what we can, but
   583						 * report failure at the end. */
   584						success = err;
   585						printk(KERN_ERR
   586							"JBD2: IO error %d recovering "
   587							"block %ld in log\n",
   588							err, io_block);
   589					} else {
   590						unsigned long long blocknr;
   591	
   592						J_ASSERT(obh != NULL);
   593						blocknr = read_tag_block(journal,
   594									 tag);
   595	
   596						/* If the block has been
   597						 * revoked, then we're all done
   598						 * here. */
   599						if (jbd2_journal_test_revoke
   600						    (journal, blocknr,
   601						     next_commit_ID)) {
   602							brelse(obh);
   603							++info->nr_revoke_hits;
   604							goto skip_write;
   605						}
   606	
   607						/* Look for block corruption */
   608						if (!jbd2_block_tag_csum_verify(
   609							journal, tag, obh->b_data,
   610							be32_to_cpu(tmp->h_sequence))) {
   611							brelse(obh);
   612							success = -EFSBADCRC;
   613							printk(KERN_ERR "JBD2: Invalid "
   614							       "checksum recovering "
   615							       "data block %llu in "
   616							       "log\n", blocknr);
   617							block_error = 1;
   618							goto skip_write;
   619						}
   620	
   621						/* Find a buffer for the new
   622						 * data being restored */
   623						nbh = __getblk(journal->j_fs_dev,
   624								blocknr,
   625								journal->j_blocksize);
   626						if (nbh == NULL) {
   627							printk(KERN_ERR
   628							       "JBD2: Out of memory "
   629							       "during recovery.\n");
   630							err = -ENOMEM;
   631							brelse(bh);
   632							brelse(obh);
   633							goto failed;
   634						}
   635	
   636						lock_buffer(nbh);
   637						memcpy(nbh->b_data, obh->b_data,
   638								journal->j_blocksize);
   639						if (flags & JBD2_FLAG_ESCAPE) {
   640							*((__be32 *)nbh->b_data) =
   641							cpu_to_be32(JBD2_MAGIC_NUMBER);
   642						}
   643	
   644						BUFFER_TRACE(nbh, "marking dirty");
   645						set_buffer_uptodate(nbh);
   646						mark_buffer_dirty(nbh);
   647						BUFFER_TRACE(nbh, "marking uptodate");
   648						++info->nr_replays;
   649						/* ll_rw_block(WRITE, 1, &nbh); */
   650						unlock_buffer(nbh);
   651						brelse(obh);
   652						brelse(nbh);
   653					}
   654	
   655				skip_write:
   656					tagp += tag_bytes;
   657					if (!(flags & JBD2_FLAG_SAME_UUID))
   658						tagp += 16;
   659	
   660					if (flags & JBD2_FLAG_LAST_TAG)
   661						break;
   662				}
   663	
   664				brelse(bh);
   665				continue;
   666	
   667			case JBD2_COMMIT_BLOCK:
   668				/*     How to differentiate between interrupted commit
   669				 *               and journal corruption ?
   670				 *
   671				 * {nth transaction}
   672				 *        Checksum Verification Failed
   673				 *			 |
   674				 *		 ____________________
   675				 *		|		     |
   676				 * 	async_commit             sync_commit
   677				 *     		|                    |
   678				 *		| GO TO NEXT    "Journal Corruption"
   679				 *		| TRANSACTION
   680				 *		|
   681				 * {(n+1)th transanction}
   682				 *		|
   683				 * 	 _______|______________
   684				 * 	|	 	      |
   685				 * Commit block found	Commit block not found
   686				 *      |		      |
   687				 * "Journal Corruption"       |
   688				 *		 _____________|_________
   689				 *     		|	           	|
   690				 *	nth trans corrupt	OR   nth trans
   691				 *	and (n+1)th interrupted     interrupted
   692				 *	before commit block
   693				 *      could reach the disk.
   694				 *	(Cannot find the difference in above
   695				 *	 mentioned conditions. Hence assume
   696				 *	 "Interrupted Commit".)
   697				 */
   698				/*
   699				 * If need_check_commit_time is set, it means
   700				 * csum verify failed before, if commit_time is
   701				 * increasing, it's same journal, otherwise it
   702				 * is stale journal block, just end this
   703				 * recovery.
   704				 */
   705				if (pass == PASS_SCAN) {
   706					struct commit_header *cbh =
   707						(struct commit_header *)bh->b_data;
 > 708					commit_time =
   709						be64_to_cpu(cbh->h_commit_sec);
   710					/*
   711					 * When need check commit time, it means csum
   712					 * verify failed before, if commit time is
   713					 * increasing, it's same journal, otherwise
   714					 * not same journal, just end this recovery.
   715					 */
   716					if (need_check_commit_time) {
   717						if (commit_time >=
   718							last_trans_commit_time) {
   719							pr_err("JBD2: Invalid checksum found in transaction %u\n",
   720									next_commit_ID);
   721							err = -EFSBADCRC;
   722							brelse(bh);
   723							goto failed;
   724						}
   725						/*
   726						 * It likely does not belong to same
   727						 * journal, just end this recovery with
   728						 * success.
   729						 */
   730						jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
   731								next_commit_ID);
   732						err = 0;
   733						brelse(bh);
   734						goto done;
   735					}
   736				}
   737				/* Found an expected commit block: if checksums
   738				 * are present verify them in PASS_SCAN; else not
   739				 * much to do other than move on to the next sequence
   740				 * number.
   741				 */
   742				if (pass == PASS_SCAN &&
   743				    jbd2_has_feature_checksum(journal)) {
   744					int chksum_err, chksum_seen;
   745					struct commit_header *cbh =
   746						(struct commit_header *)bh->b_data;
   747					unsigned found_chksum =
   748						be32_to_cpu(cbh->h_chksum[0]);
   749	
   750					chksum_err = chksum_seen = 0;
   751	
   752					if (info->end_transaction) {
   753						journal->j_failed_commit =
   754							info->end_transaction;
   755						brelse(bh);
   756						break;
   757					}
   758	
   759					if (crc32_sum == found_chksum &&
   760					    cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
   761					    cbh->h_chksum_size ==
   762							JBD2_CRC32_CHKSUM_SIZE)
   763					       chksum_seen = 1;
   764					else if (!(cbh->h_chksum_type == 0 &&
   765						     cbh->h_chksum_size == 0 &&
   766						     found_chksum == 0 &&
   767						     !chksum_seen))
   768					/*
   769					 * If fs is mounted using an old kernel and then
   770					 * kernel with journal_chksum is used then we
   771					 * get a situation where the journal flag has
   772					 * checksum flag set but checksums are not
   773					 * present i.e chksum = 0, in the individual
   774					 * commit blocks.
   775					 * Hence to avoid checksum failures, in this
   776					 * situation, this extra check is added.
   777					 */
   778							chksum_err = 1;
   779	
   780					if (chksum_err) {
   781						if (commit_time <
   782							last_trans_commit_time) {
   783							jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
   784								next_log_block);
   785							brelse(bh);
   786							goto done;
   787						}
   788						info->end_transaction = next_commit_ID;
   789	
   790						if (!jbd2_has_feature_async_commit(journal)) {
   791							journal->j_failed_commit =
   792								next_commit_ID;
   793							brelse(bh);
   794							break;
   795						}
   796					}
   797					crc32_sum = ~0;
   798				}
   799				if (pass == PASS_SCAN &&
   800				    !jbd2_commit_block_csum_verify(journal,
   801								   bh->b_data)) {
   802					if (commit_time < last_trans_commit_time) {
   803						jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
   804							next_log_block);
   805						brelse(bh);
   806						goto done;
   807					}
   808					info->end_transaction = next_commit_ID;
   809	
   810					if (!jbd2_has_feature_async_commit(journal)) {
   811						journal->j_failed_commit =
   812							next_commit_ID;
   813						brelse(bh);
   814						break;
   815					}
   816				}
   817				if (pass == PASS_SCAN)
   818					last_trans_commit_time = commit_time;
   819				brelse(bh);
   820				next_commit_ID++;
   821				continue;
   822	
   823			case JBD2_REVOKE_BLOCK:
   824				/*
   825				 * Check revoke block crc in pass_scan, if csum verify
   826				 * failed, check commit block time later.
   827				 */
   828				if (pass == PASS_SCAN) {
   829					if (!jbd2_descriptor_block_csum_verify(journal,
   830							bh->b_data)) {
   831						jbd_debug(1, "invalid revoke block found in %lu\n",
   832							next_log_block);
   833						need_check_commit_time = true;
   834					}
   835				}
   836				/* If we aren't in the REVOKE pass, then we can
   837				 * just skip over this block. */
   838				if (pass != PASS_REVOKE) {
   839					brelse(bh);
   840					continue;
   841				}
   842	
   843				err = scan_revoke_records(journal, bh,
   844							  next_commit_ID, info);
   845				brelse(bh);
   846				if (err)
   847					goto failed;
   848				continue;
   849	
   850			default:
   851				jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
   852					  blocktype);
   853				brelse(bh);
   854				goto done;
   855			}
   856		}
   857	
   858	 done:
   859		/*
   860		 * We broke out of the log scan loop: either we came to the
   861		 * known end of the log or we found an unexpected block in the
   862		 * log.  If the latter happened, then we know that the "current"
   863		 * transaction marks the end of the valid log.
   864		 */
   865	
   866		if (pass == PASS_SCAN) {
   867			if (!info->end_transaction)
   868				info->end_transaction = next_commit_ID;
   869		} else {
   870			/* It's really bad news if different passes end up at
   871			 * different places (but possible due to IO errors). */
   872			if (info->end_transaction != next_commit_ID) {
   873				printk(KERN_ERR "JBD2: recovery pass %d ended at "
   874					"transaction %u, expected %u\n",
   875					pass, next_commit_ID, info->end_transaction);
   876				if (!success)
   877					success = -EIO;
   878			}
   879		}
   880		if (block_error && success == 0)
   881			success = -EIO;
   882		return success;
   883	
   884	 failed:
   885		return err;
   886	}
   887	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index a4967b27ffb6..4d0aff628884 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -428,6 +428,8 @@  static int do_one_pass(journal_t *journal,
 	__u32			crc32_sum = ~0; /* Transactional Checksums */
 	int			descr_csum_size = 0;
 	int			block_error = 0;
+	bool		need_check_commit_time = false;
+	__be64		last_trans_commit_time = 0, commit_time = 0;
 
 	/*
 	 * First thing is to establish what we expect to find in the log
@@ -520,12 +522,21 @@  static int do_one_pass(journal_t *journal,
 			if (descr_csum_size > 0 &&
 			    !jbd2_descriptor_block_csum_verify(journal,
 							       bh->b_data)) {
-				printk(KERN_ERR "JBD2: Invalid checksum "
-				       "recovering block %lu in log\n",
-				       next_log_block);
-				err = -EFSBADCRC;
-				brelse(bh);
-				goto failed;
+				/*
+				 * PASS_SCAN can see stale blocks due to lazy
+				 * journal init. Don't error out on those yet.
+				 */
+				if (pass != PASS_SCAN) {
+					pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
+					       next_log_block);
+					err = -EFSBADCRC;
+					brelse(bh);
+					goto failed;
+				}
+				need_check_commit_time = true;
+				jbd_debug(1,
+					"invalid descriptor block found in %lu\n",
+					next_log_block);
 			}
 
 			/* If it is a valid descriptor block, replay it
@@ -535,6 +546,7 @@  static int do_one_pass(journal_t *journal,
 			if (pass != PASS_REPLAY) {
 				if (pass == PASS_SCAN &&
 				    jbd2_has_feature_checksum(journal) &&
+				    !need_check_commit_time &&
 				    !info->end_transaction) {
 					if (calc_chksums(journal, bh,
 							&next_log_block,
@@ -683,11 +695,50 @@  static int do_one_pass(journal_t *journal,
 			 *	 mentioned conditions. Hence assume
 			 *	 "Interrupted Commit".)
 			 */
-
+			/*
+			 * If need_check_commit_time is set, it means
+			 * csum verify failed before, if commit_time is
+			 * increasing, it's same journal, otherwise it
+			 * is stale journal block, just end this
+			 * recovery.
+			 */
+			if (pass == PASS_SCAN) {
+				struct commit_header *cbh =
+					(struct commit_header *)bh->b_data;
+				commit_time =
+					be64_to_cpu(cbh->h_commit_sec);
+				/*
+				 * When need check commit time, it means csum
+				 * verify failed before, if commit time is
+				 * increasing, it's same journal, otherwise
+				 * not same journal, just end this recovery.
+				 */
+				if (need_check_commit_time) {
+					if (commit_time >=
+						last_trans_commit_time) {
+						pr_err("JBD2: Invalid checksum found in transaction %u\n",
+								next_commit_ID);
+						err = -EFSBADCRC;
+						brelse(bh);
+						goto failed;
+					}
+					/*
+					 * It likely does not belong to same
+					 * journal, just end this recovery with
+					 * success.
+					 */
+					jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
+							next_commit_ID);
+					err = 0;
+					brelse(bh);
+					goto done;
+				}
+			}
 			/* Found an expected commit block: if checksums
 			 * are present verify them in PASS_SCAN; else not
 			 * much to do other than move on to the next sequence
-			 * number. */
+			 * number.
+			 */
 			if (pass == PASS_SCAN &&
 			    jbd2_has_feature_checksum(journal)) {
 				int chksum_err, chksum_seen;
@@ -727,6 +778,13 @@  static int do_one_pass(journal_t *journal,
 						chksum_err = 1;
 
 				if (chksum_err) {
+					if (commit_time <
+						last_trans_commit_time) {
+						jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
+							next_log_block);
+						brelse(bh);
+						goto done;
+					}
 					info->end_transaction = next_commit_ID;
 
 					if (!jbd2_has_feature_async_commit(journal)) {
@@ -741,6 +799,12 @@  static int do_one_pass(journal_t *journal,
 			if (pass == PASS_SCAN &&
 			    !jbd2_commit_block_csum_verify(journal,
 							   bh->b_data)) {
+				if (commit_time < last_trans_commit_time) {
+					jbd_debug(1, "JBD2: Invalid commit checksum ignored in transaction %u, likely stale data\n",
+						next_log_block);
+					brelse(bh);
+					goto done;
+				}
 				info->end_transaction = next_commit_ID;
 
 				if (!jbd2_has_feature_async_commit(journal)) {
@@ -750,11 +814,25 @@  static int do_one_pass(journal_t *journal,
 					break;
 				}
 			}
+			if (pass == PASS_SCAN)
+				last_trans_commit_time = commit_time;
 			brelse(bh);
 			next_commit_ID++;
 			continue;
 
 		case JBD2_REVOKE_BLOCK:
+			/*
+			 * Check revoke block crc in pass_scan, if csum verify
+			 * failed, check commit block time later.
+			 */
+			if (pass == PASS_SCAN) {
+				if (!jbd2_descriptor_block_csum_verify(journal,
+						bh->b_data)) {
+					jbd_debug(1, "invalid revoke block found in %lu\n",
+						next_log_block);
+					need_check_commit_time = true;
+				}
+			}
 			/* If we aren't in the REVOKE pass, then we can
 			 * just skip over this block. */
 			if (pass != PASS_REVOKE) {
@@ -822,9 +900,6 @@  static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 	offset = sizeof(jbd2_journal_revoke_header_t);
 	rcount = be32_to_cpu(header->r_count);
 
-	if (!jbd2_descriptor_block_csum_verify(journal, header))
-		return -EFSBADCRC;
-
 	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_block_tail);
 	if (rcount > journal->j_blocksize - csum_size)