diff mbox series

ext4, jbd2: ensure panic when there is no need to record errno in the jbd2 sb

Message ID 20191126144537.30020-1-yi.zhang@huawei.com
State New
Headers show
Series ext4, jbd2: ensure panic when there is no need to record errno in the jbd2 sb | expand

Commit Message

Zhang Yi Nov. 26, 2019, 2:45 p.m. UTC
JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2
aborted, and then __ext4_abort() and ext4_handle_error() can invoke
panic if ERRORS_PANIC is specified. But there is one exception, if jbd2
thread failed to submit commit record, it abort journal through
invoking __jbd2_journal_abort_hard() without set this flag, so we can
no longer panic. Fix this by set such flag even if there is no need to
record errno in the jbd2 super block.

Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock")
Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Cc: <stable@vger.kernel.org>
---
 fs/ext4/super.c      |  4 ++--
 fs/jbd2/journal.c    | 46 +++++++++++++++++++++++++++++---------------
 include/linux/jbd2.h |  4 +++-
 3 files changed, 36 insertions(+), 18 deletions(-)

Comments

Jan Kara Nov. 29, 2019, 2:46 p.m. UTC | #1
On Tue 26-11-19 22:45:37, zhangyi (F) wrote:
> JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2
> aborted, and then __ext4_abort() and ext4_handle_error() can invoke
> panic if ERRORS_PANIC is specified. But there is one exception, if jbd2
> thread failed to submit commit record, it abort journal through
> invoking __jbd2_journal_abort_hard() without set this flag, so we can
> no longer panic. Fix this by set such flag even if there is no need to
> record errno in the jbd2 super block.
> 
> Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock")
> Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
> Cc: <stable@vger.kernel.org>

Thanks for the patch. This indeed looks like a bug. I was trying hard to
understand why are we actually using __jbd2_journal_abort_hard() in
fs/jbd2/commit.c in the first place. And after some digging, I think it is
an oversight and we should just use jbd2_journal_abort(). The calls have been
introduced by commit 818d276ceb83a "ext4: Add the journal checksum
feature". Before that commit, we were just using jbd2_journal_abort() when
writing commit block failed. And when we use jbd2_journal_abort() from
everywhere, that will also deal with the problem you've found.

Also as a nice cleanup we could then just drop __jbd2_journal_abort_hard(),
__jbd2_journal_abort_soft() and have all the functionality in a single
function jbd2_journal_abort().

								Honza

> ---
>  fs/ext4/super.c      |  4 ++--
>  fs/jbd2/journal.c    | 46 +++++++++++++++++++++++++++++---------------
>  include/linux/jbd2.h |  4 +++-
>  3 files changed, 36 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index dd654e53ba3d..76cde5fb8207 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -482,7 +482,7 @@ static void ext4_handle_error(struct super_block *sb)
>  		sb->s_flags |= SB_RDONLY;
>  	} else if (test_opt(sb, ERRORS_PANIC)) {
>  		if (EXT4_SB(sb)->s_journal &&
> -		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
> +		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT_FINISHED))
>  			return;
>  		panic("EXT4-fs (device %s): panic forced after error\n",
>  			sb->s_id);
> @@ -701,7 +701,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
>  	}
>  	if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
>  		if (EXT4_SB(sb)->s_journal &&
> -		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
> +		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT_FINISHED))
>  			return;
>  		panic("EXT4-fs panic from previous error\n");
>  	}
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 1c58859aa592..eb5e60df0da4 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -2072,13 +2072,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
>   * Two internal functions, which provide abort to the jbd layer
>   * itself are here.
>   */
> -
> -/*
> - * Quick version for internal journal use (doesn't lock the journal).
> - * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
> - * and don't attempt to make any other journal updates.
> - */
> -void __jbd2_journal_abort_hard(journal_t *journal)
> +static void __jbd2_journal_abort(journal_t *journal)
>  {
>  	transaction_t *transaction;
>  
> @@ -2096,8 +2090,33 @@ void __jbd2_journal_abort_hard(journal_t *journal)
>  	write_unlock(&journal->j_state_lock);
>  }
>  
> -/* Soft abort: record the abort error status in the journal superblock,
> - * but don't do any other IO. */
> +/*
> + * Mark journal abort finished when the errno in the sb has been recorded
> + * or no need to record.
> + */
> +static void __jbd2_journal_finish_abort(journal_t *journal)
> +{
> +	write_lock(&journal->j_state_lock);
> +	journal->j_flags |= JBD2_ABORT_FINISHED;
> +	write_unlock(&journal->j_state_lock);
> +}
> +
> +/*
> + * Quick version for internal journal use (doesn't lock the journal).
> + * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
> + * and don't attempt to make any other journal updates.
> + */
> +void __jbd2_journal_abort_hard(journal_t *journal)
> +{
> +	/* Nothing need to be recorded, mark it as finished directly */
> +	__jbd2_journal_abort(journal);
> +	__jbd2_journal_finish_abort(journal);
> +}
> +
> +/*
> + * Soft abort: record the abort error status in the journal superblock,
> + * but don't do any other IO.
> + */
>  static void __journal_abort_soft (journal_t *journal, int errno)
>  {
>  	int old_errno;
> @@ -2116,14 +2135,11 @@ static void __journal_abort_soft (journal_t *journal, int errno)
>  	}
>  	write_unlock(&journal->j_state_lock);
>  
> -	__jbd2_journal_abort_hard(journal);
> +	__jbd2_journal_abort(journal);
>  
> -	if (errno) {
> +	if (errno)
>  		jbd2_journal_update_sb_errno(journal);
> -		write_lock(&journal->j_state_lock);
> -		journal->j_flags |= JBD2_REC_ERR;
> -		write_unlock(&journal->j_state_lock);
> -	}
> +	__jbd2_journal_finish_abort(journal);
>  }
>  
>  /**
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 603fbc4e2f70..870f7f2f912c 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1248,7 +1248,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
>  #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
>  						 * data write error in ordered
>  						 * mode */
> -#define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
> +#define JBD2_ABORT_FINISHED		0x080	/* Abort finished, the errno
> +						 * in the sb has been recorded
> +						 * if necessary */
>  
>  /*
>   * Function declarations for the journaling transaction and buffer
> -- 
> 2.17.2
>
Zhang Yi Nov. 30, 2019, 3:24 a.m. UTC | #2
On 2019/11/29 22:46, Jan Kara wrote:
> On Tue 26-11-19 22:45:37, zhangyi (F) wrote:
>> JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2
>> aborted, and then __ext4_abort() and ext4_handle_error() can invoke
>> panic if ERRORS_PANIC is specified. But there is one exception, if jbd2
>> thread failed to submit commit record, it abort journal through
>> invoking __jbd2_journal_abort_hard() without set this flag, so we can
>> no longer panic. Fix this by set such flag even if there is no need to
>> record errno in the jbd2 super block.
>>
>> Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock")
>> Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
>> Cc: <stable@vger.kernel.org>
> 
> Thanks for the patch. This indeed looks like a bug. I was trying hard to
> understand why are we actually using __jbd2_journal_abort_hard() in
> fs/jbd2/commit.c in the first place. And after some digging, I think it is
> an oversight and we should just use jbd2_journal_abort(). The calls have been
> introduced by commit 818d276ceb83a "ext4: Add the journal checksum
> feature". Before that commit, we were just using jbd2_journal_abort() when
> writing commit block failed. And when we use jbd2_journal_abort() from
> everywhere, that will also deal with the problem you've found.
> 
> Also as a nice cleanup we could then just drop __jbd2_journal_abort_hard(),
> __jbd2_journal_abort_soft() and have all the functionality in a single
> function jbd2_journal_abort().
>

Indeed, it seems that we also need to record the errno if we failed to
submit commit block, I will remove __jbd2_journal_abort_hard() and combine
them in my next iteration.

Thanks,
Yi.
Zhang Yi Nov. 30, 2019, 2:50 p.m. UTC | #3
On 2019/11/30 11:24, zhangyi (F) wrote:
> On 2019/11/29 22:46, Jan Kara wrote:
>> On Tue 26-11-19 22:45:37, zhangyi (F) wrote:
>>> JBD2_REC_ERR flag used to indicate the errno has been updated when jbd2
>>> aborted, and then __ext4_abort() and ext4_handle_error() can invoke
>>> panic if ERRORS_PANIC is specified. But there is one exception, if jbd2
>>> thread failed to submit commit record, it abort journal through
>>> invoking __jbd2_journal_abort_hard() without set this flag, so we can
>>> no longer panic. Fix this by set such flag even if there is no need to
>>> record errno in the jbd2 super block.
>>>
>>> Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock")
>>> Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
>>> Cc: <stable@vger.kernel.org>
>>
>> Thanks for the patch. This indeed looks like a bug. I was trying hard to
>> understand why are we actually using __jbd2_journal_abort_hard() in
>> fs/jbd2/commit.c in the first place. And after some digging, I think it is
>> an oversight and we should just use jbd2_journal_abort(). The calls have been
>> introduced by commit 818d276ceb83a "ext4: Add the journal checksum
>> feature". Before that commit, we were just using jbd2_journal_abort() when
>> writing commit block failed. And when we use jbd2_journal_abort() from
>> everywhere, that will also deal with the problem you've found.
>>
>> Also as a nice cleanup we could then just drop __jbd2_journal_abort_hard(),
>> __jbd2_journal_abort_soft() and have all the functionality in a single
>> function jbd2_journal_abort().
>>
> 
> Indeed, it seems that we also need to record the errno if we failed to
> submit commit block, I will remove __jbd2_journal_abort_hard() and combine
> them in my next iteration.
> 

Hi Ted and Jan,
I am confusing about the commit fb7c02445c49 "ext4: pass
-ESHUTDOWN code to jbd2 layer" when I trying to cleanup the
__journal_abort_soft() and __jbd2_journal_abort_hard().

Before this commit, we will not record the errno if we shutdown the
filesystem no matter it has been aborted or not, so the errno will not
change.
After this commit, we record 0 to "sb->s_errno" for the first
jbd2_journal_abort(-ESHUTDOWN), and we also do not update the errno
if it has been aborted and record a no-zero errno because of the
follow checking.

+       if (journal->j_flags & JBD2_ABORT) {
+               write_unlock(&journal->j_state_lock);
+               if (!old_errno && old_errno != -ESHUTDOWN &&
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+                   errno == -ESHUTDOWN)
+                       jbd2_journal_update_sb_errno(journal);
+               return;
+       }

So the only modification of this patch is:
1) fix the lock;
2) set journal->j_errno = -ESHUTDOWN and JBD2_REC_ERR flag when we
   invoke jbd2_journal_abort(-ESHUTDOWN). These two modifications
   do not relate to the git log you mentioned.

I guess do you want to mean
  if (old_errno && old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) ?

If so, why we need to overwrite the last aborted errno to 0,
if the filesystem was already aborted for some reasons, will
it cover up the issue? Am I miss something?

Thanks,
Yi.
diff mbox series

Patch

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dd654e53ba3d..76cde5fb8207 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -482,7 +482,7 @@  static void ext4_handle_error(struct super_block *sb)
 		sb->s_flags |= SB_RDONLY;
 	} else if (test_opt(sb, ERRORS_PANIC)) {
 		if (EXT4_SB(sb)->s_journal &&
-		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT_FINISHED))
 			return;
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
@@ -701,7 +701,7 @@  void __ext4_abort(struct super_block *sb, const char *function,
 	}
 	if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 		if (EXT4_SB(sb)->s_journal &&
-		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT_FINISHED))
 			return;
 		panic("EXT4-fs panic from previous error\n");
 	}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1c58859aa592..eb5e60df0da4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2072,13 +2072,7 @@  int jbd2_journal_wipe(journal_t *journal, int write)
  * Two internal functions, which provide abort to the jbd layer
  * itself are here.
  */
-
-/*
- * Quick version for internal journal use (doesn't lock the journal).
- * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
- * and don't attempt to make any other journal updates.
- */
-void __jbd2_journal_abort_hard(journal_t *journal)
+static void __jbd2_journal_abort(journal_t *journal)
 {
 	transaction_t *transaction;
 
@@ -2096,8 +2090,33 @@  void __jbd2_journal_abort_hard(journal_t *journal)
 	write_unlock(&journal->j_state_lock);
 }
 
-/* Soft abort: record the abort error status in the journal superblock,
- * but don't do any other IO. */
+/*
+ * Mark journal abort finished when the errno in the sb has been recorded
+ * or no need to record.
+ */
+static void __jbd2_journal_finish_abort(journal_t *journal)
+{
+	write_lock(&journal->j_state_lock);
+	journal->j_flags |= JBD2_ABORT_FINISHED;
+	write_unlock(&journal->j_state_lock);
+}
+
+/*
+ * Quick version for internal journal use (doesn't lock the journal).
+ * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
+ * and don't attempt to make any other journal updates.
+ */
+void __jbd2_journal_abort_hard(journal_t *journal)
+{
+	/* Nothing need to be recorded, mark it as finished directly */
+	__jbd2_journal_abort(journal);
+	__jbd2_journal_finish_abort(journal);
+}
+
+/*
+ * Soft abort: record the abort error status in the journal superblock,
+ * but don't do any other IO.
+ */
 static void __journal_abort_soft (journal_t *journal, int errno)
 {
 	int old_errno;
@@ -2116,14 +2135,11 @@  static void __journal_abort_soft (journal_t *journal, int errno)
 	}
 	write_unlock(&journal->j_state_lock);
 
-	__jbd2_journal_abort_hard(journal);
+	__jbd2_journal_abort(journal);
 
-	if (errno) {
+	if (errno)
 		jbd2_journal_update_sb_errno(journal);
-		write_lock(&journal->j_state_lock);
-		journal->j_flags |= JBD2_REC_ERR;
-		write_unlock(&journal->j_state_lock);
-	}
+	__jbd2_journal_finish_abort(journal);
 }
 
 /**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 603fbc4e2f70..870f7f2f912c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1248,7 +1248,9 @@  JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
 #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
 						 * data write error in ordered
 						 * mode */
-#define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
+#define JBD2_ABORT_FINISHED		0x080	/* Abort finished, the errno
+						 * in the sb has been recorded
+						 * if necessary */
 
 /*
  * Function declarations for the journaling transaction and buffer