Patchwork jbd2: instrument jh on wrong transaction BUG_ON's

login
register
mail settings
Submitter Theodore Ts'o
Date Aug. 14, 2011, 2:53 a.m.
Message ID <1313290418-12695-1-git-send-email-tytso@mit.edu>
Download mbox | patch
Permalink /patch/109953/
State Superseded
Headers show

Comments

Theodore Ts'o - Aug. 14, 2011, 2:53 a.m.
These assertions have been reported as being tripped.  Instrument them
so we get more information about what might be going on.

Cc: curtw@google.com
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---

Curt, you might want to see if you can replicate that jbd2 crash with
this patch applied.  Hopefully we'll get more information about what is
going on.

 fs/jbd2/transaction.c |   42 +++++++++++++++++++++++++++++++++++++-----
 1 files changed, 37 insertions(+), 5 deletions(-)
Curt Wohlgemuth - Aug. 14, 2011, 8:01 p.m.
Hi Ted:

Unfortunately, some deref in your printk didn't go over too well:

[  970.871166] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000008
[  970.872139] IP: [<ffffffff812a158e>] jbd2_journal_dirty_metadata+0x18e/0x220
[  970.872139] PGD 857b8d067 PUD 852f26067 PMD 0
[  970.872139] Oops: 0000 [#1] SMP
[  970.872139] CPU 14
...
[  970.872139] Call Trace:
[  970.872139]  [<ffffffff812a9d26>] ? jbd2_journal_add_journal_head+0x86/0x290
[  970.872139]  [<ffffffff8125c87c>] ? __ext4_journalled_writepage+0xac/0x390
[  970.872139]  [<ffffffff812848b3>] __ext4_handle_dirty_metadata+0x83/0x120
[  970.872139]  [<ffffffff81284c1e>] ? __ext4_journal_get_write_access+0x3e/0x80
[  970.872139]  [<ffffffff8125cac8>] __ext4_journalled_writepage+0x2f8/0x390
[  970.872139]  [<ffffffff8125e264>] ext4_writepage+0x234/0x2f0
[  970.872139]  [<ffffffff81155ad7>] __writepage+0x17/0x40
...

where the IP corresponds to:

(gdb) list *0xffffffff812a158e
0xffffffff812a158e is in jbd2_journal_dirty_metadata
(fs/jbd2/transaction.c:1124).
1119             */
1120            if (jh->b_transaction != transaction) {
1121                    JBUFFER_TRACE(jh, "already on other transaction");
1122                    if (unlikely(jh->b_transaction !=
1123                                 journal->j_committing_transaction)) {
1124                            printk(KERN_EMERG "JBD: %s: "
1125                                   "jh->b_transaction (%llu, %p, %u) != "
1126
"journal->j_committing_transaction (%p, %u)",
1127                                   journal->j_devname,
1128                                   (unsigned long long) bh->b_blocknr,

It seems that  jh->b_transaction is NULL, and deferencing it to print
out jh->b_transaction->t_tid causes the NULL pointer deref.

Curt

On Sat, Aug 13, 2011 at 7:53 PM, Theodore Ts'o <tytso@mit.edu> wrote:
> These assertions have been reported as being tripped.  Instrument them
> so we get more information about what might be going on.
>
> Cc: curtw@google.com
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
> ---
>
> Curt, you might want to see if you can replicate that jbd2 crash with
> this patch applied.  Hopefully we'll get more information about what is
> going on.
>
>  fs/jbd2/transaction.c |   42 +++++++++++++++++++++++++++++++++++++-----
>  1 files changed, 37 insertions(+), 5 deletions(-)
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 2d71094..693fe63 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1093,8 +1093,19 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>         */
>        if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
>                JBUFFER_TRACE(jh, "fastpath");
> -               J_ASSERT_JH(jh, jh->b_transaction ==
> -                                       journal->j_running_transaction);
> +               if (unlikely(jh->b_transaction !=
> +                            journal->j_running_transaction)) {
> +                       printk(KERN_EMERG "JBD: %s: "
> +                              "jh->b_transaction (%llu, %p, %u) != "
> +                              "journal->j_running_transaction (%p, %u)",
> +                              journal->j_devname,
> +                              (unsigned long long) bh->b_blocknr,
> +                              jh->b_transaction,
> +                              jh->b_transaction->t_tid,
> +                              journal->j_running_transaction,
> +                              journal->j_running_transaction->t_tid);
> +                       BUG_ON(1);
> +               }
>                goto out_unlock_bh;
>        }
>
> @@ -1108,9 +1119,30 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>         */
>        if (jh->b_transaction != transaction) {
>                JBUFFER_TRACE(jh, "already on other transaction");
> -               J_ASSERT_JH(jh, jh->b_transaction ==
> -                                       journal->j_committing_transaction);
> -               J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
> +               if (unlikely(jh->b_transaction !=
> +                            journal->j_committing_transaction)) {
> +                       printk(KERN_EMERG "JBD: %s: "
> +                              "jh->b_transaction (%llu, %p, %u) != "
> +                              "journal->j_committing_transaction (%p, %u)",
> +                              journal->j_devname,
> +                              (unsigned long long) bh->b_blocknr,
> +                              jh->b_transaction,
> +                              jh->b_transaction->t_tid,
> +                              journal->j_committing_transaction,
> +                              journal->j_committing_transaction->t_tid);
> +                       BUG_ON(1);
> +               }
> +               if (unlikely(jh->b_next_transaction != transaction)) {
> +                       printk(KERN_EMERG "JBD: %s: "
> +                              "jh->b_next_transaction (%llu, %p, %u) != "
> +                              "transaction (%p, %u)",
> +                              journal->j_devname,
> +                              (unsigned long long) bh->b_blocknr,
> +                              jh->b_next_transaction,
> +                              jh->b_next_transaction->t_tid,
> +                              transaction, transaction->t_tid);
> +                       BUG_ON(1);
> +               }
>                /* And this case is illegal: we can't reuse another
>                 * transaction's data buffer, ever. */
>                goto out_unlock_bh;
> --
> 1.7.4.1.22.gec8e1.dirty
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - Aug. 15, 2011, 2:06 a.m.
On Sun, Aug 14, 2011 at 01:01:01PM -0700, Curt Wohlgemuth wrote:
> 
> It seems that  jh->b_transaction is NULL, and deferencing it to print
> out jh->b_transaction->t_tid causes the NULL pointer deref.

Hmm, if jh->b_transaction is null, then
jbd2_journal_get_write_access() must not have been called on the
journal_head.  But the relevant code from ext4_journalled_writepage() is:

	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
				do_journal_get_write_access);

	err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
				write_end_fn);

(write_end_fn is the function which calls which ultimately calls
ext4_handle_dirty_metadata which then calls
jbd2_journal_dirty_metadata(), which is where you're seeing the BUG_ON).

I suspect what is going on is do_get_write_access() is returning an
error, which means we never set jh->b_transaction.  Hence when we call
jbd2_journal_data_metadata(), we trigger the BUG.

OK, what could cause do_get_write_access() to return an error?  Two
conditions: if the handle is aborted (due to a previous error), in
which case it returns -EROFS, or if it can't get the memory needed
make a copy of the buffer, in which case there should have been a
"do_get_write_access: OOM for frozen buffer" error message earlier in
the log.  (No, it's not prefixed with JBD2 --- it probably should be.)

Any chance you're seeing any indication of either possibility in the
messages before the BUG message?

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d71094..693fe63 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1093,8 +1093,19 @@  int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	 */
 	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
 		JBUFFER_TRACE(jh, "fastpath");
-		J_ASSERT_JH(jh, jh->b_transaction ==
-					journal->j_running_transaction);
+		if (unlikely(jh->b_transaction !=
+			     journal->j_running_transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_transaction (%llu, %p, %u) != "
+			       "journal->j_running_transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_transaction,
+			       jh->b_transaction->t_tid,
+			       journal->j_running_transaction,
+			       journal->j_running_transaction->t_tid);
+			BUG_ON(1);
+		}
 		goto out_unlock_bh;
 	}
 
@@ -1108,9 +1119,30 @@  int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	 */
 	if (jh->b_transaction != transaction) {
 		JBUFFER_TRACE(jh, "already on other transaction");
-		J_ASSERT_JH(jh, jh->b_transaction ==
-					journal->j_committing_transaction);
-		J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+		if (unlikely(jh->b_transaction !=
+			     journal->j_committing_transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_transaction (%llu, %p, %u) != "
+			       "journal->j_committing_transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_transaction,
+			       jh->b_transaction->t_tid,
+			       journal->j_committing_transaction,
+			       journal->j_committing_transaction->t_tid);
+			BUG_ON(1);
+		}
+		if (unlikely(jh->b_next_transaction != transaction)) {
+			printk(KERN_EMERG "JBD: %s: "
+			       "jh->b_next_transaction (%llu, %p, %u) != "
+			       "transaction (%p, %u)",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr,
+			       jh->b_next_transaction,
+			       jh->b_next_transaction->t_tid,
+			       transaction, transaction->t_tid);
+			BUG_ON(1);
+		}
 		/* And this case is illegal: we can't reuse another
 		 * transaction's data buffer, ever. */
 		goto out_unlock_bh;