diff mbox series

ext4: fix warning when submitting superblock in ext4_commit_super()

Message ID 20220518141020.2432652-1-yi.zhang@huawei.com
State Superseded
Headers show
Series ext4: fix warning when submitting superblock in ext4_commit_super() | expand

Commit Message

Zhang Yi May 18, 2022, 2:10 p.m. UTC
We have already check the io_error and uptodate flag before submitting
the superblock buffer, and re-set the uptodate flag if it has been
failed to write out. But it was lockless and could be raced by another
ext4_commit_super(), and finally trigger '!uptodate' WARNING when
marking buffer dirty. Fix it by submit buffer directly.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/super.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

Comments

Ritesh Harjani (IBM) May 18, 2022, 5:06 p.m. UTC | #1
On 22/05/18 10:10PM, Zhang Yi wrote:
> We have already check the io_error and uptodate flag before submitting
> the superblock buffer, and re-set the uptodate flag if it has been
> failed to write out. But it was lockless and could be raced by another
> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
> marking buffer dirty. Fix it by submit buffer directly.

I agree that there could be a race with multiple processes trying to call
ext4_commit_super(). Do you have a easy reproducer for this issue?

Also do you think something like below should fix the problem too?
So if you lock the buffer from checking until marking the buffer dirty, that
should avoid the race too that you are reporting.
Thoughts?


diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6900da973ce2..3447841fe654 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6007,6 +6007,7 @@ static int ext4_commit_super(struct super_block *sb)

        ext4_update_super(sb);

+       lock_buffer(sbh);
        if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
                /*
                 * Oh, dear.  A previous attempt to write the
@@ -6023,6 +6024,7 @@ static int ext4_commit_super(struct super_block *sb)
        }
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
+       unlock_buffer(sbh);
        error = __sync_dirty_buffer(sbh,
                REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
        if (buffer_write_io_error(sbh)) {
Zhang Yi May 19, 2022, 3:13 a.m. UTC | #2
On 2022/5/19 1:06, Ritesh Harjani wrote:
> On 22/05/18 10:10PM, Zhang Yi wrote:
>> We have already check the io_error and uptodate flag before submitting
>> the superblock buffer, and re-set the uptodate flag if it has been
>> failed to write out. But it was lockless and could be raced by another
>> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
>> marking buffer dirty. Fix it by submit buffer directly.
> 
> I agree that there could be a race with multiple processes trying to call
> ext4_commit_super(). Do you have a easy reproducer for this issue?
> 

Sorry, I don't have a easy reproducer, but we can always reproduce it through
inject delay and add filters into the ext4_commit_super().

1. Apply below diff.
 static int ext4_commit_super(struct super_block *sb)
 {
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
@@ -6026,9 +6027,22 @@ static int ext4_commit_super(struct super_block *sb)
 		set_buffer_uptodate(sbh);
 	}
 	BUFFER_TRACE(sbh, "marking dirty");
+	if (!strcmp(current->comm, "touch"))
+		pr_err("touch (%d) enter\n", current->pid);
+	if (!strcmp(current->comm, "mkdir")) {
+		pr_err("mkdir(%d): wait touch sync\n", current->pid);
+		msleep(1000);
+		pr_err("mkdir(%d): wait touch sync %d\n", current->pid, buffer_uptodate(sbh));
+	}
 	mark_buffer_dirty(sbh);
+	if (!strcmp(current->comm, "mkdir"))
+		pr_err("mkdir(%d): mark\n", current->pid);
 	error = __sync_dirty_buffer(sbh,
 		REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+	if (error) {
+		pr_err("%s(%d) sync fail %d\n", current->comm, current->pid, buffer_uptodate(sbh));
+		msleep(2000);
+	}
 	if (buffer_write_io_error(sbh)) {
 		ext4_msg(sb, KERN_ERR, "I/O error while writing "
 		       "superblock");

2. Run this script.
#!/bin/bash
echo running > /sys/block/sdb/device/state
sleep 1
umount /mnt
mkfs.ext4 -F -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdb
mount /dev/sdb -o errors=remount-ro,stripe=2048,data_err=abort /mnt
mkdir /mnt/dir_a
mkdir -p /mnt/dir_b

sync
sync

echo 3 > /proc/sys/vm/drop_caches
echo offline > /sys/block/sdb/device/state

sleep 1
mkdir /mnt/dir_a/a &
touch /mnt/dir_b/b


[ 1586.472287] ------------[ cut here ]------------
[ 1586.473834] WARNING: CPU: 14 PID: 1425 at fs/buffer.c:1081 mark_buffer_dirty+0x28f/0x330
[ 1586.476519] Modules linked in:
[ 1586.477567] CPU: 14 PID: 1425 Comm: mkdir Not tainted 5.18.0-rc7-dirty #745
[ 1586.479854] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc4
[ 1586.482709] RIP: 0010:mark_buffer_dirty+0x28f/0x330
[ 1586.483400] Code: a8 00 00 00 48 83 05 8f dd 0d 03 01 48 83 e8 01 e9 df fe ff ff 48 83 05 fe e1 0d 033
[ 1586.488136] RSP: 0018:ffffa8a6c0ef3b90 EFLAGS: 00010202
[ 1586.490142] RAX: 0000000000116418 RBX: ffff93f5bd899000 RCX: 0000000000000000
[ 1586.492571] RDX: 0000000000000000 RSI: ffffffff8bef9549 RDI: ffff93f5bd899000
[ 1586.494988] RBP: ffff93f5beffd000 R08: 0000000000000000 R09: ffffa8a6c0ef39c0
[ 1586.497380] R10: 0000000000000001 R11: 0000000000000001 R12: ffff93f5b3de0000
[ 1586.499674] R13: 0000000000000000 R14: ffffffff8b849da0 R15: 0000000000000000
[ 1586.501964] FS:  00007f561455c0c0(0000) GS:ffff93fc65980000(0000) knlGS:0000000000000000
[ 1586.504493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1586.506303] CR2: 00007f5614706f80 CR3: 0000000105534000 CR4: 00000000000006e0
[ 1586.508561] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1586.509652] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1586.510654] Call Trace:
[ 1586.511560]  <TASK>
[ 1586.512228]  ext4_commit_super+0xb1/0x2e0
[ 1586.513362]  ext4_handle_error+0x287/0x2a0
[ 1586.514508]  __ext4_error+0x138/0x240
[ 1586.515527]  ? __might_sleep+0x56/0xb0
[ 1586.516571]  ? __getblk_gfp+0x47/0x630
[ 1586.517636]  ext4_journal_check_start+0xd1/0xf0
[ 1586.518884]  __ext4_journal_start_sb+0x61/0x1f0
[ 1586.520126]  __ext4_new_inode+0x12ee/0x2670
[ 1586.521283]  ? ext4_lookup+0x297/0x340
[ 1586.522322]  ext4_mkdir+0x1a5/0x4f0
[ 1586.523298]  vfs_mkdir+0x7c/0x1b0
[ 1586.523981]  do_mkdirat+0x9e/0x160
[ 1586.524488]  __x64_sys_mkdir+0x41/0x60
[ 1586.525054]  do_syscall_64+0x3b/0x90
[ 1586.525590]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 1586.526341] RIP: 0033:0x7f5614710ecb

> Also do you think something like below should fix the problem too?
> So if you lock the buffer from checking until marking the buffer dirty, that
> should avoid the race too that you are reporting.
> Thoughts?
> 

Thanks for your suggestion. I've thought about this solution and yes it's simpler
to fix the race, but I think we lock and unlock the sbh several times just for
calling standard buffer write helpers is not so good. Opencode the submit
procedure looks more clear to me. Anyway, Your solution is also fine by me.

Thanks,
Yi.

> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 6900da973ce2..3447841fe654 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -6007,6 +6007,7 @@ static int ext4_commit_super(struct super_block *sb)
> 
>         ext4_update_super(sb);
> 
> +       lock_buffer(sbh);
>         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
>                 /*
>                  * Oh, dear.  A previous attempt to write the
> @@ -6023,6 +6024,7 @@ static int ext4_commit_super(struct super_block *sb)
>         }
>         BUFFER_TRACE(sbh, "marking dirty");
>         mark_buffer_dirty(sbh);
> +       unlock_buffer(sbh);
>         error = __sync_dirty_buffer(sbh,
>                 REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
>         if (buffer_write_io_error(sbh)) {
> .
>
Ritesh Harjani (IBM) May 19, 2022, 6:29 a.m. UTC | #3
On 22/05/19 11:13AM, Zhang Yi wrote:
> On 2022/5/19 1:06, Ritesh Harjani wrote:
> > On 22/05/18 10:10PM, Zhang Yi wrote:
> >> We have already check the io_error and uptodate flag before submitting
> >> the superblock buffer, and re-set the uptodate flag if it has been
> >> failed to write out. But it was lockless and could be raced by another
> >> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
> >> marking buffer dirty. Fix it by submit buffer directly.
> >
> > I agree that there could be a race with multiple processes trying to call
> > ext4_commit_super(). Do you have a easy reproducer for this issue?
> >
>
> Sorry, I don't have a easy reproducer, but we can always reproduce it through
> inject delay and add filters into the ext4_commit_super().

Sure, thanks for sharing.

>
> 1. Apply below diff.
>  static int ext4_commit_super(struct super_block *sb)
>  {
>  	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
> @@ -6026,9 +6027,22 @@ static int ext4_commit_super(struct super_block *sb)
>  		set_buffer_uptodate(sbh);
>  	}
>  	BUFFER_TRACE(sbh, "marking dirty");
> +	if (!strcmp(current->comm, "touch"))
> +		pr_err("touch (%d) enter\n", current->pid);
> +	if (!strcmp(current->comm, "mkdir")) {
> +		pr_err("mkdir(%d): wait touch sync\n", current->pid);
> +		msleep(1000);
> +		pr_err("mkdir(%d): wait touch sync %d\n", current->pid, buffer_uptodate(sbh));
> +	}
>  	mark_buffer_dirty(sbh);
> +	if (!strcmp(current->comm, "mkdir"))
> +		pr_err("mkdir(%d): mark\n", current->pid);
>  	error = __sync_dirty_buffer(sbh,
>  		REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> +	if (error) {
> +		pr_err("%s(%d) sync fail %d\n", current->comm, current->pid, buffer_uptodate(sbh));
> +		msleep(2000);
> +	}
>  	if (buffer_write_io_error(sbh)) {
>  		ext4_msg(sb, KERN_ERR, "I/O error while writing "
>  		       "superblock");
>
> 2. Run this script.
> #!/bin/bash
> echo running > /sys/block/sdb/device/state
> sleep 1
> umount /mnt
> mkfs.ext4 -F -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdb
> mount /dev/sdb -o errors=remount-ro,stripe=2048,data_err=abort /mnt
> mkdir /mnt/dir_a
> mkdir -p /mnt/dir_b
>
> sync
> sync
>
> echo 3 > /proc/sys/vm/drop_caches
> echo offline > /sys/block/sdb/device/state
>
> sleep 1
> mkdir /mnt/dir_a/a &
> touch /mnt/dir_b/b
>
>
> [ 1586.472287] ------------[ cut here ]------------
> [ 1586.473834] WARNING: CPU: 14 PID: 1425 at fs/buffer.c:1081 mark_buffer_dirty+0x28f/0x330
> [ 1586.476519] Modules linked in:
> [ 1586.477567] CPU: 14 PID: 1425 Comm: mkdir Not tainted 5.18.0-rc7-dirty #745
> [ 1586.479854] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc4
> [ 1586.482709] RIP: 0010:mark_buffer_dirty+0x28f/0x330
> [ 1586.483400] Code: a8 00 00 00 48 83 05 8f dd 0d 03 01 48 83 e8 01 e9 df fe ff ff 48 83 05 fe e1 0d 033
> [ 1586.488136] RSP: 0018:ffffa8a6c0ef3b90 EFLAGS: 00010202
> [ 1586.490142] RAX: 0000000000116418 RBX: ffff93f5bd899000 RCX: 0000000000000000
> [ 1586.492571] RDX: 0000000000000000 RSI: ffffffff8bef9549 RDI: ffff93f5bd899000
> [ 1586.494988] RBP: ffff93f5beffd000 R08: 0000000000000000 R09: ffffa8a6c0ef39c0
> [ 1586.497380] R10: 0000000000000001 R11: 0000000000000001 R12: ffff93f5b3de0000
> [ 1586.499674] R13: 0000000000000000 R14: ffffffff8b849da0 R15: 0000000000000000
> [ 1586.501964] FS:  00007f561455c0c0(0000) GS:ffff93fc65980000(0000) knlGS:0000000000000000
> [ 1586.504493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1586.506303] CR2: 00007f5614706f80 CR3: 0000000105534000 CR4: 00000000000006e0
> [ 1586.508561] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 1586.509652] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 1586.510654] Call Trace:
> [ 1586.511560]  <TASK>
> [ 1586.512228]  ext4_commit_super+0xb1/0x2e0
> [ 1586.513362]  ext4_handle_error+0x287/0x2a0
> [ 1586.514508]  __ext4_error+0x138/0x240
> [ 1586.515527]  ? __might_sleep+0x56/0xb0
> [ 1586.516571]  ? __getblk_gfp+0x47/0x630
> [ 1586.517636]  ext4_journal_check_start+0xd1/0xf0
> [ 1586.518884]  __ext4_journal_start_sb+0x61/0x1f0
> [ 1586.520126]  __ext4_new_inode+0x12ee/0x2670
> [ 1586.521283]  ? ext4_lookup+0x297/0x340
> [ 1586.522322]  ext4_mkdir+0x1a5/0x4f0
> [ 1586.523298]  vfs_mkdir+0x7c/0x1b0
> [ 1586.523981]  do_mkdirat+0x9e/0x160
> [ 1586.524488]  __x64_sys_mkdir+0x41/0x60
> [ 1586.525054]  do_syscall_64+0x3b/0x90
> [ 1586.525590]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 1586.526341] RIP: 0033:0x7f5614710ecb
>
> > Also do you think something like below should fix the problem too?
> > So if you lock the buffer from checking until marking the buffer dirty, that
> > should avoid the race too that you are reporting.
> > Thoughts?
> >
>
> Thanks for your suggestion. I've thought about this solution and yes it's simpler
> to fix the race, but I think we lock and unlock the sbh several times just for
> calling standard buffer write helpers is not so good. Opencode the submit
> procedure looks more clear to me.

I agree your solution was cleaner since it does not has a lot of lock/unlock.
My suggestion came in from looking at the history.
This lock was added here [1] and I think it somehow got removed in this patch[2]

[1]: https://lore.kernel.org/linux-ext4/1467285150-15977-2-git-send-email-pranjas@gmail.com/
[2]: https://lore.kernel.org/linux-ext4/20201216101844.22917-5-jack@suse.cz/

Rather then solutions, I had few queries :)
1. What are the implications of not using mark_buffer_dirty()/__sync_dirty_buffer()
2. In your solution one thing which I was not clear of, was whether we should
	call clear_buffer_dirty() before calling submit_bh(), in case if somehow(?)
	the state of the buffer was already marked dirty? Not sure how this can
	happen, but I see the logic in mark_buffer_dirty() which checks, if the
	buffer is already marked dirty, it simply returns. Then __sync_dirty_buffer()
	clears the buffer dirty state.

> Anyway, Your solution is also fine by me.

I think if we get some answers to above. It will give us more confidence on
whether should we open code submit_bh() logic or should we use
mark_buffer_dirty()/__sync_dirty_buffer() (with lock_buffer() to prevent the
warning which you reported).

-ritesh

>
> Thanks,
> Yi.
>
> > diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> > index 6900da973ce2..3447841fe654 100644
> > --- a/fs/ext4/super.c
> > +++ b/fs/ext4/super.c
> > @@ -6007,6 +6007,7 @@ static int ext4_commit_super(struct super_block *sb)
> >
> >         ext4_update_super(sb);
> >
> > +       lock_buffer(sbh);
> >         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
> >                 /*
> >                  * Oh, dear.  A previous attempt to write the
> > @@ -6023,6 +6024,7 @@ static int ext4_commit_super(struct super_block *sb)
> >         }
> >         BUFFER_TRACE(sbh, "marking dirty");
> >         mark_buffer_dirty(sbh);
> > +       unlock_buffer(sbh);
> >         error = __sync_dirty_buffer(sbh,
> >                 REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
> >         if (buffer_write_io_error(sbh)) {
> > .
> >
Jan Kara May 19, 2022, 9:30 a.m. UTC | #4
On Thu 19-05-22 11:59:29, Ritesh Harjani wrote:
> On 22/05/19 11:13AM, Zhang Yi wrote:
> > On 2022/5/19 1:06, Ritesh Harjani wrote:
> > > On 22/05/18 10:10PM, Zhang Yi wrote:
> > >> We have already check the io_error and uptodate flag before submitting
> > >> the superblock buffer, and re-set the uptodate flag if it has been
> > >> failed to write out. But it was lockless and could be raced by another
> > >> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
> > >> marking buffer dirty. Fix it by submit buffer directly.
> > >
> > > I agree that there could be a race with multiple processes trying to call
> > > ext4_commit_super(). Do you have a easy reproducer for this issue?
> > >
> >
> > Sorry, I don't have a easy reproducer, but we can always reproduce it through
> > inject delay and add filters into the ext4_commit_super().

...
 
> > > Also do you think something like below should fix the problem too?
> > > So if you lock the buffer from checking until marking the buffer dirty, that
> > > should avoid the race too that you are reporting.
> > > Thoughts?
> > >
> >
> > Thanks for your suggestion. I've thought about this solution and yes it's simpler
> > to fix the race, but I think we lock and unlock the sbh several times just for
> > calling standard buffer write helpers is not so good. Opencode the submit
> > procedure looks more clear to me.
> 
> I agree your solution was cleaner since it does not has a lot of lock/unlock.
> My suggestion came in from looking at the history.
> This lock was added here [1] and I think it somehow got removed in this patch[2]
> 
> [1]: https://lore.kernel.org/linux-ext4/1467285150-15977-2-git-send-email-pranjas@gmail.com/
> [2]: https://lore.kernel.org/linux-ext4/20201216101844.22917-5-jack@suse.cz/

So the reason why I've move unlock_buffer() into ext4_update_super() was
mostly so that the function does not return with buffer lock (which is an
odd calling convention) when I was adding another user of it
(flush_stashed_error_work()).

> Rather then solutions, I had few queries :)
> 1. What are the implications of not using
> mark_buffer_dirty()/__sync_dirty_buffer()

Not much. Using submit_bh() directly is fine. Just the duplication of the
checks is somewhat unpleasant.

> 2. In your solution one thing which I was not clear of, was whether we
> should call clear_buffer_dirty() before calling submit_bh(), in case if
> somehow(?) the state of the buffer was already marked dirty? Not sure how
> this can happen, but I see the logic in mark_buffer_dirty() which checks,
> if the buffer is already marked dirty, it simply returns. Then
> __sync_dirty_buffer() clears the buffer dirty state.

It could happen e.g. if there was journalled update of the superblock
before. I guess calling clear_buffer_dirty() before submit_bh() does no
harm.

Otherwise I like Yi's solution.

								Honza
Ritesh Harjani (IBM) May 19, 2022, 10:08 a.m. UTC | #5
On 22/05/19 11:30AM, Jan Kara wrote:
> On Thu 19-05-22 11:59:29, Ritesh Harjani wrote:
> > On 22/05/19 11:13AM, Zhang Yi wrote:
> > > On 2022/5/19 1:06, Ritesh Harjani wrote:
> > > > On 22/05/18 10:10PM, Zhang Yi wrote:
> > > >> We have already check the io_error and uptodate flag before submitting
> > > >> the superblock buffer, and re-set the uptodate flag if it has been
> > > >> failed to write out. But it was lockless and could be raced by another
> > > >> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
> > > >> marking buffer dirty. Fix it by submit buffer directly.
> > > >
> > > > I agree that there could be a race with multiple processes trying to call
> > > > ext4_commit_super(). Do you have a easy reproducer for this issue?
> > > >
> > >
> > > Sorry, I don't have a easy reproducer, but we can always reproduce it through
> > > inject delay and add filters into the ext4_commit_super().
>
> ...
>
> > > > Also do you think something like below should fix the problem too?
> > > > So if you lock the buffer from checking until marking the buffer dirty, that
> > > > should avoid the race too that you are reporting.
> > > > Thoughts?
> > > >
> > >
> > > Thanks for your suggestion. I've thought about this solution and yes it's simpler
> > > to fix the race, but I think we lock and unlock the sbh several times just for
> > > calling standard buffer write helpers is not so good. Opencode the submit
> > > procedure looks more clear to me.
> >
> > I agree your solution was cleaner since it does not has a lot of lock/unlock.
> > My suggestion came in from looking at the history.
> > This lock was added here [1] and I think it somehow got removed in this patch[2]
> >
> > [1]: https://lore.kernel.org/linux-ext4/1467285150-15977-2-git-send-email-pranjas@gmail.com/
> > [2]: https://lore.kernel.org/linux-ext4/20201216101844.22917-5-jack@suse.cz/
>
> So the reason why I've move unlock_buffer() into ext4_update_super() was
> mostly so that the function does not return with buffer lock (which is an
> odd calling convention) when I was adding another user of it
> (flush_stashed_error_work()).
>
> > Rather then solutions, I had few queries :)
> > 1. What are the implications of not using
> > mark_buffer_dirty()/__sync_dirty_buffer()
>
> Not much. Using submit_bh() directly is fine. Just the duplication of the
> checks is somewhat unpleasant.

Ok.

>
> > 2. In your solution one thing which I was not clear of, was whether we
> > should call clear_buffer_dirty() before calling submit_bh(), in case if
> > somehow(?) the state of the buffer was already marked dirty? Not sure how
> > this can happen, but I see the logic in mark_buffer_dirty() which checks,
> > if the buffer is already marked dirty, it simply returns. Then
> > __sync_dirty_buffer() clears the buffer dirty state.
>
> It could happen e.g. if there was journalled update of the superblock
> before. I guess calling clear_buffer_dirty() before submit_bh() does no
> harm.

Makes sense.

>
> Otherwise I like Yi's solution.

I agree. Thanks for helping with the queries.

-ritesh
Zhang Yi May 19, 2022, 12:33 p.m. UTC | #6
On 2022/5/19 17:30, Jan Kara wrote:
> On Thu 19-05-22 11:59:29, Ritesh Harjani wrote:
>> On 22/05/19 11:13AM, Zhang Yi wrote:
>>> On 2022/5/19 1:06, Ritesh Harjani wrote:
>>>> On 22/05/18 10:10PM, Zhang Yi wrote:
>>>>> We have already check the io_error and uptodate flag before submitting
>>>>> the superblock buffer, and re-set the uptodate flag if it has been
>>>>> failed to write out. But it was lockless and could be raced by another
>>>>> ext4_commit_super(), and finally trigger '!uptodate' WARNING when
>>>>> marking buffer dirty. Fix it by submit buffer directly.
>>>>
>>>> I agree that there could be a race with multiple processes trying to call
>>>> ext4_commit_super(). Do you have a easy reproducer for this issue?
>>>>
>>>
>>> Sorry, I don't have a easy reproducer, but we can always reproduce it through
>>> inject delay and add filters into the ext4_commit_super().
> 
> ...
>  
>>>> Also do you think something like below should fix the problem too?
>>>> So if you lock the buffer from checking until marking the buffer dirty, that
>>>> should avoid the race too that you are reporting.
>>>> Thoughts?
>>>>
>>>
>>> Thanks for your suggestion. I've thought about this solution and yes it's simpler
>>> to fix the race, but I think we lock and unlock the sbh several times just for
>>> calling standard buffer write helpers is not so good. Opencode the submit
>>> procedure looks more clear to me.
>>
>> I agree your solution was cleaner since it does not has a lot of lock/unlock.
>> My suggestion came in from looking at the history.
>> This lock was added here [1] and I think it somehow got removed in this patch[2]
>>
>> [1]: https://lore.kernel.org/linux-ext4/1467285150-15977-2-git-send-email-pranjas@gmail.com/
>> [2]: https://lore.kernel.org/linux-ext4/20201216101844.22917-5-jack@suse.cz/
> 
> So the reason why I've move unlock_buffer() into ext4_update_super() was
> mostly so that the function does not return with buffer lock (which is an
> odd calling convention) when I was adding another user of it
> (flush_stashed_error_work()).
> 
>> Rather then solutions, I had few queries :)
>> 1. What are the implications of not using
>> mark_buffer_dirty()/__sync_dirty_buffer()
> 
> Not much. Using submit_bh() directly is fine. Just the duplication of the
> checks is somewhat unpleasant.
> 
>> 2. In your solution one thing which I was not clear of, was whether we
>> should call clear_buffer_dirty() before calling submit_bh(), in case if
>> somehow(?) the state of the buffer was already marked dirty? Not sure how
>> this can happen, but I see the logic in mark_buffer_dirty() which checks,
>> if the buffer is already marked dirty, it simply returns. Then
>> __sync_dirty_buffer() clears the buffer dirty state.
> 
> It could happen e.g. if there was journalled update of the superblock
> before. I guess calling clear_buffer_dirty() before submit_bh() does no
> harm.
> 

Thanks for point out and explain, I missed this case. Call clear_buffer_dirty()
before submit_bh() can avoid one more redundant submit by writeback process.

Thanks,
Yi.
diff mbox series

Patch

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1466fbdbc8e3..cca0a87fe4ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6002,7 +6002,6 @@  static void ext4_update_super(struct super_block *sb)
 static int ext4_commit_super(struct super_block *sb)
 {
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
-	int error = 0;
 
 	if (!sbh)
 		return -EINVAL;
@@ -6011,6 +6010,13 @@  static int ext4_commit_super(struct super_block *sb)
 
 	ext4_update_super(sb);
 
+	lock_buffer(sbh);
+	/* Buffer got discarded which means block device got invalidated */
+	if (!buffer_mapped(sbh)) {
+		unlock_buffer(sbh);
+		return -EIO;
+	}
+
 	if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 		/*
 		 * Oh, dear.  A previous attempt to write the
@@ -6025,17 +6031,19 @@  static int ext4_commit_super(struct super_block *sb)
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
 	}
-	BUFFER_TRACE(sbh, "marking dirty");
-	mark_buffer_dirty(sbh);
-	error = __sync_dirty_buffer(sbh,
-		REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+	get_bh(sbh);
+	sbh->b_end_io = end_buffer_write_sync;
+	submit_bh(REQ_OP_WRITE,
+		  REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
+	wait_on_buffer(sbh);
 	if (buffer_write_io_error(sbh)) {
 		ext4_msg(sb, KERN_ERR, "I/O error while writing "
 		       "superblock");
 		clear_buffer_write_io_error(sbh);
 		set_buffer_uptodate(sbh);
+		return -EIO;
 	}
-	return error;
+	return 0;
 }
 
 /*