diff mbox series

[-next] ext4: fix bug_on in ext4_writepages

Message ID 20220505135708.2629657-1-yebin10@huawei.com
State Superseded
Headers show
Series [-next] ext4: fix bug_on in ext4_writepages | expand

Commit Message

yebin (H) May 5, 2022, 1:57 p.m. UTC
we got issue as follows:
EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
------------[ cut here ]------------
kernel BUG at fs/ext4/inode.c:2708!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
RIP: 0010:ext4_writepages+0x1977/0x1c10
RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 do_writepages+0x130/0x3a0
 filemap_fdatawrite_wbc+0x83/0xa0
 filemap_flush+0xab/0xe0
 ext4_alloc_da_blocks+0x51/0x120
 __ext4_ioctl+0x1534/0x3210
 __x64_sys_ioctl+0x12c/0x170
 do_syscall_64+0x3b/0x90

It may happen as follows:
1. write inline_data inode
vfs_write
  new_sync_write
    ext4_file_write_iter
      ext4_buffered_write_iter
        generic_perform_write
	  ext4_da_write_begin
	    ext4_da_write_inline_data_begin -> If inline data size too
	    small will allocate block to write, then mapping will has
	    dirty page
	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
2. fallocate
do_vfs_ioctl
  ioctl_preallocate
    vfs_fallocate
      ext4_fallocate
        ext4_convert_inline_data
	  ext4_convert_inline_data_nolock
	    ext4_map_blocks -> fail will goto restore data
	    ext4_restore_inline_data
	      ext4_create_inline_data
	      ext4_write_inline_data
	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
3. writepages
__ext4_ioctl
  ext4_alloc_da_blocks
    filemap_flush
      filemap_fdatawrite_wbc
        do_writepages
	  ext4_writepages
	    if (ext4_has_inline_data(inode))
	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))

To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.

Signed-off-by: Ye Bin <yebin10@huawei.com>
---
 fs/ext4/inline.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

Comments

Jan Kara May 5, 2022, 3:47 p.m. UTC | #1
On Thu 05-05-22 21:57:08, Ye Bin wrote:
> we got issue as follows:
> EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
> ------------[ cut here ]------------
> kernel BUG at fs/ext4/inode.c:2708!
> invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
> RIP: 0010:ext4_writepages+0x1977/0x1c10
> RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
> RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
> RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
> RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
> R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
> R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
> FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  <TASK>
>  do_writepages+0x130/0x3a0
>  filemap_fdatawrite_wbc+0x83/0xa0
>  filemap_flush+0xab/0xe0
>  ext4_alloc_da_blocks+0x51/0x120
>  __ext4_ioctl+0x1534/0x3210
>  __x64_sys_ioctl+0x12c/0x170
>  do_syscall_64+0x3b/0x90
> 
> It may happen as follows:
> 1. write inline_data inode
> vfs_write
>   new_sync_write
>     ext4_file_write_iter
>       ext4_buffered_write_iter
>         generic_perform_write
> 	  ext4_da_write_begin
> 	    ext4_da_write_inline_data_begin -> If inline data size too
> 	    small will allocate block to write, then mapping will has
> 	    dirty page
> 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
> 2. fallocate
> do_vfs_ioctl
>   ioctl_preallocate
>     vfs_fallocate
>       ext4_fallocate
>         ext4_convert_inline_data
> 	  ext4_convert_inline_data_nolock
> 	    ext4_map_blocks -> fail will goto restore data
> 	    ext4_restore_inline_data
> 	      ext4_create_inline_data
> 	      ext4_write_inline_data
> 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> 3. writepages
> __ext4_ioctl
>   ext4_alloc_da_blocks
>     filemap_flush
>       filemap_fdatawrite_wbc
>         do_writepages
> 	  ext4_writepages
> 	    if (ext4_has_inline_data(inode))
> 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> 
> To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
> value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
> decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
> 
> Signed-off-by: Ye Bin <yebin10@huawei.com>

I think this will get also fixed by a patch from your colleague I've
reviewed here [1], won't it?

[1] https://lore.kernel.org/all/20220428165725.mvjh6mx7gr5vekqe@quack3.lan

								Honza

> ---
>  fs/ext4/inline.c | 13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
> index 9c076262770d..407061c79adc 100644
> --- a/fs/ext4/inline.c
> +++ b/fs/ext4/inline.c
> @@ -1125,8 +1125,8 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
>  }
>  
>  static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
> -				     struct ext4_iloc *iloc,
> -				     void *buf, int inline_size)
> +				     struct ext4_iloc *iloc, void *buf,
> +				     int inline_size, bool has_data)
>  {
>  	int ret;
>  
> @@ -1138,7 +1138,8 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>  		return;
>  	}
>  	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
> -	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> +	if (has_data)
> +		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>  }
>  
>  static int ext4_finish_convert_inline_dir(handle_t *handle,
> @@ -1194,6 +1195,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	struct buffer_head *data_bh = NULL;
>  	struct ext4_map_blocks map;
>  	int inline_size;
> +	bool has_data;
>  
>  	inline_size = ext4_get_inline_size(inode);
>  	buf = kmalloc(inline_size, GFP_NOFS);
> @@ -1222,6 +1224,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	if (error)
>  		goto out;
>  
> +	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> +
>  	map.m_lblk = 0;
>  	map.m_len = 1;
>  	map.m_flags = 0;
> @@ -1262,7 +1266,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	unlock_buffer(data_bh);
>  out_restore:
>  	if (error)
> -		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
> +		ext4_restore_inline_data(handle, inode, iloc, buf,
> +					 inline_size, has_data);
>  
>  out:
>  	brelse(data_bh);
> -- 
> 2.31.1
>
yebin (H) May 6, 2022, 1:27 a.m. UTC | #2
On 2022/5/5 23:47, Jan Kara wrote:
> On Thu 05-05-22 21:57:08, Ye Bin wrote:
>> we got issue as follows:
>> EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
>> ------------[ cut here ]------------
>> kernel BUG at fs/ext4/inode.c:2708!
>> invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
>> CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
>> RIP: 0010:ext4_writepages+0x1977/0x1c10
>> RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
>> RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
>> RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
>> RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
>> R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
>> R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
>> FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>> Call Trace:
>>   <TASK>
>>   do_writepages+0x130/0x3a0
>>   filemap_fdatawrite_wbc+0x83/0xa0
>>   filemap_flush+0xab/0xe0
>>   ext4_alloc_da_blocks+0x51/0x120
>>   __ext4_ioctl+0x1534/0x3210
>>   __x64_sys_ioctl+0x12c/0x170
>>   do_syscall_64+0x3b/0x90
>>
>> It may happen as follows:
>> 1. write inline_data inode
>> vfs_write
>>    new_sync_write
>>      ext4_file_write_iter
>>        ext4_buffered_write_iter
>>          generic_perform_write
>> 	  ext4_da_write_begin
>> 	    ext4_da_write_inline_data_begin -> If inline data size too
>> 	    small will allocate block to write, then mapping will has
>> 	    dirty page
>> 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
>> 2. fallocate
>> do_vfs_ioctl
>>    ioctl_preallocate
>>      vfs_fallocate
>>        ext4_fallocate
>>          ext4_convert_inline_data
>> 	  ext4_convert_inline_data_nolock
>> 	    ext4_map_blocks -> fail will goto restore data
>> 	    ext4_restore_inline_data
>> 	      ext4_create_inline_data
>> 	      ext4_write_inline_data
>> 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
>> 3. writepages
>> __ext4_ioctl
>>    ext4_alloc_da_blocks
>>      filemap_flush
>>        filemap_fdatawrite_wbc
>>          do_writepages
>> 	  ext4_writepages
>> 	    if (ext4_has_inline_data(inode))
>> 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
>>
>> To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
>> value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
>> decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
>>
>> Signed-off-by: Ye Bin <yebin10@huawei.com>
> I think this will get also fixed by a patch from your colleague I've
> reviewed here [1], won't it?
>
> [1] https://lore.kernel.org/all/20220428165725.mvjh6mx7gr5vekqe@quack3.lan
>
> 								Honza
The issue I fixed is not the same as the isuue my colleague fixed.
>> ---
>>   fs/ext4/inline.c | 13 +++++++++----
>>   1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
>> index 9c076262770d..407061c79adc 100644
>> --- a/fs/ext4/inline.c
>> +++ b/fs/ext4/inline.c
>> @@ -1125,8 +1125,8 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
>>   }
>>   
>>   static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>> -				     struct ext4_iloc *iloc,
>> -				     void *buf, int inline_size)
>> +				     struct ext4_iloc *iloc, void *buf,
>> +				     int inline_size, bool has_data)
>>   {
>>   	int ret;
>>   
>> @@ -1138,7 +1138,8 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>>   		return;
>>   	}
>>   	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
>> -	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>> +	if (has_data)
>> +		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>>   }
>>   
>>   static int ext4_finish_convert_inline_dir(handle_t *handle,
>> @@ -1194,6 +1195,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>   	struct buffer_head *data_bh = NULL;
>>   	struct ext4_map_blocks map;
>>   	int inline_size;
>> +	bool has_data;
>>   
>>   	inline_size = ext4_get_inline_size(inode);
>>   	buf = kmalloc(inline_size, GFP_NOFS);
>> @@ -1222,6 +1224,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>   	if (error)
>>   		goto out;
>>   
>> +	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>> +
>>   	map.m_lblk = 0;
>>   	map.m_len = 1;
>>   	map.m_flags = 0;
>> @@ -1262,7 +1266,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>   	unlock_buffer(data_bh);
>>   out_restore:
>>   	if (error)
>> -		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
>> +		ext4_restore_inline_data(handle, inode, iloc, buf,
>> +					 inline_size, has_data);
>>   
>>   out:
>>   	brelse(data_bh);
>> -- 
>> 2.31.1
>>
Jan Kara May 6, 2022, 8:50 a.m. UTC | #3
On Fri 06-05-22 09:27:25, yebin wrote:
> On 2022/5/5 23:47, Jan Kara wrote:
> > On Thu 05-05-22 21:57:08, Ye Bin wrote:
> > > we got issue as follows:
> > > EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
> > > ------------[ cut here ]------------
> > > kernel BUG at fs/ext4/inode.c:2708!
> > > invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> > > CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
> > > RIP: 0010:ext4_writepages+0x1977/0x1c10
> > > RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
> > > RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
> > > RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
> > > RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
> > > R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
> > > R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
> > > FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
> > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > Call Trace:
> > >   <TASK>
> > >   do_writepages+0x130/0x3a0
> > >   filemap_fdatawrite_wbc+0x83/0xa0
> > >   filemap_flush+0xab/0xe0
> > >   ext4_alloc_da_blocks+0x51/0x120
> > >   __ext4_ioctl+0x1534/0x3210
> > >   __x64_sys_ioctl+0x12c/0x170
> > >   do_syscall_64+0x3b/0x90
> > > 
> > > It may happen as follows:
> > > 1. write inline_data inode
> > > vfs_write
> > >    new_sync_write
> > >      ext4_file_write_iter
> > >        ext4_buffered_write_iter
> > >          generic_perform_write
> > > 	  ext4_da_write_begin
> > > 	    ext4_da_write_inline_data_begin -> If inline data size too
> > > 	    small will allocate block to write, then mapping will has
> > > 	    dirty page
> > > 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
> > > 2. fallocate
> > > do_vfs_ioctl
> > >    ioctl_preallocate
> > >      vfs_fallocate
> > >        ext4_fallocate
> > >          ext4_convert_inline_data
> > > 	  ext4_convert_inline_data_nolock
> > > 	    ext4_map_blocks -> fail will goto restore data
> > > 	    ext4_restore_inline_data
> > > 	      ext4_create_inline_data
> > > 	      ext4_write_inline_data
> > > 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> > > 3. writepages
> > > __ext4_ioctl
> > >    ext4_alloc_da_blocks
> > >      filemap_flush
> > >        filemap_fdatawrite_wbc
> > >          do_writepages
> > > 	  ext4_writepages
> > > 	    if (ext4_has_inline_data(inode))
> > > 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> > > 
> > > To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
> > > value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
> > > decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
> > > 
> > > Signed-off-by: Ye Bin <yebin10@huawei.com>
> > I think this will get also fixed by a patch from your colleague I've
> > reviewed here [1], won't it?
> > 
> > [1] https://lore.kernel.org/all/20220428165725.mvjh6mx7gr5vekqe@quack3.lan
> > 
> The issue I fixed is not the same as the isuue my colleague fixed.

OK, maybe I've jumped to conclusion too early but the fix I've referenced
above will protect ext4_convert_inline_data() in ext4_fallocate() with
inode->i_rwsem so I think the race you describe with ext4_da_write_begin()
cannot happen. The inline conversion path with be entered either from
ext4_da_write_begin() or from ext4_fallocate() but not from both. If I'm
missing something, please explain how you think the problem happens with
the above fix applied... Thanks!

								Honza

> > > ---
> > >   fs/ext4/inline.c | 13 +++++++++----
> > >   1 file changed, 9 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
> > > index 9c076262770d..407061c79adc 100644
> > > --- a/fs/ext4/inline.c
> > > +++ b/fs/ext4/inline.c
> > > @@ -1125,8 +1125,8 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
> > >   }
> > >   static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
> > > -				     struct ext4_iloc *iloc,
> > > -				     void *buf, int inline_size)
> > > +				     struct ext4_iloc *iloc, void *buf,
> > > +				     int inline_size, bool has_data)
> > >   {
> > >   	int ret;
> > > @@ -1138,7 +1138,8 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
> > >   		return;
> > >   	}
> > >   	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
> > > -	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> > > +	if (has_data)
> > > +		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> > >   }
> > >   static int ext4_finish_convert_inline_dir(handle_t *handle,
> > > @@ -1194,6 +1195,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
> > >   	struct buffer_head *data_bh = NULL;
> > >   	struct ext4_map_blocks map;
> > >   	int inline_size;
> > > +	bool has_data;
> > >   	inline_size = ext4_get_inline_size(inode);
> > >   	buf = kmalloc(inline_size, GFP_NOFS);
> > > @@ -1222,6 +1224,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
> > >   	if (error)
> > >   		goto out;
> > > +	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> > > +
> > >   	map.m_lblk = 0;
> > >   	map.m_len = 1;
> > >   	map.m_flags = 0;
> > > @@ -1262,7 +1266,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
> > >   	unlock_buffer(data_bh);
> > >   out_restore:
> > >   	if (error)
> > > -		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
> > > +		ext4_restore_inline_data(handle, inode, iloc, buf,
> > > +					 inline_size, has_data);
> > >   out:
> > >   	brelse(data_bh);
> > > -- 
> > > 2.31.1
> > > 
>
yebin (H) May 6, 2022, 12:48 p.m. UTC | #4
On 2022/5/6 16:50, Jan Kara wrote:
> On Fri 06-05-22 09:27:25, yebin wrote:
>> On 2022/5/5 23:47, Jan Kara wrote:
>>> On Thu 05-05-22 21:57:08, Ye Bin wrote:
>>>> we got issue as follows:
>>>> EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
>>>> ------------[ cut here ]------------
>>>> kernel BUG at fs/ext4/inode.c:2708!
>>>> invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
>>>> CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
>>>> RIP: 0010:ext4_writepages+0x1977/0x1c10
>>>> RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
>>>> RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
>>>> RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
>>>> RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
>>>> R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
>>>> R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
>>>> FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
>>>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
>>>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>>>> Call Trace:
>>>>    <TASK>
>>>>    do_writepages+0x130/0x3a0
>>>>    filemap_fdatawrite_wbc+0x83/0xa0
>>>>    filemap_flush+0xab/0xe0
>>>>    ext4_alloc_da_blocks+0x51/0x120
>>>>    __ext4_ioctl+0x1534/0x3210
>>>>    __x64_sys_ioctl+0x12c/0x170
>>>>    do_syscall_64+0x3b/0x90
>>>>
>>>> It may happen as follows:
>>>> 1. write inline_data inode
>>>> vfs_write
>>>>     new_sync_write
>>>>       ext4_file_write_iter
>>>>         ext4_buffered_write_iter
>>>>           generic_perform_write
>>>> 	  ext4_da_write_begin
>>>> 	    ext4_da_write_inline_data_begin -> If inline data size too
>>>> 	    small will allocate block to write, then mapping will has
>>>> 	    dirty page
>>>> 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
>>>> 2. fallocate
>>>> do_vfs_ioctl
>>>>     ioctl_preallocate
>>>>       vfs_fallocate
>>>>         ext4_fallocate
>>>>           ext4_convert_inline_data
>>>> 	  ext4_convert_inline_data_nolock
>>>> 	    ext4_map_blocks -> fail will goto restore data
>>>> 	    ext4_restore_inline_data
>>>> 	      ext4_create_inline_data
>>>> 	      ext4_write_inline_data
>>>> 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
>>>> 3. writepages
>>>> __ext4_ioctl
>>>>     ext4_alloc_da_blocks
>>>>       filemap_flush
>>>>         filemap_fdatawrite_wbc
>>>>           do_writepages
>>>> 	  ext4_writepages
>>>> 	    if (ext4_has_inline_data(inode))
>>>> 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
>>>>
>>>> To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
>>>> value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
>>>> decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
>>>>
>>>> Signed-off-by: Ye Bin <yebin10@huawei.com>
>>> I think this will get also fixed by a patch from your colleague I've
>>> reviewed here [1], won't it?
>>>
>>> [1] https://lore.kernel.org/all/20220428165725.mvjh6mx7gr5vekqe@quack3.lan
>>>
>> The issue I fixed is not the same as the isuue my colleague fixed.
> OK, maybe I've jumped to conclusion too early but the fix I've referenced
> above will protect ext4_convert_inline_data() in ext4_fallocate() with
> inode->i_rwsem so I think the race you describe with ext4_da_write_begin()
> cannot happen. The inline conversion path with be entered either from
> ext4_da_write_begin() or from ext4_fallocate() but not from both. If I'm
> missing something, please explain how you think the problem happens with
> the above fix applied... Thanks!
>
> 								Honza
It's happen as follows:
step1:
ext4_file_write_iter
   ext4_buffered_write_iter
     generic_perform_write
       ext4_da_write_begin
         if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
           ext4_da_write_inline_data_begin
             ext4_da_convert_inline_data_to_extent
               __block_write_begin
               ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
             ->clear EXT4_STATE_MAY_INLINE_DATA flag,  If inline data 
size too
         small will allocate block to write, then mapping will has dirty 
page

       ext4_da_write_end
         generic_write_end

step2:
vfs_fallocate
   ext4_fallocate
     ext4_convert_inline_data
       if (ext4_has_inline_data(inode))  -> This condition is satisfied
         ext4_convert_inline_data_nolock
           ext4_map_blocks -> fail will goto restore data
           ext4_restore_inline_data
            ext4_create_inline_data
            ext4_write_inline_data
            ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
step3:
do_writepages
   ext4_writepages
     if (ext4_has_inline_data(inode))
       BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))

As we call "ext4_destroy_inline_data" to destory inline data when delay 
allocation.
So,if we call fallocate before writepages while ext4_map_blocks return 
failed,
will lead to above issue.
This issue does not require concurrency conditions.

>>>> ---
>>>>    fs/ext4/inline.c | 13 +++++++++----
>>>>    1 file changed, 9 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
>>>> index 9c076262770d..407061c79adc 100644
>>>> --- a/fs/ext4/inline.c
>>>> +++ b/fs/ext4/inline.c
>>>> @@ -1125,8 +1125,8 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
>>>>    }
>>>>    static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>>>> -				     struct ext4_iloc *iloc,
>>>> -				     void *buf, int inline_size)
>>>> +				     struct ext4_iloc *iloc, void *buf,
>>>> +				     int inline_size, bool has_data)
>>>>    {
>>>>    	int ret;
>>>> @@ -1138,7 +1138,8 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>>>>    		return;
>>>>    	}
>>>>    	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
>>>> -	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>>>> +	if (has_data)
>>>> +		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>>>>    }
>>>>    static int ext4_finish_convert_inline_dir(handle_t *handle,
>>>> @@ -1194,6 +1195,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>>>    	struct buffer_head *data_bh = NULL;
>>>>    	struct ext4_map_blocks map;
>>>>    	int inline_size;
>>>> +	bool has_data;
>>>>    	inline_size = ext4_get_inline_size(inode);
>>>>    	buf = kmalloc(inline_size, GFP_NOFS);
>>>> @@ -1222,6 +1224,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>>>    	if (error)
>>>>    		goto out;
>>>> +	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>>>> +
>>>>    	map.m_lblk = 0;
>>>>    	map.m_len = 1;
>>>>    	map.m_flags = 0;
>>>> @@ -1262,7 +1266,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>>>>    	unlock_buffer(data_bh);
>>>>    out_restore:
>>>>    	if (error)
>>>> -		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
>>>> +		ext4_restore_inline_data(handle, inode, iloc, buf,
>>>> +					 inline_size, has_data);
>>>>    out:
>>>>    	brelse(data_bh);
>>>> -- 
>>>> 2.31.1
>>>>
Jan Kara May 9, 2022, 12:31 p.m. UTC | #5
On Fri 06-05-22 20:48:44, yebin wrote:
> On 2022/5/6 16:50, Jan Kara wrote:
> > On Fri 06-05-22 09:27:25, yebin wrote:
> > > On 2022/5/5 23:47, Jan Kara wrote:
> > > > On Thu 05-05-22 21:57:08, Ye Bin wrote:
> > > > > we got issue as follows:
> > > > > EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
> > > > > ------------[ cut here ]------------
> > > > > kernel BUG at fs/ext4/inode.c:2708!
> > > > > invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> > > > > CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
> > > > > RIP: 0010:ext4_writepages+0x1977/0x1c10
> > > > > RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
> > > > > RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
> > > > > RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
> > > > > RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
> > > > > R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
> > > > > R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
> > > > > FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
> > > > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > > > CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
> > > > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > > > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > > > Call Trace:
> > > > >    <TASK>
> > > > >    do_writepages+0x130/0x3a0
> > > > >    filemap_fdatawrite_wbc+0x83/0xa0
> > > > >    filemap_flush+0xab/0xe0
> > > > >    ext4_alloc_da_blocks+0x51/0x120
> > > > >    __ext4_ioctl+0x1534/0x3210
> > > > >    __x64_sys_ioctl+0x12c/0x170
> > > > >    do_syscall_64+0x3b/0x90
> > > > > 
> > > > > It may happen as follows:
> > > > > 1. write inline_data inode
> > > > > vfs_write
> > > > >     new_sync_write
> > > > >       ext4_file_write_iter
> > > > >         ext4_buffered_write_iter
> > > > >           generic_perform_write
> > > > > 	  ext4_da_write_begin
> > > > > 	    ext4_da_write_inline_data_begin -> If inline data size too
> > > > > 	    small will allocate block to write, then mapping will has
> > > > > 	    dirty page
> > > > > 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
> > > > > 2. fallocate
> > > > > do_vfs_ioctl
> > > > >     ioctl_preallocate
> > > > >       vfs_fallocate
> > > > >         ext4_fallocate
> > > > >           ext4_convert_inline_data
> > > > > 	  ext4_convert_inline_data_nolock
> > > > > 	    ext4_map_blocks -> fail will goto restore data
> > > > > 	    ext4_restore_inline_data
> > > > > 	      ext4_create_inline_data
> > > > > 	      ext4_write_inline_data
> > > > > 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> > > > > 3. writepages
> > > > > __ext4_ioctl
> > > > >     ext4_alloc_da_blocks
> > > > >       filemap_flush
> > > > >         filemap_fdatawrite_wbc
> > > > >           do_writepages
> > > > > 	  ext4_writepages
> > > > > 	    if (ext4_has_inline_data(inode))
> > > > > 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> > > > > 
> > > > > To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
> > > > > value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
> > > > > decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
> > > > > 
> > > > > Signed-off-by: Ye Bin <yebin10@huawei.com>
> > > > I think this will get also fixed by a patch from your colleague I've
> > > > reviewed here [1], won't it?
> > > > 
> > > > [1] https://lore.kernel.org/all/20220428165725.mvjh6mx7gr5vekqe@quack3.lan
> > > > 
> > > The issue I fixed is not the same as the isuue my colleague fixed.
> > OK, maybe I've jumped to conclusion too early but the fix I've referenced
> > above will protect ext4_convert_inline_data() in ext4_fallocate() with
> > inode->i_rwsem so I think the race you describe with ext4_da_write_begin()
> > cannot happen. The inline conversion path with be entered either from
> > ext4_da_write_begin() or from ext4_fallocate() but not from both. If I'm
> > missing something, please explain how you think the problem happens with
> > the above fix applied... Thanks!
> > 
> > 								Honza
> It's happen as follows:
> step1:
> ext4_file_write_iter
>   ext4_buffered_write_iter
>     generic_perform_write
>       ext4_da_write_begin
>         if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
>           ext4_da_write_inline_data_begin
>             ext4_da_convert_inline_data_to_extent
>               __block_write_begin
>               ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>             ->clear EXT4_STATE_MAY_INLINE_DATA flag,  If inline data size
> too
>         small will allocate block to write, then mapping will has dirty page
> 
>       ext4_da_write_end
>         generic_write_end
> 
> step2:
> vfs_fallocate
>   ext4_fallocate
>     ext4_convert_inline_data
>       if (ext4_has_inline_data(inode))  -> This condition is satisfied
>         ext4_convert_inline_data_nolock
>           ext4_map_blocks -> fail will goto restore data
>           ext4_restore_inline_data
>            ext4_create_inline_data
>            ext4_write_inline_data
>            ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> step3:
> do_writepages
>   ext4_writepages
>     if (ext4_has_inline_data(inode))
>       BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> 
> As we call "ext4_destroy_inline_data" to destory inline data when delay
> allocation.  So,if we call fallocate before writepages while
> ext4_map_blocks return failed, will lead to above issue.  This issue does
> not require concurrency conditions.

Aha, I see now. Thanks for explanation and sorry for being a bit dense.
I'll have a look at your original patch.

								Honza
Jan Kara May 9, 2022, 1:01 p.m. UTC | #6
On Thu 05-05-22 21:57:08, Ye Bin wrote:
> we got issue as follows:
> EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
> ------------[ cut here ]------------
> kernel BUG at fs/ext4/inode.c:2708!
> invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
> RIP: 0010:ext4_writepages+0x1977/0x1c10
> RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
> RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
> RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
> RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
> R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
> R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
> FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  <TASK>
>  do_writepages+0x130/0x3a0
>  filemap_fdatawrite_wbc+0x83/0xa0
>  filemap_flush+0xab/0xe0
>  ext4_alloc_da_blocks+0x51/0x120
>  __ext4_ioctl+0x1534/0x3210
>  __x64_sys_ioctl+0x12c/0x170
>  do_syscall_64+0x3b/0x90
> 
> It may happen as follows:
> 1. write inline_data inode
> vfs_write
>   new_sync_write
>     ext4_file_write_iter
>       ext4_buffered_write_iter
>         generic_perform_write
> 	  ext4_da_write_begin
> 	    ext4_da_write_inline_data_begin -> If inline data size too
> 	    small will allocate block to write, then mapping will has
> 	    dirty page
> 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
> 2. fallocate
> do_vfs_ioctl
>   ioctl_preallocate
>     vfs_fallocate
>       ext4_fallocate
>         ext4_convert_inline_data
> 	  ext4_convert_inline_data_nolock
> 	    ext4_map_blocks -> fail will goto restore data
> 	    ext4_restore_inline_data
> 	      ext4_create_inline_data
> 	      ext4_write_inline_data
> 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> 3. writepages
> __ext4_ioctl
>   ext4_alloc_da_blocks
>     filemap_flush
>       filemap_fdatawrite_wbc
>         do_writepages
> 	  ext4_writepages
> 	    if (ext4_has_inline_data(inode))
> 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> 
> To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
> value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
> decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
> 
> Signed-off-by: Ye Bin <yebin10@huawei.com>

Thanks for the patch. I agree it will fix the crash you have spotted but
I'm somewhat wondering whether it would not be simpler to just move the
call to ext4_destroy_inline_data_nolock() in
ext4_convert_inline_data_nolock() later, after we have done writing
data_bh. That way we can completely remove ext4_restore_inline_data() and
as a consequence avoid problems. What do you think?

								Honza

> ---
>  fs/ext4/inline.c | 13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
> index 9c076262770d..407061c79adc 100644
> --- a/fs/ext4/inline.c
> +++ b/fs/ext4/inline.c
> @@ -1125,8 +1125,8 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
>  }
>  
>  static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
> -				     struct ext4_iloc *iloc,
> -				     void *buf, int inline_size)
> +				     struct ext4_iloc *iloc, void *buf,
> +				     int inline_size, bool has_data)
>  {
>  	int ret;
>  
> @@ -1138,7 +1138,8 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
>  		return;
>  	}
>  	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
> -	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> +	if (has_data)
> +		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
>  }
>  
>  static int ext4_finish_convert_inline_dir(handle_t *handle,
> @@ -1194,6 +1195,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	struct buffer_head *data_bh = NULL;
>  	struct ext4_map_blocks map;
>  	int inline_size;
> +	bool has_data;
>  
>  	inline_size = ext4_get_inline_size(inode);
>  	buf = kmalloc(inline_size, GFP_NOFS);
> @@ -1222,6 +1224,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	if (error)
>  		goto out;
>  
> +	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
> +
>  	map.m_lblk = 0;
>  	map.m_len = 1;
>  	map.m_flags = 0;
> @@ -1262,7 +1266,8 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
>  	unlock_buffer(data_bh);
>  out_restore:
>  	if (error)
> -		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
> +		ext4_restore_inline_data(handle, inode, iloc, buf,
> +					 inline_size, has_data);
>  
>  out:
>  	brelse(data_bh);
> -- 
> 2.31.1
>
Jan Kara May 11, 2022, 10:40 a.m. UTC | #7
On Tue 10-05-22 17:48:46, yebin wrote:
> On 2022/5/9 21:01, Jan Kara wrote:
> > On Thu 05-05-22 21:57:08, Ye Bin wrote:
> > > we got issue as follows:
> > > EXT4-fs error (device loop0): ext4_mb_generate_buddy:1141: group 0, block bitmap and bg descriptor inconsistent: 25 vs 31513 free cls
> > > ------------[ cut here ]------------
> > > kernel BUG at fs/ext4/inode.c:2708!
> > > invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> > > CPU: 2 PID: 2147 Comm: rep Not tainted 5.18.0-rc2-next-20220413+ #155
> > > RIP: 0010:ext4_writepages+0x1977/0x1c10
> > > RSP: 0018:ffff88811d3e7880 EFLAGS: 00010246
> > > RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88811c098000
> > > RDX: 0000000000000000 RSI: ffff88811c098000 RDI: 0000000000000002
> > > RBP: ffff888128140f50 R08: ffffffffb1ff6387 R09: 0000000000000000
> > > R10: 0000000000000007 R11: ffffed10250281ea R12: 0000000000000001
> > > R13: 00000000000000a4 R14: ffff88811d3e7bb8 R15: ffff888128141028
> > > FS:  00007f443aed9740(0000) GS:ffff8883aef00000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 0000000020007200 CR3: 000000011c2a4000 CR4: 00000000000006e0
> > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > Call Trace:
> > >   <TASK>
> > >   do_writepages+0x130/0x3a0
> > >   filemap_fdatawrite_wbc+0x83/0xa0
> > >   filemap_flush+0xab/0xe0
> > >   ext4_alloc_da_blocks+0x51/0x120
> > >   __ext4_ioctl+0x1534/0x3210
> > >   __x64_sys_ioctl+0x12c/0x170
> > >   do_syscall_64+0x3b/0x90
> > > 
> > > It may happen as follows:
> > > 1. write inline_data inode
> > > vfs_write
> > >    new_sync_write
> > >      ext4_file_write_iter
> > >        ext4_buffered_write_iter
> > >          generic_perform_write
> > > 	  ext4_da_write_begin
> > > 	    ext4_da_write_inline_data_begin -> If inline data size too
> > > 	    small will allocate block to write, then mapping will has
> > > 	    dirty page
> > > 	    	ext4_da_convert_inline_data_to_extent ->clear EXT4_STATE_MAY_INLINE_DATA
> > > 2. fallocate
> > > do_vfs_ioctl
> > >    ioctl_preallocate
> > >      vfs_fallocate
> > >        ext4_fallocate
> > >          ext4_convert_inline_data
> > > 	  ext4_convert_inline_data_nolock
> > > 	    ext4_map_blocks -> fail will goto restore data
> > > 	    ext4_restore_inline_data
> > > 	      ext4_create_inline_data
> > > 	      ext4_write_inline_data
> > > 	      ext4_set_inode_state -> set inode EXT4_STATE_MAY_INLINE_DATA
> > > 3. writepages
> > > __ext4_ioctl
> > >    ext4_alloc_da_blocks
> > >      filemap_flush
> > >        filemap_fdatawrite_wbc
> > >          do_writepages
> > > 	  ext4_writepages
> > > 	    if (ext4_has_inline_data(inode))
> > > 	      BUG_ON(ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
> > > 
> > > To solved this issue, record origin 'EXT4_STATE_MAY_INLINE_DATA' flag, then pass
> > > value to 'ext4_restore_inline_data', 'ext4_restore_inline_data' will
> > > decide to if recovery 'EXT4_STATE_MAY_INLINE_DATA' flag according to parameter.
> > > 
> > > Signed-off-by: Ye Bin <yebin10@huawei.com>
> > Thanks for the patch. I agree it will fix the crash you have spotted but
> > I'm somewhat wondering whether it would not be simpler to just move the
> > call to ext4_destroy_inline_data_nolock() in
> > ext4_convert_inline_data_nolock() later, after we have done writing
> > data_bh. That way we can completely remove ext4_restore_inline_data() and
> > as a consequence avoid problems. What do you think?
> > 
> > 								Honza
> 
> It may be a good idea, but i didn't know how to handle when call
> ext4_destroy_inline_data_nolock() failed. As it may lead to
> 'ei <../cgi-bin/global.cgi?pattern=ei&type=symbol>->i_reserved_data_blocks
> <../cgi-bin/global.cgi?pattern=i_reserved_data_blocks&type=symbol>'
> incorrect, and also lead to data lost.
> I have another idea which will inlcude in v2 patch.

Well, that call failing means something is seriously wrong with the
filesystem (IO errors, metadata corruption) so we don't care much what
happens. Also currently you have the problem that restoration of inline data
can fail so I don't think there's really tangible difference.

								Honza
diff mbox series

Patch

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 9c076262770d..407061c79adc 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1125,8 +1125,8 @@  static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
 }
 
 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
-				     struct ext4_iloc *iloc,
-				     void *buf, int inline_size)
+				     struct ext4_iloc *iloc, void *buf,
+				     int inline_size, bool has_data)
 {
 	int ret;
 
@@ -1138,7 +1138,8 @@  static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
 		return;
 	}
 	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
-	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+	if (has_data)
+		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
 }
 
 static int ext4_finish_convert_inline_dir(handle_t *handle,
@@ -1194,6 +1195,7 @@  static int ext4_convert_inline_data_nolock(handle_t *handle,
 	struct buffer_head *data_bh = NULL;
 	struct ext4_map_blocks map;
 	int inline_size;
+	bool has_data;
 
 	inline_size = ext4_get_inline_size(inode);
 	buf = kmalloc(inline_size, GFP_NOFS);
@@ -1222,6 +1224,8 @@  static int ext4_convert_inline_data_nolock(handle_t *handle,
 	if (error)
 		goto out;
 
+	has_data = !!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+
 	map.m_lblk = 0;
 	map.m_len = 1;
 	map.m_flags = 0;
@@ -1262,7 +1266,8 @@  static int ext4_convert_inline_data_nolock(handle_t *handle,
 	unlock_buffer(data_bh);
 out_restore:
 	if (error)
-		ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
+		ext4_restore_inline_data(handle, inode, iloc, buf,
+					 inline_size, has_data);
 
 out:
 	brelse(data_bh);