diff mbox series

[21/25] ext4: make online defragmentation support large block size

Message ID 20251025032221.2905818-22-libaokun@huaweicloud.com
State Superseded
Headers show
Series ext4: enable block size larger than page size | expand

Commit Message

Baokun Li Oct. 25, 2025, 3:22 a.m. UTC
From: Zhihao Cheng <chengzhihao1@huawei.com>

There are several places assuming that block size <= PAGE_SIZE, modify
them to support large block size (bs > ps).

Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 fs/ext4/move_extent.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

Comments

Jan Kara Nov. 5, 2025, 9:50 a.m. UTC | #1
On Sat 25-10-25 11:22:17, libaokun@huaweicloud.com wrote:
> From: Zhihao Cheng <chengzhihao1@huawei.com>
> 
> There are several places assuming that block size <= PAGE_SIZE, modify
> them to support large block size (bs > ps).
> 
> Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

...

> @@ -565,7 +564,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>  	struct inode *orig_inode = file_inode(o_filp);
>  	struct inode *donor_inode = file_inode(d_filp);
>  	struct ext4_ext_path *path = NULL;
> -	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
> +	int blocks_per_page = 1;
>  	ext4_lblk_t o_end, o_start = orig_blk;
>  	ext4_lblk_t d_start = donor_blk;
>  	int ret;
> @@ -608,6 +607,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>  		return -EOPNOTSUPP;
>  	}
>  
> +	if (i_blocksize(orig_inode) < PAGE_SIZE)
> +		blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
> +

I think these are strange and the only reason for this is that
ext4_move_extents() tries to make life easier to move_extent_per_page() and
that doesn't really work with larger folios anymore. I think
ext4_move_extents() just shouldn't care about pages / folios at all and
pass 'cur_len' as the length to the end of extent / moved range and
move_extent_per_page() will trim the length based on the folios it has got.

Also then we can rename some of the variables and functions from 'page' to
'folio'.

								Honza

>  	/* Protect orig and donor inodes against a truncate */
>  	lock_two_nondirectories(orig_inode, donor_inode);
>  
> @@ -665,10 +667,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>  		if (o_end - o_start < cur_len)
>  			cur_len = o_end - o_start;
>  
> -		orig_page_index = o_start >> (PAGE_SHIFT -
> -					       orig_inode->i_blkbits);
> -		donor_page_index = d_start >> (PAGE_SHIFT -
> -					       donor_inode->i_blkbits);
> +		orig_page_index = EXT4_LBLK_TO_P(orig_inode, o_start);
> +		donor_page_index = EXT4_LBLK_TO_P(donor_inode, d_start);
>  		offset_in_page = o_start % blocks_per_page;
>  		if (cur_len > blocks_per_page - offset_in_page)
>  			cur_len = blocks_per_page - offset_in_page;
> -- 
> 2.46.1
>
Zhang Yi Nov. 5, 2025, 10:48 a.m. UTC | #2
On 11/5/2025 5:50 PM, Jan Kara wrote:
> On Sat 25-10-25 11:22:17, libaokun@huaweicloud.com wrote:
>> From: Zhihao Cheng <chengzhihao1@huawei.com>
>>
>> There are several places assuming that block size <= PAGE_SIZE, modify
>> them to support large block size (bs > ps).
>>
>> Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> 
> ...
> 
>> @@ -565,7 +564,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  	struct inode *orig_inode = file_inode(o_filp);
>>  	struct inode *donor_inode = file_inode(d_filp);
>>  	struct ext4_ext_path *path = NULL;
>> -	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
>> +	int blocks_per_page = 1;
>>  	ext4_lblk_t o_end, o_start = orig_blk;
>>  	ext4_lblk_t d_start = donor_blk;
>>  	int ret;
>> @@ -608,6 +607,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  		return -EOPNOTSUPP;
>>  	}
>>  
>> +	if (i_blocksize(orig_inode) < PAGE_SIZE)
>> +		blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
>> +
> 
> I think these are strange and the only reason for this is that
> ext4_move_extents() tries to make life easier to move_extent_per_page() and
> that doesn't really work with larger folios anymore. I think
> ext4_move_extents() just shouldn't care about pages / folios at all and
> pass 'cur_len' as the length to the end of extent / moved range and
> move_extent_per_page() will trim the length based on the folios it has got.
> 
> Also then we can rename some of the variables and functions from 'page' to
> 'folio'.
> 
> 								Honza

Hi, Jan!

Thank you for the suggestion. However, after merging my online defragmentation
optimization series[1], we don't need this patch at all. Baokun will rebase it
onto my series in the next iteration.

[1] https://lore.kernel.org/linux-ext4/20251013015128.499308-1-yi.zhang@huaweicloud.com/

Thanks,
Yi.

> 
>>  	/* Protect orig and donor inodes against a truncate */
>>  	lock_two_nondirectories(orig_inode, donor_inode);
>>  
>> @@ -665,10 +667,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  		if (o_end - o_start < cur_len)
>>  			cur_len = o_end - o_start;
>>  
>> -		orig_page_index = o_start >> (PAGE_SHIFT -
>> -					       orig_inode->i_blkbits);
>> -		donor_page_index = d_start >> (PAGE_SHIFT -
>> -					       donor_inode->i_blkbits);
>> +		orig_page_index = EXT4_LBLK_TO_P(orig_inode, o_start);
>> +		donor_page_index = EXT4_LBLK_TO_P(donor_inode, d_start);
>>  		offset_in_page = o_start % blocks_per_page;
>>  		if (cur_len > blocks_per_page - offset_in_page)
>>  			cur_len = blocks_per_page - offset_in_page;
>> -- 
>> 2.46.1
>>
Baokun Li Nov. 5, 2025, 11:28 a.m. UTC | #3
On 2025-11-05 17:50, Jan Kara wrote:
> On Sat 25-10-25 11:22:17, libaokun@huaweicloud.com wrote:
>> From: Zhihao Cheng <chengzhihao1@huawei.com>
>>
>> There are several places assuming that block size <= PAGE_SIZE, modify
>> them to support large block size (bs > ps).
>>
>> Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> ...
>
>> @@ -565,7 +564,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  	struct inode *orig_inode = file_inode(o_filp);
>>  	struct inode *donor_inode = file_inode(d_filp);
>>  	struct ext4_ext_path *path = NULL;
>> -	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
>> +	int blocks_per_page = 1;
>>  	ext4_lblk_t o_end, o_start = orig_blk;
>>  	ext4_lblk_t d_start = donor_blk;
>>  	int ret;
>> @@ -608,6 +607,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  		return -EOPNOTSUPP;
>>  	}
>>  
>> +	if (i_blocksize(orig_inode) < PAGE_SIZE)
>> +		blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
>> +
> I think these are strange and the only reason for this is that
> ext4_move_extents() tries to make life easier to move_extent_per_page() and
> that doesn't really work with larger folios anymore. I think
> ext4_move_extents() just shouldn't care about pages / folios at all and
> pass 'cur_len' as the length to the end of extent / moved range and
> move_extent_per_page() will trim the length based on the folios it has got.
>
> Also then we can rename some of the variables and functions from 'page' to
> 'folio'.

Yes, the code here doesn’t really support folios. YI mentioned earlier
that he would make online defragmentation support large folios. So in
this patch I only avoided shifting negative values, without doing a
deeper conversion.

YI’s conversion work looks nearly complete, so in the next version I will
rebase on top of his patches. Since his patch already removes the
function modified here, the next version will likely drop this patch or
adapt it accordingly.

Thanks for your review!


Cheers,
Baokun

>>  	/* Protect orig and donor inodes against a truncate */
>>  	lock_two_nondirectories(orig_inode, donor_inode);
>>  
>> @@ -665,10 +667,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
>>  		if (o_end - o_start < cur_len)
>>  			cur_len = o_end - o_start;
>>  
>> -		orig_page_index = o_start >> (PAGE_SHIFT -
>> -					       orig_inode->i_blkbits);
>> -		donor_page_index = d_start >> (PAGE_SHIFT -
>> -					       donor_inode->i_blkbits);
>> +		orig_page_index = EXT4_LBLK_TO_P(orig_inode, o_start);
>> +		donor_page_index = EXT4_LBLK_TO_P(donor_inode, d_start);
>>  		offset_in_page = o_start % blocks_per_page;
>>  		if (cur_len > blocks_per_page - offset_in_page)
>>  			cur_len = blocks_per_page - offset_in_page;
>> -- 
>> 2.46.1
>>
diff mbox series

Patch

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4b091c21908f..cb55cd9e7eeb 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -270,7 +270,6 @@  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 	int i, err2, jblocks, retries = 0;
 	int replaced_count = 0;
 	int from;
-	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
 	struct super_block *sb = orig_inode->i_sb;
 	struct buffer_head *bh = NULL;
 
@@ -288,11 +287,11 @@  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 		return 0;
 	}
 
-	orig_blk_offset = orig_page_offset * blocks_per_page +
-		data_offset_in_page;
+	orig_blk_offset = EXT4_P_TO_LBLK(orig_inode, orig_page_offset) +
+			  data_offset_in_page;
 
-	donor_blk_offset = donor_page_offset * blocks_per_page +
-		data_offset_in_page;
+	donor_blk_offset = EXT4_P_TO_LBLK(donor_inode, donor_page_offset) +
+			   data_offset_in_page;
 
 	/* Calculate data_size */
 	if ((orig_blk_offset + block_len_in_page - 1) ==
@@ -565,7 +564,7 @@  ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 	struct inode *orig_inode = file_inode(o_filp);
 	struct inode *donor_inode = file_inode(d_filp);
 	struct ext4_ext_path *path = NULL;
-	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
+	int blocks_per_page = 1;
 	ext4_lblk_t o_end, o_start = orig_blk;
 	ext4_lblk_t d_start = donor_blk;
 	int ret;
@@ -608,6 +607,9 @@  ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 		return -EOPNOTSUPP;
 	}
 
+	if (i_blocksize(orig_inode) < PAGE_SIZE)
+		blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
+
 	/* Protect orig and donor inodes against a truncate */
 	lock_two_nondirectories(orig_inode, donor_inode);
 
@@ -665,10 +667,8 @@  ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 		if (o_end - o_start < cur_len)
 			cur_len = o_end - o_start;
 
-		orig_page_index = o_start >> (PAGE_SHIFT -
-					       orig_inode->i_blkbits);
-		donor_page_index = d_start >> (PAGE_SHIFT -
-					       donor_inode->i_blkbits);
+		orig_page_index = EXT4_LBLK_TO_P(orig_inode, o_start);
+		donor_page_index = EXT4_LBLK_TO_P(donor_inode, d_start);
 		offset_in_page = o_start % blocks_per_page;
 		if (cur_len > blocks_per_page - offset_in_page)
 			cur_len = blocks_per_page - offset_in_page;