diff mbox series

[v2,1/8] ext4: make ext4_mpage_readpages() support large folios

Message ID 20250512063319.3539411-2-yi.zhang@huaweicloud.com
State Awaiting Upstream
Headers show
Series ext4: enable large folio for regular files | expand

Commit Message

Zhang Yi May 12, 2025, 6:33 a.m. UTC
From: Zhang Yi <yi.zhang@huawei.com>

ext4_mpage_readpages() currently assumes that each folio is the size of
PAGE_SIZE. Modify it to atomically calculate the number of blocks per
folio and iterate through the blocks in each folio, which would allow
for support of larger folios.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/readpage.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

Comments

Ojaswin Mujoo May 20, 2025, 10:41 a.m. UTC | #1
On Mon, May 12, 2025 at 02:33:12PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> ext4_mpage_readpages() currently assumes that each folio is the size of
> PAGE_SIZE. Modify it to atomically calculate the number of blocks per
> folio and iterate through the blocks in each folio, which would allow
> for support of larger folios.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>

Going through this path is pretty confusing. I'm not sure why we keep
falling back to full folio read in case of mixed or non continuous mappings.
Seems like there's some read amplification here. 

Regardless, the changes here look okay to me.

Feel free to add:

Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>

Thanks,
ojaswin
> ---
>  fs/ext4/readpage.c | 28 +++++++++++++++++-----------
>  1 file changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 5d3a9dc9a32d..f329daf6e5c7 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -227,24 +227,30 @@ int ext4_mpage_readpages(struct inode *inode,
>  	int length;
>  	unsigned relative_block = 0;
>  	struct ext4_map_blocks map;
> -	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
> +	unsigned int nr_pages, folio_pages;
>  
>  	map.m_pblk = 0;
>  	map.m_lblk = 0;
>  	map.m_len = 0;
>  	map.m_flags = 0;
>  
> -	for (; nr_pages; nr_pages--) {
> +	nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio);
> +	for (; nr_pages; nr_pages -= folio_pages) {
>  		int fully_mapped = 1;
> -		unsigned first_hole = blocks_per_page;
> +		unsigned int first_hole;
> +		unsigned int blocks_per_folio;
>  
>  		if (rac)
>  			folio = readahead_folio(rac);
> +
> +		folio_pages = folio_nr_pages(folio);
>  		prefetchw(&folio->flags);
>  
>  		if (folio_buffers(folio))
>  			goto confused;
>  
> +		blocks_per_folio = folio_size(folio) >> blkbits;
> +		first_hole = blocks_per_folio;
>  		block_in_file = next_block =
>  			(sector_t)folio->index << (PAGE_SHIFT - blkbits);
>  		last_block = block_in_file + nr_pages * blocks_per_page;
> @@ -270,7 +276,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
>  				}
> -				if (page_block == blocks_per_page)
> +				if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
> @@ -281,7 +287,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  		 * Then do more ext4_map_blocks() calls until we are
>  		 * done with this folio.
>  		 */
> -		while (page_block < blocks_per_page) {
> +		while (page_block < blocks_per_folio) {
>  			if (block_in_file < last_block) {
>  				map.m_lblk = block_in_file;
>  				map.m_len = last_block - block_in_file;
> @@ -296,13 +302,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  			}
>  			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
>  				fully_mapped = 0;
> -				if (first_hole == blocks_per_page)
> +				if (first_hole == blocks_per_folio)
>  					first_hole = page_block;
>  				page_block++;
>  				block_in_file++;
>  				continue;
>  			}
> -			if (first_hole != blocks_per_page)
> +			if (first_hole != blocks_per_folio)
>  				goto confused;		/* hole -> non-hole */
>  
>  			/* Contiguous blocks? */
> @@ -315,13 +321,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  					/* needed? */
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
> -				} else if (page_block == blocks_per_page)
> +				} else if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
>  			}
>  		}
> -		if (first_hole != blocks_per_page) {
> +		if (first_hole != blocks_per_folio) {
>  			folio_zero_segment(folio, first_hole << blkbits,
>  					  folio_size(folio));
>  			if (first_hole == 0) {
> @@ -367,11 +373,11 @@ int ext4_mpage_readpages(struct inode *inode,
>  
>  		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
>  		     (relative_block == map.m_len)) ||
> -		    (first_hole != blocks_per_page)) {
> +		    (first_hole != blocks_per_folio)) {
>  			submit_bio(bio);
>  			bio = NULL;
>  		} else
> -			last_block_in_bio = first_block + blocks_per_page - 1;
> +			last_block_in_bio = first_block + blocks_per_folio - 1;
>  		continue;
>  	confused:
>  		if (bio) {
> -- 
> 2.46.1
>
diff mbox series

Patch

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5d3a9dc9a32d..f329daf6e5c7 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -227,24 +227,30 @@  int ext4_mpage_readpages(struct inode *inode,
 	int length;
 	unsigned relative_block = 0;
 	struct ext4_map_blocks map;
-	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
+	unsigned int nr_pages, folio_pages;
 
 	map.m_pblk = 0;
 	map.m_lblk = 0;
 	map.m_len = 0;
 	map.m_flags = 0;
 
-	for (; nr_pages; nr_pages--) {
+	nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio);
+	for (; nr_pages; nr_pages -= folio_pages) {
 		int fully_mapped = 1;
-		unsigned first_hole = blocks_per_page;
+		unsigned int first_hole;
+		unsigned int blocks_per_folio;
 
 		if (rac)
 			folio = readahead_folio(rac);
+
+		folio_pages = folio_nr_pages(folio);
 		prefetchw(&folio->flags);
 
 		if (folio_buffers(folio))
 			goto confused;
 
+		blocks_per_folio = folio_size(folio) >> blkbits;
+		first_hole = blocks_per_folio;
 		block_in_file = next_block =
 			(sector_t)folio->index << (PAGE_SHIFT - blkbits);
 		last_block = block_in_file + nr_pages * blocks_per_page;
@@ -270,7 +276,7 @@  int ext4_mpage_readpages(struct inode *inode,
 					map.m_flags &= ~EXT4_MAP_MAPPED;
 					break;
 				}
-				if (page_block == blocks_per_page)
+				if (page_block == blocks_per_folio)
 					break;
 				page_block++;
 				block_in_file++;
@@ -281,7 +287,7 @@  int ext4_mpage_readpages(struct inode *inode,
 		 * Then do more ext4_map_blocks() calls until we are
 		 * done with this folio.
 		 */
-		while (page_block < blocks_per_page) {
+		while (page_block < blocks_per_folio) {
 			if (block_in_file < last_block) {
 				map.m_lblk = block_in_file;
 				map.m_len = last_block - block_in_file;
@@ -296,13 +302,13 @@  int ext4_mpage_readpages(struct inode *inode,
 			}
 			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
 				fully_mapped = 0;
-				if (first_hole == blocks_per_page)
+				if (first_hole == blocks_per_folio)
 					first_hole = page_block;
 				page_block++;
 				block_in_file++;
 				continue;
 			}
-			if (first_hole != blocks_per_page)
+			if (first_hole != blocks_per_folio)
 				goto confused;		/* hole -> non-hole */
 
 			/* Contiguous blocks? */
@@ -315,13 +321,13 @@  int ext4_mpage_readpages(struct inode *inode,
 					/* needed? */
 					map.m_flags &= ~EXT4_MAP_MAPPED;
 					break;
-				} else if (page_block == blocks_per_page)
+				} else if (page_block == blocks_per_folio)
 					break;
 				page_block++;
 				block_in_file++;
 			}
 		}
-		if (first_hole != blocks_per_page) {
+		if (first_hole != blocks_per_folio) {
 			folio_zero_segment(folio, first_hole << blkbits,
 					  folio_size(folio));
 			if (first_hole == 0) {
@@ -367,11 +373,11 @@  int ext4_mpage_readpages(struct inode *inode,
 
 		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
 		     (relative_block == map.m_len)) ||
-		    (first_hole != blocks_per_page)) {
+		    (first_hole != blocks_per_folio)) {
 			submit_bio(bio);
 			bio = NULL;
 		} else
-			last_block_in_bio = first_block + blocks_per_page - 1;
+			last_block_in_bio = first_block + blocks_per_folio - 1;
 		continue;
 	confused:
 		if (bio) {