diff mbox series

[08/15] ubifs: Convert ubifs_write_begin() to use a folio

Message ID 20240120230824.2619716-9-willy@infradead.org
State Superseded
Headers show
Series ubifs folio conversion | expand

Commit Message

Matthew Wilcox Jan. 20, 2024, 11:08 p.m. UTC
Save eight calls to compound_head() by using the new folio API.
Remove a few assumptions that would break with large folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ubifs/file.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

Comments

Zhihao Cheng Jan. 22, 2024, 11:51 a.m. UTC | #1
在 2024/1/21 7:08, Matthew Wilcox (Oracle) 写道:
> Save eight calls to compound_head() by using the new folio API.
> Remove a few assumptions that would break with large folios.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   fs/ubifs/file.c | 36 +++++++++++++++++-------------------
>   1 file changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
> index 6302ce65e0b3..ef262499f228 100644
> --- a/fs/ubifs/file.c
> +++ b/fs/ubifs/file.c
> @@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   	pgoff_t index = pos >> PAGE_SHIFT;
>   	int err, appending = !!(pos + len > inode->i_size);
>   	int skipped_read = 0;
> -	struct page *page;
> +	struct folio *folio;
>   
>   	ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
>   	ubifs_assert(c, !c->ro_media && !c->ro_mount);
> @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   		return -EROFS;
>   
>   	/* Try out the fast-path part first */
> -	page = grab_cache_page_write_begin(mapping, index);
> -	if (unlikely(!page))
> -		return -ENOMEM;
> +	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
> +			mapping_gfp_mask(mapping));
> +	if (IS_ERR(folio))
> +		return PTR_ERR(folio);
>   
> -	if (!PageUptodate(page)) {
> +	if (!folio_test_uptodate(folio)) {
>   		/* The page is not loaded from the flash */
> -		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
> +		if (pos == folio_pos(folio) && len >= folio_size(folio)) {

Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is 
not wrong.

>   			/*
>   			 * We change whole page so no need to load it. But we
>   			 * do not know whether this page exists on the media or
> @@ -453,29 +454,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   			 * media. Thus, we are setting the @PG_checked flag
>   			 * here.
>   			 */
> -			SetPageChecked(page);
> +			folio_set_checked(folio);
>   			skipped_read = 1;
>   		} else {
> -			err = do_readpage(page);
> +			err = do_readpage(&folio->page);
>   			if (err) {
> -				unlock_page(page);
> -				put_page(page);
> +				folio_unlock(folio);
> +				folio_put(folio);
>   				return err;
>   			}
>   		}
>   	}
>   
> -	err = allocate_budget(c, page, ui, appending);
> +	err = allocate_budget(c, &folio->page, ui, appending);
>   	if (unlikely(err)) {
>   		ubifs_assert(c, err == -ENOSPC);
>   		/*
>   		 * If we skipped reading the page because we were going to
>   		 * write all of it, then it is not up to date.
>   		 */
> -		if (skipped_read) {
> -			ClearPageChecked(page);
> -			ClearPageUptodate(page);
> -		}
> +		if (skipped_read)
> +			folio_clear_checked(folio);
>   		/*
>   		 * Budgeting failed which means it would have to force
>   		 * write-back but didn't, because we set the @fast flag in the
> @@ -487,8 +486,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   			ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
>   			mutex_unlock(&ui->ui_mutex);
>   		}
> -		unlock_page(page);
> -		put_page(page);
> +		folio_unlock(folio);
> +		folio_put(folio);
>   
>   		return write_begin_slow(mapping, pos, len, pagep);
>   	}
> @@ -499,9 +498,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   	 * with @ui->ui_mutex locked if we are appending pages, and unlocked
>   	 * otherwise. This is an optimization (slightly hacky though).
>   	 */
> -	*pagep = page;
> +	*pagep = &folio->page;
>   	return 0;
> -
>   }
>   
>   /**
>
Matthew Wilcox Jan. 22, 2024, 2:43 p.m. UTC | #2
On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote:
> > @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
> >   		return -EROFS;
> >   	/* Try out the fast-path part first */
> > -	page = grab_cache_page_write_begin(mapping, index);
> > -	if (unlikely(!page))
> > -		return -ENOMEM;
> > +	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
> > +			mapping_gfp_mask(mapping));
> > +	if (IS_ERR(folio))
> > +		return PTR_ERR(folio);
> > -	if (!PageUptodate(page)) {
> > +	if (!folio_test_uptodate(folio)) {
> >   		/* The page is not loaded from the flash */
> > -		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
> > +		if (pos == folio_pos(folio) && len >= folio_size(folio)) {
> 
> Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not
> wrong.

This is based on my experience with iomap.  Today, the caller passes in
min(length-of-write, PAGE_SIZE).  In order to be able to create large
folios in the write path, we want to change the caller of write_begin
to pass in the length of the write, so we can see a len which is larger
than the size of the folio we found.
Zhihao Cheng Jan. 23, 2024, 2:15 a.m. UTC | #3
在 2024/1/22 22:43, Matthew Wilcox 写道:
> On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote:
>>> @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>>>    		return -EROFS;
>>>    	/* Try out the fast-path part first */
>>> -	page = grab_cache_page_write_begin(mapping, index);
>>> -	if (unlikely(!page))
>>> -		return -ENOMEM;
>>> +	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
>>> +			mapping_gfp_mask(mapping));
>>> +	if (IS_ERR(folio))
>>> +		return PTR_ERR(folio);
>>> -	if (!PageUptodate(page)) {
>>> +	if (!folio_test_uptodate(folio)) {
>>>    		/* The page is not loaded from the flash */
>>> -		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
>>> +		if (pos == folio_pos(folio) && len >= folio_size(folio)) {
>>
>> Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not
>> wrong.
> 
> This is based on my experience with iomap.  Today, the caller passes in
> min(length-of-write, PAGE_SIZE).  In order to be able to create large
> folios in the write path, we want to change the caller of write_begin
> to pass in the length of the write, so we can see a len which is larger
> than the size of the folio we found.
> 
> .
> 

I guess you are preparing for adding large folio support for 
generic_perform_write path, may I have a look for the demo code? I'm a 
little confused for this words "we want to change the caller of 
write_begin to pass in the length of the write". According to the 
implementation of iomap_write_iter(), I find the write length in each 
iteration is min(write_bytes, folio_size), so I guess the write length 
passed in ubifs_write_begin won't exceed folio_size.
Matthew Wilcox Jan. 23, 2024, 3:07 a.m. UTC | #4
On Tue, Jan 23, 2024 at 10:15:50AM +0800, Zhihao Cheng wrote:
> 在 2024/1/22 22:43, Matthew Wilcox 写道:
> > On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote:
> > > Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not
> > > wrong.
> > 
> > This is based on my experience with iomap.  Today, the caller passes in
> > min(length-of-write, PAGE_SIZE).  In order to be able to create large
> > folios in the write path, we want to change the caller of write_begin
> > to pass in the length of the write, so we can see a len which is larger
> > than the size of the folio we found.
> 
> I guess you are preparing for adding large folio support for
> generic_perform_write path, may I have a look for the demo code? I'm a

https://lore.kernel.org/linux-fsdevel/20240111192513.3505769-1-willy@infradead.org/
(now withdrawn because it breaks ext4)

> little confused for this words "we want to change the caller of write_begin
> to pass in the length of the write". According to the implementation of
> iomap_write_iter(), I find the write length in each iteration is
> min(write_bytes, folio_size), so I guess the write length passed in
> ubifs_write_begin won't exceed folio_size.

But that happens after write_begin, because we don't know what the folio
size will be until we've called write_begin.

ie we call write_begin, passing in the size remaining from the write.
If write_begin finds an existing folio in the page cache, we use it.
If not, we allocate a folio based on the size of the write.  After
write_begin returns a folio, we limit the number of bytes copied to the
size of the folio.
Zhihao Cheng Jan. 23, 2024, 4:27 a.m. UTC | #5
在 2024/1/23 11:07, Matthew Wilcox 写道:
> On Tue, Jan 23, 2024 at 10:15:50AM +0800, Zhihao Cheng wrote:
>> 在 2024/1/22 22:43, Matthew Wilcox 写道:
>>> On Mon, Jan 22, 2024 at 07:51:03PM +0800, Zhihao Cheng wrote:
>>>> Why not len == folio_size(folio)? Although 'len >= folio_size(folio)' is not
>>>> wrong.
>>>
>>> This is based on my experience with iomap.  Today, the caller passes in
>>> min(length-of-write, PAGE_SIZE).  In order to be able to create large
>>> folios in the write path, we want to change the caller of write_begin
>>> to pass in the length of the write, so we can see a len which is larger
>>> than the size of the folio we found.
>>
>> I guess you are preparing for adding large folio support for
>> generic_perform_write path, may I have a look for the demo code? I'm a
> 
> https://lore.kernel.org/linux-fsdevel/20240111192513.3505769-1-willy@infradead.org/
> (now withdrawn because it breaks ext4)
> 
>> little confused for this words "we want to change the caller of write_begin
>> to pass in the length of the write". According to the implementation of
>> iomap_write_iter(), I find the write length in each iteration is
>> min(write_bytes, folio_size), so I guess the write length passed in
>> ubifs_write_begin won't exceed folio_size.
> 
> But that happens after write_begin, because we don't know what the folio
> size will be until we've called write_begin.
> 
> ie we call write_begin, passing in the size remaining from the write.
> If write_begin finds an existing folio in the page cache, we use it.
> If not, we allocate a folio based on the size of the write.  After
> write_begin returns a folio, we limit the number of bytes copied to the
> size of the folio.
> .
> 

Makes sense. Thanks for the explaination.
Zhihao Cheng Jan. 23, 2024, 4:27 a.m. UTC | #6
在 2024/1/21 7:08, Matthew Wilcox (Oracle) 写道:
> Save eight calls to compound_head() by using the new folio API.
> Remove a few assumptions that would break with large folios.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>   fs/ubifs/file.c | 36 +++++++++++++++++-------------------
>   1 file changed, 17 insertions(+), 19 deletions(-)
> 

Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>

> diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
> index 6302ce65e0b3..ef262499f228 100644
> --- a/fs/ubifs/file.c
> +++ b/fs/ubifs/file.c
> @@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   	pgoff_t index = pos >> PAGE_SHIFT;
>   	int err, appending = !!(pos + len > inode->i_size);
>   	int skipped_read = 0;
> -	struct page *page;
> +	struct folio *folio;
>   
>   	ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
>   	ubifs_assert(c, !c->ro_media && !c->ro_mount);
> @@ -437,13 +437,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   		return -EROFS;
>   
>   	/* Try out the fast-path part first */
> -	page = grab_cache_page_write_begin(mapping, index);
> -	if (unlikely(!page))
> -		return -ENOMEM;
> +	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
> +			mapping_gfp_mask(mapping));
> +	if (IS_ERR(folio))
> +		return PTR_ERR(folio);
>   
> -	if (!PageUptodate(page)) {
> +	if (!folio_test_uptodate(folio)) {
>   		/* The page is not loaded from the flash */
> -		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
> +		if (pos == folio_pos(folio) && len >= folio_size(folio)) {
>   			/*
>   			 * We change whole page so no need to load it. But we
>   			 * do not know whether this page exists on the media or
> @@ -453,29 +454,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   			 * media. Thus, we are setting the @PG_checked flag
>   			 * here.
>   			 */
> -			SetPageChecked(page);
> +			folio_set_checked(folio);
>   			skipped_read = 1;
>   		} else {
> -			err = do_readpage(page);
> +			err = do_readpage(&folio->page);
>   			if (err) {
> -				unlock_page(page);
> -				put_page(page);
> +				folio_unlock(folio);
> +				folio_put(folio);
>   				return err;
>   			}
>   		}
>   	}
>   
> -	err = allocate_budget(c, page, ui, appending);
> +	err = allocate_budget(c, &folio->page, ui, appending);
>   	if (unlikely(err)) {
>   		ubifs_assert(c, err == -ENOSPC);
>   		/*
>   		 * If we skipped reading the page because we were going to
>   		 * write all of it, then it is not up to date.
>   		 */
> -		if (skipped_read) {
> -			ClearPageChecked(page);
> -			ClearPageUptodate(page);
> -		}
> +		if (skipped_read)
> +			folio_clear_checked(folio);
>   		/*
>   		 * Budgeting failed which means it would have to force
>   		 * write-back but didn't, because we set the @fast flag in the
> @@ -487,8 +486,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   			ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
>   			mutex_unlock(&ui->ui_mutex);
>   		}
> -		unlock_page(page);
> -		put_page(page);
> +		folio_unlock(folio);
> +		folio_put(folio);
>   
>   		return write_begin_slow(mapping, pos, len, pagep);
>   	}
> @@ -499,9 +498,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
>   	 * with @ui->ui_mutex locked if we are appending pages, and unlocked
>   	 * otherwise. This is an optimization (slightly hacky though).
>   	 */
> -	*pagep = page;
> +	*pagep = &folio->page;
>   	return 0;
> -
>   }
>   
>   /**
>
diff mbox series

Patch

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6302ce65e0b3..ef262499f228 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -428,7 +428,7 @@  static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int err, appending = !!(pos + len > inode->i_size);
 	int skipped_read = 0;
-	struct page *page;
+	struct folio *folio;
 
 	ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
 	ubifs_assert(c, !c->ro_media && !c->ro_mount);
@@ -437,13 +437,14 @@  static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 		return -EROFS;
 
 	/* Try out the fast-path part first */
-	page = grab_cache_page_write_begin(mapping, index);
-	if (unlikely(!page))
-		return -ENOMEM;
+	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+			mapping_gfp_mask(mapping));
+	if (IS_ERR(folio))
+		return PTR_ERR(folio);
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		/* The page is not loaded from the flash */
-		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
+		if (pos == folio_pos(folio) && len >= folio_size(folio)) {
 			/*
 			 * We change whole page so no need to load it. But we
 			 * do not know whether this page exists on the media or
@@ -453,29 +454,27 @@  static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 			 * media. Thus, we are setting the @PG_checked flag
 			 * here.
 			 */
-			SetPageChecked(page);
+			folio_set_checked(folio);
 			skipped_read = 1;
 		} else {
-			err = do_readpage(page);
+			err = do_readpage(&folio->page);
 			if (err) {
-				unlock_page(page);
-				put_page(page);
+				folio_unlock(folio);
+				folio_put(folio);
 				return err;
 			}
 		}
 	}
 
-	err = allocate_budget(c, page, ui, appending);
+	err = allocate_budget(c, &folio->page, ui, appending);
 	if (unlikely(err)) {
 		ubifs_assert(c, err == -ENOSPC);
 		/*
 		 * If we skipped reading the page because we were going to
 		 * write all of it, then it is not up to date.
 		 */
-		if (skipped_read) {
-			ClearPageChecked(page);
-			ClearPageUptodate(page);
-		}
+		if (skipped_read)
+			folio_clear_checked(folio);
 		/*
 		 * Budgeting failed which means it would have to force
 		 * write-back but didn't, because we set the @fast flag in the
@@ -487,8 +486,8 @@  static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 			ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
 			mutex_unlock(&ui->ui_mutex);
 		}
-		unlock_page(page);
-		put_page(page);
+		folio_unlock(folio);
+		folio_put(folio);
 
 		return write_begin_slow(mapping, pos, len, pagep);
 	}
@@ -499,9 +498,8 @@  static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 	 * with @ui->ui_mutex locked if we are appending pages, and unlocked
 	 * otherwise. This is an optimization (slightly hacky though).
 	 */
-	*pagep = page;
+	*pagep = &folio->page;
 	return 0;
-
 }
 
 /**