diff mbox series

[02/14] fs/buffer: add some new buffer read helpers

Message ID 20220831072111.3569680-3-yi.zhang@huawei.com
State Superseded
Headers show
Series buffer: remove ll_rw_block() | expand

Commit Message

Zhang Yi Aug. 31, 2022, 7:20 a.m. UTC
Current ll_rw_block() helper is fragile because it assumes that locked
buffer means it's under IO which is submitted by some other who hold
the lock, it skip buffer if it failed to get the lock, so it's only
safe on the readahead path. Unfortunately, now that most filesystems
still use this helper mistakenly on the sync metadata read path. There
is no guarantee that the one who hold the buffer lock always submit IO
(e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev:
avoid migration stalls for blkdev pages"), it could lead to false
positive -EIO when submitting reading IO.

This patch add some friendly buffer read helpers to prepare replace
ll_rw_block() and similar calls. We can only call bh_readahead_[]
helpers for the readahead paths.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/buffer.c                 | 68 +++++++++++++++++++++++++++++++++++++
 include/linux/buffer_head.h | 37 ++++++++++++++++++++
 2 files changed, 105 insertions(+)

Comments

Jan Kara Aug. 31, 2022, 11:30 a.m. UTC | #1
On Wed 31-08-22 15:20:59, Zhang Yi wrote:
> Current ll_rw_block() helper is fragile because it assumes that locked
> buffer means it's under IO which is submitted by some other who hold
> the lock, it skip buffer if it failed to get the lock, so it's only
> safe on the readahead path. Unfortunately, now that most filesystems
> still use this helper mistakenly on the sync metadata read path. There
> is no guarantee that the one who hold the buffer lock always submit IO
> (e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev:
> avoid migration stalls for blkdev pages"), it could lead to false
> positive -EIO when submitting reading IO.
> 
> This patch add some friendly buffer read helpers to prepare replace
> ll_rw_block() and similar calls. We can only call bh_readahead_[]
> helpers for the readahead paths.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>

This looks mostly good. Just a few small nits below.

> diff --git a/fs/buffer.c b/fs/buffer.c
> index a0b70b3239f3..a663191903ed 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -3017,6 +3017,74 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
>  }
>  EXPORT_SYMBOL(bh_uptodate_or_lock);
>  
> +/**
> + * __bh_read - Submit read for a locked buffer
> + * @bh: struct buffer_head
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @wait: wait until reading finish
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
> +{
> +	int ret = 0;
> +
> +	BUG_ON(!buffer_locked(bh));
> +
> +	if (buffer_uptodate(bh)) {
> +		unlock_buffer(bh);
> +		return ret;
> +	}
> +
> +	get_bh(bh);
> +	bh->b_end_io = end_buffer_read_sync;
> +	submit_bh(REQ_OP_READ | op_flags, bh);
> +	if (wait) {
> +		wait_on_buffer(bh);
> +		if (!buffer_uptodate(bh))
> +			ret = -EIO;
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL(__bh_read);
> +
> +/**
> + * __bh_read_batch - Submit read for a batch of unlocked buffers
> + * @bhs: a batch of struct buffer_head
> + * @nr: number of this batch
> + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
> + * @force_lock: force to get a lock on the buffer if set, otherwise drops any
> + *              buffer that cannot lock.
> + *
> + * Returns zero on success or don't wait, and -EIO on error.
> + */
> +void __bh_read_batch(struct buffer_head *bhs[],
> +		     int nr, blk_opf_t op_flags, bool force_lock)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr; i++) {
> +		struct buffer_head *bh = bhs[i];
> +
> +		if (buffer_uptodate(bh))
> +			continue;
> +		if (!trylock_buffer(bh)) {
> +			if (!force_lock)
> +				continue;
> +			lock_buffer(bh);
> +		}

This would be a bit more efficient for the force_lock case like:

		if (force_lock)
			lock_buffer(bh);
		else
			if (!trylock_buffer(bh))
				continue;

> +		if (buffer_uptodate(bh)) {
> +			unlock_buffer(bh);
> +			continue;
> +		}
> +
> +		bh->b_end_io = end_buffer_read_sync;
> +		get_bh(bh);
> +		submit_bh(REQ_OP_READ | op_flags, bh);
> +	}
> +}
> +EXPORT_SYMBOL(__bh_read_batch);
> +
>  /**
>   * bh_submit_read - Submit a locked buffer for reading
>   * @bh: struct buffer_head
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index c3863c417b00..8a01c07c0418 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -232,6 +232,9 @@ void write_boundary_block(struct block_device *bdev,
>  			sector_t bblock, unsigned blocksize);
>  int bh_uptodate_or_lock(struct buffer_head *bh);
>  int bh_submit_read(struct buffer_head *bh);
> +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
> +void __bh_read_batch(struct buffer_head *bhs[],
> +		     int nr, blk_opf_t op_flags, bool force_lock);
>  
>  extern int buffer_heads_over_limit;
>  
> @@ -399,6 +402,40 @@ static inline struct buffer_head *__getblk(struct block_device *bdev,
>  	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
>  }
>  
> +static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	if (trylock_buffer(bh))
> +		__bh_read(bh, op_flags, false);
> +}
> +
> +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	lock_buffer(bh);
> +	__bh_read(bh, op_flags, false);
> +}
> +
> +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	lock_buffer(bh);
> +	return __bh_read(bh, op_flags, true);
> +}

I would use bh_uptodate_or_lock() helper in the above two functions to
avoid locking the buffer in case it is already uptodate.

> +
> +static inline int bh_read_locked(struct buffer_head *bh, blk_opf_t op_flags)
> +{
> +	return __bh_read(bh, op_flags, true);
> +}

I would just drop this helper. Both ext2 and ocfs2 which use it can avoid
it very easily (by using bh_read()). 

> +
> +static inline void bh_read_batch(struct buffer_head *bhs[], int nr)
> +{
> +	__bh_read_batch(bhs, nr, 0, true);
> +}
> +
> +static inline void bh_readahead_batch(struct buffer_head *bhs[], int nr,
> +				      blk_opf_t op_flags)
> +{
> +	__bh_read_batch(bhs, nr, op_flags, false);
> +}
> +

It is more common to have number of elements in the array as the first
argument and the array as the second one in the kernel. So rather:

static inline void bh_read_batch(int nr, struct buffer_head *bhs[])

and similarly for bh_readahead_batch().

								Honza
Zhang Yi Aug. 31, 2022, 1:11 p.m. UTC | #2
Thanks for the review and suggestions.

On 2022/8/31 19:30, Jan Kara wrote:
> On Wed 31-08-22 15:20:59, Zhang Yi wrote:
>> Current ll_rw_block() helper is fragile because it assumes that locked
>> buffer means it's under IO which is submitted by some other who hold
>> the lock, it skip buffer if it failed to get the lock, so it's only
>> safe on the readahead path. Unfortunately, now that most filesystems
>> still use this helper mistakenly on the sync metadata read path. There
>> is no guarantee that the one who hold the buffer lock always submit IO
>> (e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev:
>> avoid migration stalls for blkdev pages"), it could lead to false
>> positive -EIO when submitting reading IO.
>>
>> This patch add some friendly buffer read helpers to prepare replace
>> ll_rw_block() and similar calls. We can only call bh_readahead_[]
>> helpers for the readahead paths.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> 
> This looks mostly good. Just a few small nits below.
> 
[..]
>> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
>> index c3863c417b00..8a01c07c0418 100644
>> --- a/include/linux/buffer_head.h
>> +++ b/include/linux/buffer_head.h
[..]
>> +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
>> +{
>> +	lock_buffer(bh);
>> +	__bh_read(bh, op_flags, false);
>> +}
>> +
>> +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
>> +{
>> +	lock_buffer(bh);
>> +	return __bh_read(bh, op_flags, true);
>> +}
> 
> I would use bh_uptodate_or_lock() helper in the above two functions to
> avoid locking the buffer in case it is already uptodate.
> 
Yes, it's a good point, it seems we could also remove "if (!buffer_uptodate(bh))"
before above two helpers in the latter patches, like in fs/jbd2/journal.c.

@@ -1893,15 +1893,14 @@ static int journal_get_superblock(journal_t *journal)
 {
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
-	int err = -EIO;
+	int err;

 	bh = journal->j_sb_buffer;

 	J_ASSERT(bh != NULL);
- 	if (!buffer_uptodate(bh)) {
-		ll_rw_block(REQ_OP_READ, 1, &bh);
-		wait_on_buffer(bh);
-		if (!buffer_uptodate(bh)) {
+	err = bh_read(bh, 0);
+	if (err) {
-			printk(KERN_ERR
-				"JBD2: IO error reading journal superblock\n");
-			goto out;
+		printk(KERN_ERR
+			"JBD2: IO error reading journal superblock\n");
+		goto out;
...

Thanks,
Yi.
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index a0b70b3239f3..a663191903ed 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3017,6 +3017,74 @@  int bh_uptodate_or_lock(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_uptodate_or_lock);
 
+/**
+ * __bh_read - Submit read for a locked buffer
+ * @bh: struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @wait: wait until reading finish
+ *
+ * Returns zero on success or don't wait, and -EIO on error.
+ */
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
+{
+	int ret = 0;
+
+	BUG_ON(!buffer_locked(bh));
+
+	if (buffer_uptodate(bh)) {
+		unlock_buffer(bh);
+		return ret;
+	}
+
+	get_bh(bh);
+	bh->b_end_io = end_buffer_read_sync;
+	submit_bh(REQ_OP_READ | op_flags, bh);
+	if (wait) {
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh))
+			ret = -EIO;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__bh_read);
+
+/**
+ * __bh_read_batch - Submit read for a batch of unlocked buffers
+ * @bhs: a batch of struct buffer_head
+ * @nr: number of this batch
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @force_lock: force to get a lock on the buffer if set, otherwise drops any
+ *              buffer that cannot lock.
+ *
+ * Returns zero on success or don't wait, and -EIO on error.
+ */
+void __bh_read_batch(struct buffer_head *bhs[],
+		     int nr, blk_opf_t op_flags, bool force_lock)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = bhs[i];
+
+		if (buffer_uptodate(bh))
+			continue;
+		if (!trylock_buffer(bh)) {
+			if (!force_lock)
+				continue;
+			lock_buffer(bh);
+		}
+		if (buffer_uptodate(bh)) {
+			unlock_buffer(bh);
+			continue;
+		}
+
+		bh->b_end_io = end_buffer_read_sync;
+		get_bh(bh);
+		submit_bh(REQ_OP_READ | op_flags, bh);
+	}
+}
+EXPORT_SYMBOL(__bh_read_batch);
+
 /**
  * bh_submit_read - Submit a locked buffer for reading
  * @bh: struct buffer_head
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c3863c417b00..8a01c07c0418 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -232,6 +232,9 @@  void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
 int bh_submit_read(struct buffer_head *bh);
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
+void __bh_read_batch(struct buffer_head *bhs[],
+		     int nr, blk_opf_t op_flags, bool force_lock);
 
 extern int buffer_heads_over_limit;
 
@@ -399,6 +402,40 @@  static inline struct buffer_head *__getblk(struct block_device *bdev,
 	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
 }
 
+static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags)
+{
+	if (trylock_buffer(bh))
+		__bh_read(bh, op_flags, false);
+}
+
+static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags)
+{
+	lock_buffer(bh);
+	__bh_read(bh, op_flags, false);
+}
+
+static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags)
+{
+	lock_buffer(bh);
+	return __bh_read(bh, op_flags, true);
+}
+
+static inline int bh_read_locked(struct buffer_head *bh, blk_opf_t op_flags)
+{
+	return __bh_read(bh, op_flags, true);
+}
+
+static inline void bh_read_batch(struct buffer_head *bhs[], int nr)
+{
+	__bh_read_batch(bhs, nr, 0, true);
+}
+
+static inline void bh_readahead_batch(struct buffer_head *bhs[], int nr,
+				      blk_opf_t op_flags)
+{
+	__bh_read_batch(bhs, nr, op_flags, false);
+}
+
 /**
  *  __bread() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from