diff mbox series

[RFC,v2,for-6.8/block,15/18] buffer: add a new helper to read sb block

Message ID 20231211140753.975297-1-yukuai1@huaweicloud.com
State New
Headers show
Series block: don't access bd_inode directly from other modules | expand

Commit Message

Yu Kuai Dec. 11, 2023, 2:07 p.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
   block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 fs/buffer.c                 | 68 ++++++++++++++++++++++++++-----------
 include/linux/buffer_head.h | 18 +++++++++-
 2 files changed, 65 insertions(+), 21 deletions(-)

Comments

Jan Kara Dec. 11, 2023, 5:27 p.m. UTC | #1
On Mon 11-12-23 22:07:53, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Unlike __bread_gfp(), ext4 has special handing while reading sb block:
> 
> 1) __GFP_NOFAIL is not set, and memory allocation can fail;
> 2) If buffer write failed before, set buffer uptodate and don't read
>    block from disk;
> 3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
> 4) If failed, return error ptr instead of NULL;
> 
> This patch add a new helper __bread_gfp2() that will match above 2 and 3(
> 1 will be used, and 4 will still be encapsulated by ext4), and prepare to
> prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
> in ext4.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
...
> +/*
> + * This works like __bread_gfp() except:
> + * 1) If buffer write failed before, set buffer uptodate and don't read
> + * block from disk;
> + * 2) Caller can pass in additional op_flags like REQ_META;
> + */
> +struct buffer_head *
> +__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
> +	     blk_opf_t op_flags, gfp_t gfp)
> +{
> +	return bread_gfp(bdev, block, size, op_flags, gfp, true);
> +}
> +EXPORT_SYMBOL(__bread_gfp2);

__bread_gfp2() is not a great name, why not just using bread_gfp()
directly? I'm not a huge fan of boolean arguments but three different flags
arguments would be too much for my taste ;) so I guess I can live with
that.

								Honza
Yu Kuai Dec. 12, 2023, 1:32 a.m. UTC | #2
Hi,

在 2023/12/12 1:27, Jan Kara 写道:
> On Mon 11-12-23 22:07:53, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Unlike __bread_gfp(), ext4 has special handing while reading sb block:
>>
>> 1) __GFP_NOFAIL is not set, and memory allocation can fail;
>> 2) If buffer write failed before, set buffer uptodate and don't read
>>     block from disk;
>> 3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
>> 4) If failed, return error ptr instead of NULL;
>>
>> This patch add a new helper __bread_gfp2() that will match above 2 and 3(
>> 1 will be used, and 4 will still be encapsulated by ext4), and prepare to
>> prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
>> in ext4.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ...
>> +/*
>> + * This works like __bread_gfp() except:
>> + * 1) If buffer write failed before, set buffer uptodate and don't read
>> + * block from disk;
>> + * 2) Caller can pass in additional op_flags like REQ_META;
>> + */
>> +struct buffer_head *
>> +__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
>> +	     blk_opf_t op_flags, gfp_t gfp)
>> +{
>> +	return bread_gfp(bdev, block, size, op_flags, gfp, true);
>> +}
>> +EXPORT_SYMBOL(__bread_gfp2);
> 
> __bread_gfp2() is not a great name, why not just using bread_gfp()
> directly? I'm not a huge fan of boolean arguments but three different flags
> arguments would be too much for my taste ;) so I guess I can live with
> that.

I agree that __bread_gfp2 is not a greate name, if possible, I'll try to
figure out a better name for v3.

Thanks for reviewing this patchset!
Kuai
> 
> 								Honza
>
Christoph Hellwig Dec. 12, 2023, 1:25 p.m. UTC | #3
On Mon, Dec 11, 2023 at 10:07:53PM +0800, Yu Kuai wrote:
> +static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
> +{
> +	/*
> +	 * If the buffer has the write error flag, data was failed to write
> +	 * out in the block. In this case, set buffer uptodate to prevent
> +	 * reading old data.
> +	 */
> +	if (buffer_write_io_error(bh))
> +		set_buffer_uptodate(bh);
> +	return buffer_uptodate(bh);
> +}

So - risking this blows up into a lot of nasty work: Why do we even
clear the uptodate flag on write errors?  Doing so makes not sense to
me as the data isn't any less uptodate just because we failed to write
it..
Jan Kara Dec. 12, 2023, 2:11 p.m. UTC | #4
On Tue 12-12-23 05:25:25, Christoph Hellwig wrote:
> On Mon, Dec 11, 2023 at 10:07:53PM +0800, Yu Kuai wrote:
> > +static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
> > +{
> > +	/*
> > +	 * If the buffer has the write error flag, data was failed to write
> > +	 * out in the block. In this case, set buffer uptodate to prevent
> > +	 * reading old data.
> > +	 */
> > +	if (buffer_write_io_error(bh))
> > +		set_buffer_uptodate(bh);
> > +	return buffer_uptodate(bh);
> > +}
> 
> So - risking this blows up into a lot of nasty work: Why do we even
> clear the uptodate flag on write errors?  Doing so makes not sense to
> me as the data isn't any less uptodate just because we failed to write
> it..

Historic reasons I'd say (buffer_write_io_error isn't *that* old - from
2003 it seems). And yes, it would make a lot of sense to keep uptodate flag
set and just rely on buffer_write_io_error() but it also means going
through all buffer_uptodate() checks in filesystems and determining which
need changing to buffer_write_io_error() which is something nobody is keen
on doing ;)

								Honza
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..188bd36c9fea 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1255,16 +1255,19 @@  void __bforget(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(__bforget);
 
-static struct buffer_head *__bread_slow(struct buffer_head *bh)
+static struct buffer_head *__bread_slow(struct buffer_head *bh,
+					blk_opf_t op_flags,
+					bool check_write_error)
 {
 	lock_buffer(bh);
-	if (buffer_uptodate(bh)) {
+	if (buffer_uptodate(bh) ||
+	    (check_write_error && buffer_uptodate_or_error(bh))) {
 		unlock_buffer(bh);
 		return bh;
 	} else {
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
-		submit_bh(REQ_OP_READ, bh);
+		submit_bh(REQ_OP_READ | op_flags, bh);
 		wait_on_buffer(bh);
 		if (buffer_uptodate(bh))
 			return bh;
@@ -1445,6 +1448,31 @@  void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 }
 EXPORT_SYMBOL(__breadahead);
 
+static struct buffer_head *
+bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+	  blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
+{
+	struct buffer_head *bh;
+
+	gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+	/*
+	 * Prefer looping in the allocator rather than here, at least that
+	 * code knows what it's doing.
+	 */
+	gfp |= __GFP_NOFAIL;
+
+	bh = bdev_getblk(bdev, block, size, gfp);
+	if (unlikely(!bh))
+		return NULL;
+
+	if (buffer_uptodate(bh) ||
+	    (check_write_error && buffer_uptodate_or_error(bh)))
+		return bh;
+
+	return __bread_slow(bh, op_flags, check_write_error);
+}
+
 /**
  *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
@@ -1458,27 +1486,27 @@  EXPORT_SYMBOL(__breadahead);
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
-		   unsigned size, gfp_t gfp)
+__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+	    gfp_t gfp)
 {
-	struct buffer_head *bh;
-
-	gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
-
-	/*
-	 * Prefer looping in the allocator rather than here, at least that
-	 * code knows what it's doing.
-	 */
-	gfp |= __GFP_NOFAIL;
-
-	bh = bdev_getblk(bdev, block, size, gfp);
-
-	if (likely(bh) && !buffer_uptodate(bh))
-		bh = __bread_slow(bh);
-	return bh;
+	return bread_gfp(bdev, block, size, 0, gfp, false);
 }
 EXPORT_SYMBOL(__bread_gfp);
 
+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+	     blk_opf_t op_flags, gfp_t gfp)
+{
+	return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);
+
 static void __invalidate_bh_lrus(struct bh_lru *b)
 {
 	int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..751b2744b4ae 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,18 @@  static __always_inline int buffer_uptodate(const struct buffer_head *bh)
 	return test_bit_acquire(BH_Uptodate, &bh->b_state);
 }
 
+static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
+{
+	/*
+	 * If the buffer has the write error flag, data was failed to write
+	 * out in the block. In this case, set buffer uptodate to prevent
+	 * reading old data.
+	 */
+	if (buffer_write_io_error(bh))
+		set_buffer_uptodate(bh);
+	return buffer_uptodate(bh);
+}
+
 static inline unsigned long bh_offset(const struct buffer_head *bh)
 {
 	return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1);
@@ -231,7 +243,11 @@  void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
 struct buffer_head *__bread_gfp(struct block_device *,
-				sector_t block, unsigned size, gfp_t gfp);
+				sector_t block, unsigned int size, gfp_t gfp);
+struct buffer_head *__bread_gfp2(struct block_device *bdev, sector_t block,
+				 unsigned int size, blk_opf_t op_flags,
+				 gfp_t gfp);
+
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
 void unlock_buffer(struct buffer_head *bh);