diff mbox series

[v3,4/6] ext4: reorder map.m_flags checks in ext4_iomap_begin()

Message ID 8aa099e66ece73578f32cbbc411b6f3e52d53e52.1568282664.git.mbobrowski@mbobrowski.org
State Superseded
Headers show
Series ext4: port direct IO to iomap infrastructure | expand

Commit Message

Matthew Bobrowski Sept. 12, 2019, 11:04 a.m. UTC
For iomap direct IO write code path changes, we need to accommodate
for the case where the block mapping flags passed to ext4_map_blocks()
will result in m_flags having both EXT4_MAP_MAPPED and
EXT4_MAP_UNWRITTEN bits set. In order for the allocated unwritten
extents to be converted properly in the end_io handler, iomap->type
must be set to IOMAP_UNWRITTEN, so we need to reshuffle the
conditional statement in order to achieve this.

This change is a no-op for DAX code path as the block mapping flag
passed to ext4_map_blocks() when IS_DAX(inode) never results in
EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN being set at once.

Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
---
 fs/ext4/inode.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Sept. 16, 2019, 12:05 p.m. UTC | #1
On Thu, Sep 12, 2019 at 09:04:30PM +1000, Matthew Bobrowski wrote:
> @@ -3581,10 +3581,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
>  		iomap->addr = IOMAP_NULL_ADDR;
>  	} else {
> -		if (map.m_flags & EXT4_MAP_MAPPED) {
> -			iomap->type = IOMAP_MAPPED;
> -		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
> +		/*
> +		 * Flags passed to ext4_map_blocks() for direct IO
> +		 * writes can result in m_flags having both
> +		 * EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits set. In
> +		 * order for allocated unwritten extents to be
> +		 * converted to written extents in the end_io handler
> +		 * correctly, we need to ensure that the iomap->type
> +		 * is also set appropriately in that case. Thus, we
> +		 * need to check whether EXT4_MAP_UNWRITTEN is set
> +		 * first.
> +		 */
> +		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
>  			iomap->type = IOMAP_UNWRITTEN;
> +		} else if (map.m_flags & EXT4_MAP_MAPPED) {
> +			iomap->type = IOMAP_MAPPED;

I think much of this would benefit a lot from just being split up.
I hacked up a patch last week that split the ext4 direct I/O code
a bit, but this is completely untested and needs further splitup,
but maybe you can take it as an inspiration for your series?
E.g. at least one helper for filling out the iomap from the ext4
map data, and one for the seek unwritten extent reporting.  The
split of the overall iomap ops seemed useful to me, but might not
be as important with the other cleanups:

---
From 7ac1a837e279e415882feae473e335b4a3d89c10 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 8 Sep 2019 10:44:28 +0200
Subject: ext4: refactor the iomap code

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/ext4/ext4.h  |   1 +
 fs/ext4/file.c  |   7 +-
 fs/ext4/inode.c | 279 ++++++++++++++++++++++++++----------------------
 3 files changed, 159 insertions(+), 128 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf660aa7a9e0..c8e34fe3daba 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3333,6 +3333,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
 }
 
 extern const struct iomap_ops ext4_iomap_ops;
+extern const struct iomap_ops ext4_report_iomap_ops;
 
 #endif	/* __KERNEL__ */
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 70b0438dbc94..cd2d41bc842b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -328,7 +328,8 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
 	} else {
 		down_read(&EXT4_I(inode)->i_mmap_sem);
 	}
-	result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
+	result = dax_iomap_fault(vmf, pe_size, &pfn, &error,
+			&ext4_iomap_ops);
 	if (write) {
 		ext4_journal_stop(handle);
 
@@ -492,12 +493,12 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 						maxbytes, i_size_read(inode));
 	case SEEK_HOLE:
 		inode_lock_shared(inode);
-		offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+		offset = iomap_seek_hole(inode, offset, &ext4_report_iomap_ops);
 		inode_unlock_shared(inode);
 		break;
 	case SEEK_DATA:
 		inode_lock_shared(inode);
-		offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+		offset = iomap_seek_data(inode, offset, &ext4_report_iomap_ops);
 		inode_unlock_shared(inode);
 		break;
 	}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 420fe3deed39..1c1b07f0cdbf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3452,152 +3452,116 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
 	return inode->i_state & I_DIRTY_DATASYNC;
 }
 
-static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
-			    unsigned flags, struct iomap *iomap)
+static int ext4_set_iomap(struct inode *inode, struct iomap *iomap, u16 type,
+		unsigned long first_block, struct ext4_map_blocks *map)
 {
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_blkbits;
-	unsigned long first_block, last_block;
-	struct ext4_map_blocks map;
-	bool delalloc = false;
-	int ret;
-
-	if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
-		return -EINVAL;
-	first_block = offset >> blkbits;
-	last_block = min_t(loff_t, (offset + length - 1) >> blkbits,
-			   EXT4_MAX_LOGICAL_BLOCK);
-
-	if (flags & IOMAP_REPORT) {
-		if (ext4_has_inline_data(inode)) {
-			ret = ext4_inline_data_iomap(inode, iomap);
-			if (ret != -EAGAIN) {
-				if (ret == 0 && offset >= iomap->length)
-					ret = -ENOENT;
-				return ret;
-			}
-		}
-	} else {
-		if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
-			return -ERANGE;
-	}
-
-	map.m_lblk = first_block;
-	map.m_len = last_block - first_block + 1;
-
-	if (flags & IOMAP_REPORT) {
-		ret = ext4_map_blocks(NULL, inode, &map, 0);
-		if (ret < 0)
-			return ret;
-
-		if (ret == 0) {
-			ext4_lblk_t end = map.m_lblk + map.m_len - 1;
-			struct extent_status es;
-
-			ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
-						  map.m_lblk, end, &es);
-
-			if (!es.es_len || es.es_lblk > end) {
-				/* entire range is a hole */
-			} else if (es.es_lblk > map.m_lblk) {
-				/* range starts with a hole */
-				map.m_len = es.es_lblk - map.m_lblk;
-			} else {
-				ext4_lblk_t offs = 0;
-
-				if (es.es_lblk < map.m_lblk)
-					offs = map.m_lblk - es.es_lblk;
-				map.m_lblk = es.es_lblk + offs;
-				map.m_len = es.es_len - offs;
-				delalloc = true;
-			}
-		}
-	} else if (flags & IOMAP_WRITE) {
-		int dio_credits;
-		handle_t *handle;
-		int retries = 0;
-
-		/* Trim mapping request to maximum we can map at once for DIO */
-		if (map.m_len > DIO_MAX_BLOCKS)
-			map.m_len = DIO_MAX_BLOCKS;
-		dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
-retry:
-		/*
-		 * Either we allocate blocks and then we don't get unwritten
-		 * extent so we have reserved enough credits, or the blocks
-		 * are already allocated and unwritten and in that case
-		 * extent conversion fits in the credits as well.
-		 */
-		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
-					    dio_credits);
-		if (IS_ERR(handle))
-			return PTR_ERR(handle);
-
-		ret = ext4_map_blocks(handle, inode, &map,
-				      EXT4_GET_BLOCKS_CREATE_ZERO);
-		if (ret < 0) {
-			ext4_journal_stop(handle);
-			if (ret == -ENOSPC &&
-			    ext4_should_retry_alloc(inode->i_sb, &retries))
-				goto retry;
-			return ret;
-		}
-
-		/*
-		 * If we added blocks beyond i_size, we need to make sure they
-		 * will get truncated if we crash before updating i_size in
-		 * ext4_iomap_end(). For faults we don't need to do that (and
-		 * even cannot because for orphan list operations inode_lock is
-		 * required) - if we happen to instantiate block beyond i_size,
-		 * it is because we race with truncate which has already added
-		 * the inode to the orphan list.
-		 */
-		if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
-		    (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
-			int err;
-
-			err = ext4_orphan_add(handle, inode);
-			if (err < 0) {
-				ext4_journal_stop(handle);
-				return err;
-			}
-		}
-		ext4_journal_stop(handle);
-	} else {
-		ret = ext4_map_blocks(NULL, inode, &map, 0);
-		if (ret < 0)
-			return ret;
-	}
 
 	iomap->flags = 0;
 	if (ext4_inode_datasync_dirty(inode))
 		iomap->flags |= IOMAP_F_DIRTY;
 	iomap->bdev = inode->i_sb->s_bdev;
-	iomap->dax_dev = sbi->s_daxdev;
+	iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
 	iomap->offset = (u64)first_block << blkbits;
-	iomap->length = (u64)map.m_len << blkbits;
-
-	if (ret == 0) {
-		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
+	iomap->length = (u64)map->m_len << blkbits;
+	if (type) {
+		iomap->type = type;
 		iomap->addr = IOMAP_NULL_ADDR;
 	} else {
-		if (map.m_flags & EXT4_MAP_MAPPED) {
+		if (map->m_flags & EXT4_MAP_MAPPED) {
 			iomap->type = IOMAP_MAPPED;
-		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+		} else if (map->m_flags & EXT4_MAP_UNWRITTEN) {
 			iomap->type = IOMAP_UNWRITTEN;
 		} else {
 			WARN_ON_ONCE(1);
 			return -EIO;
 		}
-		iomap->addr = (u64)map.m_pblk << blkbits;
+		iomap->addr = (u64)map->m_pblk << blkbits;
 	}
-
-	if (map.m_flags & EXT4_MAP_NEW)
+	if (map->m_flags & EXT4_MAP_NEW)
 		iomap->flags |= IOMAP_F_NEW;
-
 	return 0;
 }
 
+#ifdef CONFIG_FS_DAX
+static int ext4_iomap_alloc(struct inode *inode, unsigned flags,
+		unsigned long first_block, struct ext4_map_blocks *map)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	int dio_credits, ret, retries = 0;
+	handle_t *handle;
+
+	/* Trim mapping request to maximum we can map at once for DIO */
+	if (map->m_len > DIO_MAX_BLOCKS)
+		map->m_len = DIO_MAX_BLOCKS;
+	dio_credits = ext4_chunk_trans_blocks(inode, map->m_len);
+retry:
+	/*
+	 * Either we allocate blocks and then we don't get unwritten extent so
+	 * we have reserved enough credits, or the blocks are already allocated
+	 * and unwritten and in that case extent conversion fits in the credits
+	 as well.
+	 */
+	handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = ext4_map_blocks(handle, inode, map, EXT4_GET_BLOCKS_CREATE_ZERO);
+	if (ret < 0)
+		goto journal_stop;
+
+	/*
+	 * If we added blocks beyond i_size, we need to make sure they will get
+	 * truncated if we crash before updating i_size in ext4_iomap_end().
+	 * For faults we don't need to do that (and even cannot because for
+	 * orphan list operations inode_lock is required) - if we happen to
+	 * instantiate block beyond i_size, it is because we race with truncate
+	 * which has already added the inode to the orphan list.
+	 */
+	if (!(flags & IOMAP_FAULT) && first_block + map->m_len >
+	    (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+		int err;
+
+		err = ext4_orphan_add(handle, inode);
+		if (err < 0)
+			ret = err;
+	}
+journal_stop:
+	ext4_journal_stop(handle);
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+	return ret;
+}
+
+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block, last_block;
+	struct ext4_map_blocks map;
+	int ret;
+
+	if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
+		return -EINVAL;
+	first_block = offset >> blkbits;
+	last_block = min_t(loff_t, (offset + length - 1) >> blkbits,
+			   EXT4_MAX_LOGICAL_BLOCK);
+
+	if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+		return -ERANGE;
+
+	map.m_lblk = first_block;
+	map.m_len = last_block - first_block + 1;
+	if (flags & IOMAP_WRITE)
+		ret = ext4_iomap_alloc(inode, flags, first_block, &map);
+	else
+		ret = ext4_map_blocks(NULL, inode, &map, 0);
+
+	if (ret < 0)
+		return ret;
+	return ext4_set_iomap(inode, iomap, ret ? 0 : IOMAP_HOLE, first_block,
+			      &map);
+}
+
 static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 			  ssize_t written, unsigned flags, struct iomap *iomap)
 {
@@ -3654,6 +3618,71 @@ const struct iomap_ops ext4_iomap_ops = {
 	.iomap_begin		= ext4_iomap_begin,
 	.iomap_end		= ext4_iomap_end,
 };
+#endif /* CONFIG_FS_DAX */
+
+static u16 ext4_iomap_check_delalloc(struct inode *inode,
+		struct ext4_map_blocks *map)
+{
+	ext4_lblk_t end = map->m_lblk + map->m_len - 1;
+	struct extent_status es;
+
+	ext4_es_find_extent_range(inode, &ext4_es_is_delayed, map->m_lblk, end,
+			&es);
+
+	/* entire range is a hole */
+	if (!es.es_len || es.es_lblk > end)
+		return IOMAP_HOLE;
+	if (es.es_lblk <= map->m_lblk) {
+		ext4_lblk_t offs = 0;
+
+		if (es.es_lblk < map->m_lblk)
+			offs = map->m_lblk - es.es_lblk;
+		map->m_lblk = es.es_lblk + offs;
+		map->m_len = es.es_len - offs;
+		return IOMAP_DELALLOC;
+	}
+	/* range starts with a hole */
+	map->m_len = es.es_lblk - map->m_lblk;
+	return IOMAP_HOLE;
+}
+
+static int ext4_report_iomap_begin(struct inode *inode, loff_t offset,
+		loff_t length, unsigned flags, struct iomap *iomap)
+{
+	unsigned int blkbits = inode->i_blkbits;
+	unsigned long first_block, last_block;
+	struct ext4_map_blocks map;
+	u16 type = 0;
+	int ret;
+
+	if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
+		return -EINVAL;
+	first_block = offset >> blkbits;
+	last_block = min_t(loff_t, (offset + length - 1) >> blkbits,
+			   EXT4_MAX_LOGICAL_BLOCK);
+
+	if (ext4_has_inline_data(inode)) {
+		ret = ext4_inline_data_iomap(inode, iomap);
+		if (ret != -EAGAIN) {
+			if (ret == 0 && offset >= iomap->length)
+				ret = -ENOENT;
+			return ret;
+		}
+	}
+
+	map.m_lblk = first_block;
+	map.m_len = last_block - first_block + 1;
+	ret = ext4_map_blocks(NULL, inode, &map, 0);
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		type = ext4_iomap_check_delalloc(inode, &map);
+	return ext4_set_iomap(inode, iomap, type, first_block, &map);
+}
+
+const struct iomap_ops ext4_report_iomap_ops = {
+	.iomap_begin		= ext4_report_iomap_begin,
+};
 
 static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private)
Matthew Bobrowski Sept. 17, 2019, 12:48 p.m. UTC | #2
On Mon, Sep 16, 2019 at 05:05:33AM -0700, Christoph Hellwig wrote:
> On Thu, Sep 12, 2019 at 09:04:30PM +1000, Matthew Bobrowski wrote:
> > @@ -3581,10 +3581,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> >  		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
> >  		iomap->addr = IOMAP_NULL_ADDR;
> >  	} else {
> > -		if (map.m_flags & EXT4_MAP_MAPPED) {
> > -			iomap->type = IOMAP_MAPPED;
> > -		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
> > +		/*
> > +		 * Flags passed to ext4_map_blocks() for direct IO
> > +		 * writes can result in m_flags having both
> > +		 * EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits set. In
> > +		 * order for allocated unwritten extents to be
> > +		 * converted to written extents in the end_io handler
> > +		 * correctly, we need to ensure that the iomap->type
> > +		 * is also set appropriately in that case. Thus, we
> > +		 * need to check whether EXT4_MAP_UNWRITTEN is set
> > +		 * first.
> > +		 */
> > +		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
> >  			iomap->type = IOMAP_UNWRITTEN;
> > +		} else if (map.m_flags & EXT4_MAP_MAPPED) {
> > +			iomap->type = IOMAP_MAPPED;
> 
> I think much of this would benefit a lot from just being split up.
> I hacked up a patch last week that split the ext4 direct I/O code
> a bit, but this is completely untested and needs further splitup,
> but maybe you can take it as an inspiration for your series?

Nice, I really like this! :-)

The ext4_iomap_begin() callback is kind of already getting larger than it
should have to be and I can only see it growing moving forward, so why not
split it up now.

> E.g. at least one helper for filling out the iomap from the ext4
> map data, and one for the seek unwritten extent reporting.  The
> split of the overall iomap ops seemed useful to me, but might not
> be as important with the other cleanups:

Yeah, I think I'll leave the iomap operations as they are for now. Something
to definitely consider at a later point though.

--<M>--
Jan Kara Sept. 23, 2019, 3:08 p.m. UTC | #3
On Thu 12-09-19 21:04:30, Matthew Bobrowski wrote:
> For iomap direct IO write code path changes, we need to accommodate
> for the case where the block mapping flags passed to ext4_map_blocks()
> will result in m_flags having both EXT4_MAP_MAPPED and
> EXT4_MAP_UNWRITTEN bits set. In order for the allocated unwritten
> extents to be converted properly in the end_io handler, iomap->type
> must be set to IOMAP_UNWRITTEN, so we need to reshuffle the
> conditional statement in order to achieve this.
> 
> This change is a no-op for DAX code path as the block mapping flag
> passed to ext4_map_blocks() when IS_DAX(inode) never results in
> EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN being set at once.
> 
> Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
> Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>

The patch looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/ext4/inode.c | 17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 761ce6286b05..efb184928e51 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3581,10 +3581,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
>  		iomap->addr = IOMAP_NULL_ADDR;
>  	} else {
> -		if (map.m_flags & EXT4_MAP_MAPPED) {
> -			iomap->type = IOMAP_MAPPED;
> -		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
> +		/*
> +		 * Flags passed to ext4_map_blocks() for direct IO
> +		 * writes can result in m_flags having both
> +		 * EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits set. In
> +		 * order for allocated unwritten extents to be
> +		 * converted to written extents in the end_io handler
> +		 * correctly, we need to ensure that the iomap->type
> +		 * is also set appropriately in that case. Thus, we
> +		 * need to check whether EXT4_MAP_UNWRITTEN is set
> +		 * first.
> +		 */
> +		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
>  			iomap->type = IOMAP_UNWRITTEN;
> +		} else if (map.m_flags & EXT4_MAP_MAPPED) {
> +			iomap->type = IOMAP_MAPPED;
>  		} else {
>  			WARN_ON_ONCE(1);
>  			return -EIO;
> -- 
> 2.20.1
>
Matthew Bobrowski Sept. 24, 2019, 9:35 a.m. UTC | #4
On Mon, Sep 23, 2019 at 05:08:56PM +0200, Jan Kara wrote:
> On Thu 12-09-19 21:04:30, Matthew Bobrowski wrote:
> > For iomap direct IO write code path changes, we need to accommodate
> > for the case where the block mapping flags passed to ext4_map_blocks()
> > will result in m_flags having both EXT4_MAP_MAPPED and
> > EXT4_MAP_UNWRITTEN bits set. In order for the allocated unwritten
> > extents to be converted properly in the end_io handler, iomap->type
> > must be set to IOMAP_UNWRITTEN, so we need to reshuffle the
> > conditional statement in order to achieve this.
> > 
> > This change is a no-op for DAX code path as the block mapping flag
> > passed to ext4_map_blocks() when IS_DAX(inode) never results in
> > EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN being set at once.
> > 
> > Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
> > Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
> 
> The patch looks good to me. You can add:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>

Thank you Jan!

Note that the updated patch series that I'll send through shortly has slightly
changed in the sense that I've split up a whole bunch of the iomap code. The
idea behind this was to reduce the overall clutter that exists within the
->iomap_begin() callback. This was recommended by Christoph, and I really like
the idea because moving forward it makes complete sense to do so. This
specific patch will remain as is though.

--<M>--
diff mbox series

Patch

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 761ce6286b05..efb184928e51 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3581,10 +3581,21 @@  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
 		iomap->addr = IOMAP_NULL_ADDR;
 	} else {
-		if (map.m_flags & EXT4_MAP_MAPPED) {
-			iomap->type = IOMAP_MAPPED;
-		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+		/*
+		 * Flags passed to ext4_map_blocks() for direct IO
+		 * writes can result in m_flags having both
+		 * EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits set. In
+		 * order for allocated unwritten extents to be
+		 * converted to written extents in the end_io handler
+		 * correctly, we need to ensure that the iomap->type
+		 * is also set appropriately in that case. Thus, we
+		 * need to check whether EXT4_MAP_UNWRITTEN is set
+		 * first.
+		 */
+		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
 			iomap->type = IOMAP_UNWRITTEN;
+		} else if (map.m_flags & EXT4_MAP_MAPPED) {
+			iomap->type = IOMAP_MAPPED;
 		} else {
 			WARN_ON_ONCE(1);
 			return -EIO;