diff mbox series

[v3,08/13] ext4: fast-commit commit range tracking

Message ID 20191001074101.256523-9-harshadshirwadkar@gmail.com
State Changes Requested
Headers show
Series ext4: add fast commit support | expand

Commit Message

harshad shirwadkar Oct. 1, 2019, 7:40 a.m. UTC
With this patch, we track logical range of file offsets that need to
be committed using fast commit. This allows us to find file extents
that need to be committed during the commit time.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/ext4_jbd2.c | 34 ++++++++++++++++++++++++++++++++++
 fs/ext4/ext4_jbd2.h |  2 ++
 fs/ext4/inline.c    |  4 +++-
 fs/ext4/inode.c     | 17 ++++++++++++++++-
 4 files changed, 55 insertions(+), 2 deletions(-)

Comments

Theodore Ts'o Oct. 16, 2019, 9:36 p.m. UTC | #1
On Tue, Oct 01, 2019 at 12:40:57AM -0700, Harshad Shirwadkar wrote:
> With this patch, we track logical range of file offsets that need to
> be committed using fast commit. This allows us to find file extents
> that need to be committed during the commit time.

We don't actually need to track when data is modified in the page
cache, which is what this commit is actually doing.  We only need to
track newly allocated blocks, at granularity of the logical block
number.

That's because we only need to force out newly allocated blocks to
make sure we don't reveal stale data when we are in data=ordered mode.
And it also follows that we don't need to track logical block ranges
and submit inode data in data=writeback or data=journalled mode.

In the case where the user has actually called fsync() on the the
inode, we do a data integrity writeback in ext4_sync_file, and that's
independent on the fast commit code.

But if the file is being modified using buffered writes, or if an
already allocated block is changed, and the file has *not* been
changed, we don't need to write out those blocks on a fast commit.
For example, in the case where we are the fast commit is being
initiated via ext4_nfs_commit_metadata() -> ext4_write_inode(), we
only care about submitting data for the newly allocated blocks.  And
that's what we want to track here.

Hence, all of the callers of ext4_fc_update_commit_range() here are in
the wrong place.  (Also, they are calling ext4_fc_update_commit_range
with byte offsets, when the function is expecting logical block
numbers, but that really matter, since the existing call sites need to
be all removed and replaced with new ones in ext4_map_blocks().

       	       	   	    	     - Ted
harshad shirwadkar Oct. 30, 2019, 5:12 a.m. UTC | #2
Thanks for this, I'll remove these calls and add calls in ext4_map_blocks.

On Wed, Oct 16, 2019 at 2:36 PM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>
> On Tue, Oct 01, 2019 at 12:40:57AM -0700, Harshad Shirwadkar wrote:
> > With this patch, we track logical range of file offsets that need to
> > be committed using fast commit. This allows us to find file extents
> > that need to be committed during the commit time.
>
> We don't actually need to track when data is modified in the page
> cache, which is what this commit is actually doing.  We only need to
> track newly allocated blocks, at granularity of the logical block
> number.
>
> That's because we only need to force out newly allocated blocks to
> make sure we don't reveal stale data when we are in data=ordered mode.
> And it also follows that we don't need to track logical block ranges
> and submit inode data in data=writeback or data=journalled mode.
>
> In the case where the user has actually called fsync() on the the
> inode, we do a data integrity writeback in ext4_sync_file, and that's
> independent on the fast commit code.
>
> But if the file is being modified using buffered writes, or if an
> already allocated block is changed, and the file has *not* been
> changed, we don't need to write out those blocks on a fast commit.
> For example, in the case where we are the fast commit is being
> initiated via ext4_nfs_commit_metadata() -> ext4_write_inode(), we
> only care about submitting data for the newly allocated blocks.  And
> that's what we want to track here.
>
> Hence, all of the callers of ext4_fc_update_commit_range() here are in
> the wrong place.  (Also, they are calling ext4_fc_update_commit_range
> with byte offsets, when the function is expecting logical block
Thanks for pointing that out. My code as of now works with logical
file offsets instead of logical block offsets. So I should have used
file offset type instead of logical block type for arguments of
ext4_fc_update_commit_range. But it makes sense to just use logical
block offsets everywhere. I'll fix this in next version.

> numbers, but that really matter, since the existing call sites need to
> be all removed and replaced with new ones in ext4_map_blocks().
>
>                                      - Ted
diff mbox series

Patch

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index e70ad7a8e46e..0bb8de2139a5 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -405,6 +405,40 @@  void ext4_fc_del(struct inode *inode)
 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 }
 
+void ext4_fc_update_commit_range(struct inode *inode, ext4_lblk_t start,
+				 ext4_lblk_t end)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	tid_t running_txn_tid = get_running_txn_tid(inode->i_sb);
+
+	if (!ext4_should_fast_commit(inode->i_sb))
+		return;
+
+	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb))
+		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
+
+	if (!EXT4_SB(inode->i_sb)->s_fc_eligible)
+		return;
+
+	write_lock(&ei->i_fc.fc_lock);
+	if (ei->i_fc.fc_tid == running_txn_tid) {
+		ei->i_fc.fc_lblk_start = ei->i_fc.fc_lblk_start < start ?
+					 ei->i_fc.fc_lblk_start : start;
+		ei->i_fc.fc_lblk_end = ei->i_fc.fc_lblk_end > end ?
+				     ei->i_fc.fc_lblk_end : end;
+		write_unlock(&ei->i_fc.fc_lock);
+		return;
+	}
+
+	ext4_reset_inode_fc_info(&ei->i_fc);
+	ei->i_fc.fc_eligible = true;
+	ei->i_fc.fc_lblk_start = start;
+	ei->i_fc.fc_lblk_end = end;
+	ei->i_fc.fc_tid = running_txn_tid;
+	write_unlock(&ei->i_fc.fc_lock);
+
+}
+
 void ext4_fc_mark_new(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 65f20fbfb002..2cb7e7e1f025 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -501,6 +501,8 @@  ext4_fc_mark_ineligible(struct inode *inode)
 	spin_unlock(&sbi->s_fc_lock);
 }
 
+void ext4_fc_update_commit_range(struct inode *inode, ext4_lblk_t start,
+				 ext4_lblk_t end);
 
 void ext4_fc_mark_new(struct inode *inode);
 bool ext4_is_inode_fc_ineligible(struct inode *inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index fbd561cba098..66b2c0e3f7e4 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -966,8 +966,10 @@  int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
 	 * But it's important to update i_size while still holding page lock:
 	 * page writeout could otherwise come in and zero beyond i_size.
 	 */
-	if (pos+copied > inode->i_size)
+	if (pos+copied > inode->i_size) {
+		ext4_fc_update_commit_range(inode, inode->i_size, pos + copied);
 		i_size_write(inode, pos+copied);
+	}
 	unlock_page(page);
 	put_page(page);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6d2efbd9aba9..ea039e3e1a4d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1549,6 +1549,8 @@  static int ext4_journalled_write_end(struct file *file,
 			SetPageUptodate(page);
 	}
 	size_changed = ext4_update_inode_size(inode, pos + copied);
+	ext4_fc_update_commit_range(inode, pos, pos + copied);
+
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
 	unlock_page(page);
@@ -2610,8 +2612,12 @@  static int mpage_map_and_submit_extent(handle_t *handle,
 		i_size = i_size_read(inode);
 		if (disksize > i_size)
 			disksize = i_size;
-		if (disksize > EXT4_I(inode)->i_disksize)
+		if (disksize > EXT4_I(inode)->i_disksize) {
+			ext4_fc_update_commit_range(inode,
+						    EXT4_I(inode)->i_disksize,
+						    disksize);
 			EXT4_I(inode)->i_disksize = disksize;
+		}
 		up_write(&EXT4_I(inode)->i_data_sem);
 		err2 = ext4_mark_inode_dirty(handle, inode);
 		ext4_fc_enqueue_inode(handle, inode);
@@ -3220,6 +3226,8 @@  static int ext4_da_write_end(struct file *file,
 		}
 	}
 
+	ext4_fc_update_commit_range(inode, pos, pos + copied);
+
 	if (write_mode != CONVERT_INLINE_DATA &&
 	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
 	    ext4_has_inline_data(inode))
@@ -3627,6 +3635,7 @@  static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 		goto orphan_del;
 	}
 
+	ext4_fc_update_commit_range(inode, offset, offset + written);
 	if (ext4_update_inode_size(inode, offset + written)) {
 		ext4_mark_inode_dirty(handle, inode);
 		ext4_fc_enqueue_inode(handle, inode);
@@ -3751,6 +3760,7 @@  static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 		ext4_update_i_disksize(inode, inode->i_size);
 		ext4_journal_stop(handle);
 	}
+	ext4_fc_update_commit_range(inode, offset, offset + count);
 
 	BUG_ON(iocb->private == NULL);
 
@@ -3869,6 +3879,8 @@  static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 				ext4_mark_inode_dirty(handle, inode);
 				ext4_fc_enqueue_inode(handle, inode);
 			}
+			ext4_fc_update_commit_range(inode, offset,
+						    offset + end);
 		}
 		err = ext4_journal_stop(handle);
 		if (ret == 0)
@@ -5327,6 +5339,9 @@  static int ext4_do_update_inode(handle_t *handle,
 			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
 	if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
+		ext4_fc_update_commit_range(inode,
+					    ext4_isize(inode->i_sb, raw_inode),
+					    ei->i_disksize);
 		ext4_isize_set(raw_inode, ei->i_disksize);
 		need_datasync = 1;
 	}