diff mbox series

[v3,06/13] ext4: add fields that are needed to track changed files

Message ID 20191001074101.256523-7-harshadshirwadkar@gmail.com
State Changes Requested
Headers show
Series ext4: add fast commit support | expand

Commit Message

harshad shirwadkar Oct. 1, 2019, 7:40 a.m. UTC
Ext4's fast commit feature tracks changed files and maintains them in
a queue. We also remember for each file the logical block range that
needs to be committed. This patch adds these fields to ext4_inode_info
and ext4_sb_info and also adds initialization calls.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/ext4.h      | 60 +++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ext4_jbd2.c | 20 +++++++++++++++
 fs/ext4/ext4_jbd2.h |  2 ++
 fs/ext4/ialloc.c    |  1 +
 fs/ext4/inode.c     |  1 +
 fs/ext4/super.c     |  7 ++++++
 6 files changed, 91 insertions(+)

Comments

Theodore Ts'o Oct. 16, 2019, 6:26 p.m. UTC | #1
On Tue, Oct 01, 2019 at 12:40:55AM -0700, Harshad Shirwadkar wrote:
> +/*
> + * Ext4 fast commit inode specific information
> + */
> +struct ext4_fast_commit_inode_info {

I think it would be better to move the contents of this structure
directly into ext4_inode_info, instead of adding this structure to
ext4_inode_info; the structure is never used in a free-standing
context.

> +	/*
> +	 * Flag indicating whether this inode is eligible for fast commits or
> +	 * not.
> +	 */
> +	bool fc_eligible;
> +
> +	/*
> +	 * Flag indicating whether this inode is newly created during this
> +	 * tid:subtid.
> +	 */
> +	bool fc_new;

These two bools could be replaced using EXT4_STATE_* flags.  Grep for
EXT4_STATE_NEWENTRY to see an example of how an EXT4_STATE_ flag is
defined and used.


> +	rwlock_t fc_lock;

What is this used for?  If it's only just to protect the i_fc_list
list_head, maybe name it i_fc_list_lock?

> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 764ff4c56233..ff30f3015551 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -1131,6 +1131,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
>  
>  	ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
>  	ext4_set_inode_state(inode, EXT4_STATE_NEW);
> +	ext4_init_inode_fc_info(inode);
>  
>  	ei->i_extra_isize = sbi->s_want_extra_isize;
>  	ei->i_inline_off = 0;

I don't think this is necessary; the inode was returned by ext4_iget,
so the ext4_alloc_inode() will have already called that function.


> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 420fe3deed39..f230a888eddd 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4996,6 +4996,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
>  	for (block = 0; block < EXT4_N_BLOCKS; block++)
>  		ei->i_data[block] = raw_inode->i_block[block];
>  	INIT_LIST_HEAD(&ei->i_orphan);
> +	ext4_init_inode_fc_info(&ei->vfs_inode);
>  

The inode here was returned by iget_locked(), which means
ext4_alloc_inode() will have been called.

> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 7725eb2105f4..c90337fc98c1 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1139,6 +1140,7 @@ static void init_once(void *foo)
>  	init_rwsem(&ei->i_data_sem);
>  	init_rwsem(&ei->i_mmap_sem);
>  	inode_init_once(&ei->vfs_inode);
> +	ext4_init_inode_fc_info(&ei->vfs_inode);
>  }

Maybe pull the rwlock_init() out of ext4_init_inode_fc_info() and
stuff it here?

Basically, it looks like certain fields are getting redundantly
initalized a lot.  The init_once function will initialize those fields
that will be reset when the structure is released.  If we are sure
that it will be reset (e.g., the spinlock will be reset), then we can
initialize it once in init_once() and then not re-initializing in
other places, such as ext4_alloc_inode().

There are some people who think it's not worth it to avoid using
init_once, since this can cause bugs if it turns out it wasn't
properly reset at the time when the object is released.  So the other
approach is to drop the ext4_init_inode_fc_info() and then just
reinitialize the spinlock every time.  (OTOH, if someone else is still
holding on the spinlock when you release it, then reinitialize the
spinlock can *also* lead to a very hard-to-debug crash.)

	     	    	      	   		 - Ted
diff mbox series

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index becbda38b7db..c36ec23046f3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -921,6 +921,48 @@  enum {
 	I_DATA_SEM_QUOTA,
 };
 
+/*
+ * Ext4 fast commit inode specific information
+ */
+struct ext4_fast_commit_inode_info {
+	/*
+	 * TID of when this struct was last updated. If fc_tid !=
+	 * running transaction tid, then none of the other fields in this struct
+	 * are valid. Don't directly modify fields in this struct. Use wrappers
+	 * provided in ext4_jbd2.c.
+	 */
+	tid_t fc_tid;
+	/*
+	 * Start of logical block range that needs to be committed in this fast
+	 * commit
+	 */
+	ext4_lblk_t fc_lblk_start;
+
+	/*
+	 * End of logical block range that needs to be committed in this fast
+	 * commit
+	 */
+	ext4_lblk_t fc_lblk_end;
+
+	/*
+	 * Inode number of the directory that contains this inode. This field
+	 * is onlt valid if fc_new is set.
+	 */
+	u32 fc_parent_ino;
+
+	/*
+	 * Flag indicating whether this inode is eligible for fast commits or
+	 * not.
+	 */
+	bool fc_eligible;
+
+	/*
+	 * Flag indicating whether this inode is newly created during this
+	 * tid:subtid.
+	 */
+	bool fc_new;
+	rwlock_t fc_lock;
+};
 
 /*
  * fourth extended file system inode data in memory
@@ -955,6 +997,12 @@  struct ext4_inode_info {
 
 	struct list_head i_orphan;	/* unlinked but open inodes */
 
+	struct list_head i_fc_list;	/*
+					 * inodes that need fast commit
+					 * protected by sbi->s_fc_lock.
+					 */
+	struct ext4_fast_commit_inode_info i_fc;
+
 	/*
 	 * i_disksize keeps track of what the inode size is ON DISK, not
 	 * in memory.  During truncate, i_size is set to the new size by
@@ -1058,7 +1106,9 @@  struct ext4_inode_info {
 	 * fsync and fdatasync, respectively.
 	 */
 	tid_t i_sync_tid;
+	tid_t i_sync_subtid;
 	tid_t i_datasync_tid;
+	tid_t i_datasync_subtid;
 
 #ifdef CONFIG_QUOTA
 	struct dquot *i_dquot[MAXQUOTAS];
@@ -1529,6 +1579,16 @@  struct ext4_sb_info {
 	/* Barrier between changing inodes' journal flags and writepages ops. */
 	struct percpu_rw_semaphore s_journal_flag_rwsem;
 	struct dax_device *s_daxdev;
+
+	/* Ext4 fast commit stuff */
+	bool s_fc_replay;		/* Fast commit replay in progress */
+	struct list_head s_fc_q;	/* Inodes that need fast commit. */
+	__u32 s_fc_q_cnt;		/* Number of inodes in the fc queue */
+	bool s_fc_eligible;		/*
+					 * Are changes after the last commit
+					 * eligible for fast commit?
+					 */
+	spinlock_t s_fc_lock;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 7c70b08d104c..9066bcfbee29 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -330,3 +330,23 @@  int __ext4_handle_dirty_super(const char *where, unsigned int line,
 		mark_buffer_dirty(bh);
 	return err;
 }
+
+static inline
+void ext4_reset_inode_fc_info(struct ext4_fast_commit_inode_info *i_fc)
+{
+	i_fc->fc_tid = 0;
+	i_fc->fc_lblk_start = 0;
+	i_fc->fc_lblk_end = 0;
+	i_fc->fc_parent_ino = 0;
+	i_fc->fc_eligible = false;
+	i_fc->fc_new = false;
+}
+
+void ext4_init_inode_fc_info(struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+
+	ext4_reset_inode_fc_info(&ei->i_fc);
+	INIT_LIST_HEAD(&ei->i_fc_list);
+	rwlock_init(&ei->i_fc.fc_lock);
+}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index ef8fcf7d0d3b..2305c1acd415 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -459,4 +459,6 @@  static inline int ext4_should_dioread_nolock(struct inode *inode)
 	return 1;
 }
 
+void ext4_init_inode_fc_info(struct inode *inode);
+
 #endif	/* _EXT4_JBD2_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 764ff4c56233..ff30f3015551 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1131,6 +1131,7 @@  struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 
 	ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
 	ext4_set_inode_state(inode, EXT4_STATE_NEW);
+	ext4_init_inode_fc_info(inode);
 
 	ei->i_extra_isize = sbi->s_want_extra_isize;
 	ei->i_inline_off = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 420fe3deed39..f230a888eddd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4996,6 +4996,7 @@  struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 	for (block = 0; block < EXT4_N_BLOCKS; block++)
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
+	ext4_init_inode_fc_info(&ei->vfs_inode);
 
 	/*
 	 * Set transaction id's of transactions that have to be committed
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7725eb2105f4..c90337fc98c1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1100,6 +1100,7 @@  static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_datasync_tid = 0;
 	atomic_set(&ei->i_unwritten, 0);
 	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
+	ext4_init_inode_fc_info(&ei->vfs_inode);
 	return &ei->vfs_inode;
 }
 
@@ -1139,6 +1140,7 @@  static void init_once(void *foo)
 	init_rwsem(&ei->i_data_sem);
 	init_rwsem(&ei->i_mmap_sem);
 	inode_init_once(&ei->vfs_inode);
+	ext4_init_inode_fc_info(&ei->vfs_inode);
 }
 
 static int __init init_inodecache(void)
@@ -4301,6 +4303,11 @@  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
 
+	INIT_LIST_HEAD(&sbi->s_fc_q);
+	sbi->s_fc_q_cnt = 0;
+	sbi->s_fc_eligible = true;
+	spin_lock_init(&sbi->s_fc_lock);
+
 	sb->s_root = NULL;
 
 	needs_recovery = (es->s_last_orphan != 0 ||