diff mbox series

[04/10] ext4: clean up the JBD2 API that initializes fast commits

Message ID 20201031200518.4178786-5-harshadshirwadkar@gmail.com
State Superseded
Headers show
Series Ext4 fast commit fixes | expand

Commit Message

harshad shirwadkar Oct. 31, 2020, 8:05 p.m. UTC
This patch cleans up the jbd2_fc_init() API and its related functions
to simplify enabling fast commits and configuring the number of blocks
that fast commits will use. With this change, the number of fast
commit blocks to use is solely determined by the JBD2 layer. However,
whether or not to use fast commits is determined by the file
system. The file system just calls jbd2_fc_init() to tell the JBD2
layer that it is interested in enabling fast commits. JBD2 layer
determines how many blocks to use for fast commits (based on the value
found in the JBD2 superblock).

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/fast_commit.c | 12 +-------
 fs/jbd2/commit.c      |  2 +-
 fs/jbd2/journal.c     | 66 +++++++++++++++++++++++++++----------------
 fs/jbd2/recovery.c    |  2 +-
 include/linux/jbd2.h  |  3 +-
 5 files changed, 46 insertions(+), 39 deletions(-)

Comments

Jan Kara Nov. 3, 2020, 4:29 p.m. UTC | #1
On Sat 31-10-20 13:05:12, Harshad Shirwadkar wrote:
> This patch cleans up the jbd2_fc_init() API and its related functions
> to simplify enabling fast commits and configuring the number of blocks
> that fast commits will use. With this change, the number of fast
> commit blocks to use is solely determined by the JBD2 layer. However,
> whether or not to use fast commits is determined by the file
> system. The file system just calls jbd2_fc_init() to tell the JBD2
> layer that it is interested in enabling fast commits. JBD2 layer
> determines how many blocks to use for fast commits (based on the value
> found in the JBD2 superblock).
> 
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

Thanks for the cleanup. Some comments below...

> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index fa688e163a80..353534403769 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -801,7 +801,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
>  		if (first_block < journal->j_tail)
>  			freed += journal->j_last - journal->j_first;
>  		/* Update tail only if we free significant amount of space */
> -		if (freed < journal->j_maxlen / 4)
> +		if (freed < (journal->j_maxlen - journal->j_fc_wbufsize) / 4)
>  			update_tail = 0;

This change seems unrelated to the API change in jbd2_fc_init(). Can you
please separate fix for journal length handling into a separate patch with
a proper changelog etc.?

Also can you perhaps rename j_maxlen to j_total_len to give better hint
that there may be multiple parts of the journal and provide wrapper
jbd2_transaction_space(journal) for the
(journal->j_maxlen - journal->j_fc_wbufsize) expression because that's kind
of implementation detail of the current fastcommit code.

> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 0c7c42bd530f..ea15f55aff5c 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1357,14 +1357,6 @@ static journal_t *journal_init_common(struct block_device *bdev,
>  	if (!journal->j_wbuf)
>  		goto err_cleanup;
>  
> -	if (journal->j_fc_wbufsize > 0) {
> -		journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> -					sizeof(struct buffer_head *),
> -					GFP_KERNEL);
> -		if (!journal->j_fc_wbuf)
> -			goto err_cleanup;
> -	}
> -
>  	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
>  	if (!bh) {
>  		pr_err("%s: Cannot get buffer for journal superblock\n",
> @@ -1378,19 +1370,21 @@ static journal_t *journal_init_common(struct block_device *bdev,
>  
>  err_cleanup:
>  	kfree(journal->j_wbuf);
> -	kfree(journal->j_fc_wbuf);
>  	jbd2_journal_destroy_revoke(journal);
>  	kfree(journal);
>  	return NULL;
>  }
>  
> -int jbd2_fc_init(journal_t *journal, int num_fc_blks)
> +int jbd2_fc_init(journal_t *journal)
>  {
> -	journal->j_fc_wbufsize = num_fc_blks;
> -	journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> -				sizeof(struct buffer_head *), GFP_KERNEL);
> -	if (!journal->j_fc_wbuf)
> -		return -ENOMEM;
> +	/*
> +	 * Only set j_fc_wbufsize here to indicate that the client file
> +	 * system is interested in using fast commits. The actual number of
> +	 * fast commit blocks is found inside jbd2_superblock and is only
> +	 * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
> +	 * gets set in journal_reset().
> +	 */
> +	journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
>  	return 0;
>  }

When looking at this, is there a reason why jbd2_fc_init() still exists?  I
mean why not just make the rule that the journal has FC block number set
iff FC gets enabled? Anything else seems a bit confusing to me and also
dangerous - imagine we have fs with FC running, we write some FCs and then
crash. Then on system recovery we mount with no_fc mount option. We have
just lost data on the filesystem AFAIU... So I'd just remove all the mount
options related to fastcommits and leave everything to the journal setup
(which can be modified with e2fsprogs if needed) to keep things simple.

>  EXPORT_SYMBOL(jbd2_fc_init);
> @@ -1500,7 +1494,7 @@ static void journal_fail_superblock(journal_t *journal)
>  static int journal_reset(journal_t *journal)
>  {
>  	journal_superblock_t *sb = journal->j_superblock;
> -	unsigned long long first, last;
> +	unsigned long long first, last, num_fc_blocks;
>  
>  	first = be32_to_cpu(sb->s_first);
>  	last = be32_to_cpu(sb->s_maxlen);
> @@ -1513,6 +1507,28 @@ static int journal_reset(journal_t *journal)
>  
>  	journal->j_first = first;
>  
> +	/*
> +	 * At this point, fast commit recovery has finished. Now, we solely
> +	 * rely on the file system to decide whether it wants fast commits
> +	 * or not. File system that wishes to use fast commits must have
> +	 * already called jbd2_fc_init() before we get here.
> +	 */
> +	if (journal->j_fc_wbufsize > 0)
> +		jbd2_journal_set_features(journal, 0, 0,
> +					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> +	else
> +		jbd2_journal_clear_features(journal, 0, 0,
> +					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> +
> +	/* If valid, prefer the value found in superblock over the default */
> +	num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> +	if (num_fc_blocks > 0 && num_fc_blocks < last)
> +		journal->j_fc_wbufsize = num_fc_blocks;
> +
> +	if (jbd2_has_feature_fast_commit(journal))
> +		journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> +					sizeof(struct buffer_head *), GFP_KERNEL);
> +
>  	if (jbd2_has_feature_fast_commit(journal) &&
>  	    journal->j_fc_wbufsize > 0) {
>  		journal->j_fc_last = last;
> @@ -1531,7 +1547,8 @@ static int journal_reset(journal_t *journal)
>  	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
>  	journal->j_commit_request = journal->j_commit_sequence;
>  
> -	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
> +	journal->j_max_transaction_buffers =
> +		(journal->j_maxlen - journal->j_fc_wbufsize) / 4;
>  
>  	/*
>  	 * As a special case, if the on-disk copy is already marked as needing
> @@ -1872,6 +1889,7 @@ static int load_superblock(journal_t *journal)
>  {
>  	int err;
>  	journal_superblock_t *sb;
> +	int num_fc_blocks;
>  
>  	err = journal_get_superblock(journal);
>  	if (err)
> @@ -1884,10 +1902,12 @@ static int load_superblock(journal_t *journal)
>  	journal->j_first = be32_to_cpu(sb->s_first);
>  	journal->j_errno = be32_to_cpu(sb->s_errno);
>  
> -	if (jbd2_has_feature_fast_commit(journal) &&
> -	    journal->j_fc_wbufsize > 0) {
> +	if (jbd2_has_feature_fast_commit(journal)) {
>  		journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
> -		journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
> +		num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> +		if (!num_fc_blocks || num_fc_blocks >= journal->j_fc_last)

I think this needs to be stricter - we need the check that the journal is
at least JBD2_MIN_JOURNAL_BLOCKS long (which happens at the beginning of
journal_reset()) to happen after we've subtracted fastcommit blocks...

> +			num_fc_blocks = JBD2_MIN_FC_BLOCKS;
> +		journal->j_last = journal->j_fc_last - num_fc_blocks;
>  		journal->j_fc_first = journal->j_last + 1;
>  		journal->j_fc_off = 0;
>  	} else {
...
> diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
> index eb2606133cd8..822f16cbf9b3 100644
> --- a/fs/jbd2/recovery.c
> +++ b/fs/jbd2/recovery.c
> @@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
>  
>  	*bhp = NULL;
>  
> -	if (offset >= journal->j_maxlen) {
> +	if (offset >= journal->j_maxlen + journal->j_fc_wbufsize) {

This looks wrong since j_maxlen is currently including fastcommit blocks...

								Honza
harshad shirwadkar Nov. 4, 2020, 7:52 p.m. UTC | #2
On Tue, Nov 3, 2020 at 8:29 AM Jan Kara <jack@suse.cz> wrote:
>
> On Sat 31-10-20 13:05:12, Harshad Shirwadkar wrote:
> > This patch cleans up the jbd2_fc_init() API and its related functions
> > to simplify enabling fast commits and configuring the number of blocks
> > that fast commits will use. With this change, the number of fast
> > commit blocks to use is solely determined by the JBD2 layer. However,
> > whether or not to use fast commits is determined by the file
> > system. The file system just calls jbd2_fc_init() to tell the JBD2
> > layer that it is interested in enabling fast commits. JBD2 layer
> > determines how many blocks to use for fast commits (based on the value
> > found in the JBD2 superblock).
> >
> > Suggested-by: Jan Kara <jack@suse.cz>
> > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
>
> Thanks for the cleanup. Some comments below...
>
> > diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> > index fa688e163a80..353534403769 100644
> > --- a/fs/jbd2/commit.c
> > +++ b/fs/jbd2/commit.c
> > @@ -801,7 +801,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> >               if (first_block < journal->j_tail)
> >                       freed += journal->j_last - journal->j_first;
> >               /* Update tail only if we free significant amount of space */
> > -             if (freed < journal->j_maxlen / 4)
> > +             if (freed < (journal->j_maxlen - journal->j_fc_wbufsize) / 4)
> >                       update_tail = 0;
>
> This change seems unrelated to the API change in jbd2_fc_init(). Can you
> please separate fix for journal length handling into a separate patch with
> a proper changelog etc.?
Sounds good, will do that.
>
> Also can you perhaps rename j_maxlen to j_total_len to give better hint
> that there may be multiple parts of the journal and provide wrapper
> jbd2_transaction_space(journal) for the
> (journal->j_maxlen - journal->j_fc_wbufsize) expression because that's kind
> of implementation detail of the current fastcommit code.
Ack
>
> > diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> > index 0c7c42bd530f..ea15f55aff5c 100644
> > --- a/fs/jbd2/journal.c
> > +++ b/fs/jbd2/journal.c
> > @@ -1357,14 +1357,6 @@ static journal_t *journal_init_common(struct block_device *bdev,
> >       if (!journal->j_wbuf)
> >               goto err_cleanup;
> >
> > -     if (journal->j_fc_wbufsize > 0) {
> > -             journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > -                                     sizeof(struct buffer_head *),
> > -                                     GFP_KERNEL);
> > -             if (!journal->j_fc_wbuf)
> > -                     goto err_cleanup;
> > -     }
> > -
> >       bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
> >       if (!bh) {
> >               pr_err("%s: Cannot get buffer for journal superblock\n",
> > @@ -1378,19 +1370,21 @@ static journal_t *journal_init_common(struct block_device *bdev,
> >
> >  err_cleanup:
> >       kfree(journal->j_wbuf);
> > -     kfree(journal->j_fc_wbuf);
> >       jbd2_journal_destroy_revoke(journal);
> >       kfree(journal);
> >       return NULL;
> >  }
> >
> > -int jbd2_fc_init(journal_t *journal, int num_fc_blks)
> > +int jbd2_fc_init(journal_t *journal)
> >  {
> > -     journal->j_fc_wbufsize = num_fc_blks;
> > -     journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > -                             sizeof(struct buffer_head *), GFP_KERNEL);
> > -     if (!journal->j_fc_wbuf)
> > -             return -ENOMEM;
> > +     /*
> > +      * Only set j_fc_wbufsize here to indicate that the client file
> > +      * system is interested in using fast commits. The actual number of
> > +      * fast commit blocks is found inside jbd2_superblock and is only
> > +      * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
> > +      * gets set in journal_reset().
> > +      */
> > +     journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
> >       return 0;
> >  }
>
> When looking at this, is there a reason why jbd2_fc_init() still exists?  I
> mean why not just make the rule that the journal has FC block number set
> iff FC gets enabled? Anything else seems a bit confusing to me and also
> dangerous - imagine we have fs with FC running, we write some FCs and then
> crash. Then on system recovery we mount with no_fc mount option. We have
> just lost data on the filesystem AFAIU... So I'd just remove all the mount
> options related to fastcommits and leave everything to the journal setup
> (which can be modified with e2fsprogs if needed) to keep things simple.
The problem is whether or not to use fast commits is the file system's
call. The JBD2 feature flag will be cleared on a clean unmount and if
we rely solely on the JBD2 feature flag, fast commit will be turned
off after a clean unmount. Whereas the FS compat flag is the source of
truth about whether fast commit needs to be used or not. That's why we
need an API for the file system to tell JBD2 to still do fast commits.
Mount options that override the feature flag in Ext4 were mainly meant
for debugging purposes. So, perhaps there should be a clear warning
message in the kernel if any of these options are used? Even if we get
rid of the mount options, we still need the jbd2_fc_init() API for the
FS to tell JBD2 that it wants to use fast commit. Note that even if
jbd2_fc_init() is not called, JBD2 will still try to replay fast
commit blocks.
>
> >  EXPORT_SYMBOL(jbd2_fc_init);
> > @@ -1500,7 +1494,7 @@ static void journal_fail_superblock(journal_t *journal)
> >  static int journal_reset(journal_t *journal)
> >  {
> >       journal_superblock_t *sb = journal->j_superblock;
> > -     unsigned long long first, last;
> > +     unsigned long long first, last, num_fc_blocks;
> >
> >       first = be32_to_cpu(sb->s_first);
> >       last = be32_to_cpu(sb->s_maxlen);
> > @@ -1513,6 +1507,28 @@ static int journal_reset(journal_t *journal)
> >
> >       journal->j_first = first;
> >
> > +     /*
> > +      * At this point, fast commit recovery has finished. Now, we solely
> > +      * rely on the file system to decide whether it wants fast commits
> > +      * or not. File system that wishes to use fast commits must have
> > +      * already called jbd2_fc_init() before we get here.
> > +      */
> > +     if (journal->j_fc_wbufsize > 0)
> > +             jbd2_journal_set_features(journal, 0, 0,
> > +                                       JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> > +     else
> > +             jbd2_journal_clear_features(journal, 0, 0,
> > +                                       JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> > +
> > +     /* If valid, prefer the value found in superblock over the default */
> > +     num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> > +     if (num_fc_blocks > 0 && num_fc_blocks < last)
> > +             journal->j_fc_wbufsize = num_fc_blocks;
> > +
> > +     if (jbd2_has_feature_fast_commit(journal))
> > +             journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > +                                     sizeof(struct buffer_head *), GFP_KERNEL);
> > +
> >       if (jbd2_has_feature_fast_commit(journal) &&
> >           journal->j_fc_wbufsize > 0) {
> >               journal->j_fc_last = last;
> > @@ -1531,7 +1547,8 @@ static int journal_reset(journal_t *journal)
> >       journal->j_commit_sequence = journal->j_transaction_sequence - 1;
> >       journal->j_commit_request = journal->j_commit_sequence;
> >
> > -     journal->j_max_transaction_buffers = journal->j_maxlen / 4;
> > +     journal->j_max_transaction_buffers =
> > +             (journal->j_maxlen - journal->j_fc_wbufsize) / 4;
> >
> >       /*
> >        * As a special case, if the on-disk copy is already marked as needing
> > @@ -1872,6 +1889,7 @@ static int load_superblock(journal_t *journal)
> >  {
> >       int err;
> >       journal_superblock_t *sb;
> > +     int num_fc_blocks;
> >
> >       err = journal_get_superblock(journal);
> >       if (err)
> > @@ -1884,10 +1902,12 @@ static int load_superblock(journal_t *journal)
> >       journal->j_first = be32_to_cpu(sb->s_first);
> >       journal->j_errno = be32_to_cpu(sb->s_errno);
> >
> > -     if (jbd2_has_feature_fast_commit(journal) &&
> > -         journal->j_fc_wbufsize > 0) {
> > +     if (jbd2_has_feature_fast_commit(journal)) {
> >               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
> > -             journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
> > +             num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> > +             if (!num_fc_blocks || num_fc_blocks >= journal->j_fc_last)
>
> I think this needs to be stricter - we need the check that the journal is
> at least JBD2_MIN_JOURNAL_BLOCKS long (which happens at the beginning of
> journal_reset()) to happen after we've subtracted fastcommit blocks...
So are you saying that with FC, the minimum journal size is
JBD2_MIN_JOURNAL_BLOCKS + JBD2_MIN_FC_BLOCKS? I was assuming that we
will reserve JBD2_MIN_FC_BLOCKS (256) blocks out of the total journal
size. That way the users who rely on the journal size to be 1024
blocks, won't see a difference in journal size even after turning FC
on. But I'm not sure if that's something we should care about.
>
> > +                     num_fc_blocks = JBD2_MIN_FC_BLOCKS;
> > +             journal->j_last = journal->j_fc_last - num_fc_blocks;
> >               journal->j_fc_first = journal->j_last + 1;
> >               journal->j_fc_off = 0;
> >       } else {
> ...
> > diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
> > index eb2606133cd8..822f16cbf9b3 100644
> > --- a/fs/jbd2/recovery.c
> > +++ b/fs/jbd2/recovery.c
> > @@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
> >
> >       *bhp = NULL;
> >
> > -     if (offset >= journal->j_maxlen) {
> > +     if (offset >= journal->j_maxlen + journal->j_fc_wbufsize) {
>
> This looks wrong since j_maxlen is currently including fastcommit blocks...
Ack,

Thanks,
Harshad
>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
Jan Kara Nov. 5, 2020, 10:30 a.m. UTC | #3
On Wed 04-11-20 11:52:24, harshad shirwadkar wrote:
> On Tue, Nov 3, 2020 at 8:29 AM Jan Kara <jack@suse.cz> wrote:
> > > -int jbd2_fc_init(journal_t *journal, int num_fc_blks)
> > > +int jbd2_fc_init(journal_t *journal)
> > >  {
> > > -     journal->j_fc_wbufsize = num_fc_blks;
> > > -     journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > > -                             sizeof(struct buffer_head *), GFP_KERNEL);
> > > -     if (!journal->j_fc_wbuf)
> > > -             return -ENOMEM;
> > > +     /*
> > > +      * Only set j_fc_wbufsize here to indicate that the client file
> > > +      * system is interested in using fast commits. The actual number of
> > > +      * fast commit blocks is found inside jbd2_superblock and is only
> > > +      * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
> > > +      * gets set in journal_reset().
> > > +      */
> > > +     journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
> > >       return 0;
> > >  }
> >
> > When looking at this, is there a reason why jbd2_fc_init() still exists?  I
> > mean why not just make the rule that the journal has FC block number set
> > iff FC gets enabled? Anything else seems a bit confusing to me and also
> > dangerous - imagine we have fs with FC running, we write some FCs and then
> > crash. Then on system recovery we mount with no_fc mount option. We have
> > just lost data on the filesystem AFAIU... So I'd just remove all the mount
> > options related to fastcommits and leave everything to the journal setup
> > (which can be modified with e2fsprogs if needed) to keep things simple.
> The problem is whether or not to use fast commits is the file system's
> call. The JBD2 feature flag will be cleared on a clean unmount and if
> we rely solely on the JBD2 feature flag, fast commit will be turned
> off after a clean unmount. Whereas the FS compat flag is the source of
> truth about whether fast commit needs to be used or not. That's why we
> need an API for the file system to tell JBD2 to still do fast commits.

Yes, I meant the API could be just that the filesystem either calls
jbd2_journal_set_features() with FASTCOMMIT feature or it won't. Similarly
to how e.g. JBD2_FEATURE_INCOMPAT_64BIT is handled. No need for
jbd2_fc_init() function AFAICT.

> Mount options that override the feature flag in Ext4 were mainly meant
> for debugging purposes. So, perhaps there should be a clear warning
> message in the kernel if any of these options are used? Even if we get
> rid of the mount options, we still need the jbd2_fc_init() API for the
> FS to tell JBD2 that it wants to use fast commit. Note that even if
> jbd2_fc_init() is not called, JBD2 will still try to replay fast
> commit blocks.



> > >  EXPORT_SYMBOL(jbd2_fc_init);
> > > @@ -1500,7 +1494,7 @@ static void journal_fail_superblock(journal_t *journal)
> > >  static int journal_reset(journal_t *journal)
> > >  {
> > >       journal_superblock_t *sb = journal->j_superblock;
> > > -     unsigned long long first, last;
> > > +     unsigned long long first, last, num_fc_blocks;
> > >
> > >       first = be32_to_cpu(sb->s_first);
> > >       last = be32_to_cpu(sb->s_maxlen);
> > > @@ -1513,6 +1507,28 @@ static int journal_reset(journal_t *journal)
> > >
> > >       journal->j_first = first;
> > >
> > > +     /*
> > > +      * At this point, fast commit recovery has finished. Now, we solely
> > > +      * rely on the file system to decide whether it wants fast commits
> > > +      * or not. File system that wishes to use fast commits must have
> > > +      * already called jbd2_fc_init() before we get here.
> > > +      */
> > > +     if (journal->j_fc_wbufsize > 0)
> > > +             jbd2_journal_set_features(journal, 0, 0,
> > > +                                       JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> > > +     else
> > > +             jbd2_journal_clear_features(journal, 0, 0,
> > > +                                       JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
> > > +
> > > +     /* If valid, prefer the value found in superblock over the default */
> > > +     num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> > > +     if (num_fc_blocks > 0 && num_fc_blocks < last)
> > > +             journal->j_fc_wbufsize = num_fc_blocks;
> > > +
> > > +     if (jbd2_has_feature_fast_commit(journal))
> > > +             journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > > +                                     sizeof(struct buffer_head *), GFP_KERNEL);
> > > +
> > >       if (jbd2_has_feature_fast_commit(journal) &&
> > >           journal->j_fc_wbufsize > 0) {
> > >               journal->j_fc_last = last;
> > > @@ -1531,7 +1547,8 @@ static int journal_reset(journal_t *journal)
> > >       journal->j_commit_sequence = journal->j_transaction_sequence - 1;
> > >       journal->j_commit_request = journal->j_commit_sequence;
> > >
> > > -     journal->j_max_transaction_buffers = journal->j_maxlen / 4;
> > > +     journal->j_max_transaction_buffers =
> > > +             (journal->j_maxlen - journal->j_fc_wbufsize) / 4;
> > >
> > >       /*
> > >        * As a special case, if the on-disk copy is already marked as needing
> > > @@ -1872,6 +1889,7 @@ static int load_superblock(journal_t *journal)
> > >  {
> > >       int err;
> > >       journal_superblock_t *sb;
> > > +     int num_fc_blocks;
> > >
> > >       err = journal_get_superblock(journal);
> > >       if (err)
> > > @@ -1884,10 +1902,12 @@ static int load_superblock(journal_t *journal)
> > >       journal->j_first = be32_to_cpu(sb->s_first);
> > >       journal->j_errno = be32_to_cpu(sb->s_errno);
> > >
> > > -     if (jbd2_has_feature_fast_commit(journal) &&
> > > -         journal->j_fc_wbufsize > 0) {
> > > +     if (jbd2_has_feature_fast_commit(journal)) {
> > >               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
> > > -             journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
> > > +             num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
> > > +             if (!num_fc_blocks || num_fc_blocks >= journal->j_fc_last)
> >
> > I think this needs to be stricter - we need the check that the journal is
> > at least JBD2_MIN_JOURNAL_BLOCKS long (which happens at the beginning of
> > journal_reset()) to happen after we've subtracted fastcommit blocks...
> So are you saying that with FC, the minimum journal size is
> JBD2_MIN_JOURNAL_BLOCKS + JBD2_MIN_FC_BLOCKS? I was assuming that we

Yes. JBD2_MIN_JOURNAL_BLOCKS is minimum number of blocks we need for normal
commits to get reasonable behavior. So as you say with fastcommits enabled,
the minimal journal size is JBD2_MIN_JOURNAL_BLOCKS + JBD2_MIN_FC_BLOCKS.

> will reserve JBD2_MIN_FC_BLOCKS (256) blocks out of the total journal
> size. That way the users who rely on the journal size to be 1024
> blocks, won't see a difference in journal size even after turning FC
> on. But I'm not sure if that's something we should care about.

Well, e2fsprogs need to check journal size when enabling fastcommits so
that we don't get invalid configurations.

								Honza
Jan Kara Nov. 5, 2020, 12:44 p.m. UTC | #4
On Thu 05-11-20 11:30:24, Jan Kara wrote:
> On Wed 04-11-20 11:52:24, harshad shirwadkar wrote:
> > On Tue, Nov 3, 2020 at 8:29 AM Jan Kara <jack@suse.cz> wrote:
> > > > -int jbd2_fc_init(journal_t *journal, int num_fc_blks)
> > > > +int jbd2_fc_init(journal_t *journal)
> > > >  {
> > > > -     journal->j_fc_wbufsize = num_fc_blks;
> > > > -     journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
> > > > -                             sizeof(struct buffer_head *), GFP_KERNEL);
> > > > -     if (!journal->j_fc_wbuf)
> > > > -             return -ENOMEM;
> > > > +     /*
> > > > +      * Only set j_fc_wbufsize here to indicate that the client file
> > > > +      * system is interested in using fast commits. The actual number of
> > > > +      * fast commit blocks is found inside jbd2_superblock and is only
> > > > +      * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
> > > > +      * gets set in journal_reset().
> > > > +      */
> > > > +     journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
> > > >       return 0;
> > > >  }
> > >
> > > When looking at this, is there a reason why jbd2_fc_init() still exists?  I
> > > mean why not just make the rule that the journal has FC block number set
> > > iff FC gets enabled? Anything else seems a bit confusing to me and also
> > > dangerous - imagine we have fs with FC running, we write some FCs and then
> > > crash. Then on system recovery we mount with no_fc mount option. We have
> > > just lost data on the filesystem AFAIU... So I'd just remove all the mount
> > > options related to fastcommits and leave everything to the journal setup
> > > (which can be modified with e2fsprogs if needed) to keep things simple.
> > The problem is whether or not to use fast commits is the file system's
> > call. The JBD2 feature flag will be cleared on a clean unmount and if
> > we rely solely on the JBD2 feature flag, fast commit will be turned
> > off after a clean unmount. Whereas the FS compat flag is the source of
> > truth about whether fast commit needs to be used or not. That's why we
> > need an API for the file system to tell JBD2 to still do fast commits.
> 
> Yes, I meant the API could be just that the filesystem either calls
> jbd2_journal_set_features() with FASTCOMMIT feature or it won't. Similarly
> to how e.g. JBD2_FEATURE_INCOMPAT_64BIT is handled. No need for
> jbd2_fc_init() function AFAICT.
> 
> > Mount options that override the feature flag in Ext4 were mainly meant
> > for debugging purposes. So, perhaps there should be a clear warning
> > message in the kernel if any of these options are used? Even if we get
> > rid of the mount options, we still need the jbd2_fc_init() API for the
> > FS to tell JBD2 that it wants to use fast commit. Note that even if
> > jbd2_fc_init() is not called, JBD2 will still try to replay fast
> > commit blocks.

I forgot to add here: I don't like "debug-only" mount options in production
kernels because users tend to try them out and:
a) occasionally get burnt by unexpected behavior
b) the options become hard to get rid of because someone starts to depend
on them.

So I'd prefer that the options are removed unless they are really essential
for debugging the feature and if they are essential, they should be clearly
marked as debug aid... (e.g. with debug in the name or so).

								Honza
harshad shirwadkar Nov. 6, 2020, 3:02 a.m. UTC | #5
Thanks Jan and Amy for the feedback. In V2, I'll drop "no_fc" mount
option which was very confusing. Also, we have a mount option called
"fc_debug_force" that forcefully turns fast commits on even if it was
not enabled by mke2fs. It's handy for running performance tests
without relying on e2fsprogs. But I understand that this option could
also be confusing. There's "debug" in its name and I will also move it
inside #ifdef CONFIG_EXT4_DEBUG so that for production, this gets
compiled out.

On Thu, Nov 5, 2020 at 5:30 AM Amy Parker <enbyamy@gmail.com> wrote:
>
> On Thu, Nov 5, 2020, 4:45 AM Jan Kara <jack@suse.cz> wrote:
>>
>> On Thu 05-11-20 11:30:24, Jan Kara wrote:
>> > On Wed 04-11-20 11:52:24, harshad shirwadkar wrote:
>> > > On Tue, Nov 3, 2020 at 8:29 AM Jan Kara <jack@suse.cz> wrote:
>> > > > > -int jbd2_fc_init(journal_t *journal, int num_fc_blks)
>> > > > > +int jbd2_fc_init(journal_t *journal)
>> > > > >  {
>> > > > > -     journal->j_fc_wbufsize = num_fc_blks;
>> > > > > -     journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
>> > > > > -                             sizeof(struct buffer_head *), GFP_KERNEL);
>> > > > > -     if (!journal->j_fc_wbuf)
>> > > > > -             return -ENOMEM;
>> > > > > +     /*
>> > > > > +      * Only set j_fc_wbufsize here to indicate that the client file
>> > > > > +      * system is interested in using fast commits. The actual number of
>> > > > > +      * fast commit blocks is found inside jbd2_superblock and is only
>> > > > > +      * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
>> > > > > +      * gets set in journal_reset().
>> > > > > +      */
>> > > > > +     journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
>> > > > >       return 0;
>> > > > >  }
>> > > >
>> > > > When looking at this, is there a reason why jbd2_fc_init() still exists?  I
>> > > > mean why not just make the rule that the journal has FC block number set
>> > > > iff FC gets enabled? Anything else seems a bit confusing to me and also
>> > > > dangerous - imagine we have fs with FC running, we write some FCs and then
>> > > > crash. Then on system recovery we mount with no_fc mount option. We have
>> > > > just lost data on the filesystem AFAIU... So I'd just remove all the mount
>> > > > options related to fastcommits and leave everything to the journal setup
>> > > > (which can be modified with e2fsprogs if needed) to keep things simple.
>> > > The problem is whether or not to use fast commits is the file system's
>> > > call. The JBD2 feature flag will be cleared on a clean unmount and if
>> > > we rely solely on the JBD2 feature flag, fast commit will be turned
>> > > off after a clean unmount. Whereas the FS compat flag is the source of
>> > > truth about whether fast commit needs to be used or not. That's why we
>> > > need an API for the file system to tell JBD2 to still do fast commits.
>> >
>> > Yes, I meant the API could be just that the filesystem either calls
>> > jbd2_journal_set_features() with FASTCOMMIT feature or it won't. Similarly
>> > to how e.g. JBD2_FEATURE_INCOMPAT_64BIT is handled. No need for
>> > jbd2_fc_init() function AFAICT.
Sounds good, I'll drop it.

Thanks,
Harshad
>> >
>> > > Mount options that override the feature flag in Ext4 were mainly meant
>> > > for debugging purposes. So, perhaps there should be a clear warning
>> > > message in the kernel if any of these options are used? Even if we get
>> > > rid of the mount options, we still need the jbd2_fc_init() API for the
>> > > FS to tell JBD2 that it wants to use fast commit. Note that even if
>> > > jbd2_fc_init() is not called, JBD2 will still try to replay fast
>> > > commit blocks.
>>
>> I forgot to add here: I don't like "debug-only" mount options in production
>> kernels because users tend to try them out and:
>> a) occasionally get burnt by unexpected behavior
>
>
> Seen that happen enough times myself, there's a
> reason I always leave notes to users of systems I
> set up to never turn on debug-only mode in day to
>
>> b) the options become hard to get rid of because someone starts to depend
>> on them.
>
>
> This occurs not just with kernel things but with all
> software in general. Leaving options in long-term
> that then need to be removed gets problematic
> pretty quickly.
>
>>
>> So I'd prefer that the options are removed unless they are really essential
>> for debugging the feature
>
>
> Yeah - remove them if we can, if they aren't essential
> then no need to keep them.
>
>> and if they are essential, they should be clearly
>> marked as debug aid... (e.g. with debug in the name or so).
>
>
> At *least* that if not more.
>
>>
>>                                                                 Honza
>>
>> --
>> Jan Kara <jack@suse.com>
>> SUSE Labs, CR
>
>
> Best regards,
> Amy Parker
> (they/them)
diff mbox series

Patch

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 5c3af472287a..1b62d82b9622 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -2082,8 +2082,6 @@  static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 
 void ext4_fc_init(struct super_block *sb, journal_t *journal)
 {
-	int num_fc_blocks;
-
 	/*
 	 * We set replay callback even if fast commit disabled because we may
 	 * could still have fast commit blocks that need to be replayed even if
@@ -2093,15 +2091,7 @@  void ext4_fc_init(struct super_block *sb, journal_t *journal)
 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
 		return;
 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
-	if (!buffer_uptodate(journal->j_sb_buffer)
-		&& ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
-					true)) {
-		ext4_msg(sb, KERN_ERR, "I/O error on journal");
-		return;
-	}
-	num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
-	if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
-					EXT4_NUM_FC_BLKS)) {
+	if (jbd2_fc_init(journal)) {
 		pr_warn("Error while enabling fast commits, turning off.");
 		ext4_clear_feature_fast_commit(sb);
 	}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fa688e163a80..353534403769 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -801,7 +801,7 @@  void jbd2_journal_commit_transaction(journal_t *journal)
 		if (first_block < journal->j_tail)
 			freed += journal->j_last - journal->j_first;
 		/* Update tail only if we free significant amount of space */
-		if (freed < journal->j_maxlen / 4)
+		if (freed < (journal->j_maxlen - journal->j_fc_wbufsize) / 4)
 			update_tail = 0;
 	}
 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0c7c42bd530f..ea15f55aff5c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1357,14 +1357,6 @@  static journal_t *journal_init_common(struct block_device *bdev,
 	if (!journal->j_wbuf)
 		goto err_cleanup;
 
-	if (journal->j_fc_wbufsize > 0) {
-		journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-					sizeof(struct buffer_head *),
-					GFP_KERNEL);
-		if (!journal->j_fc_wbuf)
-			goto err_cleanup;
-	}
-
 	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
 		pr_err("%s: Cannot get buffer for journal superblock\n",
@@ -1378,19 +1370,21 @@  static journal_t *journal_init_common(struct block_device *bdev,
 
 err_cleanup:
 	kfree(journal->j_wbuf);
-	kfree(journal->j_fc_wbuf);
 	jbd2_journal_destroy_revoke(journal);
 	kfree(journal);
 	return NULL;
 }
 
-int jbd2_fc_init(journal_t *journal, int num_fc_blks)
+int jbd2_fc_init(journal_t *journal)
 {
-	journal->j_fc_wbufsize = num_fc_blks;
-	journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-				sizeof(struct buffer_head *), GFP_KERNEL);
-	if (!journal->j_fc_wbuf)
-		return -ENOMEM;
+	/*
+	 * Only set j_fc_wbufsize here to indicate that the client file
+	 * system is interested in using fast commits. The actual number of
+	 * fast commit blocks is found inside jbd2_superblock and is only
+	 * valid if j_fc_wbufsize is non-zero. The real value of j_fc_wbufsize
+	 * gets set in journal_reset().
+	 */
+	journal->j_fc_wbufsize = JBD2_MIN_FC_BLOCKS;
 	return 0;
 }
 EXPORT_SYMBOL(jbd2_fc_init);
@@ -1500,7 +1494,7 @@  static void journal_fail_superblock(journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned long long first, last;
+	unsigned long long first, last, num_fc_blocks;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
@@ -1513,6 +1507,28 @@  static int journal_reset(journal_t *journal)
 
 	journal->j_first = first;
 
+	/*
+	 * At this point, fast commit recovery has finished. Now, we solely
+	 * rely on the file system to decide whether it wants fast commits
+	 * or not. File system that wishes to use fast commits must have
+	 * already called jbd2_fc_init() before we get here.
+	 */
+	if (journal->j_fc_wbufsize > 0)
+		jbd2_journal_set_features(journal, 0, 0,
+					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
+	else
+		jbd2_journal_clear_features(journal, 0, 0,
+					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
+
+	/* If valid, prefer the value found in superblock over the default */
+	num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
+	if (num_fc_blocks > 0 && num_fc_blocks < last)
+		journal->j_fc_wbufsize = num_fc_blocks;
+
+	if (jbd2_has_feature_fast_commit(journal))
+		journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
+					sizeof(struct buffer_head *), GFP_KERNEL);
+
 	if (jbd2_has_feature_fast_commit(journal) &&
 	    journal->j_fc_wbufsize > 0) {
 		journal->j_fc_last = last;
@@ -1531,7 +1547,8 @@  static int journal_reset(journal_t *journal)
 	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
 	journal->j_commit_request = journal->j_commit_sequence;
 
-	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+	journal->j_max_transaction_buffers =
+		(journal->j_maxlen - journal->j_fc_wbufsize) / 4;
 
 	/*
 	 * As a special case, if the on-disk copy is already marked as needing
@@ -1872,6 +1889,7 @@  static int load_superblock(journal_t *journal)
 {
 	int err;
 	journal_superblock_t *sb;
+	int num_fc_blocks;
 
 	err = journal_get_superblock(journal);
 	if (err)
@@ -1884,10 +1902,12 @@  static int load_superblock(journal_t *journal)
 	journal->j_first = be32_to_cpu(sb->s_first);
 	journal->j_errno = be32_to_cpu(sb->s_errno);
 
-	if (jbd2_has_feature_fast_commit(journal) &&
-	    journal->j_fc_wbufsize > 0) {
+	if (jbd2_has_feature_fast_commit(journal)) {
 		journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-		journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
+		num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
+		if (!num_fc_blocks || num_fc_blocks >= journal->j_fc_last)
+			num_fc_blocks = JBD2_MIN_FC_BLOCKS;
+		journal->j_last = journal->j_fc_last - num_fc_blocks;
 		journal->j_fc_first = journal->j_last + 1;
 		journal->j_fc_off = 0;
 	} else {
@@ -1954,9 +1974,6 @@  int jbd2_journal_load(journal_t *journal)
 	 */
 	journal->j_flags &= ~JBD2_ABORT;
 
-	if (journal->j_fc_wbufsize > 0)
-		jbd2_journal_set_features(journal, 0, 0,
-					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
 	/* OK, we've finished with the dynamic journal bits:
 	 * reinitialise the dynamic contents of the superblock in memory
 	 * and reset them on disk. */
@@ -2040,8 +2057,7 @@  int jbd2_journal_destroy(journal_t *journal)
 		jbd2_journal_destroy_revoke(journal);
 	if (journal->j_chksum_driver)
 		crypto_free_shash(journal->j_chksum_driver);
-	if (journal->j_fc_wbufsize > 0)
-		kfree(journal->j_fc_wbuf);
+	kfree(journal->j_fc_wbuf);
 	kfree(journal->j_wbuf);
 	kfree(journal);
 
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index eb2606133cd8..822f16cbf9b3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -134,7 +134,7 @@  static int jread(struct buffer_head **bhp, journal_t *journal,
 
 	*bhp = NULL;
 
-	if (offset >= journal->j_maxlen) {
+	if (offset >= journal->j_maxlen + journal->j_fc_wbufsize) {
 		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
 		return -EFSCORRUPTED;
 	}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1d5566af48ac..9b4e87a0068b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -68,6 +68,7 @@  extern void *jbd2_alloc(size_t size, gfp_t flags);
 extern void jbd2_free(void *ptr, size_t size);
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_MIN_FC_BLOCKS	256
 
 #ifdef __KERNEL__
 
@@ -1614,7 +1615,7 @@  extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
 extern int jbd2_cleanup_journal_tail(journal_t *);
 
 /* Fast commit related APIs */
-int jbd2_fc_init(journal_t *journal, int num_fc_blks);
+int jbd2_fc_init(journal_t *journal);
 int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
 int jbd2_fc_end_commit(journal_t *journal);
 int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);