Message ID | 1465354006-15091-1-git-send-email-tytso@mit.edu |
---|---|
State | Superseded, archived |
Headers | show |
On Jun 7, 2016, at 8:46 PM, Theodore Ts'o <tytso@mit.edu> wrote: > > If there are pending blocks to be released after a commit, retrying > the allocation after a journal commit has no hope of helping. So > track how many pending deleted blocks there might be, and don't retry > if there are no pending blocks. > > Reported-by: Chao Yu <yuchao0@huawei.com> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > --- > > Oops, ignore the earlier version of this patch. I bobbled the commit > and merged in part of another change. > > fs/ext4/balloc.c | 9 ++++++++- > fs/ext4/ext4.h | 1 + > fs/ext4/ext4_jbd2.h | 10 +++++++++- > fs/ext4/mballoc.c | 12 ++++++++++-- > 4 files changed, 28 insertions(+), 4 deletions(-) > > diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c > index 3020fd7..371ac63 100644 > --- a/fs/ext4/balloc.c > +++ b/fs/ext4/balloc.c > @@ -603,7 +603,14 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, > */ > int ext4_should_retry_alloc(struct super_block *sb, int *retries) > { > - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || > + unsigned int pending_blocks; > + > + spin_lock(&EXT4_SB(sb)->s_md_lock); > + pending_blocks = EXT4_SB(sb)->s_mb_free_pending; > + spin_unlock(&EXT4_SB(sb)->s_md_lock); Is there a benefit of having a spinlock here? The read of free_pending should be atomic. Cheers, Andreas > + > + if (pending_blocks == 0 || > + !ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || > (*retries)++ > 3 || > !EXT4_SB(sb)->s_journal) > return 0; > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index b84aa1c..96c73e6 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1430,6 +1430,7 @@ struct ext4_sb_info { > unsigned short *s_mb_offsets; > unsigned int *s_mb_maxs; > unsigned int s_group_info_size; > + unsigned int s_mb_free_pending; > > /* tunables */ > unsigned long s_stripe; > diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h > index 09c1ef3..b1d52c1 100644 > --- a/fs/ext4/ext4_jbd2.h > +++ b/fs/ext4/ext4_jbd2.h > @@ -175,6 +175,13 @@ struct ext4_journal_cb_entry { > * There is no guaranteed calling order of multiple registered callbacks on > * the same transaction. > */ > +static inline void _ext4_journal_callback_add(handle_t *handle, > + struct ext4_journal_cb_entry *jce) > +{ > + /* Add the jce to transaction's private list */ > + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); > +} > + > static inline void ext4_journal_callback_add(handle_t *handle, > void (*func)(struct super_block *sb, > struct ext4_journal_cb_entry *jce, > @@ -187,10 +194,11 @@ static inline void ext4_journal_callback_add(handle_t *handle, > /* Add the jce to transaction's private list */ > jce->jce_func = func; > spin_lock(&sbi->s_md_lock); > - list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); > + _ext4_journal_callback_add(handle, jce); > spin_unlock(&sbi->s_md_lock); > } > > + > /** > * ext4_journal_callback_del: delete a registered callback > * @handle: active journal transaction handle on which callback was registered > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index c1ab3ec..77249e1 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2627,6 +2627,7 @@ int ext4_mb_init(struct super_block *sb) > > spin_lock_init(&sbi->s_md_lock); > spin_lock_init(&sbi->s_bal_lock); > + sbi->s_mb_free_pending = 0; > > sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; > sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; > @@ -2814,6 +2815,9 @@ static void ext4_free_data_callback(struct super_block *sb, > /* we expect to find existing buddy because it's pinned */ > BUG_ON(err != 0); > > + spin_lock(&EXT4_SB(sb)->s_md_lock); > + EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count; > + spin_unlock(&EXT4_SB(sb)->s_md_lock); > > db = e4b.bd_info; > /* there are blocks to put in buddy to make them really free */ > @@ -4583,6 +4587,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, > { > ext4_group_t group = e4b->bd_group; > ext4_grpblk_t cluster; > + ext4_grpblk_t clusters = new_entry->efd_count; > struct ext4_free_data *entry; > struct ext4_group_info *db = e4b->bd_info; > struct super_block *sb = e4b->bd_sb; > @@ -4649,8 +4654,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, > } > } > /* Add the extent to transaction's private list */ > - ext4_journal_callback_add(handle, ext4_free_data_callback, > - &new_entry->efd_jce); > + new_entry->efd_jce.jce_func = ext4_free_data_callback; > + spin_lock(&sbi->s_md_lock); > + _ext4_journal_callback_add(handle, &new_entry->efd_jce); > + sbi->s_mb_free_pending += clusters; > + spin_unlock(&sbi->s_md_lock); > return 0; > } > > -- > 2.5.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html Cheers, Andreas
On Tue 07-06-16 22:46:46, Ted Tso wrote: > If there are pending blocks to be released after a commit, retrying > the allocation after a journal commit has no hope of helping. So > track how many pending deleted blocks there might be, and don't retry > if there are no pending blocks. > > Reported-by: Chao Yu <yuchao0@huawei.com> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > --- > > Oops, ignore the earlier version of this patch. I bobbled the commit > and merged in part of another change. Couple of notes below: > fs/ext4/balloc.c | 9 ++++++++- > fs/ext4/ext4.h | 1 + > fs/ext4/ext4_jbd2.h | 10 +++++++++- > fs/ext4/mballoc.c | 12 ++++++++++-- > 4 files changed, 28 insertions(+), 4 deletions(-) > > diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c > index 3020fd7..371ac63 100644 > --- a/fs/ext4/balloc.c > +++ b/fs/ext4/balloc.c > @@ -603,7 +603,14 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, > */ > int ext4_should_retry_alloc(struct super_block *sb, int *retries) > { > - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || > + unsigned int pending_blocks; > + > + spin_lock(&EXT4_SB(sb)->s_md_lock); > + pending_blocks = EXT4_SB(sb)->s_mb_free_pending; > + spin_unlock(&EXT4_SB(sb)->s_md_lock); > + > + if (pending_blocks == 0 || > + !ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || > (*retries)++ > 3 || > !EXT4_SB(sb)->s_journal) > return 0; But this is racy. Transaction commit could have finished before we called ext4_should_retry_alloc() and so we will mistakenly think there's no hope although there are blocks free now. But what you could probably do is just return 1 without forcing a transaction commit when pending_blocks == 0. > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index b84aa1c..96c73e6 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1430,6 +1430,7 @@ struct ext4_sb_info { > unsigned short *s_mb_offsets; > unsigned int *s_mb_maxs; > unsigned int s_group_info_size; > + unsigned int s_mb_free_pending; > > /* tunables */ > unsigned long s_stripe; > diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h > index 09c1ef3..b1d52c1 100644 > --- a/fs/ext4/ext4_jbd2.h > +++ b/fs/ext4/ext4_jbd2.h > @@ -175,6 +175,13 @@ struct ext4_journal_cb_entry { > * There is no guaranteed calling order of multiple registered callbacks on > * the same transaction. > */ > +static inline void _ext4_journal_callback_add(handle_t *handle, > + struct ext4_journal_cb_entry *jce) > +{ > + /* Add the jce to transaction's private list */ > + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); > +} > + > static inline void ext4_journal_callback_add(handle_t *handle, > void (*func)(struct super_block *sb, > struct ext4_journal_cb_entry *jce, Well, since ext4_mb_free_metadata() is the only user of ext4_journal_callback_add(), ext4_journal_callback_add() won't have any user after your patch. Maybe we could just stop playing these abstraction games nobody currently uses and just implement a helper function to add freeing callback to the transaction list including increment of the pending counter. Honza
On Jun 16, 2016, at 1:20 PM, Jan Kara <jack@suse.cz> wrote: > > Well, since ext4_mb_free_metadata() is the only user of > ext4_journal_callback_add(), ext4_journal_callback_add() won't have any > user after your patch. Maybe we could just stop playing these abstraction > games nobody currently uses and just implement a helper function to add > freeing callback to the transaction list including increment of the pending > counter. We use the journal callback for Lustre servers. Please don't remove it. Cheers, Andreas
On Thu 16-06-16 14:11:28, Andreas Dilger wrote: > On Jun 16, 2016, at 1:20 PM, Jan Kara <jack@suse.cz> wrote: > > > > Well, since ext4_mb_free_metadata() is the only user of > > ext4_journal_callback_add(), ext4_journal_callback_add() won't have any > > user after your patch. Maybe we could just stop playing these abstraction > > games nobody currently uses and just implement a helper function to add > > freeing callback to the transaction list including increment of the pending > > counter. > > We use the journal callback for Lustre servers. Please don't remove it. I didn't really mean to rewrite the callback infrastructure. I just meant to change the helper function to fit in-tree users. If Lustre needs to do something else, it is free to have its own helper function which attaches callbacks to the transaction's list. Honza
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3020fd7..371ac63 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -603,7 +603,14 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || + unsigned int pending_blocks; + + spin_lock(&EXT4_SB(sb)->s_md_lock); + pending_blocks = EXT4_SB(sb)->s_mb_free_pending; + spin_unlock(&EXT4_SB(sb)->s_md_lock); + + if (pending_blocks == 0 || + !ext4_has_free_clusters(EXT4_SB(sb), 1, 0) || (*retries)++ > 3 || !EXT4_SB(sb)->s_journal) return 0; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b84aa1c..96c73e6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1430,6 +1430,7 @@ struct ext4_sb_info { unsigned short *s_mb_offsets; unsigned int *s_mb_maxs; unsigned int s_group_info_size; + unsigned int s_mb_free_pending; /* tunables */ unsigned long s_stripe; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 09c1ef3..b1d52c1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -175,6 +175,13 @@ struct ext4_journal_cb_entry { * There is no guaranteed calling order of multiple registered callbacks on * the same transaction. */ +static inline void _ext4_journal_callback_add(handle_t *handle, + struct ext4_journal_cb_entry *jce) +{ + /* Add the jce to transaction's private list */ + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); +} + static inline void ext4_journal_callback_add(handle_t *handle, void (*func)(struct super_block *sb, struct ext4_journal_cb_entry *jce, @@ -187,10 +194,11 @@ static inline void ext4_journal_callback_add(handle_t *handle, /* Add the jce to transaction's private list */ jce->jce_func = func; spin_lock(&sbi->s_md_lock); - list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); + _ext4_journal_callback_add(handle, jce); spin_unlock(&sbi->s_md_lock); } + /** * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c1ab3ec..77249e1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2627,6 +2627,7 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); + sbi->s_mb_free_pending = 0; sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; @@ -2814,6 +2815,9 @@ static void ext4_free_data_callback(struct super_block *sb, /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); + spin_lock(&EXT4_SB(sb)->s_md_lock); + EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count; + spin_unlock(&EXT4_SB(sb)->s_md_lock); db = e4b.bd_info; /* there are blocks to put in buddy to make them really free */ @@ -4583,6 +4587,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, { ext4_group_t group = e4b->bd_group; ext4_grpblk_t cluster; + ext4_grpblk_t clusters = new_entry->efd_count; struct ext4_free_data *entry; struct ext4_group_info *db = e4b->bd_info; struct super_block *sb = e4b->bd_sb; @@ -4649,8 +4654,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, } } /* Add the extent to transaction's private list */ - ext4_journal_callback_add(handle, ext4_free_data_callback, - &new_entry->efd_jce); + new_entry->efd_jce.jce_func = ext4_free_data_callback; + spin_lock(&sbi->s_md_lock); + _ext4_journal_callback_add(handle, &new_entry->efd_jce); + sbi->s_mb_free_pending += clusters; + spin_unlock(&sbi->s_md_lock); return 0; }
If there are pending blocks to be released after a commit, retrying the allocation after a journal commit has no hope of helping. So track how many pending deleted blocks there might be, and don't retry if there are no pending blocks. Reported-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> --- Oops, ignore the earlier version of this patch. I bobbled the commit and merged in part of another change. fs/ext4/balloc.c | 9 ++++++++- fs/ext4/ext4.h | 1 + fs/ext4/ext4_jbd2.h | 10 +++++++++- fs/ext4/mballoc.c | 12 ++++++++++-- 4 files changed, 28 insertions(+), 4 deletions(-)