diff mbox series

[05/10] ext4: Fix race when reusing xattr blocks

Message ID 20220712105436.32204-5-jack@suse.cz
State Accepted
Headers show
Series ext4: Fix possible fs corruption due to xattr races | expand

Commit Message

Jan Kara July 12, 2022, 10:54 a.m. UTC
When ext4_xattr_block_set() decides to remove xattr block the following
race can happen:

CPU1                                    CPU2
ext4_xattr_block_set()                  ext4_xattr_release_block()
  new_bh = ext4_xattr_block_cache_find()

                                          lock_buffer(bh);
                                          ref = le32_to_cpu(BHDR(bh)->h_refcount);
                                          if (ref == 1) {
                                            ...
                                            mb_cache_entry_delete();
                                            unlock_buffer(bh);
                                            ext4_free_blocks();
                                              ...
                                              ext4_forget(..., bh, ...);
                                                jbd2_journal_revoke(..., bh);

  ext4_journal_get_write_access(..., new_bh, ...)
    do_get_write_access()
      jbd2_journal_cancel_revoke(..., new_bh);

Later the code in ext4_xattr_block_set() finds out the block got freed
and cancels reusal of the block but the revoke stays canceled and so in
case of block reuse and journal replay the filesystem can get corrupted.
If the race works out slightly differently, we can also hit assertions
in the jbd2 code.

Fix the problem by making sure that once matching mbcache entry is
found, code dropping the last xattr block reference (or trying to modify
xattr block in place) waits until the mbcache entry reference is
dropped. This way code trying to reuse xattr block is protected from
someone trying to drop the last reference to xattr block.

Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
CC: stable@vger.kernel.org
Fixes: 82939d7999df ("ext4: convert to mbcache2")
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 22 deletions(-)

Comments

Ritesh Harjani (IBM) July 14, 2022, 12:26 p.m. UTC | #1
On 22/07/12 12:54PM, Jan Kara wrote:
> When ext4_xattr_block_set() decides to remove xattr block the following
> race can happen:
>
> CPU1                                    CPU2
> ext4_xattr_block_set()                  ext4_xattr_release_block()
>   new_bh = ext4_xattr_block_cache_find()
>
>                                           lock_buffer(bh);
>                                           ref = le32_to_cpu(BHDR(bh)->h_refcount);
>                                           if (ref == 1) {
>                                             ...
>                                             mb_cache_entry_delete();
>                                             unlock_buffer(bh);
>                                             ext4_free_blocks();
>                                               ...
>                                               ext4_forget(..., bh, ...);
>                                                 jbd2_journal_revoke(..., bh);
>
>   ext4_journal_get_write_access(..., new_bh, ...)
>     do_get_write_access()
>       jbd2_journal_cancel_revoke(..., new_bh);
>
> Later the code in ext4_xattr_block_set() finds out the block got freed
> and cancels reusal of the block but the revoke stays canceled and so in
> case of block reuse and journal replay the filesystem can get corrupted.
> If the race works out slightly differently, we can also hit assertions
> in the jbd2 code.
>
> Fix the problem by making sure that once matching mbcache entry is
> found, code dropping the last xattr block reference (or trying to modify
> xattr block in place) waits until the mbcache entry reference is
> dropped. This way code trying to reuse xattr block is protected from
> someone trying to drop the last reference to xattr block.
>
> Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
> CC: stable@vger.kernel.org
> Fixes: 82939d7999df ("ext4: convert to mbcache2")
> Signed-off-by: Jan Kara <jack@suse.cz>

Thanks Jan,
Just a note - I retested the patches only till here (marked stable) with
stress-ng --xattr 16.
And I didn't find any issues so far for ext2, ext3, ext4 default mkfs options.

Also I re-ran full v3 patch series with the same test case on all 3 filesystem,
and I didn't find any failures of the same test case.

-ritesh




> ---
>  fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
>  1 file changed, 45 insertions(+), 22 deletions(-)
>
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index aadfae53d055..3a0928c8720e 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
>  /* Remove entry from mbcache when EA inode is getting evicted */
>  void ext4_evict_ea_inode(struct inode *inode)
>  {
> -	if (EA_INODE_CACHE(inode))
> -		mb_cache_entry_delete(EA_INODE_CACHE(inode),
> -			ext4_xattr_inode_get_hash(inode), inode->i_ino);
> +	struct mb_cache_entry *oe;
> +
> +	if (!EA_INODE_CACHE(inode))
> +		return;
> +	/* Wait for entry to get unused so that we can remove it */
> +	while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
> +			ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
> +		mb_cache_entry_wait_unused(oe);
> +		mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
> +	}
>  }
>
>  static int
> @@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
>  	if (error)
>  		goto out;
>
> +retry_ref:
>  	lock_buffer(bh);
>  	hash = le32_to_cpu(BHDR(bh)->h_hash);
>  	ref = le32_to_cpu(BHDR(bh)->h_refcount);
> @@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
>  		 * This must happen under buffer lock for
>  		 * ext4_xattr_block_set() to reliably detect freed block
>  		 */
> -		if (ea_block_cache)
> -			mb_cache_entry_delete(ea_block_cache, hash,
> -					      bh->b_blocknr);
> +		if (ea_block_cache) {
> +			struct mb_cache_entry *oe;
> +
> +			oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
> +							  bh->b_blocknr);
> +			if (oe) {
> +				unlock_buffer(bh);
> +				mb_cache_entry_wait_unused(oe);
> +				mb_cache_entry_put(ea_block_cache, oe);
> +				goto retry_ref;
> +			}
> +		}
>  		get_bh(bh);
>  		unlock_buffer(bh);
>
> @@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  			 * ext4_xattr_block_set() to reliably detect modified
>  			 * block
>  			 */
> -			if (ea_block_cache)
> -				mb_cache_entry_delete(ea_block_cache, hash,
> -						      bs->bh->b_blocknr);
> +			if (ea_block_cache) {
> +				struct mb_cache_entry *oe;
> +
> +				oe = mb_cache_entry_delete_or_get(ea_block_cache,
> +					hash, bs->bh->b_blocknr);
> +				if (oe) {
> +					/*
> +					 * Xattr block is getting reused. Leave
> +					 * it alone.
> +					 */
> +					mb_cache_entry_put(ea_block_cache, oe);
> +					goto clone_block;
> +				}
> +			}
>  			ea_bdebug(bs->bh, "modifying in-place");
>  			error = ext4_xattr_set_entry(i, s, handle, inode,
>  						     true /* is_block */);
> @@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  				goto cleanup;
>  			goto inserted;
>  		}
> +clone_block:
>  		unlock_buffer(bs->bh);
>  		ea_bdebug(bs->bh, "cloning");
>  		s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
> @@ -1991,18 +2020,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  				lock_buffer(new_bh);
>  				/*
>  				 * We have to be careful about races with
> -				 * freeing, rehashing or adding references to
> -				 * xattr block. Once we hold buffer lock xattr
> -				 * block's state is stable so we can check
> -				 * whether the block got freed / rehashed or
> -				 * not.  Since we unhash mbcache entry under
> -				 * buffer lock when freeing / rehashing xattr
> -				 * block, checking whether entry is still
> -				 * hashed is reliable. Same rules hold for
> -				 * e_reusable handling.
> +				 * adding references to xattr block. Once we
> +				 * hold buffer lock xattr block's state is
> +				 * stable so we can check the additional
> +				 * reference fits.
>  				 */
> -				if (hlist_bl_unhashed(&ce->e_hash_list) ||
> -				    !ce->e_reusable) {
> +				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
> +				if (ref > EXT4_XATTR_REFCOUNT_MAX) {
>  					/*
>  					 * Undo everything and check mbcache
>  					 * again.
> @@ -2017,9 +2041,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>  					new_bh = NULL;
>  					goto inserted;
>  				}
> -				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
>  				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
> -				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
> +				if (ref == EXT4_XATTR_REFCOUNT_MAX)
>  					ce->e_reusable = 0;
>  				ea_bdebug(new_bh, "reusing; refcount now=%d",
>  					  ref);
> --
> 2.35.3
>
Zhihao Cheng July 16, 2022, 3 a.m. UTC | #2
在 2022/7/12 18:54, Jan Kara 写道:
Hi Jan, one little question confuses me:
> When ext4_xattr_block_set() decides to remove xattr block the following
> race can happen:
> 
> CPU1                                    CPU2
> ext4_xattr_block_set()                  ext4_xattr_release_block()
>    new_bh = ext4_xattr_block_cache_find()
> 
>                                            lock_buffer(bh);
>                                            ref = le32_to_cpu(BHDR(bh)->h_refcount);
>                                            if (ref == 1) {
>                                              ...
>                                              mb_cache_entry_delete();
>                                              unlock_buffer(bh);
>                                              ext4_free_blocks();
>                                                ...
>                                                ext4_forget(..., bh, ...);
>                                                  jbd2_journal_revoke(..., bh);
> 
>    ext4_journal_get_write_access(..., new_bh, ...)
>      do_get_write_access()
>        jbd2_journal_cancel_revoke(..., new_bh);
> 
> Later the code in ext4_xattr_block_set() finds out the block got freed
> and cancels reusal of the block but the revoke stays canceled and so in
> case of block reuse and journal replay the filesystem can get corrupted.
> If the race works out slightly differently, we can also hit assertions
> in the jbd2 code.
> 
> Fix the problem by making sure that once matching mbcache entry is
> found, code dropping the last xattr block reference (or trying to modify
> xattr block in place) waits until the mbcache entry reference is
> dropped. This way code trying to reuse xattr block is protected from
> someone trying to drop the last reference to xattr block.
> 
> Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
> CC: stable@vger.kernel.org
> Fixes: 82939d7999df ("ext4: convert to mbcache2")
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>   fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
>   1 file changed, 45 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index aadfae53d055..3a0928c8720e 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
>   /* Remove entry from mbcache when EA inode is getting evicted */
>   void ext4_evict_ea_inode(struct inode *inode)
>   {
> -	if (EA_INODE_CACHE(inode))
> -		mb_cache_entry_delete(EA_INODE_CACHE(inode),
> -			ext4_xattr_inode_get_hash(inode), inode->i_ino);
> +	struct mb_cache_entry *oe;
> +
> +	if (!EA_INODE_CACHE(inode))
> +		return;
> +	/* Wait for entry to get unused so that we can remove it */
> +	while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
> +			ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
> +		mb_cache_entry_wait_unused(oe);
> +		mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
> +	}
>   }
>   
>   static int
> @@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
>   	if (error)
>   		goto out;
>   
> +retry_ref:
>   	lock_buffer(bh);
>   	hash = le32_to_cpu(BHDR(bh)->h_hash);
>   	ref = le32_to_cpu(BHDR(bh)->h_refcount);
> @@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
>   		 * This must happen under buffer lock for
>   		 * ext4_xattr_block_set() to reliably detect freed block
>   		 */
> -		if (ea_block_cache)
> -			mb_cache_entry_delete(ea_block_cache, hash,
> -					      bh->b_blocknr);
> +		if (ea_block_cache) {
> +			struct mb_cache_entry *oe;
> +
> +			oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
> +							  bh->b_blocknr);
> +			if (oe) {
> +				unlock_buffer(bh);
> +				mb_cache_entry_wait_unused(oe);
> +				mb_cache_entry_put(ea_block_cache, oe);
> +				goto retry_ref;
> +			}
> +		}
>   		get_bh(bh);
>   		unlock_buffer(bh);
>   
> @@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>   			 * ext4_xattr_block_set() to reliably detect modified
>   			 * block
>   			 */
> -			if (ea_block_cache)
> -				mb_cache_entry_delete(ea_block_cache, hash,
> -						      bs->bh->b_blocknr);
> +			if (ea_block_cache) {
> +				struct mb_cache_entry *oe;
> +
> +				oe = mb_cache_entry_delete_or_get(ea_block_cache,
> +					hash, bs->bh->b_blocknr);
> +				if (oe) {
> +					/*
> +					 * Xattr block is getting reused. Leave
> +					 * it alone.
> +					 */
> +					mb_cache_entry_put(ea_block_cache, oe);
> +					goto clone_block;
> +				}
> +			}
>   			ea_bdebug(bs->bh, "modifying in-place");
>   			error = ext4_xattr_set_entry(i, s, handle, inode,
>   						     true /* is_block */);
> @@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>   				goto cleanup;
>   			goto inserted;
>   		}
> +clone_block:
>   		unlock_buffer(bs->bh);
>   		ea_bdebug(bs->bh, "cloning");
>   		s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
> @@ -1991,18 +2020,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>   				lock_buffer(new_bh);
>   				/*
>   				 * We have to be careful about races with
> -				 * freeing, rehashing or adding references to
> -				 * xattr block. Once we hold buffer lock xattr
> -				 * block's state is stable so we can check
> -				 * whether the block got freed / rehashed or
> -				 * not.  Since we unhash mbcache entry under
> -				 * buffer lock when freeing / rehashing xattr
> -				 * block, checking whether entry is still
> -				 * hashed is reliable. Same rules hold for
> -				 * e_reusable handling.
> +				 * adding references to xattr block. Once we
> +				 * hold buffer lock xattr block's state is
> +				 * stable so we can check the additional
> +				 * reference fits.
>   				 */
> -				if (hlist_bl_unhashed(&ce->e_hash_list) ||
> -				    !ce->e_reusable) {
> +				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
> +				if (ref > EXT4_XATTR_REFCOUNT_MAX) {

So far, we have mb_cache_entry_delete_or_get() and 
mb_cache_entry_wait_unused(), so used cache entry cannot be concurrently 
removed. Removing check 'hlist_bl_unhashed(&ce->e_hash_list)' is okay.

What's affect of changing the other two checks 'ref >= 
EXT4_XATTR_REFCOUNT_MAX' and '!ce->e_reusable'? To make code more 
obvious? eg. To point out the condition of 'ref <= 
EXT4_XATTR_REFCOUNT_MAX' rather than 'ce->e_reusable', we have checked 
'ce->e_reusable' in ext4_xattr_block_cache_find() before?
>   					/*
>   					 * Undo everything and check mbcache
>   					 * again.
> @@ -2017,9 +2041,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>   					new_bh = NULL;
>   					goto inserted;
>   				}
> -				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
>   				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
> -				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
> +				if (ref == EXT4_XATTR_REFCOUNT_MAX)
>   					ce->e_reusable = 0;
>   				ea_bdebug(new_bh, "reusing; refcount now=%d",
>   					  ref);
>
Jan Kara July 25, 2022, 3:23 p.m. UTC | #3
On Sat 16-07-22 11:00:46, Zhihao Cheng wrote:
> 在 2022/7/12 18:54, Jan Kara 写道:
> Hi Jan, one little question confuses me:
> > When ext4_xattr_block_set() decides to remove xattr block the following
> > race can happen:
> > 
> > CPU1                                    CPU2
> > ext4_xattr_block_set()                  ext4_xattr_release_block()
> >    new_bh = ext4_xattr_block_cache_find()
> > 
> >                                            lock_buffer(bh);
> >                                            ref = le32_to_cpu(BHDR(bh)->h_refcount);
> >                                            if (ref == 1) {
> >                                              ...
> >                                              mb_cache_entry_delete();
> >                                              unlock_buffer(bh);
> >                                              ext4_free_blocks();
> >                                                ...
> >                                                ext4_forget(..., bh, ...);
> >                                                  jbd2_journal_revoke(..., bh);
> > 
> >    ext4_journal_get_write_access(..., new_bh, ...)
> >      do_get_write_access()
> >        jbd2_journal_cancel_revoke(..., new_bh);
> > 
> > Later the code in ext4_xattr_block_set() finds out the block got freed
> > and cancels reusal of the block but the revoke stays canceled and so in
> > case of block reuse and journal replay the filesystem can get corrupted.
> > If the race works out slightly differently, we can also hit assertions
> > in the jbd2 code.
> > 
> > Fix the problem by making sure that once matching mbcache entry is
> > found, code dropping the last xattr block reference (or trying to modify
> > xattr block in place) waits until the mbcache entry reference is
> > dropped. This way code trying to reuse xattr block is protected from
> > someone trying to drop the last reference to xattr block.
> > 
> > Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
> > CC: stable@vger.kernel.org
> > Fixes: 82939d7999df ("ext4: convert to mbcache2")
> > Signed-off-by: Jan Kara <jack@suse.cz>

...

> > @@ -1991,18 +2020,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> >   				lock_buffer(new_bh);
> >   				/*
> >   				 * We have to be careful about races with
> > -				 * freeing, rehashing or adding references to
> > -				 * xattr block. Once we hold buffer lock xattr
> > -				 * block's state is stable so we can check
> > -				 * whether the block got freed / rehashed or
> > -				 * not.  Since we unhash mbcache entry under
> > -				 * buffer lock when freeing / rehashing xattr
> > -				 * block, checking whether entry is still
> > -				 * hashed is reliable. Same rules hold for
> > -				 * e_reusable handling.
> > +				 * adding references to xattr block. Once we
> > +				 * hold buffer lock xattr block's state is
> > +				 * stable so we can check the additional
> > +				 * reference fits.
> >   				 */
> > -				if (hlist_bl_unhashed(&ce->e_hash_list) ||
> > -				    !ce->e_reusable) {
> > +				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
> > +				if (ref > EXT4_XATTR_REFCOUNT_MAX) {
> 
> So far, we have mb_cache_entry_delete_or_get() and
> mb_cache_entry_wait_unused(), so used cache entry cannot be concurrently
> removed. Removing check 'hlist_bl_unhashed(&ce->e_hash_list)' is okay.
> 
> What's affect of changing the other two checks 'ref >=
> EXT4_XATTR_REFCOUNT_MAX' and '!ce->e_reusable'? To make code more obvious?
> eg. To point out the condition of 'ref <= EXT4_XATTR_REFCOUNT_MAX' rather
> than 'ce->e_reusable', we have checked 'ce->e_reusable' in
> ext4_xattr_block_cache_find() before?

Well, ce->e_reusable is set if and only if BHDR(new_bh)->h_refcount <
EXT4_XATTR_REFCOUNT_MAX. So checking whether the refcount is small enough
is all that is needed and we don't need the ce->e_reusable check here.

								Honza
Zhihao Cheng July 26, 2022, 1:14 a.m. UTC | #4
在 2022/7/25 23:23, Jan Kara 写道:

>> So far, we have mb_cache_entry_delete_or_get() and
>> mb_cache_entry_wait_unused(), so used cache entry cannot be concurrently
>> removed. Removing check 'hlist_bl_unhashed(&ce->e_hash_list)' is okay.
>>
>> What's affect of changing the other two checks 'ref >=
>> EXT4_XATTR_REFCOUNT_MAX' and '!ce->e_reusable'? To make code more obvious?
>> eg. To point out the condition of 'ref <= EXT4_XATTR_REFCOUNT_MAX' rather
>> than 'ce->e_reusable', we have checked 'ce->e_reusable' in
>> ext4_xattr_block_cache_find() before?
> 
> Well, ce->e_reusable is set if and only if BHDR(new_bh)->h_refcount <
> EXT4_XATTR_REFCOUNT_MAX. So checking whether the refcount is small enough
> is all that is needed and we don't need the ce->e_reusable check here.
> 

Thanks for replying, I get it, thanks.
diff mbox series

Patch

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index aadfae53d055..3a0928c8720e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -439,9 +439,16 @@  static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
 /* Remove entry from mbcache when EA inode is getting evicted */
 void ext4_evict_ea_inode(struct inode *inode)
 {
-	if (EA_INODE_CACHE(inode))
-		mb_cache_entry_delete(EA_INODE_CACHE(inode),
-			ext4_xattr_inode_get_hash(inode), inode->i_ino);
+	struct mb_cache_entry *oe;
+
+	if (!EA_INODE_CACHE(inode))
+		return;
+	/* Wait for entry to get unused so that we can remove it */
+	while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+			ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+		mb_cache_entry_wait_unused(oe);
+		mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+	}
 }
 
 static int
@@ -1229,6 +1236,7 @@  ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 	if (error)
 		goto out;
 
+retry_ref:
 	lock_buffer(bh);
 	hash = le32_to_cpu(BHDR(bh)->h_hash);
 	ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@  ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		 * This must happen under buffer lock for
 		 * ext4_xattr_block_set() to reliably detect freed block
 		 */
-		if (ea_block_cache)
-			mb_cache_entry_delete(ea_block_cache, hash,
-					      bh->b_blocknr);
+		if (ea_block_cache) {
+			struct mb_cache_entry *oe;
+
+			oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+							  bh->b_blocknr);
+			if (oe) {
+				unlock_buffer(bh);
+				mb_cache_entry_wait_unused(oe);
+				mb_cache_entry_put(ea_block_cache, oe);
+				goto retry_ref;
+			}
+		}
 		get_bh(bh);
 		unlock_buffer(bh);
 
@@ -1867,9 +1884,20 @@  ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			 * ext4_xattr_block_set() to reliably detect modified
 			 * block
 			 */
-			if (ea_block_cache)
-				mb_cache_entry_delete(ea_block_cache, hash,
-						      bs->bh->b_blocknr);
+			if (ea_block_cache) {
+				struct mb_cache_entry *oe;
+
+				oe = mb_cache_entry_delete_or_get(ea_block_cache,
+					hash, bs->bh->b_blocknr);
+				if (oe) {
+					/*
+					 * Xattr block is getting reused. Leave
+					 * it alone.
+					 */
+					mb_cache_entry_put(ea_block_cache, oe);
+					goto clone_block;
+				}
+			}
 			ea_bdebug(bs->bh, "modifying in-place");
 			error = ext4_xattr_set_entry(i, s, handle, inode,
 						     true /* is_block */);
@@ -1885,6 +1913,7 @@  ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				goto cleanup;
 			goto inserted;
 		}
+clone_block:
 		unlock_buffer(bs->bh);
 		ea_bdebug(bs->bh, "cloning");
 		s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
@@ -1991,18 +2020,13 @@  ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				lock_buffer(new_bh);
 				/*
 				 * We have to be careful about races with
-				 * freeing, rehashing or adding references to
-				 * xattr block. Once we hold buffer lock xattr
-				 * block's state is stable so we can check
-				 * whether the block got freed / rehashed or
-				 * not.  Since we unhash mbcache entry under
-				 * buffer lock when freeing / rehashing xattr
-				 * block, checking whether entry is still
-				 * hashed is reliable. Same rules hold for
-				 * e_reusable handling.
+				 * adding references to xattr block. Once we
+				 * hold buffer lock xattr block's state is
+				 * stable so we can check the additional
+				 * reference fits.
 				 */
-				if (hlist_bl_unhashed(&ce->e_hash_list) ||
-				    !ce->e_reusable) {
+				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+				if (ref > EXT4_XATTR_REFCOUNT_MAX) {
 					/*
 					 * Undo everything and check mbcache
 					 * again.
@@ -2017,9 +2041,8 @@  ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 					new_bh = NULL;
 					goto inserted;
 				}
-				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
 				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
-				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+				if (ref == EXT4_XATTR_REFCOUNT_MAX)
 					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);