diff mbox series

[2/2] ubifs: Fix ABBA deadlock between inode reclaiming and deleted inode writing

Message ID 20240710022628.1227996-3-chengzhihao1@huawei.com
State Rejected
Headers show
Series ubifs: Fix deadlock between inode reclaiming and host inode writing | expand

Commit Message

Zhihao Cheng July 10, 2024, 2:26 a.m. UTC
The inodes reclaiming process(See function prune_icache_sb) collects
all reclaimable inodes and mark them with I_FREEING flag at first, at
that time, other processes will be stuck if they try getting these
inodes(See function find_inode_fast), then the reclaiming process
destroy the inodes.
In deleted inode writing function ubifs_jnl_write_inode(), UBIFS holds
BASEHD's wbuf->io_mutex while traversing all xattr inodes, which could
race with inodes reclaiming process(The reclaiming process could try
locking BASEHD's wbuf->io_mutex in inode evicting function), then an
ABBA deadlock problem would happens as following:

 1. File A has inode ia and a xattr(with inode ixa), regular file B has
    inode ib and a xattr.
 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa
 3. Then, following three processes running like this:

        PA                PB                        PC
                echo 2 > /proc/sys/vm/drop_caches
                // ib and ia area added into lru, lru->ixa->ib->ia
                 shrink_slab
                  prune_icache_sb
                   list_lru_walk_one
                    inode_lru_isolate
                     ixa->inode->i_state |= I_FREEING // set inode state
                    inode_lru_isolate
                     __iget(ib)
                     spin_unlock(&ib->i_lock)
                     spin_unlock(lru_lock)
                                                   rm file B
                                                    ib->nlink = 0
                                                    iput(ib)
 rm file A
  iput(ia)
   ubifs_evict_inode(ia)
    ubifs_jnl_delete_inode(ia)
     ubifs_jnl_write_inode(ia)
      make_reservation(BASEHD) // Lock wbuf->io_mutex
      ubifs_iget(ixa->i_ino)
       iget_locked
        find_inode_fast
	 __wait_on_freeing_inode(ixa)
          |          iput(ib) // ib->nlink is 0, do evict
          |           ubifs_evict_inode
          |            ubifs_jnl_delete_inode(ib)
          ↓             ubifs_jnl_write_inode
     ABBA deadlock ←-----make_reservation(BASEHD)
                   dispose_list // cannot be executed by prune_icache_sb
                    wake_up_bit(&inode->i_state)

The root cause is that UBIFS tries getting xattr inode with holding
BASEHD's wbuf->io_mutex, so the problem can be fixed by getting all
xattr inodes before locking BASEHD's wbuf->io_mutex.

Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files")
Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
---
 fs/ubifs/journal.c | 110 +++++++++++++++++++++++++++------------------
 1 file changed, 67 insertions(+), 43 deletions(-)
diff mbox series

Patch

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index a5c7c499cc29..7556d87934b7 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -971,7 +971,8 @@  int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
  */
 int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 {
-	int err, lnum, offs;
+	int err, lnum, offs, i = 0;
+	struct inode **xinos = NULL;
 	struct ubifs_ino_node *ino, *ino_start;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	int sync = 0, write_len = 0, ilen = UBIFS_INO_NODE_SZ;
@@ -982,44 +983,24 @@  int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
 
 	if (kill_xattrs) {
+		union ubifs_key key;
+		struct fscrypt_name nm = {0};
+		struct ubifs_dent_node *xent, *pxent = NULL;
+		struct super_block *sb = c->vfs_sb;
+
 		if (ui->xattr_cnt > ubifs_xattr_max_cnt(c)) {
 			ubifs_err(c, "Cannot delete inode, it has too much xattrs!");
 			ubifs_ro_mode(c, err);
 			return -EPERM;
 		}
-	}
-
-	/*
-	 * If the inode is being deleted, do not write the attached data. No
-	 * need to synchronize the write-buffer either.
-	 */
-	if (!last_reference) {
-		ilen += ui->data_len;
-		sync = IS_SYNC(inode);
-	} else if (kill_xattrs) {
-		write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
-	}
-
-	if (ubifs_authenticated(c))
-		write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
-	else
-		write_len += ilen;
 
-	ino_start = ino = kmalloc(write_len, GFP_NOFS);
-	if (!ino)
-		return -ENOMEM;
-
-	/* Make reservation before allocating sequence numbers */
-	err = make_reservation(c, BASEHD, write_len);
-	if (err)
-		goto out_free;
-
-	if (kill_xattrs) {
-		union ubifs_key key;
-		struct fscrypt_name nm = {0};
-		struct inode *xino;
-		struct ubifs_dent_node *xent, *pxent = NULL;
+		xinos = kmalloc(sizeof(*xinos) * ui->xattr_cnt, GFP_NOFS);
+		if (!xinos) {
+			ubifs_err(c, "Cannot allocate memory for xattr inodes");
+			return -ENOMEM;
+		}
 
+		i = 0;
 		lowest_xent_key(c, &key, inode->i_ino);
 		while (1) {
 			xent = ubifs_tnc_next_ent(c, &key, &nm);
@@ -1029,34 +1010,71 @@  int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 					break;
 
 				kfree(pxent);
-				goto out_release;
+				goto out_iput;
 			}
 
 			fname_name(&nm) = xent->name;
 			fname_len(&nm) = le16_to_cpu(xent->nlen);
 
-			xino = ubifs_iget(c->vfs_sb, le64_to_cpu(xent->inum));
-			if (IS_ERR(xino)) {
-				err = PTR_ERR(xino);
+			xinos[i] = ubifs_iget(sb, le64_to_cpu(xent->inum));
+			if (IS_ERR(xinos[i])) {
+				err = PTR_ERR(xinos[i]);
 				ubifs_err(c, "dead directory entry '%s', error %d",
 					  xent->name, err);
 				ubifs_ro_mode(c, err);
 				kfree(pxent);
 				kfree(xent);
-				goto out_release;
+				goto out_iput;
 			}
-			ubifs_assert(c, ubifs_inode(xino)->xattr);
-
-			clear_nlink(xino);
-			pack_inode(c, ino, xino, 0);
-			ino = (void *)ino + UBIFS_INO_NODE_SZ;
-			iput(xino);
+			ubifs_assert(c, ubifs_inode(xinos[i])->xattr);
+			ubifs_assert(c, i < ui->xattr_cnt);
 
 			kfree(pxent);
 			pxent = xent;
 			key_read(c, &xent->key, &key);
+			i++;
 		}
 		kfree(pxent);
+		ubifs_assert(c, i == ui->xattr_cnt);
+	}
+
+	/*
+	 * If the inode is being deleted, do not write the attached data. No
+	 * need to synchronize the write-buffer either.
+	 */
+	if (!last_reference) {
+		ilen += ui->data_len;
+		sync = IS_SYNC(inode);
+	} else if (kill_xattrs) {
+		write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
+	}
+
+	if (ubifs_authenticated(c))
+		write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+	else
+		write_len += ilen;
+
+	ino_start = ino = kmalloc(write_len, GFP_NOFS);
+	if (!ino) {
+		err = -ENOMEM;
+		goto out_iput;
+	}
+
+	/* Make reservation before allocating sequence numbers */
+	err = make_reservation(c, BASEHD, write_len);
+	if (err)
+		goto out_free;
+
+	if (kill_xattrs) {
+		for (i = 0; i < ui->xattr_cnt; i++) {
+			clear_nlink(xinos[i]);
+			pack_inode(c, ino, xinos[i], 0);
+			ino = (void *)ino + UBIFS_INO_NODE_SZ;
+			iput(xinos[i]);
+		}
+
+		kfree(xinos);
+		xinos = NULL;
 	}
 
 	pack_inode(c, ino, inode, 1);
@@ -1103,6 +1121,12 @@  int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	finish_reservation(c);
 out_free:
 	kfree(ino_start);
+out_iput:
+	if (xinos) {
+		while (--i >= 0)
+			iput(xinos[i]);
+		kfree(xinos);
+	}
 	return err;
 }