diff mbox series

[SRU,F,I,J,5/6] nbd: don't clear 'NBD_CMD_INFLIGHT' flag if request is not completed

Message ID 20220622051731.23563-6-matthew.ruffell@canonical.com
State New
Headers show
Series nbd: requests can become stuck when disconnecting from server with qemu-nbd | expand

Commit Message

Matthew Ruffell June 22, 2022, 5:17 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

BugLink: https://bugs.launchpad.net/bugs/1896350

Otherwise io will hung because request will only be completed if the
cmd has the flag 'NBD_CMD_INFLIGHT'.

Fixes: 07175cb1baf4 ("nbd: make sure request completion won't concurrent")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20220521073749.3146892-4-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(backported from 2895f1831e911ca87d4efdf43e35eb72a0c7e66e)
[mruffell: context adjustment removing percpu_ref_put in recv_work()]
Signed-off-by: Matthew Ruffell <matthew.ruffell@canonical.com>
---
 drivers/block/nbd.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d525ad238e2d..f97f3a5adf28 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -411,13 +411,14 @@  static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	if (!mutex_trylock(&cmd->lock))
 		return BLK_EH_RESET_TIMER;
 
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		mutex_unlock(&cmd->lock);
 		return BLK_EH_DONE;
 	}
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
 		cmd->status = BLK_STS_TIMEOUT;
+		__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 		mutex_unlock(&cmd->lock);
 		goto done;
 	}
@@ -486,6 +487,7 @@  static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
 	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
 	cmd->status = BLK_STS_IOERR;
+	__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 	mutex_unlock(&cmd->lock);
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
@@ -745,7 +747,7 @@  static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	cmd = blk_mq_rq_to_pdu(req);
 
 	mutex_lock(&cmd->lock);
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
 			tag, cmd->status, cmd->flags);
 		ret = -ENOENT;
@@ -832,8 +834,16 @@  static void recv_work(struct work_struct *work)
 		}
 
 		rq = blk_mq_rq_from_pdu(cmd);
-		if (likely(!blk_should_fake_timeout(rq->q)))
-			blk_mq_complete_request(rq);
+		if (likely(!blk_should_fake_timeout(rq->q))) {
+			bool complete;
+
+			mutex_lock(&cmd->lock);
+			complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+							&cmd->flags);
+			mutex_unlock(&cmd->lock);
+			if (complete)
+				blk_mq_complete_request(rq);
+		}
 	}
 	nbd_config_put(nbd);
 	atomic_dec(&config->recv_threads);