From patchwork Tue Apr 24 13:08:19 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vladimir Sementsov-Ogievskiy X-Patchwork-Id: 903417 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=nongnu.org (client-ip=2001:4830:134:3::11; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=virtuozzo.com Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 40VkDg50WQz9s0v for ; Tue, 24 Apr 2018 23:09:07 +1000 (AEST) Received: from localhost ([::1]:58413 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fAxgv-0007hU-OX for incoming@patchwork.ozlabs.org; Tue, 24 Apr 2018 09:09:05 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35572) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fAxgL-0007gl-6Z for qemu-devel@nongnu.org; Tue, 24 Apr 2018 09:08:33 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fAxgG-0005U0-5g for qemu-devel@nongnu.org; Tue, 24 Apr 2018 09:08:29 -0400 Received: from relay.sw.ru ([185.231.240.75]:45490) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fAxgF-0005Na-TL; Tue, 24 Apr 2018 09:08:24 -0400 Received: from msk-vpn.virtuozzo.com ([195.214.232.6] helo=kvm.sw.ru) by relay.sw.ru with esmtp (Exim 4.90_1) (envelope-from ) id 1fAxgD-0007sT-KX; Tue, 24 Apr 2018 16:08:21 +0300 From: Vladimir Sementsov-Ogievskiy To: qemu-devel@nongnu.org, qemu-block@nongnu.org Date: Tue, 24 Apr 2018 16:08:19 +0300 Message-Id: <20180424130821.50987-2-vsementsov@virtuozzo.com> X-Mailer: git-send-email 2.11.1 In-Reply-To: <20180424130821.50987-1-vsementsov@virtuozzo.com> References: <20180424130821.50987-1-vsementsov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 185.231.240.75 Subject: [Qemu-devel] [RFC 1/3] block: add bdrv_reconnect X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, famz@redhat.com, armbru@redhat.com, mreitz@redhat.com, den@openvz.org, pbonzini@redhat.com Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" It will be used to reconnect NBD connection. Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/block/block.h | 2 ++ include/block/block_int.h | 3 +++ block.c | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/include/block/block.h b/include/block/block.h index cdec3639a3..912e3f3dcc 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -604,4 +604,6 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, */ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); void bdrv_unregister_buf(BlockDriverState *bs, void *host); + +int bdrv_reconnect(BlockDriverState *bs, Error **errp); #endif diff --git a/include/block/block_int.h b/include/block/block_int.h index c4dd1d4bb8..ab9018f1c4 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -475,6 +475,9 @@ struct BlockDriver { */ void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); + + int (*bdrv_reconnect)(BlockDriverState *bs, Error **errp); + QLIST_ENTRY(BlockDriver) list; }; diff --git a/block.c b/block.c index a2caadf0a0..fab4413d59 100644 --- a/block.c +++ b/block.c @@ -4095,6 +4095,28 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } +int bdrv_reconnect(BlockDriverState *bs, Error **errp) +{ + int ret; + + if (bs->drv && bs->drv->bdrv_reconnect) { + return bs->drv->bdrv_reconnect(bs, errp); + } + + if (bs->backing) { + ret = bdrv_reconnect(bs->backing->bs, errp); + if (ret < 0) { + return ret; + } + } + + if (bs->file) { + return bdrv_reconnect(bs->file->bs, errp); + } + + return 0; +} + bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) { BlockDriverInfo bdi; From patchwork Tue Apr 24 13:08:21 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vladimir Sementsov-Ogievskiy X-Patchwork-Id: 903421 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=nongnu.org (client-ip=2001:4830:134:3::11; helo=lists.gnu.org; envelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=virtuozzo.com Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 40VkGt3J33z9rxx for ; Tue, 24 Apr 2018 23:11:02 +1000 (AEST) Received: from localhost ([::1]:58439 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fAxim-0000lD-GH for incoming@patchwork.ozlabs.org; Tue, 24 Apr 2018 09:11:00 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35573) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fAxgL-0007gm-6n for qemu-devel@nongnu.org; Tue, 24 Apr 2018 09:08:34 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fAxgG-0005V0-Gy for qemu-devel@nongnu.org; Tue, 24 Apr 2018 09:08:29 -0400 Received: from relay.sw.ru ([185.231.240.75]:45498) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fAxgG-0005PC-8h; Tue, 24 Apr 2018 09:08:24 -0400 Received: from msk-vpn.virtuozzo.com ([195.214.232.6] helo=kvm.sw.ru) by relay.sw.ru with esmtp (Exim 4.90_1) (envelope-from ) id 1fAxgE-0007sT-5G; Tue, 24 Apr 2018 16:08:22 +0300 From: Vladimir Sementsov-Ogievskiy To: qemu-devel@nongnu.org, qemu-block@nongnu.org Date: Tue, 24 Apr 2018 16:08:21 +0300 Message-Id: <20180424130821.50987-4-vsementsov@virtuozzo.com> X-Mailer: git-send-email 2.11.1 In-Reply-To: <20180424130821.50987-1-vsementsov@virtuozzo.com> References: <20180424130821.50987-1-vsementsov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 185.231.240.75 Subject: [Qemu-devel] [RFC 3/3] blk: add 'reconnect' error action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, famz@redhat.com, armbru@redhat.com, mreitz@redhat.com, den@openvz.org, pbonzini@redhat.com Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" New action works as follows: Firstly, not stopping the vm, it tries to bdrv_reconnect several times with given pause. Then, if we failed to reconnect fallthrough to 'stop' error action. TODO: - qapi docs - support other disks (only scsi here) - support block jobs - add configuration of timeout and tries count parameters Signed-off-by: Vladimir Sementsov-Ogievskiy --- qapi/block-core.json | 4 ++-- block/block-backend.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- hw/scsi/scsi-disk.c | 4 +++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index c50517bff3..d4d87dbd4f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1028,7 +1028,7 @@ # Since: 1.3 ## { 'enum': 'BlockdevOnError', - 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } + 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'reconnect'] } ## # @MirrorSyncMode: @@ -4351,7 +4351,7 @@ # Since: 2.1 ## { 'enum': 'BlockErrorAction', - 'data': [ 'ignore', 'report', 'stop' ] } + 'data': [ 'ignore', 'report', 'stop', 'reconnect' ] } ## diff --git a/block/block-backend.c b/block/block-backend.c index 681b240b12..81eb9a7bd0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -89,6 +89,11 @@ struct BlockBackend { */ unsigned int in_flight; AioWait wait; + + bool reconnect_failed; /* TODO: worth tri-state variable? */ + bool reconnecting; + unsigned int reconnect_max; + uint64_t reconnect_ns; }; typedef struct BlockBackendAIOCB { @@ -322,6 +327,8 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) blk->refcnt = 1; blk->perm = perm; blk->shared_perm = shared_perm; + blk->reconnect_max = 10; /* TODO configure */ + blk->reconnect_ns = 5000000000; /* 5 seconds, TODO configure */ blk_set_enable_write_cache(blk, true); block_acct_init(&blk->stats); @@ -1079,6 +1086,7 @@ void blk_iostatus_disable(BlockBackend *blk) void blk_iostatus_reset(BlockBackend *blk) { + blk->reconnect_failed = false; if (blk_iostatus_is_enabled(blk)) { BlockDriverState *bs = blk_bs(blk); blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; @@ -1635,6 +1643,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, BlockdevOnError on_err = blk_get_on_error(blk, is_read); switch (on_err) { + case BLOCKDEV_ON_ERROR_RECONNECT: + return blk->reconnect_failed ? BLOCK_ERROR_ACTION_STOP : + BLOCK_ERROR_ACTION_RECONNECT; case BLOCKDEV_ON_ERROR_ENOSPC: return (error == ENOSPC) ? BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; @@ -1665,6 +1676,29 @@ static void send_qmp_error_event(BlockBackend *blk, &error_abort); } + +static void coroutine_fn blk_reconnect_co(void *opaque) +{ + BlockBackend *blk = opaque; + int i; + + for (i = 0; i < blk->reconnect_max; i++) { + int ret; + + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, blk->reconnect_ns); + + ret = bdrv_reconnect(blk_bs(blk), NULL); + if (ret == 0) { + blk->reconnecting = false; + blk_iostatus_reset(blk); + return; + } + } + + blk->reconnecting = false; + blk->reconnect_failed = true; +} + /* This is done by device models because, while the block layer knows * about the error, it does not know whether an operation comes from * the device or the block layer (from a job, for example). @@ -1674,7 +1708,19 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, { assert(error >= 0); - if (action == BLOCK_ERROR_ACTION_STOP) { + if (action == BLOCK_ERROR_ACTION_RECONNECT) { + Coroutine *co; + blk_iostatus_set_err(blk, error); + + if (blk->reconnecting || blk->reconnect_failed) { + return; + } + + blk->reconnecting = true; + + co = qemu_coroutine_create(blk_reconnect_co, blk); + aio_co_enter(blk_get_aio_context(blk), co); + } else if (action == BLOCK_ERROR_ACTION_STOP) { /* First set the iostatus, so that "info block" returns an iostatus * that matches the events raised so far (an additional error iostatus * is fine, but not a lost one). diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index ded23d36ca..f1c166dfda 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -474,7 +474,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) } blk_error_action(s->qdev.conf.blk, action, is_read, error); - if (action == BLOCK_ERROR_ACTION_STOP) { + if (action == BLOCK_ERROR_ACTION_STOP || + action == BLOCK_ERROR_ACTION_RECONNECT) + { scsi_req_retry(&r->req); } return action != BLOCK_ERROR_ACTION_IGNORE;