Message ID | d5aacb323b4dcb1c9acb699e4ece02d0b0c3b2fa.1350901963.git.nick@bytemark.co.uk |
---|---|
State | New |
Headers | show |
Am 22.10.2012 13:09, schrieb nick@bytemark.co.uk: > > The previous behaviour when an NBD connection broke was to fail just the broken I/O request > and (sometimes) never unlock send_mutex. Now we explicitly call nbd_teardown_connection and > fail all NBD requests in the "inflight" state - this allows werror/rerror settings to be > applied to those requests all at once. > > When a new request (or a request that was pending, but not yet inflight) is issued against > the NBD driver, if we're not connected to the NBD server, we attempt to connect (and fail > the new request immediately if that doesn't work). Doesn't this block the vcpu while qemu is trying to establish a new connection? When the network is down, I think this can take quite a while. I would actually expect that this is one of the cases where qemu as a whole would seem to hang. Kevin
Il 22/10/2012 13:09, nick@bytemark.co.uk ha scritto: > diff --git a/block/nbd.c b/block/nbd.c > index 9536408..1caae89 100644 > --- a/block/nbd.c > +++ b/block/nbd.c > @@ -71,6 +71,9 @@ typedef struct BDRVNBDState { > char *host_spec; > } BDRVNBDState; > > +static int nbd_establish_connection(BDRVNBDState *s); > +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect); > + > static bool nbd_is_connected(BDRVNBDState *s) > { > return s->sock >= 0; > @@ -152,6 +155,20 @@ static int nbd_have_request(void *opaque) > return s->in_flight > 0; > } > > +static void nbd_abort_inflight_requests(BDRVNBDState *s) > +{ > + int i; > + Coroutine *co; > + > + s->reply.handle = 0; > + for (i = 0; i < MAX_NBD_REQUESTS; i++) { > + co = s->recv_coroutine[i]; > + if (co && co != qemu_coroutine_self()) { > + qemu_coroutine_enter(co, NULL); > + } > + } > +} I think this is quite risky. Does it work if you shutdown(s, 2) the socket, then wait for the number of pending requests (not just those in_flight---also those that are waiting) to become 0, and only then close it? (For the wait you can add another Coroutine * field to BDRVNBDState, and reenter it in nbd_coroutine_end if the number of pending requests becomes zero). > static void nbd_reply_ready(void *opaque) > { > BDRVNBDState *s = opaque; > @@ -168,8 +185,9 @@ static void nbd_reply_ready(void *opaque) > return; > } > if (ret < 0) { > - s->reply.handle = 0; > - goto fail; > + nbd_teardown_connection(s, false); > + nbd_abort_inflight_requests(s); This is also problematic because you first close the socket, which means that something else can grab the file descriptor. But the original file descriptor is in use (in qemu_co_recvv or qemu_co_sendv) until after nbd_abort_inflight_requests returns. So the correct order would be something like this: assert(nbd_is_connected(s)); shutdown(s->sock, 2); nbd_abort_inflight_requests(s); nbd_teardown_connection(s, false); where (if my theory is right) the shutdown should immediately cause the socket to become readable and writable. > + return; > } > } > > @@ -178,20 +196,15 @@ static void nbd_reply_ready(void *opaque) > * one coroutine is called until the reply finishes. */ > i = HANDLE_TO_INDEX(s, s->reply.handle); > if (i >= MAX_NBD_REQUESTS) { > - goto fail; > + nbd_teardown_connection(s, false); > + nbd_abort_inflight_requests(s); > + return; > } > > if (s->recv_coroutine[i]) { > qemu_coroutine_enter(s->recv_coroutine[i], NULL); > return; > } > - > -fail: > - for (i = 0; i < MAX_NBD_REQUESTS; i++) { > - if (s->recv_coroutine[i]) { > - qemu_coroutine_enter(s->recv_coroutine[i], NULL); > - } > - } > } > > static void nbd_restart_write(void *opaque) > @@ -206,6 +219,13 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, > int rc, ret; > > qemu_co_mutex_lock(&s->send_mutex); > + > + if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) { > + nbd_abort_inflight_requests(s); > + qemu_co_mutex_unlock(&s->send_mutex); > + return -EIO; Do you really need to abort all the requests, or only this one? Paolo > + } > + > s->send_coroutine = qemu_coroutine_self(); > qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, > nbd_have_request, s); > @@ -214,6 +234,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, > ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, > offset, request->len); > if (ret != request->len) { > + s->send_coroutine = NULL; > + nbd_teardown_connection(s, false); > + qemu_co_mutex_unlock(&s->send_mutex); > return -EIO; > } > } > @@ -259,9 +282,8 @@ static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request) > } > } > > -static int nbd_establish_connection(BlockDriverState *bs) > +static int nbd_establish_connection(BDRVNBDState *s) > { > - BDRVNBDState *s = bs->opaque; > int sock; > int ret; > off_t size; > @@ -302,19 +324,23 @@ static int nbd_establish_connection(BlockDriverState *bs) > return 0; > } > > -static void nbd_teardown_connection(BlockDriverState *bs) > +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect) > { > - BDRVNBDState *s = bs->opaque; > + > struct nbd_request request; > > - request.type = NBD_CMD_DISC; > - request.from = 0; > - request.len = 0; > - nbd_send_request(s->sock, &request); > + if (nbd_is_connected(s)) { > + if (send_disconnect) { > + request.type = NBD_CMD_DISC; > + request.from = 0; > + request.len = 0; > + nbd_send_request(s->sock, &request); > + } > > - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); > - closesocket(s->sock); > - s->sock = -1; > + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); > + closesocket(s->sock); > + s->sock = -1; /* Makes nbd_is_connected() return true */ > + } > } > > static int nbd_open(BlockDriverState *bs, const char* filename, int flags) > @@ -336,7 +362,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) > /* establish TCP connection, return error if it fails > * TODO: Configurable retry-until-timeout behaviour. > */ > - result = nbd_establish_connection(bs); > + result = nbd_establish_connection(s); > > return result; > } > @@ -494,7 +520,7 @@ static void nbd_close(BlockDriverState *bs) > g_free(s->export_name); > g_free(s->host_spec); > > - nbd_teardown_connection(bs); > + nbd_teardown_connection(s, true); > } > > static int64_t nbd_getlength(BlockDriverState *bs) >
diff --git a/block/nbd.c b/block/nbd.c index 9536408..1caae89 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -71,6 +71,9 @@ typedef struct BDRVNBDState { char *host_spec; } BDRVNBDState; +static int nbd_establish_connection(BDRVNBDState *s); +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect); + static bool nbd_is_connected(BDRVNBDState *s) { return s->sock >= 0; @@ -152,6 +155,20 @@ static int nbd_have_request(void *opaque) return s->in_flight > 0; } +static void nbd_abort_inflight_requests(BDRVNBDState *s) +{ + int i; + Coroutine *co; + + s->reply.handle = 0; + for (i = 0; i < MAX_NBD_REQUESTS; i++) { + co = s->recv_coroutine[i]; + if (co && co != qemu_coroutine_self()) { + qemu_coroutine_enter(co, NULL); + } + } +} + static void nbd_reply_ready(void *opaque) { BDRVNBDState *s = opaque; @@ -168,8 +185,9 @@ static void nbd_reply_ready(void *opaque) return; } if (ret < 0) { - s->reply.handle = 0; - goto fail; + nbd_teardown_connection(s, false); + nbd_abort_inflight_requests(s); + return; } } @@ -178,20 +196,15 @@ static void nbd_reply_ready(void *opaque) * one coroutine is called until the reply finishes. */ i = HANDLE_TO_INDEX(s, s->reply.handle); if (i >= MAX_NBD_REQUESTS) { - goto fail; + nbd_teardown_connection(s, false); + nbd_abort_inflight_requests(s); + return; } if (s->recv_coroutine[i]) { qemu_coroutine_enter(s->recv_coroutine[i], NULL); return; } - -fail: - for (i = 0; i < MAX_NBD_REQUESTS; i++) { - if (s->recv_coroutine[i]) { - qemu_coroutine_enter(s->recv_coroutine[i], NULL); - } - } } static void nbd_restart_write(void *opaque) @@ -206,6 +219,13 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, int rc, ret; qemu_co_mutex_lock(&s->send_mutex); + + if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) { + nbd_abort_inflight_requests(s); + qemu_co_mutex_unlock(&s->send_mutex); + return -EIO; + } + s->send_coroutine = qemu_coroutine_self(); qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, s); @@ -214,6 +234,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, offset, request->len); if (ret != request->len) { + s->send_coroutine = NULL; + nbd_teardown_connection(s, false); + qemu_co_mutex_unlock(&s->send_mutex); return -EIO; } } @@ -259,9 +282,8 @@ static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request) } } -static int nbd_establish_connection(BlockDriverState *bs) +static int nbd_establish_connection(BDRVNBDState *s) { - BDRVNBDState *s = bs->opaque; int sock; int ret; off_t size; @@ -302,19 +324,23 @@ static int nbd_establish_connection(BlockDriverState *bs) return 0; } -static void nbd_teardown_connection(BlockDriverState *bs) +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect) { - BDRVNBDState *s = bs->opaque; + struct nbd_request request; - request.type = NBD_CMD_DISC; - request.from = 0; - request.len = 0; - nbd_send_request(s->sock, &request); + if (nbd_is_connected(s)) { + if (send_disconnect) { + request.type = NBD_CMD_DISC; + request.from = 0; + request.len = 0; + nbd_send_request(s->sock, &request); + } - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); - closesocket(s->sock); - s->sock = -1; + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); + closesocket(s->sock); + s->sock = -1; /* Makes nbd_is_connected() return true */ + } } static int nbd_open(BlockDriverState *bs, const char* filename, int flags) @@ -336,7 +362,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) /* establish TCP connection, return error if it fails * TODO: Configurable retry-until-timeout behaviour. */ - result = nbd_establish_connection(bs); + result = nbd_establish_connection(s); return result; } @@ -494,7 +520,7 @@ static void nbd_close(BlockDriverState *bs) g_free(s->export_name); g_free(s->host_spec); - nbd_teardown_connection(bs); + nbd_teardown_connection(s, true); } static int64_t nbd_getlength(BlockDriverState *bs)
The previous behaviour when an NBD connection broke was to fail just the broken I/O request and (sometimes) never unlock send_mutex. Now we explicitly call nbd_teardown_connection and fail all NBD requests in the "inflight" state - this allows werror/rerror settings to be applied to those requests all at once. When a new request (or a request that was pending, but not yet inflight) is issued against the NBD driver, if we're not connected to the NBD server, we attempt to connect (and fail the new request immediately if that doesn't work). This isn't ideal, as it can lead to many failed attempts to connect to the NBD server in short order, but at least allows us to get over temporary failures in this state. Signed-off-by: Nick Thomas <nick@bytemark.co.uk> --- block/nbd.c | 72 ++++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 49 insertions(+), 23 deletions(-)