Patchwork [2/3] nbd: Explicitly disconnect and fail inflight I/O requests on error, then reconnect next I/O request.

login
register
mail settings
Submitter Nicholas Thomas
Date Oct. 22, 2012, 11:09 a.m.
Message ID <d5aacb323b4dcb1c9acb699e4ece02d0b0c3b2fa.1350901963.git.nick@bytemark.co.uk>
Download mbox | patch
Permalink /patch/193212/
State New
Headers show

Comments

Nicholas Thomas - Oct. 22, 2012, 11:09 a.m.
The previous behaviour when an NBD connection broke was to fail just the broken I/O request
and (sometimes) never unlock send_mutex. Now we explicitly call nbd_teardown_connection and
fail all NBD requests in the "inflight" state - this allows werror/rerror settings to be
applied to those requests all at once.

When a new request (or a request that was pending, but not yet inflight) is issued against
the NBD driver, if we're not connected to the NBD server, we attempt to connect (and fail
the new request immediately if that doesn't work). This isn't ideal, as it can lead to many
failed attempts to connect to the NBD server in short order, but at least allows us to get
over temporary failures in this state.

Signed-off-by: Nick Thomas <nick@bytemark.co.uk>
---
 block/nbd.c |   72 ++++++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 49 insertions(+), 23 deletions(-)
Kevin Wolf - Oct. 23, 2012, 10:40 a.m.
Am 22.10.2012 13:09, schrieb nick@bytemark.co.uk:
> 
> The previous behaviour when an NBD connection broke was to fail just the broken I/O request
> and (sometimes) never unlock send_mutex. Now we explicitly call nbd_teardown_connection and
> fail all NBD requests in the "inflight" state - this allows werror/rerror settings to be
> applied to those requests all at once.
> 
> When a new request (or a request that was pending, but not yet inflight) is issued against
> the NBD driver, if we're not connected to the NBD server, we attempt to connect (and fail
> the new request immediately if that doesn't work). 

Doesn't this block the vcpu while qemu is trying to establish a new
connection? When the network is down, I think this can take quite a
while. I would actually expect that this is one of the cases where qemu
as a whole would seem to hang.

Kevin
Paolo Bonzini - Oct. 24, 2012, 2:31 p.m.
Il 22/10/2012 13:09, nick@bytemark.co.uk ha scritto:
> diff --git a/block/nbd.c b/block/nbd.c
> index 9536408..1caae89 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -71,6 +71,9 @@ typedef struct BDRVNBDState {
>      char *host_spec;
>  } BDRVNBDState;
>  
> +static int nbd_establish_connection(BDRVNBDState *s);
> +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect);
> +
>  static bool nbd_is_connected(BDRVNBDState *s)
>  {
>      return s->sock >= 0;
> @@ -152,6 +155,20 @@ static int nbd_have_request(void *opaque)
>      return s->in_flight > 0;
>  }
>  
> +static void nbd_abort_inflight_requests(BDRVNBDState *s)
> +{
> +    int i;
> +    Coroutine *co;
> +
> +    s->reply.handle = 0;
> +    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
> +        co = s->recv_coroutine[i];
> +        if (co && co != qemu_coroutine_self()) {
> +            qemu_coroutine_enter(co, NULL);
> +        }
> +    }
> +}

I think this is quite risky.  Does it work if you shutdown(s, 2) the
socket, then wait for the number of pending requests (not just those
in_flight---also those that are waiting) to become 0, and only then
close it?

(For the wait you can add another Coroutine * field to BDRVNBDState, and
reenter it in nbd_coroutine_end if the number of pending requests
becomes zero).

>  static void nbd_reply_ready(void *opaque)
>  {
>      BDRVNBDState *s = opaque;
> @@ -168,8 +185,9 @@ static void nbd_reply_ready(void *opaque)
>              return;
>          }
>          if (ret < 0) {
> -            s->reply.handle = 0;
> -            goto fail;
> +            nbd_teardown_connection(s, false);
> +            nbd_abort_inflight_requests(s);

This is also problematic because you first close the socket, which means
that something else can grab the file descriptor.  But the original file
descriptor is in use (in qemu_co_recvv or qemu_co_sendv) until after
nbd_abort_inflight_requests returns.

So the correct order would be something like this:

           assert(nbd_is_connected(s));
           shutdown(s->sock, 2);
           nbd_abort_inflight_requests(s);
           nbd_teardown_connection(s, false);

where (if my theory is right) the shutdown should immediately cause the
socket to become readable and writable.

> +            return;
>          }
>      }
>  
> @@ -178,20 +196,15 @@ static void nbd_reply_ready(void *opaque)
>       * one coroutine is called until the reply finishes.  */
>      i = HANDLE_TO_INDEX(s, s->reply.handle);
>      if (i >= MAX_NBD_REQUESTS) {
> -        goto fail;
> +        nbd_teardown_connection(s, false);
> +        nbd_abort_inflight_requests(s);
> +        return;
>      }
>  
>      if (s->recv_coroutine[i]) {
>          qemu_coroutine_enter(s->recv_coroutine[i], NULL);
>          return;
>      }
> -
> -fail:
> -    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
> -        if (s->recv_coroutine[i]) {
> -            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
> -        }
> -    }
>  }
>  
>  static void nbd_restart_write(void *opaque)
> @@ -206,6 +219,13 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
>      int rc, ret;
>  
>      qemu_co_mutex_lock(&s->send_mutex);
> +
> +    if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) {
> +        nbd_abort_inflight_requests(s);
> +        qemu_co_mutex_unlock(&s->send_mutex);
> +        return -EIO;

Do you really need to abort all the requests, or only this one?

Paolo

> +    }
> +
>      s->send_coroutine = qemu_coroutine_self();
>      qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
>                              nbd_have_request, s);
> @@ -214,6 +234,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
>          ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
>                              offset, request->len);
>          if (ret != request->len) {
> +            s->send_coroutine = NULL;
> +            nbd_teardown_connection(s, false);
> +            qemu_co_mutex_unlock(&s->send_mutex);
>              return -EIO;
>          }
>      }
> @@ -259,9 +282,8 @@ static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
>      }
>  }
>  
> -static int nbd_establish_connection(BlockDriverState *bs)
> +static int nbd_establish_connection(BDRVNBDState *s)
>  {
> -    BDRVNBDState *s = bs->opaque;
>      int sock;
>      int ret;
>      off_t size;
> @@ -302,19 +324,23 @@ static int nbd_establish_connection(BlockDriverState *bs)
>      return 0;
>  }
>  
> -static void nbd_teardown_connection(BlockDriverState *bs)
> +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect)
>  {
> -    BDRVNBDState *s = bs->opaque;
> +
>      struct nbd_request request;
>  
> -    request.type = NBD_CMD_DISC;
> -    request.from = 0;
> -    request.len = 0;
> -    nbd_send_request(s->sock, &request);
> +    if (nbd_is_connected(s)) {
> +        if (send_disconnect) {
> +            request.type = NBD_CMD_DISC;
> +            request.from = 0;
> +            request.len = 0;
> +            nbd_send_request(s->sock, &request);
> +        }
>  
> -    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
> -    closesocket(s->sock);
> -    s->sock = -1;
> +        qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
> +        closesocket(s->sock);
> +        s->sock = -1; /* Makes nbd_is_connected() return true */
> +    }
>  }
>  
>  static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
> @@ -336,7 +362,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
>      /* establish TCP connection, return error if it fails
>       * TODO: Configurable retry-until-timeout behaviour.
>       */
> -    result = nbd_establish_connection(bs);
> +    result = nbd_establish_connection(s);
>  
>      return result;
>  }
> @@ -494,7 +520,7 @@ static void nbd_close(BlockDriverState *bs)
>      g_free(s->export_name);
>      g_free(s->host_spec);
>  
> -    nbd_teardown_connection(bs);
> +    nbd_teardown_connection(s, true);
>  }
>  
>  static int64_t nbd_getlength(BlockDriverState *bs)
>

Patch

diff --git a/block/nbd.c b/block/nbd.c
index 9536408..1caae89 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -71,6 +71,9 @@  typedef struct BDRVNBDState {
     char *host_spec;
 } BDRVNBDState;
 
+static int nbd_establish_connection(BDRVNBDState *s);
+static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect);
+
 static bool nbd_is_connected(BDRVNBDState *s)
 {
     return s->sock >= 0;
@@ -152,6 +155,20 @@  static int nbd_have_request(void *opaque)
     return s->in_flight > 0;
 }
 
+static void nbd_abort_inflight_requests(BDRVNBDState *s)
+{
+    int i;
+    Coroutine *co;
+
+    s->reply.handle = 0;
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        co = s->recv_coroutine[i];
+        if (co && co != qemu_coroutine_self()) {
+            qemu_coroutine_enter(co, NULL);
+        }
+    }
+}
+
 static void nbd_reply_ready(void *opaque)
 {
     BDRVNBDState *s = opaque;
@@ -168,8 +185,9 @@  static void nbd_reply_ready(void *opaque)
             return;
         }
         if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
+            nbd_teardown_connection(s, false);
+            nbd_abort_inflight_requests(s);
+            return;
         }
     }
 
@@ -178,20 +196,15 @@  static void nbd_reply_ready(void *opaque)
      * one coroutine is called until the reply finishes.  */
     i = HANDLE_TO_INDEX(s, s->reply.handle);
     if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
+        nbd_teardown_connection(s, false);
+        nbd_abort_inflight_requests(s);
+        return;
     }
 
     if (s->recv_coroutine[i]) {
         qemu_coroutine_enter(s->recv_coroutine[i], NULL);
         return;
     }
-
-fail:
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
 }
 
 static void nbd_restart_write(void *opaque)
@@ -206,6 +219,13 @@  static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
     int rc, ret;
 
     qemu_co_mutex_lock(&s->send_mutex);
+
+    if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) {
+        nbd_abort_inflight_requests(s);
+        qemu_co_mutex_unlock(&s->send_mutex);
+        return -EIO;
+    }
+
     s->send_coroutine = qemu_coroutine_self();
     qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
                             nbd_have_request, s);
@@ -214,6 +234,9 @@  static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
         ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
                             offset, request->len);
         if (ret != request->len) {
+            s->send_coroutine = NULL;
+            nbd_teardown_connection(s, false);
+            qemu_co_mutex_unlock(&s->send_mutex);
             return -EIO;
         }
     }
@@ -259,9 +282,8 @@  static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
     }
 }
 
-static int nbd_establish_connection(BlockDriverState *bs)
+static int nbd_establish_connection(BDRVNBDState *s)
 {
-    BDRVNBDState *s = bs->opaque;
     int sock;
     int ret;
     off_t size;
@@ -302,19 +324,23 @@  static int nbd_establish_connection(BlockDriverState *bs)
     return 0;
 }
 
-static void nbd_teardown_connection(BlockDriverState *bs)
+static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect)
 {
-    BDRVNBDState *s = bs->opaque;
+
     struct nbd_request request;
 
-    request.type = NBD_CMD_DISC;
-    request.from = 0;
-    request.len = 0;
-    nbd_send_request(s->sock, &request);
+    if (nbd_is_connected(s)) {
+        if (send_disconnect) {
+            request.type = NBD_CMD_DISC;
+            request.from = 0;
+            request.len = 0;
+            nbd_send_request(s->sock, &request);
+        }
 
-    qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
-    closesocket(s->sock);
-    s->sock = -1;
+        qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
+        closesocket(s->sock);
+        s->sock = -1; /* Makes nbd_is_connected() return true */
+    }
 }
 
 static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
@@ -336,7 +362,7 @@  static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     /* establish TCP connection, return error if it fails
      * TODO: Configurable retry-until-timeout behaviour.
      */
-    result = nbd_establish_connection(bs);
+    result = nbd_establish_connection(s);
 
     return result;
 }
@@ -494,7 +520,7 @@  static void nbd_close(BlockDriverState *bs)
     g_free(s->export_name);
     g_free(s->host_spec);
 
-    nbd_teardown_connection(bs);
+    nbd_teardown_connection(s, true);
 }
 
 static int64_t nbd_getlength(BlockDriverState *bs)