Patchwork [3/3] nbd: Move reconnection attempts from each new I/O request to a 5-second timer

login
register
mail settings
Submitter Nicholas Thomas
Date Oct. 22, 2012, 11:09 a.m.
Message ID <25ca669db15dddbe1c0c7aeebc29febef851d817.1350901963.git.nick@bytemark.co.uk>
Download mbox | patch
Permalink /patch/193228/
State New
Headers show

Comments

Nicholas Thomas - Oct. 22, 2012, 11:09 a.m.
We don't want the guest to be able to control the reconnection rate (by sending
lots of I/O requests to the block device), so we try once every 5 seconds and
fail I/O requests that happen while we're disconnected.

TODO: Allow the reconnect delay to be specified as an option.
TODO: Queue, rather than fail, I/O requests when disconnected

Signed-off-by: Nick Thomas <nick@bytemark.co.uk>
---
 block/nbd.c |   54 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 44 insertions(+), 10 deletions(-)

Patch

diff --git a/block/nbd.c b/block/nbd.c
index 1caae89..e55a9a7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -50,6 +50,8 @@ 
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
+#define RECONNECT_DELAY 5000
+
 typedef struct BDRVNBDState {
     int sock;
     uint32_t nbdflags;
@@ -62,6 +64,8 @@  typedef struct BDRVNBDState {
     Coroutine *send_coroutine;
     int in_flight;
 
+    QEMUTimer *recon_timer;
+
     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
     struct nbd_reply reply;
 
@@ -125,6 +129,27 @@  out:
     return err;
 }
 
+static void nbd_reconnect(void *opaque)
+{
+    BDRVNBDState *s = opaque;
+
+    if (nbd_is_connected(s)) {
+        logout("Already connected to NBD server, disarming timer\n");
+        qemu_del_timer(s->recon_timer);
+        return;
+    }
+
+    if (nbd_establish_connection(s) == 0) {
+        logout("Reconnected to server, disabling reconnect timer\n");
+        qemu_del_timer(s->recon_timer);
+    } else {
+        logout("Failed to reconnect, trying again in %ims\n", RECONNECT_DELAY);
+        int64_t delay = qemu_get_clock_ms(rt_clock) + RECONNECT_DELAY;
+        qemu_mod_timer(s->recon_timer, delay);
+    }
+
+}
+
 static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
 {
     int i;
@@ -155,11 +180,15 @@  static int nbd_have_request(void *opaque)
     return s->in_flight > 0;
 }
 
-static void nbd_abort_inflight_requests(BDRVNBDState *s)
+static void nbd_disconnect_for_reconnect(BDRVNBDState *s)
 {
     int i;
     Coroutine *co;
+    int64_t recon_time;
 
+    nbd_teardown_connection(s, false);
+
+    /* Abort all in-flight requests. TODO: these should be retried */
     s->reply.handle = 0;
     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
         co = s->recv_coroutine[i];
@@ -167,6 +196,9 @@  static void nbd_abort_inflight_requests(BDRVNBDState *s)
             qemu_coroutine_enter(co, NULL);
         }
     }
+
+    recon_time = qemu_get_clock_ms(rt_clock) + RECONNECT_DELAY;
+    qemu_mod_timer(s->recon_timer, recon_time);
 }
 
 static void nbd_reply_ready(void *opaque)
@@ -185,8 +217,7 @@  static void nbd_reply_ready(void *opaque)
             return;
         }
         if (ret < 0) {
-            nbd_teardown_connection(s, false);
-            nbd_abort_inflight_requests(s);
+            nbd_disconnect_for_reconnect(s);
             return;
         }
     }
@@ -196,8 +227,7 @@  static void nbd_reply_ready(void *opaque)
      * one coroutine is called until the reply finishes.  */
     i = HANDLE_TO_INDEX(s, s->reply.handle);
     if (i >= MAX_NBD_REQUESTS) {
-        nbd_teardown_connection(s, false);
-        nbd_abort_inflight_requests(s);
+        nbd_disconnect_for_reconnect(s);
         return;
     }
 
@@ -218,14 +248,14 @@  static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
 {
     int rc, ret;
 
-    qemu_co_mutex_lock(&s->send_mutex);
-
-    if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) {
-        nbd_abort_inflight_requests(s);
-        qemu_co_mutex_unlock(&s->send_mutex);
+    /* TODO: We should be able to add this to the queue regardless, once
+     * coroutines retry if not connected */
+    if (!nbd_is_connected(s)) {
         return -EIO;
     }
 
+    qemu_co_mutex_lock(&s->send_mutex);
+
     s->send_coroutine = qemu_coroutine_self();
     qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
                             nbd_have_request, s);
@@ -349,6 +379,7 @@  static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
     int result;
 
     s->sock = -1;
+    s->recon_timer = qemu_new_timer_ms(rt_clock, nbd_reconnect, s);
 
     qemu_co_mutex_init(&s->send_mutex);
     qemu_co_mutex_init(&s->free_sema);
@@ -520,6 +551,9 @@  static void nbd_close(BlockDriverState *bs)
     g_free(s->export_name);
     g_free(s->host_spec);
 
+    qemu_del_timer(s->recon_timer);
+    qemu_free_timer(s->recon_timer);
+
     nbd_teardown_connection(s, true);
 }