diff mbox

migration: add timeout option for tcp migration send/receive socket

Message ID D3E216785288A145B7BC975F83A2ED103FEE66F7@szxeml556-mbx.china.huawei.com
State New
Headers show

Commit Message

Zhanghaoyu (A) June 30, 2013, 5:12 a.m. UTC
When network disconnection occurs during live migration, the migration thread will be stuck in the function sendmsg(), as the migration socket is in ~O_NONBLOCK mode now.

Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>
---
 include/migration/migration.h |    4 ++++
 migration-tcp.c               |   23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletions(-)

Comments

Anthony Liguori July 2, 2013, 2:53 p.m. UTC | #1
"Zhanghaoyu (A)" <haoyu.zhang@huawei.com> writes:

> When network disconnection occurs during live migration, the migration thread will be stuck in the function sendmsg(), as the migration socket is in ~O_NONBLOCK mode now.
>
> Signed-off-by: Zeng Junliang <zengjunliang@huawei.com>

Unconditionally setting an arbitrary timeout is a bad idea.

Should we send a signal on cancel to break the migration thread out of
sendmsg?

Regards,

Anthony Liguori

> ---
>  include/migration/migration.h |    4 ++++
>  migration-tcp.c               |   23 ++++++++++++++++++++++-
>  2 files changed, 26 insertions(+), 1 deletions(-)
>
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index f0640e0..1a56248 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -23,6 +23,8 @@
>  #include "qapi-types.h"
>  #include "exec/cpu-common.h"
>  
> +#define QEMU_MIGRATE_SOCKET_OP_TIMEOUT 60
> +
>  struct MigrationParams {
>      bool blk;
>      bool shared;
> @@ -109,6 +111,8 @@ uint64_t xbzrle_mig_pages_transferred(void);
>  uint64_t xbzrle_mig_pages_overflow(void);
>  uint64_t xbzrle_mig_pages_cache_miss(void);
>  
> +int tcp_migration_set_socket_timeout(int fd, int optname, int timeout_in_sec);
> +
>  /**
>   * @migrate_add_blocker - prevent migration from proceeding
>   *
> diff --git a/migration-tcp.c b/migration-tcp.c
> index b20ee58..860238b 100644
> --- a/migration-tcp.c
> +++ b/migration-tcp.c
> @@ -29,11 +29,28 @@
>      do { } while (0)
>  #endif
>  
> +int tcp_migration_set_socket_timeout(int fd, int optname, int timeout_in_sec)
> +{
> +    struct timeval timeout;
> +    int ret = 0;
> +
> +    if (fd < 0 || timeout_in_sec < 0 ||
> +        (optname != SO_RCVTIMEO && optname != SO_SNDTIMEO))
> +        return -1;
> +
> +    timeout.tv_sec = timeout_in_sec;
> +    timeout.tv_usec = 0;
> +
> +    ret = qemu_setsockopt(fd, SOL_SOCKET, optname, &timeout, sizeof(timeout));
> +
> +    return ret;
> +}
> +
>  static void tcp_wait_for_connect(int fd, void *opaque)
>  {
>      MigrationState *s = opaque;
>  
> -    if (fd < 0) {
> +    if (tcp_migration_set_socket_timeout(fd, SO_SNDTIMEO, QEMU_MIGRATE_SOCKET_OP_TIMEOUT) < 0) {
>          DPRINTF("migrate connect error\n");
>          s->file = NULL;
>          migrate_fd_error(s);
> @@ -76,6 +93,10 @@ static void tcp_accept_incoming_migration(void *opaque)
>          goto out;
>      }
>  
> +    if (tcp_migration_set_socket_timeout(c, SO_RCVTIMEO, QEMU_MIGRATE_SOCKET_OP_TIMEOUT) < 0) {
> +        fprintf(stderr, "set tcp migration socket receive timeout error\n");
> +        goto out;
> +    }
>      process_incoming_migration(f);
>      return;
>  
> -- 
> 1.7.3.1.msysgit.0
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index f0640e0..1a56248 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -23,6 +23,8 @@ 
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 
+#define QEMU_MIGRATE_SOCKET_OP_TIMEOUT 60
+
 struct MigrationParams {
     bool blk;
     bool shared;
@@ -109,6 +111,8 @@  uint64_t xbzrle_mig_pages_transferred(void);
 uint64_t xbzrle_mig_pages_overflow(void);
 uint64_t xbzrle_mig_pages_cache_miss(void);
 
+int tcp_migration_set_socket_timeout(int fd, int optname, int timeout_in_sec);
+
 /**
  * @migrate_add_blocker - prevent migration from proceeding
  *
diff --git a/migration-tcp.c b/migration-tcp.c
index b20ee58..860238b 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -29,11 +29,28 @@ 
     do { } while (0)
 #endif
 
+int tcp_migration_set_socket_timeout(int fd, int optname, int timeout_in_sec)
+{
+    struct timeval timeout;
+    int ret = 0;
+
+    if (fd < 0 || timeout_in_sec < 0 ||
+        (optname != SO_RCVTIMEO && optname != SO_SNDTIMEO))
+        return -1;
+
+    timeout.tv_sec = timeout_in_sec;
+    timeout.tv_usec = 0;
+
+    ret = qemu_setsockopt(fd, SOL_SOCKET, optname, &timeout, sizeof(timeout));
+
+    return ret;
+}
+
 static void tcp_wait_for_connect(int fd, void *opaque)
 {
     MigrationState *s = opaque;
 
-    if (fd < 0) {
+    if (tcp_migration_set_socket_timeout(fd, SO_SNDTIMEO, QEMU_MIGRATE_SOCKET_OP_TIMEOUT) < 0) {
         DPRINTF("migrate connect error\n");
         s->file = NULL;
         migrate_fd_error(s);
@@ -76,6 +93,10 @@  static void tcp_accept_incoming_migration(void *opaque)
         goto out;
     }
 
+    if (tcp_migration_set_socket_timeout(c, SO_RCVTIMEO, QEMU_MIGRATE_SOCKET_OP_TIMEOUT) < 0) {
+        fprintf(stderr, "set tcp migration socket receive timeout error\n");
+        goto out;
+    }
     process_incoming_migration(f);
     return;