Patchwork [RFC,RDMA,support,v4:,09/10] check for QMP string and bypass nonblock() calls

login
register
mail settings
Submitter mrhines@linux.vnet.ibm.com
Date March 18, 2013, 3:19 a.m.
Message ID <1363576743-6146-10-git-send-email-mrhines@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/228353/
State New
Headers show

Comments

mrhines@linux.vnet.ibm.com - March 18, 2013, 3:19 a.m.
From: "Michael R. Hines" <mrhines@us.ibm.com>

Since we're not using TCP anymore, we skip these calls.

Also print a little extra text while debugging, like "gbps"
which is helpful to know how the link is being utilized.

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 include/migration/migration.h |    3 +++
 migration.c                   |   19 +++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)
Paolo Bonzini - March 18, 2013, 8:47 a.m.
Il 18/03/2013 04:19, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> Since we're not using TCP anymore, we skip these calls.
> 
> Also print a little extra text while debugging, like "gbps"
> which is helpful to know how the link is being utilized.
> 
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  include/migration/migration.h |    3 +++
>  migration.c                   |   19 +++++++++++++------
>  2 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index bb617fd..88ab5f6 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -20,6 +20,7 @@
>  #include "qemu/notify.h"
>  #include "qapi/error.h"
>  #include "migration/vmstate.h"
> +#include "migration/rdma.h"
>  #include "qapi-types.h"
>  
>  struct MigrationParams {
> @@ -102,6 +103,7 @@ uint64_t xbzrle_mig_bytes_transferred(void);
>  uint64_t xbzrle_mig_pages_transferred(void);
>  uint64_t xbzrle_mig_pages_overflow(void);
>  uint64_t xbzrle_mig_pages_cache_miss(void);
> +uint64_t delta_norm_mig_bytes_transferred(void);

Please add the protocol under the
>  
>  /**
>   * @migrate_add_blocker - prevent migration from proceeding
> @@ -122,6 +124,7 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
>  int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
>  
>  int migrate_use_xbzrle(void);
> +void *migrate_use_rdma(QEMUFile *f);

Perhaps you can add a new function to QEMUFile send_page?  And if it
returns -ENOTSUP, proceed with the normal is_dup_page + put_buffer.  I
wonder if that lets use remove migrate_use_rdma() completely.

Also, if QEMUFileRDMA is moved to rdma.c, the number of public and
stubbed functions should decrease noticeably.  There is a patch on the
list to move QEMUFile to its own source file.  You could incorporate it
in your series.

>  int64_t migrate_xbzrle_cache_size(void);
>  
>  int64_t xbzrle_cache_resize(int64_t new_size);
> diff --git a/migration.c b/migration.c
> index 185d112..634437a 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -15,6 +15,7 @@
>  
>  #include "qemu-common.h"
>  #include "migration/migration.h"
> +#include "migration/rdma.h"
>  #include "monitor/monitor.h"
>  #include "migration/qemu-file.h"
>  #include "sysemu/sysemu.h"
> @@ -77,6 +78,8 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
>  
>      if (strstart(uri, "tcp:", &p))
>          tcp_start_incoming_migration(p, errp);
> +    else if (strstart(uri, "rdma:", &p))
> +        rdma_start_incoming_migration(p, errp);
>  #if !defined(WIN32)
>      else if (strstart(uri, "exec:", &p))
>          exec_start_incoming_migration(p, errp);
> @@ -118,10 +121,11 @@ static void process_incoming_migration_co(void *opaque)
>  void process_incoming_migration(QEMUFile *f)
>  {
>      Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
> -    int fd = qemu_get_fd(f);
> -
> -    assert(fd != -1);
> -    socket_set_nonblock(fd);
> +    if(!migrate_use_rdma(f)) {
> +        int fd = qemu_get_fd(f);
> +        assert(fd != -1);
> +        socket_set_nonblock(fd);

Is this because qemu_get_fd(f) returns -1 for RDMA?  Then, you can
instead put socket_set_nonblock under an if(fd != -1).

> +    }
>      qemu_coroutine_enter(co, f);
>  }
>  
> @@ -404,6 +408,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>  
>      if (strstart(uri, "tcp:", &p)) {
>          tcp_start_outgoing_migration(s, p, &local_err);
> +    } else if (strstart(uri, "rdma:", &p)) {
> +        rdma_start_outgoing_migration(s, p, &local_err);
>  #if !defined(WIN32)
>      } else if (strstart(uri, "exec:", &p)) {
>          exec_start_outgoing_migration(s, p, &local_err);
> @@ -545,8 +551,9 @@ static void *migration_thread(void *opaque)
>              max_size = bandwidth * migrate_max_downtime() / 1000000;
>  
>              DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
> -                    " bandwidth %g max_size %" PRId64 "\n",
> -                    transferred_bytes, time_spent, bandwidth, max_size);
> +                    " bandwidth %g (%0.2f mbps) max_size %" PRId64 "\n",
> +                    transferred_bytes, time_spent, 
> +                    bandwidth, Gbps(transferred_bytes, time_spent), max_size);
>              /* if we haven't sent anything, we don't want to recalculate
>                 10000 is a small enough number for our purposes */
>              if (s->dirty_bytes_rate && transferred_bytes > 10000) {
> 

Otherwise looks good.
mrhines@linux.vnet.ibm.com - March 18, 2013, 8:37 p.m.
Comments inline.......

On 03/18/2013 04:47 AM, Paolo Bonzini wrote:
>   int migrate_use_xbzrle(void);
> +void *migrate_use_rdma(QEMUFile *f);
> Perhaps you can add a new function to QEMUFile send_page?  And if it
> returns -ENOTSUP, proceed with the normal is_dup_page + put_buffer.  I
> wonder if that lets use remove migrate_use_rdma() completely.

That's great - I'll make the modification......

>   void process_incoming_migration(QEMUFile *f)
>   {
>       Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
> -    int fd = qemu_get_fd(f);
> -
> -    assert(fd != -1);
> -    socket_set_nonblock(fd);
> +    if(!migrate_use_rdma(f)) {
> +        int fd = qemu_get_fd(f);
> +        assert(fd != -1);
> +        socket_set_nonblock(fd);
> Is this because qemu_get_fd(f) returns -1 for RDMA?  Then, you can
> instead put socket_set_nonblock under an if(fd != -1).

Yes, I proposed doing that check (for -1) in a previous RFC,
but you told me to remove it and make a separate patch =)

Is it OK to keep it in this patch?

> Otherwise looks good. 

Thanks for taking the time =)
Paolo Bonzini - March 19, 2013, 9:23 a.m.
Il 18/03/2013 21:37, Michael R. Hines ha scritto:
>>
>> +    if(!migrate_use_rdma(f)) {
>> +        int fd = qemu_get_fd(f);
>> +        assert(fd != -1);
>> +        socket_set_nonblock(fd);
>> Is this because qemu_get_fd(f) returns -1 for RDMA?  Then, you can
>> instead put socket_set_nonblock under an if(fd != -1).
> 
> Yes, I proposed doing that check (for -1) in a previous RFC,
> but you told me to remove it and make a separate patch =)
> 
> Is it OK to keep it in this patch?

Yes---this is a separate patch.  Apologies if you had the if(fd != -1)
before. :)  In fact, both the if(fd != -1) and the
if(!migrate_use_rdma(f)) are bad, but I prefer to eliminate as many uses
as possible of migrate_use_rdma.

The reason why they are bad, is that we try to operate on the socket in
a non-blocking manner, so that the monitor keeps working during incoming
migration.  We do it with non-blocking sockets because incoming
migration does not (yet?) have a separate thread, and is not a
bottleneck (the VM is not running, so it's not a problem to hold the big
QEMU lock for extended periods of time).

Does librdmacm support non-blocking operation, similar to select() or
poll()?  Perhaps we can add support for that later.

Paolo
mrhines@linux.vnet.ibm.com - March 19, 2013, 1:08 p.m.
On 03/19/2013 05:23 AM, Paolo Bonzini wrote:
>
> Yes---this is a separate patch.  Apologies if you had the if(fd != -1)
> before. :)  In fact, both the if(fd != -1) and the
> if(!migrate_use_rdma(f)) are bad, but I prefer to eliminate as many uses
> as possible of migrate_use_rdma.
I agree. In my current patch I've eliminated all of them.

> Does librdmacm support non-blocking operation, similar to select() or
> poll()?  Perhaps we can add support for that later.

Yes, it does, actually. The library provides what is called an "event 
channel".
(This term is overloaded by other technologies, but that's OK).

An event channel is a file descriptor provided by (I believe) the rdma_cm
kernel module driver.

When you poll on this file descriptor, it can tell you all sorts of things
just like other files or sockets like when data is ready or when
events of interest have completed (like the completion queue has elements).

In my current patch, I'm using this during 
"rdma_accept_incoming_connection()",
but I'm not currently using it for the rest of the coroutine on the 
receiver side.
Paolo Bonzini - March 19, 2013, 1:20 p.m.
Il 19/03/2013 14:08, Michael R. Hines ha scritto:
> On 03/19/2013 05:23 AM, Paolo Bonzini wrote:
>>
>> Yes---this is a separate patch.  Apologies if you had the if(fd != -1)
>> before. :)  In fact, both the if(fd != -1) and the
>> if(!migrate_use_rdma(f)) are bad, but I prefer to eliminate as many uses
>> as possible of migrate_use_rdma.
> I agree. In my current patch I've eliminated all of them.

Very nice.  It remains to be seen how many are replaced by checks on the
QEMUFileOps :) but it cannot be worse!

>> Does librdmacm support non-blocking operation, similar to select() or
>> poll()?  Perhaps we can add support for that later.
> 
> Yes, it does, actually. The library provides what is called an "event
> channel".
> (This term is overloaded by other technologies, but that's OK).
> 
> An event channel is a file descriptor provided by (I believe) the rdma_cm
> kernel module driver.
> 
> When you poll on this file descriptor, it can tell you all sorts of things
> just like other files or sockets like when data is ready or when
> events of interest have completed (like the completion queue has elements).
> 
> In my current patch, I'm using this during
> "rdma_accept_incoming_connection()",
> but I'm not currently using it for the rest of the coroutine on the
> receiver side.

Ok, this can be added later.

Paolo

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index bb617fd..88ab5f6 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -20,6 +20,7 @@ 
 #include "qemu/notify.h"
 #include "qapi/error.h"
 #include "migration/vmstate.h"
+#include "migration/rdma.h"
 #include "qapi-types.h"
 
 struct MigrationParams {
@@ -102,6 +103,7 @@  uint64_t xbzrle_mig_bytes_transferred(void);
 uint64_t xbzrle_mig_pages_transferred(void);
 uint64_t xbzrle_mig_pages_overflow(void);
 uint64_t xbzrle_mig_pages_cache_miss(void);
+uint64_t delta_norm_mig_bytes_transferred(void);
 
 /**
  * @migrate_add_blocker - prevent migration from proceeding
@@ -122,6 +124,7 @@  int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
 
 int migrate_use_xbzrle(void);
+void *migrate_use_rdma(QEMUFile *f);
 int64_t migrate_xbzrle_cache_size(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
diff --git a/migration.c b/migration.c
index 185d112..634437a 100644
--- a/migration.c
+++ b/migration.c
@@ -15,6 +15,7 @@ 
 
 #include "qemu-common.h"
 #include "migration/migration.h"
+#include "migration/rdma.h"
 #include "monitor/monitor.h"
 #include "migration/qemu-file.h"
 #include "sysemu/sysemu.h"
@@ -77,6 +78,8 @@  void qemu_start_incoming_migration(const char *uri, Error **errp)
 
     if (strstart(uri, "tcp:", &p))
         tcp_start_incoming_migration(p, errp);
+    else if (strstart(uri, "rdma:", &p))
+        rdma_start_incoming_migration(p, errp);
 #if !defined(WIN32)
     else if (strstart(uri, "exec:", &p))
         exec_start_incoming_migration(p, errp);
@@ -118,10 +121,11 @@  static void process_incoming_migration_co(void *opaque)
 void process_incoming_migration(QEMUFile *f)
 {
     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
-    int fd = qemu_get_fd(f);
-
-    assert(fd != -1);
-    socket_set_nonblock(fd);
+    if(!migrate_use_rdma(f)) {
+        int fd = qemu_get_fd(f);
+        assert(fd != -1);
+        socket_set_nonblock(fd);
+    }
     qemu_coroutine_enter(co, f);
 }
 
@@ -404,6 +408,8 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
 
     if (strstart(uri, "tcp:", &p)) {
         tcp_start_outgoing_migration(s, p, &local_err);
+    } else if (strstart(uri, "rdma:", &p)) {
+        rdma_start_outgoing_migration(s, p, &local_err);
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         exec_start_outgoing_migration(s, p, &local_err);
@@ -545,8 +551,9 @@  static void *migration_thread(void *opaque)
             max_size = bandwidth * migrate_max_downtime() / 1000000;
 
             DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
-                    " bandwidth %g max_size %" PRId64 "\n",
-                    transferred_bytes, time_spent, bandwidth, max_size);
+                    " bandwidth %g (%0.2f mbps) max_size %" PRId64 "\n",
+                    transferred_bytes, time_spent, 
+                    bandwidth, Gbps(transferred_bytes, time_spent), max_size);
             /* if we haven't sent anything, we don't want to recalculate
                10000 is a small enough number for our purposes */
             if (s->dirty_bytes_rate && transferred_bytes > 10000) {