Patchwork [RFC,RDMA,support,v6:,4/7] Introduce two new capabilities

login
register
mail settings
Submitter mrhines@linux.vnet.ibm.com
Date April 10, 2013, 4:29 a.m.
Message ID <1365568180-19593-5-git-send-email-mrhines@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/235289/
State New
Headers show

Comments

mrhines@linux.vnet.ibm.com - April 10, 2013, 4:29 a.m.
From: "Michael R. Hines" <mrhines@us.ibm.com>

RDMA performs very slowly with zero-page checking.
Without the ability to disable it, RDMA throughput and
latency promises and high performance links cannot be
fully realized.

On the other hand, dynamic page registration support is also
included in the RDMA protocol. This second capability also
cannot be fully realized without the ability to enable zero
page scanning.

So, we have two new capabilities which work together:

1. migrate_set_capability check_for_zero on|off (default on)
2. migrate_set_capability chunk_register_destination on|off (default off)

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 include/migration/qemu-file.h |   15 +++++++++++++++
 migration.c                   |   33 +++++++++++++++++++++++++++++++--
 qapi-schema.json              |    2 +-
 3 files changed, 47 insertions(+), 3 deletions(-)
Paolo Bonzini - April 10, 2013, 7:50 a.m.
Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> RDMA performs very slowly with zero-page checking.
> Without the ability to disable it, RDMA throughput and
> latency promises and high performance links cannot be
> fully realized.
> 
> On the other hand, dynamic page registration support is also
> included in the RDMA protocol. This second capability also
> cannot be fully realized without the ability to enable zero
> page scanning.
> 
> So, we have two new capabilities which work together:
> 
> 1. migrate_set_capability check_for_zero on|off (default on)
> 2. migrate_set_capability chunk_register_destination on|off (default off)
> 
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>

Michael, please listen to _all_ review comments.

1) I asked you to place check_for_zero in a separate patch.

2) Again, patch 3 cannot compile without this one.  The code should
compile after each patch, with and without --enable-rdma.

> ---
>  include/migration/qemu-file.h |   15 +++++++++++++++
>  migration.c                   |   33 +++++++++++++++++++++++++++++++--
>  qapi-schema.json              |    2 +-
>  3 files changed, 47 insertions(+), 3 deletions(-)
> 
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index 623c434..b6f3256 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -57,12 +57,15 @@ typedef int (QEMUFileGetFD)(void *opaque);
>  typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
>                                             int iovcnt);
>  
> +typedef struct QEMURamControlOps QEMURamControlOps;
> +
>  typedef struct QEMUFileOps {
>      QEMUFilePutBufferFunc *put_buffer;
>      QEMUFileGetBufferFunc *get_buffer;
>      QEMUFileCloseFunc *close;
>      QEMUFileGetFD *get_fd;
>      QEMUFileWritevBufferFunc *writev_buffer;
> +    const QEMURamControlOps *ram_control;

Why a separate member?  You can put them directly here.

>  } QEMUFileOps;
>  
>  QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
> @@ -80,6 +83,18 @@ void qemu_put_byte(QEMUFile *f, int v);
>   * The buffer should be available till it is sent asynchronously.
>   */
>  void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
> +void qemu_file_set_error(QEMUFile *f, int ret);
> +
> +void qemu_rdma_cleanup(void *opaque);
> +int qemu_rdma_close(void *opaque);
> +int qemu_rdma_get_fd(void *opaque);
> +int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size);
> +int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, 
> +                            int64_t pos, int size);
> +bool qemu_file_mode_is_not_valid(const char * mode);
> +
> +extern const QEMUFileOps rdma_read_ops;
> +extern const QEMUFileOps rdma_write_ops;

Please put these in migration-rdma.c.  You do not need them here.

>  static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
>  {
> diff --git a/migration.c b/migration.c
> index 3b4b467..875cee3 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -66,6 +66,7 @@ MigrationState *migrate_get_current(void)
>          .state = MIG_STATE_SETUP,
>          .bandwidth_limit = MAX_THROTTLE,
>          .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
> +        .enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO] = true,
>      };
>  
>      return &current_migration;
> @@ -77,6 +78,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
>  
>      if (strstart(uri, "tcp:", &p))
>          tcp_start_incoming_migration(p, errp);
> +#ifdef CONFIG_RDMA
> +    else if (strstart(uri, "rdma:", &p))
> +        rdma_start_incoming_migration(p, errp);
> +#endif
>  #if !defined(WIN32)
>      else if (strstart(uri, "exec:", &p))
>          exec_start_incoming_migration(p, errp);
> @@ -120,8 +125,10 @@ void process_incoming_migration(QEMUFile *f)
>      Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
>      int fd = qemu_get_fd(f);
>  
> -    assert(fd != -1);
> -    qemu_set_nonblock(fd);
> +    if(fd != -2) { /* rdma returns -2 */

Please do not invent a special return value, this will not be maintainable.

Just sacrifice the assert for now, as we had agreed in the past.

Paolo

> +        assert(fd != -1);
> +        qemu_set_nonblock(fd);
> +    }
>      qemu_coroutine_enter(co, f);
>  }
>  
> @@ -405,6 +412,10 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>  
>      if (strstart(uri, "tcp:", &p)) {
>          tcp_start_outgoing_migration(s, p, &local_err);
> +#ifdef CONFIG_RDMA
> +    } else if (strstart(uri, "rdma:", &p)) {
> +        rdma_start_outgoing_migration(s, p, &local_err);
> +#endif
>  #if !defined(WIN32)
>      } else if (strstart(uri, "exec:", &p)) {
>          exec_start_outgoing_migration(s, p, &local_err);
> @@ -474,6 +485,24 @@ void qmp_migrate_set_downtime(double value, Error **errp)
>      max_downtime = (uint64_t)value;
>  }
>  
> +bool migrate_chunk_register_destination(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHUNK_REGISTER_DESTINATION];
> +}
> +
> +bool migrate_check_for_zero(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO];
> +}
> +
>  int migrate_use_xbzrle(void)
>  {
>      MigrationState *s;
> diff --git a/qapi-schema.json b/qapi-schema.json
> index db542f6..7ebcf99 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -602,7 +602,7 @@
>  # Since: 1.2
>  ##
>  { 'enum': 'MigrationCapability',
> -  'data': ['xbzrle'] }
> +  'data': ['xbzrle', 'check_for_zero', 'chunk_register_destination'] }
>  
>  ##
>  # @MigrationCapabilityStatus
>
mrhines@linux.vnet.ibm.com - April 10, 2013, 12:34 p.m.
On 04/10/2013 03:50 AM, Paolo Bonzini wrote:
> Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
>> From: "Michael R. Hines" <mrhines@us.ibm.com>
>>
>> RDMA performs very slowly with zero-page checking.
>> Without the ability to disable it, RDMA throughput and
>> latency promises and high performance links cannot be
>> fully realized.
>>
>> On the other hand, dynamic page registration support is also
>> included in the RDMA protocol. This second capability also
>> cannot be fully realized without the ability to enable zero
>> page scanning.
>>
>> So, we have two new capabilities which work together:
>>
>> 1. migrate_set_capability check_for_zero on|off (default on)
>> 2. migrate_set_capability chunk_register_destination on|off (default off)
>>
>> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> Michael, please listen to _all_ review comments.
>
> 1) I asked you to place check_for_zero in a separate patch.
>
> 2) Again, patch 3 cannot compile without this one.  The code should
> compile after each patch, with and without --enable-rdma.

My apologies - I misunderstood the request. I am indeed addressing every 
comment.

When you said separate, I thought you meant a different patch series 
altpgether.

This is my first time, so the meaning of "separate" was not clear =)

And yes, patch 3 does in fact compile both with and without --enable-rdma.
>> ---
>>   include/migration/qemu-file.h |   15 +++++++++++++++
>>   migration.c                   |   33 +++++++++++++++++++++++++++++++--
>>   qapi-schema.json              |    2 +-
>>   3 files changed, 47 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
>> index 623c434..b6f3256 100644
>> --- a/include/migration/qemu-file.h
>> +++ b/include/migration/qemu-file.h
>> @@ -57,12 +57,15 @@ typedef int (QEMUFileGetFD)(void *opaque);
>>   typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
>>                                              int iovcnt);
>>   
>> +typedef struct QEMURamControlOps QEMURamControlOps;
>> +
>>   typedef struct QEMUFileOps {
>>       QEMUFilePutBufferFunc *put_buffer;
>>       QEMUFileGetBufferFunc *get_buffer;
>>       QEMUFileCloseFunc *close;
>>       QEMUFileGetFD *get_fd;
>>       QEMUFileWritevBufferFunc *writev_buffer;
>> +    const QEMURamControlOps *ram_control;
> Why a separate member?  You can put them directly here.

Acknowledged.

>>   } QEMUFileOps;
>>   
>>   QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
>> @@ -80,6 +83,18 @@ void qemu_put_byte(QEMUFile *f, int v);
>>    * The buffer should be available till it is sent asynchronously.
>>    */
>>   void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
>> +void qemu_file_set_error(QEMUFile *f, int ret);
>> +
>> +void qemu_rdma_cleanup(void *opaque);
>> +int qemu_rdma_close(void *opaque);
>> +int qemu_rdma_get_fd(void *opaque);
>> +int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size);
>> +int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
>> +                            int64_t pos, int size);
>> +bool qemu_file_mode_is_not_valid(const char * mode);
>> +
>> +extern const QEMUFileOps rdma_read_ops;
>> +extern const QEMUFileOps rdma_write_ops;
> Please put these in migration-rdma.c.  You do not need them here.
>

Acknowledged.

>>   static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
>>   {
>> diff --git a/migration.c b/migration.c
>> index 3b4b467..875cee3 100644
>> --- a/migration.c
>> +++ b/migration.c
>> @@ -66,6 +66,7 @@ MigrationState *migrate_get_current(void)
>>           .state = MIG_STATE_SETUP,
>>           .bandwidth_limit = MAX_THROTTLE,
>>           .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
>> +        .enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO] = true,
>>       };
>>   
>>       return &current_migration;
>> @@ -77,6 +78,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
>>   
>>       if (strstart(uri, "tcp:", &p))
>>           tcp_start_incoming_migration(p, errp);
>> +#ifdef CONFIG_RDMA
>> +    else if (strstart(uri, "rdma:", &p))
>> +        rdma_start_incoming_migration(p, errp);
>> +#endif
>>   #if !defined(WIN32)
>>       else if (strstart(uri, "exec:", &p))
>>           exec_start_incoming_migration(p, errp);
>> @@ -120,8 +125,10 @@ void process_incoming_migration(QEMUFile *f)
>>       Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
>>       int fd = qemu_get_fd(f);
>>   
>> -    assert(fd != -1);
>> -    qemu_set_nonblock(fd);
>> +    if(fd != -2) { /* rdma returns -2 */
> Please do not invent a special return value, this will not be maintainable.
>
> Just sacrifice the assert for now, as we had agreed in the past.

You flip-flopped on me =)
You said conditionalize it, then make a separate patch, then delete it 
altogether =)

But I'm fine with delete =)
Paolo Bonzini - April 10, 2013, 12:40 p.m.
Il 10/04/2013 14:34, Michael R. Hines ha scritto:
> On 04/10/2013 03:50 AM, Paolo Bonzini wrote:
>> Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
>>> From: "Michael R. Hines" <mrhines@us.ibm.com>
>>>
>>> RDMA performs very slowly with zero-page checking.
>>> Without the ability to disable it, RDMA throughput and
>>> latency promises and high performance links cannot be
>>> fully realized.
>>>
>>> On the other hand, dynamic page registration support is also
>>> included in the RDMA protocol. This second capability also
>>> cannot be fully realized without the ability to enable zero
>>> page scanning.
>>>
>>> So, we have two new capabilities which work together:
>>>
>>> 1. migrate_set_capability check_for_zero on|off (default on)
>>> 2. migrate_set_capability chunk_register_destination on|off (default
>>> off)
>>>
>>> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
>> Michael, please listen to _all_ review comments.
>>
>> 1) I asked you to place check_for_zero in a separate patch.
>>
>> 2) Again, patch 3 cannot compile without this one.  The code should
>> compile after each patch, with and without --enable-rdma.
> 
> My apologies - I misunderstood the request. I am indeed addressing every
> comment.
> 
> When you said separate, I thought you meant a different patch series
> altpgether.
> 
> This is my first time, so the meaning of "separate" was not clear =)
> 
> And yes, patch 3 does in fact compile both with and without --enable-rdma.

How does this:

+#ifdef CONFIG_RDMA
+const QEMURamControlOps qemu_rdma_write_control = {
+    .before_ram_iterate = qemu_ram_registration_start,
+    .after_ram_iterate = qemu_rdma_registration_stop,
+    .register_ram_iterate = qemu_rdma_registration_handle,
+    .save_page = qemu_rdma_save_page,
+};

compile (and link) successfully without a definition of
qemu_ram_registration_start, which is in patch 5?  (Honest question).

Similarly, patch 5 cannot link without qemu_ram_foreach_block.

Anyway, patch 1 does not compile with --enable-rdma. :)

> You flip-flopped on me =)
> You said conditionalize it, then make a separate patch, then delete it
> altogether =)

Make qemu_set_nonblock conditional, dropping the assert.

Do not touch the implementation of qemu_set_nonblock.  (Yes, this was a
mess).

Paolo
Orit Wasserman - April 10, 2013, 12:47 p.m.
On 04/10/2013 07:29 AM, mrhines@linux.vnet.ibm.com wrote:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> RDMA performs very slowly with zero-page checking.
> Without the ability to disable it, RDMA throughput and
> latency promises and high performance links cannot be
> fully realized.
> 
> On the other hand, dynamic page registration support is also
> included in the RDMA protocol. This second capability also
> cannot be fully realized without the ability to enable zero
> page scanning.
> 
> So, we have two new capabilities which work together:
> 
> 1. migrate_set_capability check_for_zero on|off (default on)
This is not a capability but a migration parameter, the command
should be named migrate_check_for_zero (on|off) and you will need to add a flag
to MigrationParams (and it should be in a separate patch as Paolo already requested)
> 2. migrate_set_capability chunk_register_destination on|off (default off)
This is indeed a capability as it effect both source and destination

Orit 
> 
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  include/migration/qemu-file.h |   15 +++++++++++++++
>  migration.c                   |   33 +++++++++++++++++++++++++++++++--
>  qapi-schema.json              |    2 +-
>  3 files changed, 47 insertions(+), 3 deletions(-)
> 
> diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
> index 623c434..b6f3256 100644
> --- a/include/migration/qemu-file.h
> +++ b/include/migration/qemu-file.h
> @@ -57,12 +57,15 @@ typedef int (QEMUFileGetFD)(void *opaque);
>  typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
>                                             int iovcnt);
>  
> +typedef struct QEMURamControlOps QEMURamControlOps;
> +
>  typedef struct QEMUFileOps {
>      QEMUFilePutBufferFunc *put_buffer;
>      QEMUFileGetBufferFunc *get_buffer;
>      QEMUFileCloseFunc *close;
>      QEMUFileGetFD *get_fd;
>      QEMUFileWritevBufferFunc *writev_buffer;
> +    const QEMURamControlOps *ram_control;
>  } QEMUFileOps;
>  
>  QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
> @@ -80,6 +83,18 @@ void qemu_put_byte(QEMUFile *f, int v);
>   * The buffer should be available till it is sent asynchronously.
>   */
>  void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
> +void qemu_file_set_error(QEMUFile *f, int ret);
> +
> +void qemu_rdma_cleanup(void *opaque);
> +int qemu_rdma_close(void *opaque);
> +int qemu_rdma_get_fd(void *opaque);
> +int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size);
> +int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, 
> +                            int64_t pos, int size);
> +bool qemu_file_mode_is_not_valid(const char * mode);
> +
> +extern const QEMUFileOps rdma_read_ops;
> +extern const QEMUFileOps rdma_write_ops;
>  
>  static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
>  {
> diff --git a/migration.c b/migration.c
> index 3b4b467..875cee3 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -66,6 +66,7 @@ MigrationState *migrate_get_current(void)
>          .state = MIG_STATE_SETUP,
>          .bandwidth_limit = MAX_THROTTLE,
>          .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
> +        .enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO] = true,
>      };
>  
>      return &current_migration;
> @@ -77,6 +78,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
>  
>      if (strstart(uri, "tcp:", &p))
>          tcp_start_incoming_migration(p, errp);
> +#ifdef CONFIG_RDMA
> +    else if (strstart(uri, "rdma:", &p))
> +        rdma_start_incoming_migration(p, errp);
> +#endif
>  #if !defined(WIN32)
>      else if (strstart(uri, "exec:", &p))
>          exec_start_incoming_migration(p, errp);
> @@ -120,8 +125,10 @@ void process_incoming_migration(QEMUFile *f)
>      Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
>      int fd = qemu_get_fd(f);
>  
> -    assert(fd != -1);
> -    qemu_set_nonblock(fd);
> +    if(fd != -2) { /* rdma returns -2 */
> +        assert(fd != -1);
> +        qemu_set_nonblock(fd);
> +    }
>      qemu_coroutine_enter(co, f);
>  }
>  
> @@ -405,6 +412,10 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
>  
>      if (strstart(uri, "tcp:", &p)) {
>          tcp_start_outgoing_migration(s, p, &local_err);
> +#ifdef CONFIG_RDMA
> +    } else if (strstart(uri, "rdma:", &p)) {
> +        rdma_start_outgoing_migration(s, p, &local_err);
> +#endif
>  #if !defined(WIN32)
>      } else if (strstart(uri, "exec:", &p)) {
>          exec_start_outgoing_migration(s, p, &local_err);
> @@ -474,6 +485,24 @@ void qmp_migrate_set_downtime(double value, Error **errp)
>      max_downtime = (uint64_t)value;
>  }
>  
> +bool migrate_chunk_register_destination(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHUNK_REGISTER_DESTINATION];
> +}
> +
> +bool migrate_check_for_zero(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO];
> +}
> +
>  int migrate_use_xbzrle(void)
>  {
>      MigrationState *s;
> diff --git a/qapi-schema.json b/qapi-schema.json
> index db542f6..7ebcf99 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -602,7 +602,7 @@
>  # Since: 1.2
>  ##
>  { 'enum': 'MigrationCapability',
> -  'data': ['xbzrle'] }
> +  'data': ['xbzrle', 'check_for_zero', 'chunk_register_destination'] }
>  
>  ##
>  # @MigrationCapabilityStatus
>
mrhines@linux.vnet.ibm.com - April 10, 2013, 1:10 p.m.
On 04/10/2013 08:40 AM, Paolo Bonzini wrote:
> Il 10/04/2013 14:34, Michael R. Hines ha scritto:
>> On 04/10/2013 03:50 AM, Paolo Bonzini wrote:
>>> Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
>>>> From: "Michael R. Hines" <mrhines@us.ibm.com>
>>>>
>>>> RDMA performs very slowly with zero-page checking.
>>>> Without the ability to disable it, RDMA throughput and
>>>> latency promises and high performance links cannot be
>>>> fully realized.
>>>>
>>>> On the other hand, dynamic page registration support is also
>>>> included in the RDMA protocol. This second capability also
>>>> cannot be fully realized without the ability to enable zero
>>>> page scanning.
>>>>
>>>> So, we have two new capabilities which work together:
>>>>
>>>> 1. migrate_set_capability check_for_zero on|off (default on)
>>>> 2. migrate_set_capability chunk_register_destination on|off (default
>>>> off)
>>>>
>>>> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
>>> Michael, please listen to _all_ review comments.
>>>
>>> 1) I asked you to place check_for_zero in a separate patch.
>>>
>>> 2) Again, patch 3 cannot compile without this one.  The code should
>>> compile after each patch, with and without --enable-rdma.
>> My apologies - I misunderstood the request. I am indeed addressing every
>> comment.
>>
>> When you said separate, I thought you meant a different patch series
>> altpgether.
>>
>> This is my first time, so the meaning of "separate" was not clear =)
>>
>> And yes, patch 3 does in fact compile both with and without --enable-rdma.
> How does this:
>
> +#ifdef CONFIG_RDMA
> +const QEMURamControlOps qemu_rdma_write_control = {
> +    .before_ram_iterate = qemu_ram_registration_start,
> +    .after_ram_iterate = qemu_rdma_registration_stop,
> +    .register_ram_iterate = qemu_rdma_registration_handle,
> +    .save_page = qemu_rdma_save_page,
> +};
>
> compile (and link) successfully without a definition of
> qemu_ram_registration_start, which is in patch 5?  (Honest question).
>
> Similarly, patch 5 cannot link without qemu_ram_foreach_block.
>
> Anyway, patch 1 does not compile with --enable-rdma. :)

Yes, all 7 patches do compile in both cases - that code is 
conditionalized with CONFIG_RDMA.

What exactly is the problem there?

I toggled the enable/disable flags several times and recompiled just to 
be sure.

Also about qemu_ram_foreach_block: Why does this depend on patch 5, exactly?

qemu_ram_foreach_block is not conditionalized by anything.

>> You flip-flopped on me =)
>> You said conditionalize it, then make a separate patch, then delete it
>> altogether =)
> Make qemu_set_nonblock conditional, dropping the assert.
>
> Do not touch the implementation of qemu_set_nonblock.  (Yes, this was a
> mess).
>
> Paolo

Acknowledged.
Paolo Bonzini - April 10, 2013, 1:13 p.m.
Il 10/04/2013 15:10, Michael R. Hines ha scritto:
> 
> 
> Also about qemu_ram_foreach_block: Why does this depend on patch 5,
> exactly?

It's patch 5 that depends on qemu_ram_foreach_block.  I'm not sure how
it is possible to link patch 5 if exec.c still does not provide
qemu_ram_foreach_block.

Paolo

> qemu_ram_foreach_block is not conditionalized by anything.
mrhines@linux.vnet.ibm.com - April 10, 2013, 2:03 p.m.
On 04/10/2013 09:13 AM, Paolo Bonzini wrote:
> Il 10/04/2013 15:10, Michael R. Hines ha scritto:
>>
>> Also about qemu_ram_foreach_block: Why does this depend on patch 5,
>> exactly?

Understood - I will move the patch up in the series and make it more clear.

Patch

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 623c434..b6f3256 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -57,12 +57,15 @@  typedef int (QEMUFileGetFD)(void *opaque);
 typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
                                            int iovcnt);
 
+typedef struct QEMURamControlOps QEMURamControlOps;
+
 typedef struct QEMUFileOps {
     QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
     QEMUFileCloseFunc *close;
     QEMUFileGetFD *get_fd;
     QEMUFileWritevBufferFunc *writev_buffer;
+    const QEMURamControlOps *ram_control;
 } QEMUFileOps;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
@@ -80,6 +83,18 @@  void qemu_put_byte(QEMUFile *f, int v);
  * The buffer should be available till it is sent asynchronously.
  */
 void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_file_set_error(QEMUFile *f, int ret);
+
+void qemu_rdma_cleanup(void *opaque);
+int qemu_rdma_close(void *opaque);
+int qemu_rdma_get_fd(void *opaque);
+int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size);
+int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, 
+                            int64_t pos, int size);
+bool qemu_file_mode_is_not_valid(const char * mode);
+
+extern const QEMUFileOps rdma_read_ops;
+extern const QEMUFileOps rdma_write_ops;
 
 static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 {
diff --git a/migration.c b/migration.c
index 3b4b467..875cee3 100644
--- a/migration.c
+++ b/migration.c
@@ -66,6 +66,7 @@  MigrationState *migrate_get_current(void)
         .state = MIG_STATE_SETUP,
         .bandwidth_limit = MAX_THROTTLE,
         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
+        .enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO] = true,
     };
 
     return &current_migration;
@@ -77,6 +78,10 @@  void qemu_start_incoming_migration(const char *uri, Error **errp)
 
     if (strstart(uri, "tcp:", &p))
         tcp_start_incoming_migration(p, errp);
+#ifdef CONFIG_RDMA
+    else if (strstart(uri, "rdma:", &p))
+        rdma_start_incoming_migration(p, errp);
+#endif
 #if !defined(WIN32)
     else if (strstart(uri, "exec:", &p))
         exec_start_incoming_migration(p, errp);
@@ -120,8 +125,10 @@  void process_incoming_migration(QEMUFile *f)
     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
     int fd = qemu_get_fd(f);
 
-    assert(fd != -1);
-    qemu_set_nonblock(fd);
+    if(fd != -2) { /* rdma returns -2 */
+        assert(fd != -1);
+        qemu_set_nonblock(fd);
+    }
     qemu_coroutine_enter(co, f);
 }
 
@@ -405,6 +412,10 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
 
     if (strstart(uri, "tcp:", &p)) {
         tcp_start_outgoing_migration(s, p, &local_err);
+#ifdef CONFIG_RDMA
+    } else if (strstart(uri, "rdma:", &p)) {
+        rdma_start_outgoing_migration(s, p, &local_err);
+#endif
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         exec_start_outgoing_migration(s, p, &local_err);
@@ -474,6 +485,24 @@  void qmp_migrate_set_downtime(double value, Error **errp)
     max_downtime = (uint64_t)value;
 }
 
+bool migrate_chunk_register_destination(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHUNK_REGISTER_DESTINATION];
+}
+
+bool migrate_check_for_zero(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_CHECK_FOR_ZERO];
+}
+
 int migrate_use_xbzrle(void)
 {
     MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index db542f6..7ebcf99 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -602,7 +602,7 @@ 
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle'] }
+  'data': ['xbzrle', 'check_for_zero', 'chunk_register_destination'] }
 
 ##
 # @MigrationCapabilityStatus