diff mbox

[RFC,RDMA,support,v4:,07/10] connection-establishment for RDMA

Message ID 1363576743-6146-8-git-send-email-mrhines@linux.vnet.ibm.com
State New
Headers show

Commit Message

mrhines@linux.vnet.ibm.com March 18, 2013, 3:19 a.m. UTC
From: "Michael R. Hines" <mrhines@us.ibm.com>


Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 migration-rdma.c |  205 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)
 create mode 100644 migration-rdma.c

Comments

Paolo Bonzini March 18, 2013, 8:56 a.m. UTC | #1
Il 18/03/2013 04:19, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> 
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  migration-rdma.c |  205 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 205 insertions(+)
>  create mode 100644 migration-rdma.c
> 
> diff --git a/migration-rdma.c b/migration-rdma.c
> new file mode 100644
> index 0000000..e1ea055
> --- /dev/null
> +++ b/migration-rdma.c
> @@ -0,0 +1,205 @@
> +/*
> + *  Copyright (C) 2013 Michael R. Hines <mrhines@us.ibm.com>
> + *  Copyright (C) 2013 Jiuxing Liu <jl@us.ibm.com>
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; under version 2 of the License.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +#include "migration/rdma.h"
> +#include "qemu-common.h"
> +#include "migration/migration.h"
> +#include "migration/qemu-file.h"
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <netdb.h>
> +#include <arpa/inet.h>
> +#include <string.h>
> +
> +//#define DEBUG_MIGRATION_RDMA
> +
> +#ifdef DEBUG_MIGRATION_RDMA
> +#define DPRINTF(fmt, ...) \
> +    do { printf("migration-rdma: " fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +static int rdma_accept_incoming_migration(RDMAData *rdma, Error **errp)
> +{
> +    int ret;
> +
> +    ret = qemu_rdma_migrate_listen(rdma, rdma->host, rdma->port);
> +    if (ret) {
> +        qemu_rdma_print("rdma migration: error listening!");
> +        goto err_rdma_server_wait;
> +    }
> +
> +    ret = qemu_rdma_alloc_qp(&rdma->rdma_ctx);
> +    if (ret) {
> +        qemu_rdma_print("rdma migration: error allocating qp!");
> +        goto err_rdma_server_wait;
> +    }
> +
> +    ret = qemu_rdma_migrate_accept(&rdma->rdma_ctx, NULL, NULL, NULL, 0);
> +    if (ret) {
> +        qemu_rdma_print("rdma migration: error accepting connection!");
> +        goto err_rdma_server_wait;
> +    }
> +
> +    ret = qemu_rdma_post_recv_qemu_file(rdma);
> +    if (ret) {
> +        qemu_rdma_print("rdma migration: error posting second qemu file recv!");
> +        goto err_rdma_server_wait;
> +    }
> +
> +    ret = qemu_rdma_post_send_remote_info(rdma);
> +    if (ret) {
> +        qemu_rdma_print("rdma migration: error sending remote info!");
> +        goto err_rdma_server_wait;
> +    }
> +
> +    ret = qemu_rdma_wait_for_wrid(rdma, RDMA_WRID_SEND_REMOTE_INFO);
> +    if (ret < 0) {
> +        qemu_rdma_print("rdma migration: polling remote info error!");
> +        goto err_rdma_server_wait;
> +    }

In a "socket-like" abstraction, all of these steps except the initial
listen are part of "accept".  Please move them to
qemu_rdma_migrate_accept (possibly renaming the existing
qemu_rdma_migrate_accept to a different name).

Similarly, perhaps you can merge qemu_rdma_server_prepare and
qemu_rdma_migrate_listen.

Try to make the public API between modules as small as possible (but not
smaller :)), so that you can easily document it without too many
references to RDMA concepts.

Thanks,

Paolo

> +    rdma->total_bytes = 0;
> +    rdma->enabled = 1;
> +    qemu_rdma_dump_gid("server_connect", rdma->rdma_ctx.cm_id);
> +    return 0;
> +
> +err_rdma_server_wait:
> +    qemu_rdma_cleanup(rdma);
> +    return -1;
> +
> +}
> +
> +int rdma_start_incoming_migration(const char * host_port, Error **errp)
> +{
> +    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
> +    QEMUFile *f;
> +    int ret;
> +
> +    if ((ret = qemu_rdma_data_init(rdma, host_port, errp)) < 0)
> +        return ret; 
> +
> +    ret = qemu_rdma_server_init(rdma, NULL);
> +
> +    DPRINTF("Starting RDMA-based incoming migration\n");
> +
> +    if (!ret) {
> +        DPRINTF("qemu_rdma_server_init success\n");
> +        ret = qemu_rdma_server_prepare(rdma, NULL);
> +
> +        if (!ret) {
> +            DPRINTF("qemu_rdma_server_prepare success\n");
> +
> +            ret = rdma_accept_incoming_migration(rdma, NULL);
> +            if(!ret)
> +                DPRINTF("qemu_rdma_accept_incoming_migration success\n");
> +            f = qemu_fopen_rdma(rdma, "rb");
> +            if (f == NULL) {
> +                fprintf(stderr, "could not qemu_fopen RDMA\n");
> +                ret = -EIO;
> +            }
> +
> +            process_incoming_migration(f);
> +        }
> +    }
> +
> +    return ret;
> +}
> +
> +void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp)
> +{
> +    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
> +    MigrationState *s = opaque;
> +    int ret;
> +
> +    if (qemu_rdma_data_init(rdma, host_port, errp) < 0)
> +        return; 
> +
> +    ret = qemu_rdma_client_init(rdma, NULL);
> +    if(!ret) {
> +        DPRINTF("qemu_rdma_client_init success\n");
> +        ret = qemu_rdma_client_connect(rdma, NULL);
> +
> +        if(!ret) {
> +            s->file = qemu_fopen_rdma(rdma, "wb");
> +            DPRINTF("qemu_rdma_client_connect success\n");
> +            migrate_fd_connect(s);
> +            return;
> +        }
> +    }
> +
> +    migrate_fd_error(s);
> +}
> +
> +size_t save_rdma_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, int cont, size_t size)
> +{
> +    int ret;
> +    size_t bytes_sent = 0;
> +    ram_addr_t current_addr;
> +    RDMAData * rdma = migrate_use_rdma(f);
> +
> +    current_addr = block_offset + offset;
> +
> +    /*
> +     * Add this page to the current 'chunk'. If the chunk
> +     * is full, an actual RDMA write will occur.
> +     */
> +    if ((ret = qemu_rdma_write(rdma, current_addr, size)) < 0) {
> +        fprintf(stderr, "rdma migration: write error! %d\n", ret);
> +        return ret;
> +    }
> +
> +    /*
> +     * Drain the Completion Queue if possible.
> +     * If not, the end of the iteration will do this
> +     * again to make sure we don't overflow the
> +     * request queue. 
> +     */
> +    while (1) {
> +        int ret = qemu_rdma_poll(rdma);
> +        if (ret == RDMA_WRID_NONE) {
> +            break;
> +        }
> +        if (ret < 0) {
> +            fprintf(stderr, "rdma migration: polling error! %d\n", ret);
> +            return ret;
> +        }
> +    }
> +
> +    bytes_sent += size;
> +    return bytes_sent;
> +}
> +
> +size_t qemu_rdma_fill(void * opaque, uint8_t *buf, int size)
> +{
> +    RDMAData * rdma = opaque;
> +    size_t len = 0;
> +
> +    if(rdma->qemu_file_len) {
> +        DPRINTF("RDMA %" PRId64 " of %d bytes already in buffer\n",
> +	    rdma->qemu_file_len, size);
> +
> +        len = MIN(size, rdma->qemu_file_len);
> +        memcpy(buf, rdma->qemu_file_curr, len);
> +        rdma->qemu_file_curr += len;
> +        rdma->qemu_file_len -= len;
> +    }
> +
> +    return len;
> +}
>
mrhines@linux.vnet.ibm.com March 18, 2013, 8:26 p.m. UTC | #2
Acknowledged.

On 03/18/2013 04:56 AM, Paolo Bonzini wrote:
> In a "socket-like" abstraction, all of these steps except the initial 
> listen are part of "accept". Please move them to 
> qemu_rdma_migrate_accept (possibly renaming the existing 
> qemu_rdma_migrate_accept to a different name). Similarly, perhaps you 
> can merge qemu_rdma_server_prepare and qemu_rdma_migrate_listen. Try 
> to make the public API between modules as small as possible (but not 
> smaller :)), so that you can easily document it without too many 
> references to RDMA concepts. Thanks, Paolo
>> +    rdma->total_bytes = 0;
>> +    rdma->enabled = 1;
>> +    qemu_rdma_dump_gid("server_connect", rdma->rdma_ctx.cm_id);
>> +    return 0;
>> +
>> +err_rdma_server_wait:
>> +    qemu_rdma_cleanup(rdma);
>> +    return -1;
>> +
>> +}
>> +
>> +int rdma_start_incoming_migration(const char * host_port, Error **errp)
>> +{
>> +    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
>> +    QEMUFile *f;
>> +    int ret;
>> +
>> +    if ((ret = qemu_rdma_data_init(rdma, host_port, errp)) < 0)
>> +        return ret;
>> +
>> +    ret = qemu_rdma_server_init(rdma, NULL);
>> +
>> +    DPRINTF("Starting RDMA-based incoming migration\n");
>> +
>> +    if (!ret) {
>> +        DPRINTF("qemu_rdma_server_init success\n");
>> +        ret = qemu_rdma_server_prepare(rdma, NULL);
>> +
>> +        if (!ret) {
>> +            DPRINTF("qemu_rdma_server_prepare success\n");
>> +
>> +            ret = rdma_accept_incoming_migration(rdma, NULL);
>> +            if(!ret)
>> +                DPRINTF("qemu_rdma_accept_incoming_migration success\n");
>> +            f = qemu_fopen_rdma(rdma, "rb");
>> +            if (f == NULL) {
>> +                fprintf(stderr, "could not qemu_fopen RDMA\n");
>> +                ret = -EIO;
>> +            }
>> +
>> +            process_incoming_migration(f);
>> +        }
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp)
>> +{
>> +    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
>> +    MigrationState *s = opaque;
>> +    int ret;
>> +
>> +    if (qemu_rdma_data_init(rdma, host_port, errp) < 0)
>> +        return;
>> +
>> +    ret = qemu_rdma_client_init(rdma, NULL);
>> +    if(!ret) {
>> +        DPRINTF("qemu_rdma_client_init success\n");
>> +        ret = qemu_rdma_client_connect(rdma, NULL);
>> +
>> +        if(!ret) {
>> +            s->file = qemu_fopen_rdma(rdma, "wb");
>> +            DPRINTF("qemu_rdma_client_connect success\n");
>> +            migrate_fd_connect(s);
>> +            return;
>> +        }
>> +    }
>> +
>> +    migrate_fd_error(s);
>> +}
>> +
>> +size_t save_rdma_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, int cont, size_t size)
>> +{
>> +    int ret;
>> +    size_t bytes_sent = 0;
>> +    ram_addr_t current_addr;
>> +    RDMAData * rdma = migrate_use_rdma(f);
>> +
>> +    current_addr = block_offset + offset;
>> +
>> +    /*
>> +     * Add this page to the current 'chunk'. If the chunk
>> +     * is full, an actual RDMA write will occur.
>> +     */
>> +    if ((ret = qemu_rdma_write(rdma, current_addr, size)) < 0) {
>> +        fprintf(stderr, "rdma migration: write error! %d\n", ret);
>> +        return ret;
>> +    }
>> +
>> +    /*
>> +     * Drain the Completion Queue if possible.
>> +     * If not, the end of the iteration will do this
>> +     * again to make sure we don't overflow the
>> +     * request queue.
>> +     */
>> +    while (1) {
>> +        int ret = qemu_rdma_poll(rdma);
>> +        if (ret == RDMA_WRID_NONE) {
>> +            break;
>> +        }
>> +        if (ret < 0) {
>> +            fprintf(stderr, "rdma migration: polling error! %d\n", ret);
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    bytes_sent += size;
>> +    return bytes_sent;
>> +}
>> +
>> +size_t qemu_rdma_fill(void * opaque, uint8_t *buf, int size)
>> +{
>> +    RDMAData * rdma = opaque;
>> +    size_t len = 0;
>> +
>> +    if(rdma->qemu_file_len) {
>> +        DPRINTF("RDMA %" PRId64 " of %d bytes already in buffer\n",
>> +	    rdma->qemu_file_len, size);
>> +
>> +        len = MIN(size, rdma->qemu_file_len);
>> +        memcpy(buf, rdma->qemu_file_curr, len);
>> +        rdma->qemu_file_curr += len;
>> +        rdma->qemu_file_len -= len;
>> +    }
>> +
>> +    return len;
>> +}
>>
>
diff mbox

Patch

diff --git a/migration-rdma.c b/migration-rdma.c
new file mode 100644
index 0000000..e1ea055
--- /dev/null
+++ b/migration-rdma.c
@@ -0,0 +1,205 @@ 
+/*
+ *  Copyright (C) 2013 Michael R. Hines <mrhines@us.ibm.com>
+ *  Copyright (C) 2013 Jiuxing Liu <jl@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "migration/rdma.h"
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+//#define DEBUG_MIGRATION_RDMA
+
+#ifdef DEBUG_MIGRATION_RDMA
+#define DPRINTF(fmt, ...) \
+    do { printf("migration-rdma: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static int rdma_accept_incoming_migration(RDMAData *rdma, Error **errp)
+{
+    int ret;
+
+    ret = qemu_rdma_migrate_listen(rdma, rdma->host, rdma->port);
+    if (ret) {
+        qemu_rdma_print("rdma migration: error listening!");
+        goto err_rdma_server_wait;
+    }
+
+    ret = qemu_rdma_alloc_qp(&rdma->rdma_ctx);
+    if (ret) {
+        qemu_rdma_print("rdma migration: error allocating qp!");
+        goto err_rdma_server_wait;
+    }
+
+    ret = qemu_rdma_migrate_accept(&rdma->rdma_ctx, NULL, NULL, NULL, 0);
+    if (ret) {
+        qemu_rdma_print("rdma migration: error accepting connection!");
+        goto err_rdma_server_wait;
+    }
+
+    ret = qemu_rdma_post_recv_qemu_file(rdma);
+    if (ret) {
+        qemu_rdma_print("rdma migration: error posting second qemu file recv!");
+        goto err_rdma_server_wait;
+    }
+
+    ret = qemu_rdma_post_send_remote_info(rdma);
+    if (ret) {
+        qemu_rdma_print("rdma migration: error sending remote info!");
+        goto err_rdma_server_wait;
+    }
+
+    ret = qemu_rdma_wait_for_wrid(rdma, RDMA_WRID_SEND_REMOTE_INFO);
+    if (ret < 0) {
+        qemu_rdma_print("rdma migration: polling remote info error!");
+        goto err_rdma_server_wait;
+    }
+
+    rdma->total_bytes = 0;
+    rdma->enabled = 1;
+    qemu_rdma_dump_gid("server_connect", rdma->rdma_ctx.cm_id);
+    return 0;
+
+err_rdma_server_wait:
+    qemu_rdma_cleanup(rdma);
+    return -1;
+
+}
+
+int rdma_start_incoming_migration(const char * host_port, Error **errp)
+{
+    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
+    QEMUFile *f;
+    int ret;
+
+    if ((ret = qemu_rdma_data_init(rdma, host_port, errp)) < 0)
+        return ret; 
+
+    ret = qemu_rdma_server_init(rdma, NULL);
+
+    DPRINTF("Starting RDMA-based incoming migration\n");
+
+    if (!ret) {
+        DPRINTF("qemu_rdma_server_init success\n");
+        ret = qemu_rdma_server_prepare(rdma, NULL);
+
+        if (!ret) {
+            DPRINTF("qemu_rdma_server_prepare success\n");
+
+            ret = rdma_accept_incoming_migration(rdma, NULL);
+            if(!ret)
+                DPRINTF("qemu_rdma_accept_incoming_migration success\n");
+            f = qemu_fopen_rdma(rdma, "rb");
+            if (f == NULL) {
+                fprintf(stderr, "could not qemu_fopen RDMA\n");
+                ret = -EIO;
+            }
+
+            process_incoming_migration(f);
+        }
+    }
+
+    return ret;
+}
+
+void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp)
+{
+    RDMAData *rdma = g_malloc0(sizeof(RDMAData));
+    MigrationState *s = opaque;
+    int ret;
+
+    if (qemu_rdma_data_init(rdma, host_port, errp) < 0)
+        return; 
+
+    ret = qemu_rdma_client_init(rdma, NULL);
+    if(!ret) {
+        DPRINTF("qemu_rdma_client_init success\n");
+        ret = qemu_rdma_client_connect(rdma, NULL);
+
+        if(!ret) {
+            s->file = qemu_fopen_rdma(rdma, "wb");
+            DPRINTF("qemu_rdma_client_connect success\n");
+            migrate_fd_connect(s);
+            return;
+        }
+    }
+
+    migrate_fd_error(s);
+}
+
+size_t save_rdma_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, int cont, size_t size)
+{
+    int ret;
+    size_t bytes_sent = 0;
+    ram_addr_t current_addr;
+    RDMAData * rdma = migrate_use_rdma(f);
+
+    current_addr = block_offset + offset;
+
+    /*
+     * Add this page to the current 'chunk'. If the chunk
+     * is full, an actual RDMA write will occur.
+     */
+    if ((ret = qemu_rdma_write(rdma, current_addr, size)) < 0) {
+        fprintf(stderr, "rdma migration: write error! %d\n", ret);
+        return ret;
+    }
+
+    /*
+     * Drain the Completion Queue if possible.
+     * If not, the end of the iteration will do this
+     * again to make sure we don't overflow the
+     * request queue. 
+     */
+    while (1) {
+        int ret = qemu_rdma_poll(rdma);
+        if (ret == RDMA_WRID_NONE) {
+            break;
+        }
+        if (ret < 0) {
+            fprintf(stderr, "rdma migration: polling error! %d\n", ret);
+            return ret;
+        }
+    }
+
+    bytes_sent += size;
+    return bytes_sent;
+}
+
+size_t qemu_rdma_fill(void * opaque, uint8_t *buf, int size)
+{
+    RDMAData * rdma = opaque;
+    size_t len = 0;
+
+    if(rdma->qemu_file_len) {
+        DPRINTF("RDMA %" PRId64 " of %d bytes already in buffer\n",
+	    rdma->qemu_file_len, size);
+
+        len = MIN(size, rdma->qemu_file_len);
+        memcpy(buf, rdma->qemu_file_curr, len);
+        rdma->qemu_file_curr += len;
+        rdma->qemu_file_len -= len;
+    }
+
+    return len;
+}