Message ID | 1533562177-16447-10-git-send-email-lidongchen@tencent.com |
---|---|
State | New |
Headers | show |
Series | Enable postcopy RDMA live migration | expand |
* Lidong Chen (jemmy858585@gmail.com) wrote: > The destination qemu only poll the comp_channel->fd in > qemu_rdma_wait_comp_channel. But when source qemu disconnnect > the rdma connection, the destination qemu should be notified. > > Signed-off-by: Lidong Chen <lidongchen@tencent.com> OK, this could do with an update to the migration_incoming_co comment in migration.h, since previously it was only used by colo; if we merge this first please post a patch to update the comment. Other than that, I think I'm OK: Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/migration.c | 3 ++- > migration/rdma.c | 32 +++++++++++++++++++++++++++++++- > 2 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index df0c2cf..f7d6e26 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -389,6 +389,7 @@ static void process_incoming_migration_co(void *opaque) > int ret; > > assert(mis->from_src_file); > + mis->migration_incoming_co = qemu_coroutine_self(); > mis->largest_page_size = qemu_ram_pagesize_largest(); > postcopy_state_set(POSTCOPY_INCOMING_NONE); > migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, > @@ -418,7 +419,6 @@ static void process_incoming_migration_co(void *opaque) > > /* we get COLO info, and know if we are in COLO mode */ > if (!ret && migration_incoming_enable_colo()) { > - mis->migration_incoming_co = qemu_coroutine_self(); > qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", > colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); > mis->have_colo_incoming_thread = true; > @@ -442,6 +442,7 @@ static void process_incoming_migration_co(void *opaque) > } > mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); > qemu_bh_schedule(mis->bh); > + mis->migration_incoming_co = NULL; > } > > static void migration_incoming_setup(QEMUFile *f) > diff --git a/migration/rdma.c b/migration/rdma.c > index 1affc46..ae07515 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -3226,6 +3226,35 @@ err: > > static void rdma_accept_incoming_migration(void *opaque); > > +static void rdma_cm_poll_handler(void *opaque) > +{ > + RDMAContext *rdma = opaque; > + int ret; > + struct rdma_cm_event *cm_event; > + MigrationIncomingState *mis = migration_incoming_get_current(); > + > + ret = rdma_get_cm_event(rdma->channel, &cm_event); > + if (ret) { > + error_report("get_cm_event failed %d", errno); > + return; > + } > + rdma_ack_cm_event(cm_event); > + > + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || > + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { > + error_report("receive cm event, cm event is %d", cm_event->event); > + rdma->error_state = -EPIPE; > + if (rdma->return_path) { > + rdma->return_path->error_state = -EPIPE; > + } > + > + if (mis->migration_incoming_co) { > + qemu_coroutine_enter(mis->migration_incoming_co); > + } > + return; > + } > +} > + > static int qemu_rdma_accept(RDMAContext *rdma) > { > RDMACapabilities cap; > @@ -3326,7 +3355,8 @@ static int qemu_rdma_accept(RDMAContext *rdma) > NULL, > (void *)(intptr_t)rdma->return_path); > } else { > - qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); > + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, > + NULL, rdma); > } > > ret = rdma_accept(rdma->cm_id, &conn_param); > -- > 1.8.3.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Fri, Aug 17, 2018 at 10:01 PM, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > * Lidong Chen (jemmy858585@gmail.com) wrote: >> The destination qemu only poll the comp_channel->fd in >> qemu_rdma_wait_comp_channel. But when source qemu disconnnect >> the rdma connection, the destination qemu should be notified. >> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com> > > OK, this could do with an update to the migration_incoming_co comment in > migration.h, since previously it was only used by colo; if we merge this > first please post a patch to update the comment. How about? /* The coroutine we should enter back for incoming migration */ Coroutine *migration_incoming_co; > > Other than that, I think I'm OK: > > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > >> --- >> migration/migration.c | 3 ++- >> migration/rdma.c | 32 +++++++++++++++++++++++++++++++- >> 2 files changed, 33 insertions(+), 2 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index df0c2cf..f7d6e26 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -389,6 +389,7 @@ static void process_incoming_migration_co(void *opaque) >> int ret; >> >> assert(mis->from_src_file); >> + mis->migration_incoming_co = qemu_coroutine_self(); >> mis->largest_page_size = qemu_ram_pagesize_largest(); >> postcopy_state_set(POSTCOPY_INCOMING_NONE); >> migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, >> @@ -418,7 +419,6 @@ static void process_incoming_migration_co(void *opaque) >> >> /* we get COLO info, and know if we are in COLO mode */ >> if (!ret && migration_incoming_enable_colo()) { >> - mis->migration_incoming_co = qemu_coroutine_self(); >> qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", >> colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); >> mis->have_colo_incoming_thread = true; >> @@ -442,6 +442,7 @@ static void process_incoming_migration_co(void *opaque) >> } >> mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); >> qemu_bh_schedule(mis->bh); >> + mis->migration_incoming_co = NULL; >> } >> >> static void migration_incoming_setup(QEMUFile *f) >> diff --git a/migration/rdma.c b/migration/rdma.c >> index 1affc46..ae07515 100644 >> --- a/migration/rdma.c >> +++ b/migration/rdma.c >> @@ -3226,6 +3226,35 @@ err: >> >> static void rdma_accept_incoming_migration(void *opaque); >> >> +static void rdma_cm_poll_handler(void *opaque) >> +{ >> + RDMAContext *rdma = opaque; >> + int ret; >> + struct rdma_cm_event *cm_event; >> + MigrationIncomingState *mis = migration_incoming_get_current(); >> + >> + ret = rdma_get_cm_event(rdma->channel, &cm_event); >> + if (ret) { >> + error_report("get_cm_event failed %d", errno); >> + return; >> + } >> + rdma_ack_cm_event(cm_event); >> + >> + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || >> + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { >> + error_report("receive cm event, cm event is %d", cm_event->event); >> + rdma->error_state = -EPIPE; >> + if (rdma->return_path) { >> + rdma->return_path->error_state = -EPIPE; >> + } >> + >> + if (mis->migration_incoming_co) { >> + qemu_coroutine_enter(mis->migration_incoming_co); >> + } >> + return; >> + } >> +} >> + >> static int qemu_rdma_accept(RDMAContext *rdma) >> { >> RDMACapabilities cap; >> @@ -3326,7 +3355,8 @@ static int qemu_rdma_accept(RDMAContext *rdma) >> NULL, >> (void *)(intptr_t)rdma->return_path); >> } else { >> - qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); >> + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, >> + NULL, rdma); >> } >> >> ret = rdma_accept(rdma->cm_id, &conn_param); >> -- >> 1.8.3.1 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/migration.c b/migration/migration.c index df0c2cf..f7d6e26 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -389,6 +389,7 @@ static void process_incoming_migration_co(void *opaque) int ret; assert(mis->from_src_file); + mis->migration_incoming_co = qemu_coroutine_self(); mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, @@ -418,7 +419,6 @@ static void process_incoming_migration_co(void *opaque) /* we get COLO info, and know if we are in COLO mode */ if (!ret && migration_incoming_enable_colo()) { - mis->migration_incoming_co = qemu_coroutine_self(); qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); mis->have_colo_incoming_thread = true; @@ -442,6 +442,7 @@ static void process_incoming_migration_co(void *opaque) } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); + mis->migration_incoming_co = NULL; } static void migration_incoming_setup(QEMUFile *f) diff --git a/migration/rdma.c b/migration/rdma.c index 1affc46..ae07515 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3226,6 +3226,35 @@ err: static void rdma_accept_incoming_migration(void *opaque); +static void rdma_cm_poll_handler(void *opaque) +{ + RDMAContext *rdma = opaque; + int ret; + struct rdma_cm_event *cm_event; + MigrationIncomingState *mis = migration_incoming_get_current(); + + ret = rdma_get_cm_event(rdma->channel, &cm_event); + if (ret) { + error_report("get_cm_event failed %d", errno); + return; + } + rdma_ack_cm_event(cm_event); + + if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED || + cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { + error_report("receive cm event, cm event is %d", cm_event->event); + rdma->error_state = -EPIPE; + if (rdma->return_path) { + rdma->return_path->error_state = -EPIPE; + } + + if (mis->migration_incoming_co) { + qemu_coroutine_enter(mis->migration_incoming_co); + } + return; + } +} + static int qemu_rdma_accept(RDMAContext *rdma) { RDMACapabilities cap; @@ -3326,7 +3355,8 @@ static int qemu_rdma_accept(RDMAContext *rdma) NULL, (void *)(intptr_t)rdma->return_path); } else { - qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); + qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler, + NULL, rdma); } ret = rdma_accept(rdma->cm_id, &conn_param);
The destination qemu only poll the comp_channel->fd in qemu_rdma_wait_comp_channel. But when source qemu disconnnect the rdma connection, the destination qemu should be notified. Signed-off-by: Lidong Chen <lidongchen@tencent.com> --- migration/migration.c | 3 ++- migration/rdma.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-)