Message ID | 1450167779-9960-22-git-send-email-zhang.zhanghailiang@huawei.com |
---|---|
State | New |
Headers | show |
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote: > We leave users to choose whatever heartbeat solution they want, if the heartbeat > is lost, or other errors they detect, they can use experimental command > 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations > accordingly. > > For example, if the command is sent to the PVM, the Primary side will > exit COLO mode and take over operation. If sent to the Secondary, the > secondary will run failover work, then take over server operation to > become the new Primary. > > Cc: Luiz Capitulino <lcapitulino@redhat.com> > Cc: Eric Blake <eblake@redhat.com> > Cc: Markus Armbruster <armbru@redhat.com> > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > v11: > - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion) > - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion) > v10: > - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat' > > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > --- > hmp-commands.hx | 15 +++++++++++++++ > hmp.c | 8 ++++++++ > hmp.h | 1 + > include/migration/colo.h | 3 +++ > include/migration/failover.h | 20 ++++++++++++++++++++ > migration/Makefile.objs | 2 +- > migration/colo-comm.c | 11 +++++++++++ > migration/colo-failover.c | 41 +++++++++++++++++++++++++++++++++++++++++ > migration/colo.c | 1 + > qapi-schema.json | 29 +++++++++++++++++++++++++++++ > qmp-commands.hx | 19 +++++++++++++++++++ > stubs/migration-colo.c | 8 ++++++++ > 12 files changed, 157 insertions(+), 1 deletion(-) > create mode 100644 include/migration/failover.h > create mode 100644 migration/colo-failover.c > > diff --git a/hmp-commands.hx b/hmp-commands.hx > index bb52e4d..a381b0b 100644 > --- a/hmp-commands.hx > +++ b/hmp-commands.hx > @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). > ETEXI > > { > + .name = "x_colo_lost_heartbeat", > + .args_type = "", > + .params = "", > + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" > + "a failover or takeover is needed.", > + .mhandler.cmd = hmp_x_colo_lost_heartbeat, > + }, > + > +STEXI > +@item x_colo_lost_heartbeat > +@findex x_colo_lost_heartbeat > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > +ETEXI > + > + { > .name = "client_migrate_info", > .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/hmp.c b/hmp.c > index ee87d38..dc6dc30 100644 > --- a/hmp.c > +++ b/hmp.c > @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) > hmp_handle_error(mon, &err); > } > > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) > +{ > + Error *err = NULL; > + > + qmp_x_colo_lost_heartbeat(&err); > + hmp_handle_error(mon, &err); > +} > + > void hmp_set_password(Monitor *mon, const QDict *qdict) > { > const char *protocol = qdict_get_str(qdict, "protocol"); > diff --git a/hmp.h b/hmp.h > index a8c5b5a..864a300 100644 > --- a/hmp.h > +++ b/hmp.h > @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); > void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); > void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); > void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); > void hmp_set_password(Monitor *mon, const QDict *qdict); > void hmp_expire_password(Monitor *mon, const QDict *qdict); > void hmp_eject(Monitor *mon, const QDict *qdict); > diff --git a/include/migration/colo.h b/include/migration/colo.h > index 2676c4a..ba27719 100644 > --- a/include/migration/colo.h > +++ b/include/migration/colo.h > @@ -17,6 +17,7 @@ > #include "migration/migration.h" > #include "qemu/coroutine_int.h" > #include "qemu/thread.h" > +#include "qemu/main-loop.h" > > bool colo_supported(void); > void colo_info_mig_init(void); > @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); > void migration_incoming_exit_colo(void); > void *colo_process_incoming_thread(void *opaque); > bool migration_incoming_in_colo_state(void); > + > +COLOMode get_colo_mode(void); > #endif > diff --git a/include/migration/failover.h b/include/migration/failover.h > new file mode 100644 > index 0000000..1785b52 > --- /dev/null > +++ b/include/migration/failover.h > @@ -0,0 +1,20 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_FAILOVER_H > +#define QEMU_FAILOVER_H > + > +#include "qemu-common.h" > + > +void failover_request_active(Error **errp); > + > +#endif > diff --git a/migration/Makefile.objs b/migration/Makefile.objs > index 81b5713..920d1e7 100644 > --- a/migration/Makefile.objs > +++ b/migration/Makefile.objs > @@ -1,6 +1,6 @@ > common-obj-y += migration.o tcp.o > -common-obj-$(CONFIG_COLO) += colo.o > common-obj-y += colo-comm.o > +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o > common-obj-y += vmstate.o > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o > common-obj-y += xbzrle.o postcopy-ram.o > diff --git a/migration/colo-comm.c b/migration/colo-comm.c > index 30df3d3..58a6488 100644 > --- a/migration/colo-comm.c > +++ b/migration/colo-comm.c > @@ -20,6 +20,17 @@ typedef struct { > > static COLOInfo colo_info; > > +COLOMode get_colo_mode(void) > +{ > + if (migration_in_colo_state()) { > + return COLO_MODE_PRIMARY; > + } else if (migration_incoming_in_colo_state()) { > + return COLO_MODE_SECONDARY; > + } else { > + return COLO_MODE_UNKNOWN; > + } > +} > + > static void colo_info_pre_save(void *opaque) > { > COLOInfo *s = opaque; > diff --git a/migration/colo-failover.c b/migration/colo-failover.c > new file mode 100644 > index 0000000..e3897c6 > --- /dev/null > +++ b/migration/colo-failover.c > @@ -0,0 +1,41 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#include "migration/colo.h" > +#include "migration/failover.h" > +#include "qmp-commands.h" > +#include "qapi/qmp/qerror.h" > + > +static QEMUBH *failover_bh; > + > +static void colo_failover_bh(void *opaque) > +{ > + qemu_bh_delete(failover_bh); > + failover_bh = NULL; > + /*TODO: Do failover work */ > +} > + > +void failover_request_active(Error **errp) > +{ > + failover_bh = qemu_bh_new(colo_failover_bh, NULL); > + qemu_bh_schedule(failover_bh); > +} > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + if (get_colo_mode() == COLO_MODE_UNKNOWN) { > + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); > + return; > + } > + > + failover_request_active(errp); > +} > diff --git a/migration/colo.c b/migration/colo.c > index ca5df44..7098497 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -17,6 +17,7 @@ > #include "trace.h" > #include "qemu/error-report.h" > #include "qemu/sockets.h" > +#include "migration/failover.h" > > /* colo buffer */ > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > diff --git a/qapi-schema.json b/qapi-schema.json > index a5699a7..feb7d53 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -761,6 +761,35 @@ > 'vmstate-send', 'vmstate-size','vmstate-received', > 'vmstate-loaded' ] } > > +## > +# @COLOMode > +# > +# The colo mode > +# > +# @unknown: unknown mode > +# > +# @primary: master side > +# > +# @secondary: slave side > +# > +# Since: 2.6 > +## > +{ 'enum': 'COLOMode', > + 'data': [ 'unknown', 'primary', 'secondary'] } > + > +## > +# @x-colo-lost-heartbeat > +# > +# Tell qemu that heartbeat is lost, request it to do takeover procedures. > +# If this command is sent to the PVM, the Primary side will exit COLO mode. > +# If sent to the Secondary, the Secondary side will run failover work, > +# then takes over server operation to become the service VM. > +# > +# Since: 2.6 > +## > +{ 'command': 'x-colo-lost-heartbeat' } > + > +## > # @MouseInfo: > # > # Information about a mouse device. > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 89756c9..76ad208 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -805,6 +805,25 @@ Example: > EQMP > > { > + .name = "x-colo-lost-heartbeat", > + .args_type = "", > + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, > + }, > + > +SQMP > +x-colo-lost-heartbeat > +-------------------- > + > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > + > +Example: > + > +-> { "execute": "x-colo-lost-heartbeat" } > +<- { "return": {} } > + > +EQMP > + > + { > .name = "client_migrate_info", > .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c > index c12516e..5028f63 100644 > --- a/stubs/migration-colo.c > +++ b/stubs/migration-colo.c > @@ -11,6 +11,7 @@ > */ > > #include "migration/colo.h" > +#include "qmp-commands.h" > > bool colo_supported(void) > { > @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) > { > return NULL; > } > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + error_setg(errp, "COLO is not supported, please rerun configure" > + " with --enable-colo option in order to support" > + " COLO feature"); > +} > -- > 1.8.3.1 > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
zhanghailiang <zhang.zhanghailiang@huawei.com> writes: > We leave users to choose whatever heartbeat solution they want, if the heartbeat > is lost, or other errors they detect, they can use experimental command > 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations > accordingly. > > For example, if the command is sent to the PVM, the Primary side will > exit COLO mode and take over operation. If sent to the Secondary, the > secondary will run failover work, then take over server operation to > become the new Primary. > > Cc: Luiz Capitulino <lcapitulino@redhat.com> > Cc: Eric Blake <eblake@redhat.com> > Cc: Markus Armbruster <armbru@redhat.com> > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> > --- > v11: > - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion) > - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion) > v10: > - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat' > > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > --- > hmp-commands.hx | 15 +++++++++++++++ > hmp.c | 8 ++++++++ > hmp.h | 1 + > include/migration/colo.h | 3 +++ > include/migration/failover.h | 20 ++++++++++++++++++++ > migration/Makefile.objs | 2 +- > migration/colo-comm.c | 11 +++++++++++ > migration/colo-failover.c | 41 +++++++++++++++++++++++++++++++++++++++++ > migration/colo.c | 1 + > qapi-schema.json | 29 +++++++++++++++++++++++++++++ > qmp-commands.hx | 19 +++++++++++++++++++ > stubs/migration-colo.c | 8 ++++++++ > 12 files changed, 157 insertions(+), 1 deletion(-) > create mode 100644 include/migration/failover.h > create mode 100644 migration/colo-failover.c > > diff --git a/hmp-commands.hx b/hmp-commands.hx > index bb52e4d..a381b0b 100644 > --- a/hmp-commands.hx > +++ b/hmp-commands.hx > @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). > ETEXI > > { > + .name = "x_colo_lost_heartbeat", > + .args_type = "", > + .params = "", > + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" > + "a failover or takeover is needed.", > + .mhandler.cmd = hmp_x_colo_lost_heartbeat, > + }, > + > +STEXI > +@item x_colo_lost_heartbeat > +@findex x_colo_lost_heartbeat > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > +ETEXI > + > + { > .name = "client_migrate_info", > .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/hmp.c b/hmp.c > index ee87d38..dc6dc30 100644 > --- a/hmp.c > +++ b/hmp.c > @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) > hmp_handle_error(mon, &err); > } > > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) > +{ > + Error *err = NULL; > + > + qmp_x_colo_lost_heartbeat(&err); > + hmp_handle_error(mon, &err); > +} > + > void hmp_set_password(Monitor *mon, const QDict *qdict) > { > const char *protocol = qdict_get_str(qdict, "protocol"); > diff --git a/hmp.h b/hmp.h > index a8c5b5a..864a300 100644 > --- a/hmp.h > +++ b/hmp.h > @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); > void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); > void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); > void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); > void hmp_set_password(Monitor *mon, const QDict *qdict); > void hmp_expire_password(Monitor *mon, const QDict *qdict); > void hmp_eject(Monitor *mon, const QDict *qdict); > diff --git a/include/migration/colo.h b/include/migration/colo.h > index 2676c4a..ba27719 100644 > --- a/include/migration/colo.h > +++ b/include/migration/colo.h > @@ -17,6 +17,7 @@ > #include "migration/migration.h" > #include "qemu/coroutine_int.h" > #include "qemu/thread.h" > +#include "qemu/main-loop.h" > > bool colo_supported(void); > void colo_info_mig_init(void); > @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); > void migration_incoming_exit_colo(void); > void *colo_process_incoming_thread(void *opaque); > bool migration_incoming_in_colo_state(void); > + > +COLOMode get_colo_mode(void); > #endif > diff --git a/include/migration/failover.h b/include/migration/failover.h > new file mode 100644 > index 0000000..1785b52 > --- /dev/null > +++ b/include/migration/failover.h > @@ -0,0 +1,20 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_FAILOVER_H > +#define QEMU_FAILOVER_H > + > +#include "qemu-common.h" > + > +void failover_request_active(Error **errp); > + > +#endif > diff --git a/migration/Makefile.objs b/migration/Makefile.objs > index 81b5713..920d1e7 100644 > --- a/migration/Makefile.objs > +++ b/migration/Makefile.objs > @@ -1,6 +1,6 @@ > common-obj-y += migration.o tcp.o > -common-obj-$(CONFIG_COLO) += colo.o > common-obj-y += colo-comm.o > +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o > common-obj-y += vmstate.o > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o > common-obj-y += xbzrle.o postcopy-ram.o > diff --git a/migration/colo-comm.c b/migration/colo-comm.c > index 30df3d3..58a6488 100644 > --- a/migration/colo-comm.c > +++ b/migration/colo-comm.c > @@ -20,6 +20,17 @@ typedef struct { > > static COLOInfo colo_info; > > +COLOMode get_colo_mode(void) > +{ > + if (migration_in_colo_state()) { > + return COLO_MODE_PRIMARY; > + } else if (migration_incoming_in_colo_state()) { > + return COLO_MODE_SECONDARY; > + } else { > + return COLO_MODE_UNKNOWN; > + } > +} > + > static void colo_info_pre_save(void *opaque) > { > COLOInfo *s = opaque; > diff --git a/migration/colo-failover.c b/migration/colo-failover.c > new file mode 100644 > index 0000000..e3897c6 > --- /dev/null > +++ b/migration/colo-failover.c > @@ -0,0 +1,41 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#include "migration/colo.h" > +#include "migration/failover.h" > +#include "qmp-commands.h" > +#include "qapi/qmp/qerror.h" > + > +static QEMUBH *failover_bh; > + > +static void colo_failover_bh(void *opaque) > +{ > + qemu_bh_delete(failover_bh); > + failover_bh = NULL; > + /*TODO: Do failover work */ > +} > + > +void failover_request_active(Error **errp) > +{ > + failover_bh = qemu_bh_new(colo_failover_bh, NULL); > + qemu_bh_schedule(failover_bh); > +} > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + if (get_colo_mode() == COLO_MODE_UNKNOWN) { > + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); > + return; > + } > + > + failover_request_active(errp); > +} > diff --git a/migration/colo.c b/migration/colo.c > index ca5df44..7098497 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -17,6 +17,7 @@ > #include "trace.h" > #include "qemu/error-report.h" > #include "qemu/sockets.h" > +#include "migration/failover.h" > > /* colo buffer */ > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > diff --git a/qapi-schema.json b/qapi-schema.json > index a5699a7..feb7d53 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -761,6 +761,35 @@ > 'vmstate-send', 'vmstate-size','vmstate-received', > 'vmstate-loaded' ] } > > +## > +# @COLOMode > +# > +# The colo mode This is rather terse for an ignorant reader like me. > +# > +# @unknown: unknown mode What does "unknown mode" mean, and how can it happen? > +# > +# @primary: master side > +# > +# @secondary: slave side > +# > +# Since: 2.6 > +## > +{ 'enum': 'COLOMode', > + 'data': [ 'unknown', 'primary', 'secondary'] } > + > +## > +# @x-colo-lost-heartbeat > +# > +# Tell qemu that heartbeat is lost, request it to do takeover procedures. > +# If this command is sent to the PVM, the Primary side will exit COLO mode. > +# If sent to the Secondary, the Secondary side will run failover work, > +# then takes over server operation to become the service VM. > +# > +# Since: 2.6 > +## > +{ 'command': 'x-colo-lost-heartbeat' } > + > +## > # @MouseInfo: > # > # Information about a mouse device. > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 89756c9..76ad208 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -805,6 +805,25 @@ Example: > EQMP > > { > + .name = "x-colo-lost-heartbeat", > + .args_type = "", > + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, > + }, > + > +SQMP > +x-colo-lost-heartbeat > +-------------------- > + > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > + > +Example: > + > +-> { "execute": "x-colo-lost-heartbeat" } > +<- { "return": {} } > + > +EQMP > + > + { > .name = "client_migrate_info", > .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c > index c12516e..5028f63 100644 > --- a/stubs/migration-colo.c > +++ b/stubs/migration-colo.c > @@ -11,6 +11,7 @@ > */ > > #include "migration/colo.h" > +#include "qmp-commands.h" > > bool colo_supported(void) > { > @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) > { > return NULL; > } > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + error_setg(errp, "COLO is not supported, please rerun configure" > + " with --enable-colo option in order to support" > + " COLO feature"); > +}
On 2015/12/19 17:38, Markus Armbruster wrote: > zhanghailiang <zhang.zhanghailiang@huawei.com> writes: > >> We leave users to choose whatever heartbeat solution they want, if the heartbeat >> is lost, or other errors they detect, they can use experimental command >> 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations >> accordingly. >> >> For example, if the command is sent to the PVM, the Primary side will >> exit COLO mode and take over operation. If sent to the Secondary, the >> secondary will run failover work, then take over server operation to >> become the new Primary. >> >> Cc: Luiz Capitulino <lcapitulino@redhat.com> >> Cc: Eric Blake <eblake@redhat.com> >> Cc: Markus Armbruster <armbru@redhat.com> >> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> >> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> >> --- >> v11: >> - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion) >> - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion) >> v10: >> - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat' >> >> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> >> --- >> hmp-commands.hx | 15 +++++++++++++++ >> hmp.c | 8 ++++++++ >> hmp.h | 1 + >> include/migration/colo.h | 3 +++ >> include/migration/failover.h | 20 ++++++++++++++++++++ >> migration/Makefile.objs | 2 +- >> migration/colo-comm.c | 11 +++++++++++ >> migration/colo-failover.c | 41 +++++++++++++++++++++++++++++++++++++++++ >> migration/colo.c | 1 + >> qapi-schema.json | 29 +++++++++++++++++++++++++++++ >> qmp-commands.hx | 19 +++++++++++++++++++ >> stubs/migration-colo.c | 8 ++++++++ >> 12 files changed, 157 insertions(+), 1 deletion(-) >> create mode 100644 include/migration/failover.h >> create mode 100644 migration/colo-failover.c >> >> diff --git a/hmp-commands.hx b/hmp-commands.hx >> index bb52e4d..a381b0b 100644 >> --- a/hmp-commands.hx >> +++ b/hmp-commands.hx >> @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). >> ETEXI >> >> { >> + .name = "x_colo_lost_heartbeat", >> + .args_type = "", >> + .params = "", >> + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" >> + "a failover or takeover is needed.", >> + .mhandler.cmd = hmp_x_colo_lost_heartbeat, >> + }, >> + >> +STEXI >> +@item x_colo_lost_heartbeat >> +@findex x_colo_lost_heartbeat >> +Tell COLO that heartbeat is lost, a failover or takeover is needed. >> +ETEXI >> + >> + { >> .name = "client_migrate_info", >> .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", >> .params = "protocol hostname port tls-port cert-subject", >> diff --git a/hmp.c b/hmp.c >> index ee87d38..dc6dc30 100644 >> --- a/hmp.c >> +++ b/hmp.c >> @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) >> hmp_handle_error(mon, &err); >> } >> >> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) >> +{ >> + Error *err = NULL; >> + >> + qmp_x_colo_lost_heartbeat(&err); >> + hmp_handle_error(mon, &err); >> +} >> + >> void hmp_set_password(Monitor *mon, const QDict *qdict) >> { >> const char *protocol = qdict_get_str(qdict, "protocol"); >> diff --git a/hmp.h b/hmp.h >> index a8c5b5a..864a300 100644 >> --- a/hmp.h >> +++ b/hmp.h >> @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); >> void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); >> void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); >> void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); >> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); >> void hmp_set_password(Monitor *mon, const QDict *qdict); >> void hmp_expire_password(Monitor *mon, const QDict *qdict); >> void hmp_eject(Monitor *mon, const QDict *qdict); >> diff --git a/include/migration/colo.h b/include/migration/colo.h >> index 2676c4a..ba27719 100644 >> --- a/include/migration/colo.h >> +++ b/include/migration/colo.h >> @@ -17,6 +17,7 @@ >> #include "migration/migration.h" >> #include "qemu/coroutine_int.h" >> #include "qemu/thread.h" >> +#include "qemu/main-loop.h" >> >> bool colo_supported(void); >> void colo_info_mig_init(void); >> @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); >> void migration_incoming_exit_colo(void); >> void *colo_process_incoming_thread(void *opaque); >> bool migration_incoming_in_colo_state(void); >> + >> +COLOMode get_colo_mode(void); >> #endif >> diff --git a/include/migration/failover.h b/include/migration/failover.h >> new file mode 100644 >> index 0000000..1785b52 >> --- /dev/null >> +++ b/include/migration/failover.h >> @@ -0,0 +1,20 @@ >> +/* >> + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) >> + * (a.k.a. Fault Tolerance or Continuous Replication) >> + * >> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. >> + * Copyright (c) 2015 FUJITSU LIMITED >> + * Copyright (c) 2015 Intel Corporation >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or >> + * later. See the COPYING file in the top-level directory. >> + */ >> + >> +#ifndef QEMU_FAILOVER_H >> +#define QEMU_FAILOVER_H >> + >> +#include "qemu-common.h" >> + >> +void failover_request_active(Error **errp); >> + >> +#endif >> diff --git a/migration/Makefile.objs b/migration/Makefile.objs >> index 81b5713..920d1e7 100644 >> --- a/migration/Makefile.objs >> +++ b/migration/Makefile.objs >> @@ -1,6 +1,6 @@ >> common-obj-y += migration.o tcp.o >> -common-obj-$(CONFIG_COLO) += colo.o >> common-obj-y += colo-comm.o >> +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o >> common-obj-y += vmstate.o >> common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o >> common-obj-y += xbzrle.o postcopy-ram.o >> diff --git a/migration/colo-comm.c b/migration/colo-comm.c >> index 30df3d3..58a6488 100644 >> --- a/migration/colo-comm.c >> +++ b/migration/colo-comm.c >> @@ -20,6 +20,17 @@ typedef struct { >> >> static COLOInfo colo_info; >> >> +COLOMode get_colo_mode(void) >> +{ >> + if (migration_in_colo_state()) { >> + return COLO_MODE_PRIMARY; >> + } else if (migration_incoming_in_colo_state()) { >> + return COLO_MODE_SECONDARY; >> + } else { >> + return COLO_MODE_UNKNOWN; >> + } >> +} >> + >> static void colo_info_pre_save(void *opaque) >> { >> COLOInfo *s = opaque; >> diff --git a/migration/colo-failover.c b/migration/colo-failover.c >> new file mode 100644 >> index 0000000..e3897c6 >> --- /dev/null >> +++ b/migration/colo-failover.c >> @@ -0,0 +1,41 @@ >> +/* >> + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) >> + * (a.k.a. Fault Tolerance or Continuous Replication) >> + * >> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. >> + * Copyright (c) 2015 FUJITSU LIMITED >> + * Copyright (c) 2015 Intel Corporation >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or >> + * later. See the COPYING file in the top-level directory. >> + */ >> + >> +#include "migration/colo.h" >> +#include "migration/failover.h" >> +#include "qmp-commands.h" >> +#include "qapi/qmp/qerror.h" >> + >> +static QEMUBH *failover_bh; >> + >> +static void colo_failover_bh(void *opaque) >> +{ >> + qemu_bh_delete(failover_bh); >> + failover_bh = NULL; >> + /*TODO: Do failover work */ >> +} >> + >> +void failover_request_active(Error **errp) >> +{ >> + failover_bh = qemu_bh_new(colo_failover_bh, NULL); >> + qemu_bh_schedule(failover_bh); >> +} >> + >> +void qmp_x_colo_lost_heartbeat(Error **errp) >> +{ >> + if (get_colo_mode() == COLO_MODE_UNKNOWN) { >> + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); >> + return; >> + } >> + >> + failover_request_active(errp); >> +} >> diff --git a/migration/colo.c b/migration/colo.c >> index ca5df44..7098497 100644 >> --- a/migration/colo.c >> +++ b/migration/colo.c >> @@ -17,6 +17,7 @@ >> #include "trace.h" >> #include "qemu/error-report.h" >> #include "qemu/sockets.h" >> +#include "migration/failover.h" >> >> /* colo buffer */ >> #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) >> diff --git a/qapi-schema.json b/qapi-schema.json >> index a5699a7..feb7d53 100644 >> --- a/qapi-schema.json >> +++ b/qapi-schema.json >> @@ -761,6 +761,35 @@ >> 'vmstate-send', 'vmstate-size','vmstate-received', >> 'vmstate-loaded' ] } >> >> +## >> +# @COLOMode >> +# >> +# The colo mode > > This is rather terse for an ignorant reader like me. > Hmm, this is used to distinguish Primary and Secondary sides, I will add more comments. >> +# >> +# @unknown: unknown mode > > What does "unknown mode" mean, and how can it happen? > It will never happen, i will remove it. :) >> +# >> +# @primary: master side >> +# >> +# @secondary: slave side >> +# >> +# Since: 2.6 >> +## >> +{ 'enum': 'COLOMode', >> + 'data': [ 'unknown', 'primary', 'secondary'] } >> + >> +## >> +# @x-colo-lost-heartbeat >> +# >> +# Tell qemu that heartbeat is lost, request it to do takeover procedures. >> +# If this command is sent to the PVM, the Primary side will exit COLO mode. >> +# If sent to the Secondary, the Secondary side will run failover work, >> +# then takes over server operation to become the service VM. >> +# >> +# Since: 2.6 >> +## >> +{ 'command': 'x-colo-lost-heartbeat' } >> + >> +## >> # @MouseInfo: >> # >> # Information about a mouse device. >> diff --git a/qmp-commands.hx b/qmp-commands.hx >> index 89756c9..76ad208 100644 >> --- a/qmp-commands.hx >> +++ b/qmp-commands.hx >> @@ -805,6 +805,25 @@ Example: >> EQMP >> >> { >> + .name = "x-colo-lost-heartbeat", >> + .args_type = "", >> + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, >> + }, >> + >> +SQMP >> +x-colo-lost-heartbeat >> +-------------------- >> + >> +Tell COLO that heartbeat is lost, a failover or takeover is needed. >> + >> +Example: >> + >> +-> { "execute": "x-colo-lost-heartbeat" } >> +<- { "return": {} } >> + >> +EQMP >> + >> + { >> .name = "client_migrate_info", >> .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", >> .params = "protocol hostname port tls-port cert-subject", >> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c >> index c12516e..5028f63 100644 >> --- a/stubs/migration-colo.c >> +++ b/stubs/migration-colo.c >> @@ -11,6 +11,7 @@ >> */ >> >> #include "migration/colo.h" >> +#include "qmp-commands.h" >> >> bool colo_supported(void) >> { >> @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) >> { >> return NULL; >> } >> + >> +void qmp_x_colo_lost_heartbeat(Error **errp) >> +{ >> + error_setg(errp, "COLO is not supported, please rerun configure" >> + " with --enable-colo option in order to support" >> + " COLO feature"); >> +} > > . >
On 2015/12/22 21:50, Hailiang Zhang wrote: > On 2015/12/19 17:38, Markus Armbruster wrote: >> zhanghailiang <zhang.zhanghailiang@huawei.com> writes: >> >>> We leave users to choose whatever heartbeat solution they want, if the heartbeat >>> is lost, or other errors they detect, they can use experimental command >>> 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations >>> accordingly. >>> >>> For example, if the command is sent to the PVM, the Primary side will >>> exit COLO mode and take over operation. If sent to the Secondary, the >>> secondary will run failover work, then take over server operation to >>> become the new Primary. >>> >>> Cc: Luiz Capitulino <lcapitulino@redhat.com> >>> Cc: Eric Blake <eblake@redhat.com> >>> Cc: Markus Armbruster <armbru@redhat.com> >>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> >>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> >>> --- >>> v11: >>> - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion) >>> - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion) >>> v10: >>> - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat' >>> >>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> >>> --- >>> hmp-commands.hx | 15 +++++++++++++++ >>> hmp.c | 8 ++++++++ >>> hmp.h | 1 + >>> include/migration/colo.h | 3 +++ >>> include/migration/failover.h | 20 ++++++++++++++++++++ >>> migration/Makefile.objs | 2 +- >>> migration/colo-comm.c | 11 +++++++++++ >>> migration/colo-failover.c | 41 +++++++++++++++++++++++++++++++++++++++++ >>> migration/colo.c | 1 + >>> qapi-schema.json | 29 +++++++++++++++++++++++++++++ >>> qmp-commands.hx | 19 +++++++++++++++++++ >>> stubs/migration-colo.c | 8 ++++++++ >>> 12 files changed, 157 insertions(+), 1 deletion(-) >>> create mode 100644 include/migration/failover.h >>> create mode 100644 migration/colo-failover.c >>> >>> diff --git a/hmp-commands.hx b/hmp-commands.hx >>> index bb52e4d..a381b0b 100644 >>> --- a/hmp-commands.hx >>> +++ b/hmp-commands.hx >>> @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). >>> ETEXI >>> >>> { >>> + .name = "x_colo_lost_heartbeat", >>> + .args_type = "", >>> + .params = "", >>> + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" >>> + "a failover or takeover is needed.", >>> + .mhandler.cmd = hmp_x_colo_lost_heartbeat, >>> + }, >>> + >>> +STEXI >>> +@item x_colo_lost_heartbeat >>> +@findex x_colo_lost_heartbeat >>> +Tell COLO that heartbeat is lost, a failover or takeover is needed. >>> +ETEXI >>> + >>> + { >>> .name = "client_migrate_info", >>> .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", >>> .params = "protocol hostname port tls-port cert-subject", >>> diff --git a/hmp.c b/hmp.c >>> index ee87d38..dc6dc30 100644 >>> --- a/hmp.c >>> +++ b/hmp.c >>> @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) >>> hmp_handle_error(mon, &err); >>> } >>> >>> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) >>> +{ >>> + Error *err = NULL; >>> + >>> + qmp_x_colo_lost_heartbeat(&err); >>> + hmp_handle_error(mon, &err); >>> +} >>> + >>> void hmp_set_password(Monitor *mon, const QDict *qdict) >>> { >>> const char *protocol = qdict_get_str(qdict, "protocol"); >>> diff --git a/hmp.h b/hmp.h >>> index a8c5b5a..864a300 100644 >>> --- a/hmp.h >>> +++ b/hmp.h >>> @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); >>> void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); >>> void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); >>> void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); >>> +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); >>> void hmp_set_password(Monitor *mon, const QDict *qdict); >>> void hmp_expire_password(Monitor *mon, const QDict *qdict); >>> void hmp_eject(Monitor *mon, const QDict *qdict); >>> diff --git a/include/migration/colo.h b/include/migration/colo.h >>> index 2676c4a..ba27719 100644 >>> --- a/include/migration/colo.h >>> +++ b/include/migration/colo.h >>> @@ -17,6 +17,7 @@ >>> #include "migration/migration.h" >>> #include "qemu/coroutine_int.h" >>> #include "qemu/thread.h" >>> +#include "qemu/main-loop.h" >>> >>> bool colo_supported(void); >>> void colo_info_mig_init(void); >>> @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); >>> void migration_incoming_exit_colo(void); >>> void *colo_process_incoming_thread(void *opaque); >>> bool migration_incoming_in_colo_state(void); >>> + >>> +COLOMode get_colo_mode(void); >>> #endif >>> diff --git a/include/migration/failover.h b/include/migration/failover.h >>> new file mode 100644 >>> index 0000000..1785b52 >>> --- /dev/null >>> +++ b/include/migration/failover.h >>> @@ -0,0 +1,20 @@ >>> +/* >>> + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) >>> + * (a.k.a. Fault Tolerance or Continuous Replication) >>> + * >>> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. >>> + * Copyright (c) 2015 FUJITSU LIMITED >>> + * Copyright (c) 2015 Intel Corporation >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>> + * later. See the COPYING file in the top-level directory. >>> + */ >>> + >>> +#ifndef QEMU_FAILOVER_H >>> +#define QEMU_FAILOVER_H >>> + >>> +#include "qemu-common.h" >>> + >>> +void failover_request_active(Error **errp); >>> + >>> +#endif >>> diff --git a/migration/Makefile.objs b/migration/Makefile.objs >>> index 81b5713..920d1e7 100644 >>> --- a/migration/Makefile.objs >>> +++ b/migration/Makefile.objs >>> @@ -1,6 +1,6 @@ >>> common-obj-y += migration.o tcp.o >>> -common-obj-$(CONFIG_COLO) += colo.o >>> common-obj-y += colo-comm.o >>> +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o >>> common-obj-y += vmstate.o >>> common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o >>> common-obj-y += xbzrle.o postcopy-ram.o >>> diff --git a/migration/colo-comm.c b/migration/colo-comm.c >>> index 30df3d3..58a6488 100644 >>> --- a/migration/colo-comm.c >>> +++ b/migration/colo-comm.c >>> @@ -20,6 +20,17 @@ typedef struct { >>> >>> static COLOInfo colo_info; >>> >>> +COLOMode get_colo_mode(void) >>> +{ >>> + if (migration_in_colo_state()) { >>> + return COLO_MODE_PRIMARY; >>> + } else if (migration_incoming_in_colo_state()) { >>> + return COLO_MODE_SECONDARY; >>> + } else { >>> + return COLO_MODE_UNKNOWN; >>> + } >>> +} >>> + >>> static void colo_info_pre_save(void *opaque) >>> { >>> COLOInfo *s = opaque; >>> diff --git a/migration/colo-failover.c b/migration/colo-failover.c >>> new file mode 100644 >>> index 0000000..e3897c6 >>> --- /dev/null >>> +++ b/migration/colo-failover.c >>> @@ -0,0 +1,41 @@ >>> +/* >>> + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) >>> + * (a.k.a. Fault Tolerance or Continuous Replication) >>> + * >>> + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. >>> + * Copyright (c) 2015 FUJITSU LIMITED >>> + * Copyright (c) 2015 Intel Corporation >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or >>> + * later. See the COPYING file in the top-level directory. >>> + */ >>> + >>> +#include "migration/colo.h" >>> +#include "migration/failover.h" >>> +#include "qmp-commands.h" >>> +#include "qapi/qmp/qerror.h" >>> + >>> +static QEMUBH *failover_bh; >>> + >>> +static void colo_failover_bh(void *opaque) >>> +{ >>> + qemu_bh_delete(failover_bh); >>> + failover_bh = NULL; >>> + /*TODO: Do failover work */ >>> +} >>> + >>> +void failover_request_active(Error **errp) >>> +{ >>> + failover_bh = qemu_bh_new(colo_failover_bh, NULL); >>> + qemu_bh_schedule(failover_bh); >>> +} >>> + >>> +void qmp_x_colo_lost_heartbeat(Error **errp) >>> +{ >>> + if (get_colo_mode() == COLO_MODE_UNKNOWN) { >>> + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); >>> + return; >>> + } >>> + >>> + failover_request_active(errp); >>> +} >>> diff --git a/migration/colo.c b/migration/colo.c >>> index ca5df44..7098497 100644 >>> --- a/migration/colo.c >>> +++ b/migration/colo.c >>> @@ -17,6 +17,7 @@ >>> #include "trace.h" >>> #include "qemu/error-report.h" >>> #include "qemu/sockets.h" >>> +#include "migration/failover.h" >>> >>> /* colo buffer */ >>> #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) >>> diff --git a/qapi-schema.json b/qapi-schema.json >>> index a5699a7..feb7d53 100644 >>> --- a/qapi-schema.json >>> +++ b/qapi-schema.json >>> @@ -761,6 +761,35 @@ >>> 'vmstate-send', 'vmstate-size','vmstate-received', >>> 'vmstate-loaded' ] } >>> >>> +## >>> +# @COLOMode >>> +# >>> +# The colo mode >> >> This is rather terse for an ignorant reader like me. >> > > Hmm, this is used to distinguish Primary and Secondary sides, I will > add more comments. > >>> +# >>> +# @unknown: unknown mode >> >> What does "unknown mode" mean, and how can it happen? >> > > It will never happen, i will remove it. :) > Er, i made a mistake, we need this 'unknown mode', which indicates we are not in COLO mode. I will add more comments about it. >>> +# >>> +# @primary: master side >>> +# >>> +# @secondary: slave side >>> +# >>> +# Since: 2.6 >>> +## >>> +{ 'enum': 'COLOMode', >>> + 'data': [ 'unknown', 'primary', 'secondary'] } >>> + >>> +## >>> +# @x-colo-lost-heartbeat >>> +# >>> +# Tell qemu that heartbeat is lost, request it to do takeover procedures. >>> +# If this command is sent to the PVM, the Primary side will exit COLO mode. >>> +# If sent to the Secondary, the Secondary side will run failover work, >>> +# then takes over server operation to become the service VM. >>> +# >>> +# Since: 2.6 >>> +## >>> +{ 'command': 'x-colo-lost-heartbeat' } >>> + >>> +## >>> # @MouseInfo: >>> # >>> # Information about a mouse device. >>> diff --git a/qmp-commands.hx b/qmp-commands.hx >>> index 89756c9..76ad208 100644 >>> --- a/qmp-commands.hx >>> +++ b/qmp-commands.hx >>> @@ -805,6 +805,25 @@ Example: >>> EQMP >>> >>> { >>> + .name = "x-colo-lost-heartbeat", >>> + .args_type = "", >>> + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, >>> + }, >>> + >>> +SQMP >>> +x-colo-lost-heartbeat >>> +-------------------- >>> + >>> +Tell COLO that heartbeat is lost, a failover or takeover is needed. >>> + >>> +Example: >>> + >>> +-> { "execute": "x-colo-lost-heartbeat" } >>> +<- { "return": {} } >>> + >>> +EQMP >>> + >>> + { >>> .name = "client_migrate_info", >>> .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", >>> .params = "protocol hostname port tls-port cert-subject", >>> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c >>> index c12516e..5028f63 100644 >>> --- a/stubs/migration-colo.c >>> +++ b/stubs/migration-colo.c >>> @@ -11,6 +11,7 @@ >>> */ >>> >>> #include "migration/colo.h" >>> +#include "qmp-commands.h" >>> >>> bool colo_supported(void) >>> { >>> @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) >>> { >>> return NULL; >>> } >>> + >>> +void qmp_x_colo_lost_heartbeat(Error **errp) >>> +{ >>> + error_setg(errp, "COLO is not supported, please rerun configure" >>> + " with --enable-colo option in order to support" >>> + " COLO feature"); >>> +} >> >> . >> >
diff --git a/hmp-commands.hx b/hmp-commands.hx index bb52e4d..a381b0b 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). ETEXI { + .name = "x_colo_lost_heartbeat", + .args_type = "", + .params = "", + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" + "a failover or takeover is needed.", + .mhandler.cmd = hmp_x_colo_lost_heartbeat, + }, + +STEXI +@item x_colo_lost_heartbeat +@findex x_colo_lost_heartbeat +Tell COLO that heartbeat is lost, a failover or takeover is needed. +ETEXI + + { .name = "client_migrate_info", .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", .params = "protocol hostname port tls-port cert-subject", diff --git a/hmp.c b/hmp.c index ee87d38..dc6dc30 100644 --- a/hmp.c +++ b/hmp.c @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &err); } +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + + qmp_x_colo_lost_heartbeat(&err); + hmp_handle_error(mon, &err); +} + void hmp_set_password(Monitor *mon, const QDict *qdict) { const char *protocol = qdict_get_str(qdict, "protocol"); diff --git a/hmp.h b/hmp.h index a8c5b5a..864a300 100644 --- a/hmp.h +++ b/hmp.h @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict); void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); void hmp_set_password(Monitor *mon, const QDict *qdict); void hmp_expire_password(Monitor *mon, const QDict *qdict); void hmp_eject(Monitor *mon, const QDict *qdict); diff --git a/include/migration/colo.h b/include/migration/colo.h index 2676c4a..ba27719 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -17,6 +17,7 @@ #include "migration/migration.h" #include "qemu/coroutine_int.h" #include "qemu/thread.h" +#include "qemu/main-loop.h" bool colo_supported(void); void colo_info_mig_init(void); @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); void migration_incoming_exit_colo(void); void *colo_process_incoming_thread(void *opaque); bool migration_incoming_in_colo_state(void); + +COLOMode get_colo_mode(void); #endif diff --git a/include/migration/failover.h b/include/migration/failover.h new file mode 100644 index 0000000..1785b52 --- /dev/null +++ b/include/migration/failover.h @@ -0,0 +1,20 @@ +/* + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) + * (a.k.a. Fault Tolerance or Continuous Replication) + * + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. + * Copyright (c) 2015 FUJITSU LIMITED + * Copyright (c) 2015 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_FAILOVER_H +#define QEMU_FAILOVER_H + +#include "qemu-common.h" + +void failover_request_active(Error **errp); + +#endif diff --git a/migration/Makefile.objs b/migration/Makefile.objs index 81b5713..920d1e7 100644 --- a/migration/Makefile.objs +++ b/migration/Makefile.objs @@ -1,6 +1,6 @@ common-obj-y += migration.o tcp.o -common-obj-$(CONFIG_COLO) += colo.o common-obj-y += colo-comm.o +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o common-obj-y += vmstate.o common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o common-obj-y += xbzrle.o postcopy-ram.o diff --git a/migration/colo-comm.c b/migration/colo-comm.c index 30df3d3..58a6488 100644 --- a/migration/colo-comm.c +++ b/migration/colo-comm.c @@ -20,6 +20,17 @@ typedef struct { static COLOInfo colo_info; +COLOMode get_colo_mode(void) +{ + if (migration_in_colo_state()) { + return COLO_MODE_PRIMARY; + } else if (migration_incoming_in_colo_state()) { + return COLO_MODE_SECONDARY; + } else { + return COLO_MODE_UNKNOWN; + } +} + static void colo_info_pre_save(void *opaque) { COLOInfo *s = opaque; diff --git a/migration/colo-failover.c b/migration/colo-failover.c new file mode 100644 index 0000000..e3897c6 --- /dev/null +++ b/migration/colo-failover.c @@ -0,0 +1,41 @@ +/* + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) + * (a.k.a. Fault Tolerance or Continuous Replication) + * + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2015 FUJITSU LIMITED + * Copyright (c) 2015 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "migration/colo.h" +#include "migration/failover.h" +#include "qmp-commands.h" +#include "qapi/qmp/qerror.h" + +static QEMUBH *failover_bh; + +static void colo_failover_bh(void *opaque) +{ + qemu_bh_delete(failover_bh); + failover_bh = NULL; + /*TODO: Do failover work */ +} + +void failover_request_active(Error **errp) +{ + failover_bh = qemu_bh_new(colo_failover_bh, NULL); + qemu_bh_schedule(failover_bh); +} + +void qmp_x_colo_lost_heartbeat(Error **errp) +{ + if (get_colo_mode() == COLO_MODE_UNKNOWN) { + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); + return; + } + + failover_request_active(errp); +} diff --git a/migration/colo.c b/migration/colo.c index ca5df44..7098497 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -17,6 +17,7 @@ #include "trace.h" #include "qemu/error-report.h" #include "qemu/sockets.h" +#include "migration/failover.h" /* colo buffer */ #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) diff --git a/qapi-schema.json b/qapi-schema.json index a5699a7..feb7d53 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -761,6 +761,35 @@ 'vmstate-send', 'vmstate-size','vmstate-received', 'vmstate-loaded' ] } +## +# @COLOMode +# +# The colo mode +# +# @unknown: unknown mode +# +# @primary: master side +# +# @secondary: slave side +# +# Since: 2.6 +## +{ 'enum': 'COLOMode', + 'data': [ 'unknown', 'primary', 'secondary'] } + +## +# @x-colo-lost-heartbeat +# +# Tell qemu that heartbeat is lost, request it to do takeover procedures. +# If this command is sent to the PVM, the Primary side will exit COLO mode. +# If sent to the Secondary, the Secondary side will run failover work, +# then takes over server operation to become the service VM. +# +# Since: 2.6 +## +{ 'command': 'x-colo-lost-heartbeat' } + +## # @MouseInfo: # # Information about a mouse device. diff --git a/qmp-commands.hx b/qmp-commands.hx index 89756c9..76ad208 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -805,6 +805,25 @@ Example: EQMP { + .name = "x-colo-lost-heartbeat", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, + }, + +SQMP +x-colo-lost-heartbeat +-------------------- + +Tell COLO that heartbeat is lost, a failover or takeover is needed. + +Example: + +-> { "execute": "x-colo-lost-heartbeat" } +<- { "return": {} } + +EQMP + + { .name = "client_migrate_info", .args_type = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", .params = "protocol hostname port tls-port cert-subject", diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c index c12516e..5028f63 100644 --- a/stubs/migration-colo.c +++ b/stubs/migration-colo.c @@ -11,6 +11,7 @@ */ #include "migration/colo.h" +#include "qmp-commands.h" bool colo_supported(void) { @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) { return NULL; } + +void qmp_x_colo_lost_heartbeat(Error **errp) +{ + error_setg(errp, "COLO is not supported, please rerun configure" + " with --enable-colo option in order to support" + " COLO feature"); +}