difference which is the inaccuracy value which is caused by
block device flushing and also the qemu_get_clock_ns() is
being used in there and subsequent calls of this function
may result into minor inaccuracies (in the matter smaller than
of milliseconds).

I also did the testing with various migration speed settings
(using the set_migrate_speed monitor command) for 7 GiB
RHEL-6 i386 guest running bonnie++ test for 14 GiB (2x RAM)
and the results were as follows:

Max.speed     |    Memory transferred       |    Time (s)
--------------+-----------------------------+----------------
     32m      |       12 925 676 bytes      |      199 s
     64m      |        7 745 224 bytes      |       26 s
    128m      |        7 674 188 bytes      |       16 s
    256m      |        7 628 988 bytes      |       16 s
    512m      |        7 599 837 bytes      |       15 s
  1024m (1g)  |        7 592 934 bytes      |       14 s
     10g      |        7 583 824 bytes      |       13 s

This has been tested on the 1 GiB network using the remote
migration.

The output of the command for last iteration (shown as an
example was):

(qemu) info migrate-times
Total transferred memory: 7583824 kbytes
Total migration time: 13.552894 s
Waiting for input data: 6.942414 s
Time difference (inaccuracy): 0.018257 s
Times for total stage 1: 0.020247 s
Times for total stage 2: 6.355092 s
Times for total stage 3: 0.253398 s
Times for total total: 6.628737 s
Times for ram stage 1: 0.020238 s
Times for ram stage 2: 6.353832 s
Times for ram stage 3: 0.228953 s
Times for ram total: 6.603023 s
(qemu)

So please review. This patch could be useful for getting
the migration stage times.

Thanks,
Michal

Signed-off-by: Michal Novotny <minovotn@redhat.com>
---
 arch_init.c       |   12 +++++-
 block-migration.c |    5 ++
 hmp-commands.hx   |    2 +
 migration.c       |  105 +++++++++++++++++++++++++++++++++++++++++++++
 migration.h       |    4 ++
 monitor.c         |    8 +++
 savevm.c          |   50 ++++++++++++++++++---
 sysemu.h          |    6 +++
 vl.c              |  123 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 307 insertions(+), 8 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 484b39d..684ae3c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -252,8 +252,12 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
     uint64_t bytes_transferred_last;
+    uint64_t t_start;
     double bwidth = 0;
     uint64_t expected_time = 0;
+    int retval = 0;
+
+    t_start = qemu_get_clock_ns(host_clock);
 
     if (stage < 0) {
         cpu_physical_memory_set_dirty_tracking(0);
@@ -272,6 +276,10 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         last_offset = 0;
         sort_ram_list();
 
+        time_set("ram", 1, 0);
+        time_set("ram", 2, 0);
+        time_set("ram", 3, 0);
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
@@ -331,8 +339,10 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    retval = (stage == 2) && (expected_time <= migrate_max_downtime());
 
-    return (stage == 2) && (expected_time <= migrate_max_downtime());
+    time_add2("ram", stage, qemu_get_clock_ns(host_clock), t_start);
+    return retval;
 }
 
 static inline void *host_from_stream_offset(QEMUFile *f,
diff --git a/block-migration.c b/block-migration.c
index 0936c7d..b53a1f4 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -17,6 +17,8 @@
 #include "qemu-queue.h"
 #include "qemu-timer.h"
 #include "monitor.h"
+#include "qemu-timer.h"
+#include "sysemu.h"
 #include "block-migration.h"
 #include "migration.h"
 #include "blockdev.h"
@@ -556,6 +558,7 @@ static void blk_mig_cleanup(Monitor *mon)
 
 static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 {
+    uint64_t t_start = qemu_get_clock_ns(host_clock);
     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
             stage, block_mig_state.submitted, block_mig_state.transferred);
 
@@ -633,6 +636,8 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 
     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 
+    time_add2("disk", stage, qemu_get_clock_ns(host_clock), t_start);
+
     return ((stage == 2) && is_stage2_completed());
 }
 
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 6ad8806..83b33c8 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1345,6 +1345,8 @@ show CPU statistics
 show user network stack connection states
 @item info migrate
 show migration status
+@item info migrate-times
+show migration times for last non-active migration
 @item info balloon
 show balloon information
 @item info qtree
diff --git a/migration.c b/migration.c
index af3a1f2..dee61d4 100644
--- a/migration.c
+++ b/migration.c
@@ -20,6 +20,7 @@
 #include "qemu_socket.h"
 #include "block-migration.h"
 #include "qemu-objects.h"
+#include "qemu-timer.h"
 
 //#define DEBUG_MIGRATION
 
@@ -39,6 +40,8 @@ static MigrationState *current_migration;
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
+static uint64_t t_last_put_ready;
+
 int qemu_start_incoming_migration(const char *uri)
 {
     const char *p;
@@ -224,6 +227,99 @@ static void migrate_put_status(QDict *qdict, const char *name,
     qdict_put_obj(qdict, name, obj);
 }
 
+static void migrate_times_print_status(Monitor *mon, const char *name,
+                                       const QDict *status_dict)
+{
+    QDict *qdict;
+    qdict = qobject_to_qdict(qdict_get(status_dict, name));
+
+    monitor_printf(mon, "Times for %s stage 1: %.6f s\n", name,
+                        qdict_get_int(qdict, "stage1") / 1000000.);
+    monitor_printf(mon, "Times for %s stage 2: %.6f s\n", name,
+                        qdict_get_int(qdict, "stage2") / 1000000.);
+    monitor_printf(mon, "Times for %s stage 3: %.6f s\n", name,
+                        qdict_get_int(qdict, "stage3") / 1000000.);
+    monitor_printf(mon, "Times for %s total: %.6f s\n", name,
+                        qdict_get_int(qdict, "total") / 1000000.);
+}
+
+void do_info_migrate_times_print(Monitor *mon, const QObject *data)
+{
+    QDict *qdict;
+    MigrationState *s = current_migration;
+
+    if (s && (s->get_status(s) == MIG_STATE_ACTIVE) ) {
+        monitor_printf(mon, "Cannot get migration result times when migration is still running\n");
+        return;
+    }
+
+    qdict = qobject_to_qdict(data);
+
+    monitor_printf(mon, "Total transferred memory: %"PRIu64" kbytes\n",
+                   qdict_get_int(qdict, "transferred-memory") >> 10);
+
+    monitor_printf(mon, "Total migration time: %.6f s\n",
+                   qdict_get_int(qdict, "migration-time") / 1000000.);
+
+    monitor_printf(mon, "Waiting for input data: %.6f s\n",
+                   qdict_get_int(qdict, "wait-input") / 1000000.);
+
+    if (qdict_haskey(qdict, "total")) {
+        QDict *qdict2;
+        qdict2 = qobject_to_qdict(qdict_get(qdict, "total"));
+
+        monitor_printf(mon, "Time difference (inaccuracy): %.6f s\n",
+                       abs(qdict_get_int(qdict, "migration-time") -
+                        qdict_get_int(qdict, "wait-input") -
+                        qdict_get_int(qdict2, "total")) / 1000000.);
+
+        migrate_times_print_status(mon, "total", qdict);
+    }
+
+    if (qdict_haskey(qdict, "ram")) {
+        migrate_times_print_status(mon, "ram", qdict);
+    }
+
+    if (qdict_haskey(qdict, "disk")) {
+        migrate_times_print_status(mon, "disk", qdict);
+    }
+}
+
+static void migrate_times_put_status(QDict *qdict, const char *name)
+{
+    QObject *obj;
+    uint64_t t0, t1, t2, t3;
+
+    t1 = time_get(name, 1) / 1000;
+    t2 = time_get(name, 2) / 1000;
+    t3 = time_get(name, 3) / 1000;
+    t0 = t1 + t2 + t3;
+
+    obj = qobject_from_jsonf("{ 'stage1': %" PRId64 ", "
+                               "'stage2': %" PRId64 ", "
+                               "'stage3': %" PRId64 ", "
+                               "'total' : %" PRId64 "  }", t1, t2, t3, t0);
+
+    qdict_put_obj(qdict, name, obj);
+}
+
+void do_info_migrate_times(Monitor *mon, QObject **ret_data)
+{
+    QDict *qdict;
+
+    qdict = qdict_new();
+    qdict_put(qdict, "migration-time", qint_from_int( save_total_time() / 1000 ) );
+    qdict_put(qdict, "wait-input", qint_from_int( time_get("wait-input", -1) / 1000 ) );
+    qdict_put(qdict, "transferred-memory", qint_from_int( ram_bytes_transferred() ) );
+
+    migrate_times_put_status(qdict, "total");
+    if (time_get("disk", 1) > 0)
+        migrate_times_put_status(qdict, "disk");
+    migrate_times_put_status(qdict, "ram");
+
+    *ret_data = QOBJECT(qdict);
+}
+
 void do_info_migrate(Monitor *mon, QObject **ret_data)
 {
     QDict *qdict;
@@ -344,6 +440,11 @@ void migrate_fd_connect(FdMigrationState *s)
 {
     int ret;
 
+    time_set("total", 1, 0);
+    time_set("total", 2, 0);
+    time_set("total", 3, 0);
+    time_set("wait-input", -1, 0);
+
     s->file = qemu_fopen_ops_buffered(s,
                                       s->bandwidth_limit,
                                       migrate_fd_put_buffer,
@@ -352,6 +453,7 @@ void migrate_fd_connect(FdMigrationState *s)
                                       migrate_fd_close);
 
     DPRINTF("beginning savevm\n");
+    t_last_put_ready = qemu_get_clock_ns(host_clock);
     ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
                                   s->mig_state.shared);
     if (ret < 0) {
@@ -373,6 +475,7 @@ void migrate_fd_put_ready(void *opaque)
     }
 
     DPRINTF("iterate\n");
+    time_add2("wait-input", -1, qemu_get_clock_ns(host_clock), t_last_put_ready);
     if (qemu_savevm_state_iterate(s->mon, s->file) == 1) {
         int state;
         int old_vm_running = vm_running;
@@ -397,6 +500,8 @@ void migrate_fd_put_ready(void *opaque)
         s->state = state;
         notifier_list_notify(&migration_state_notifiers);
     }
+
+    t_last_put_ready = qemu_get_clock_ns(host_clock);
 }
 
 int migrate_fd_get_status(MigrationState *mig_state)
diff --git a/migration.h b/migration.h
index 050c56c..ef61f3f 100644
--- a/migration.h
+++ b/migration.h
@@ -70,6 +70,10 @@ void do_info_migrate_print(Monitor *mon, const QObject *data);
 
 void do_info_migrate(Monitor *mon, QObject **ret_data);
 
+void do_info_migrate_times_print(Monitor *mon, const QObject *data);
+
+void do_info_migrate_times(Monitor *mon, QObject **ret_data);
+
 int exec_start_incoming_migration(const char *host_port);
 
 MigrationState *exec_start_outgoing_migration(Monitor *mon,
diff --git a/monitor.c b/monitor.c
index 67ceb46..ac555f3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3060,6 +3060,14 @@ static const mon_cmd_t info_cmds[] = {
         .mhandler.info_new = do_info_migrate,
     },
     {
+        .name       = "migrate-times",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show migration times",
+        .user_print = do_info_migrate_times_print,
+        .mhandler.info_new = do_info_migrate_times,
+    },
+    {
         .name       = "balloon",
         .args_type  = "",
         .params     = "",
diff --git a/savevm.c b/savevm.c
index 8139bc7..3ca9b8a 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1447,6 +1447,14 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se)
 #define QEMU_VM_SECTION_FULL         0x04
 #define QEMU_VM_SUBSECTION           0x05
 
+static uint64_t t_save_start;
+static uint64_t t_save_total;
+
+uint64_t save_total_time(void)
+{
+    return t_save_total;
+}
+
 bool qemu_savevm_state_blocked(Monitor *mon)
 {
     SaveStateEntry *se;
@@ -1465,6 +1473,9 @@ int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
                             int shared)
 {
     SaveStateEntry *se;
+    int ret = 0;
+
+    t_save_start = qemu_get_clock_ns(host_clock);
 
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if(se->set_params == NULL) {
@@ -1472,7 +1483,7 @@ int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
 	}
 	se->set_params(blk_enable, shared, se->opaque);
     }
-    
+
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
     qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 
@@ -1499,17 +1510,21 @@ int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
 
     if (qemu_file_has_error(f)) {
         qemu_savevm_state_cancel(mon, f);
-        return -EIO;
+        ret = -EIO;
     }
 
-    return 0;
+    time_add2("total", 1, qemu_get_clock_ns(host_clock), t_save_start);
+    return ret;
 }
 
 int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f)
 {
     SaveStateEntry *se;
+    uint64_t t_start;
     int ret = 1;
 
+    t_start = qemu_get_clock_ns(host_clock);
+
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if (se->save_live_state == NULL)
             continue;
@@ -1528,20 +1543,30 @@ int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f)
         }
     }
 
-    if (ret)
-        return 1;
+    if (ret) {
+        ret = 1;
+        goto out;
+    }
 
     if (qemu_file_has_error(f)) {
         qemu_savevm_state_cancel(mon, f);
-        return -EIO;
+        ret = -EIO;
     }
+    else
+        ret = 0;
 
-    return 0;
+out:
+    time_add2("total", 2, qemu_get_clock_ns(host_clock), t_start);
+    return ret;
 }
 
 int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f)
 {
     SaveStateEntry *se;
+    uint64_t t_start;
+    uint64_t t_help;
+
+    t_start = qemu_get_clock_ns(host_clock);
 
     cpu_synchronize_all_states();
 
@@ -1582,6 +1607,10 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f)
     if (qemu_file_has_error(f))
         return -EIO;
 
+    t_help = qemu_get_clock_ns(host_clock);
+    t_save_total = t_help - t_save_start;
+    time_add2("total", 3, t_help, t_start);
+
     return 0;
 }
 
@@ -1589,6 +1618,13 @@ void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f)
 {
     SaveStateEntry *se;
 
+    time_set("ram", 1, 0);
+    time_set("ram", 2, 0);
+    time_set("ram", 3, 0);
+    time_set("disk", 1, 0);
+    time_set("disk", 2, 0);
+    time_set("disk", 3, 0);
+
     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if (se->save_live_state) {
             se->save_live_state(mon, f, -1, se->opaque);
diff --git a/sysemu.h b/sysemu.h
index d3013f5..c533706 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -77,6 +77,12 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
 void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
 int qemu_loadvm_state(QEMUFile *f);
 
+uint64_t time_get(const char *name, int stage);
+void time_set(const char *name, int stage, uint64_t tv);
+void time_add(const char *name, int stage, uint64_t tv);
+void time_add2(const char *name, int stage, uint64_t time1, uint64_t time2);
+uint64_t save_total_time(void);
+
 /* SLIRP */
 void do_info_slirp(Monitor *mon);
 
diff --git a/vl.c b/vl.c
index fcd7395..340cbe3 100644
--- a/vl.c
+++ b/vl.c
@@ -229,6 +229,12 @@ unsigned int nb_prom_envs = 0;
 const char *prom_envs[MAX_PROM_ENVS];
 int boot_menu;
 
+/* Times */
+static uint64_t time_save_total[3];
+static uint64_t time_save_disk[3];
+static uint64_t time_save_ram[3];
+static uint64_t time_save_wait_input;
+
 typedef struct FWBootEntry FWBootEntry;
 
 struct FWBootEntry {
@@ -348,6 +354,123 @@ int qemu_timedate_diff(struct tm *tm)
     return seconds - time(NULL);
 }
 
+static void time_set_ram(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_ram[stage - 1] = tv;
+}
+
+static void time_set_disk(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_disk[stage - 1] = tv;
+}
+
+static void time_set_total(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_total[stage - 1] = tv;
+}
+
+void time_set(const char *name, int stage, uint64_t tv)
+{
+    if (strcmp(name, "ram") == 0)
+        time_set_ram(stage, tv);
+    if (strcmp(name, "disk") == 0)
+        time_set_disk(stage, tv);
+    if (strcmp(name, "wait-input") == 0)
+        time_save_wait_input = tv;
+    if (strcmp(name, "total") == 0)
+        time_set_total(stage, tv);
+}
+
+static uint64_t time_get_ram(int stage) {
+    if ((stage < 0) || (stage > 3))
+        return 0;
+
+    return time_save_ram[stage - 1];
+}
+
+static uint64_t time_get_disk(int stage) {
+    if ((stage < 0) || (stage > 3))
+        return 0;
+
+    return time_save_disk[stage - 1];
+}
+
+static uint64_t time_get_total(int stage) {
+    if ((stage < 0) || (stage > 3))
+        return 0;
+
+    return time_save_total[stage - 1];
+}
+
+uint64_t time_get(const char *name, int stage)
+{
+    if (strcmp(name, "ram") == 0)
+        return time_get_ram(stage);
+    if (strcmp(name, "disk") == 0)
+        return time_get_disk(stage);
+    if (strcmp(name, "wait-input") == 0)
+        return time_save_wait_input;
+    if (strcmp(name, "total") == 0)
+        return time_get_total(stage);
+
+    return 0;
+}
+
+static void time_add_ram(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_ram[stage - 1] += tv;
+}
+
+static void time_add_disk(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_disk[stage - 1] += tv;
+}
+
+static void time_add_wait_input(uint64_t tv)
+{
+    time_save_wait_input += tv;
+}
+
+static void time_add_total(int stage, uint64_t tv)
+{
+    if ((stage < 0) || (stage > 3))
+        return;
+
+    time_save_total[stage - 1] += tv;
+}
+
+void time_add(const char *name, int stage, uint64_t tv)
+{
+    if (strcmp(name, "ram") == 0)
+        time_add_ram(stage, tv);
+    if (strcmp(name, "disk") == 0)
+        time_add_disk(stage, tv);
+    if (strcmp(name, "wait-input") == 0)
+        time_add_wait_input(tv);
+    if (strcmp(name, "total") == 0)
+        time_add_total(stage, tv);
+}
+
+void time_add2(const char *name, int stage, uint64_t time1, uint64_t time2)
+{
+    time_add(name, stage, time1 - time2);
+}
+
 void rtc_change_mon_event(struct tm *tm)
 {
     QObject *data;
