diff mbox

[RFC,1/1] COLO: Add primary side rolling statistics

Message ID 1425562294-1616-2-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert March 5, 2015, 1:31 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Record:
  Checkpoint lifetime (ms)
  Pause time due to checkpoint (ms)
  Checkpoint size (bytes)

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 hmp.c                         | 12 ++++++++++++
 include/migration/migration.h |  3 +++
 migration/colo.c              | 15 +++++++++++++++
 migration/migration.c         | 30 ++++++++++++++++++++++++++++++
 qapi-schema.json              | 11 ++++++++++-
 5 files changed, 70 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/hmp.c b/hmp.c
index c724efa..2b17ed0 100644
--- a/hmp.c
+++ b/hmp.c
@@ -197,6 +197,18 @@  void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                            info->setup_time);
         }
     }
+    if (info->has_colo_checkpoint_stats) {
+        monitor_printf_RollingStats(mon, "colo checkpoint (ms)",
+                           info->colo_checkpoint_stats);
+    }
+    if (info->has_colo_paused_stats) {
+        monitor_printf_RollingStats(mon, "colo paused time (ms)",
+                           info->colo_paused_stats);
+    }
+    if (info->has_colo_size_stats) {
+        monitor_printf_RollingStats(mon, "colo checkpoint size",
+                           info->colo_size_stats);
+    }
 
     if (info->has_ram) {
         monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n",
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9893467..564edaa 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -64,6 +64,9 @@  struct MigrationState
     int64_t setup_time;
     int64_t dirty_sync_count;
     RStats *expected_downtime_stats;
+    RStats *colo_checkpoint_stats;
+    RStats *colo_paused_stats;
+    RStats *colo_size_stats;
 };
 
 enum {
diff --git a/migration/colo.c b/migration/colo.c
index 042dec8..653ef25 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -15,6 +15,7 @@ 
 #include "sysemu/sysemu.h"
 #include "migration/migration-colo.h"
 #include "qemu/error-report.h"
+#include "qemu/rolling-stats.h"
 #include "migration/migration-failover.h"
 #include "net/colo-nic.h"
 #include "block/block.h"
@@ -272,6 +273,7 @@  static int do_colo_transaction(MigrationState *s, QEMUFile *control)
     int ret;
     size_t size;
     QEMUFile *trans = NULL;
+    int64_t stop_time, start_time;
 
     ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW);
     if (ret < 0) {
@@ -295,6 +297,7 @@  static int do_colo_transaction(MigrationState *s, QEMUFile *control)
         goto out;
     }
     /* suspend and save vm state to colo buffer */
+    stop_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     qemu_mutex_lock_iothread();
     vm_stop_force_state(RUN_STATE_COLO);
     qemu_mutex_unlock_iothread();
@@ -343,6 +346,9 @@  static int do_colo_transaction(MigrationState *s, QEMUFile *control)
     if (ret < 0) {
         goto out;
     }
+
+    rstats_add_value(s->colo_size_stats, size, stop_time);
+
     ret = colo_ctl_get(control, COLO_CHECKPOINT_RECEIVED);
     if (ret < 0) {
         goto out;
@@ -366,6 +372,11 @@  static int do_colo_transaction(MigrationState *s, QEMUFile *control)
     qemu_mutex_lock_iothread();
     vm_start();
     qemu_mutex_unlock_iothread();
+    start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    rstats_add_value(s->colo_paused_stats,
+                     start_time - stop_time,
+                     start_time);
+
     DPRINTF("vm resume to run again\n");
 
 out:
@@ -450,6 +461,10 @@  static void *colo_thread(void *opaque)
             DPRINTF("Net packets is not consistent!!!\n");
         }
 
+        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+        rstats_add_value(s->colo_checkpoint_stats,
+                         current_time - start_time,
+                         current_time);
         /* start a colo checkpoint */
         if (do_colo_transaction(s, colo_control)) {
             goto out;
diff --git a/migration/migration.c b/migration/migration.c
index 794d94a..7c0517a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -244,6 +244,21 @@  MigrationInfo *qmp_query_migrate(Error **errp)
     case MIG_STATE_COLO:
         info->has_status = true;
         info->status = g_strdup("colo");
+        if (s->colo_checkpoint_stats) {
+            info->colo_checkpoint_stats =
+                rstats_as_RollingStats(s->colo_checkpoint_stats);
+            info->has_colo_checkpoint_stats = true;
+        }
+        if (s->colo_paused_stats) {
+            info->colo_paused_stats =
+                rstats_as_RollingStats(s->colo_paused_stats);
+            info->has_colo_paused_stats = true;
+        }
+        if (s->colo_size_stats) {
+            info->colo_size_stats =
+                rstats_as_RollingStats(s->colo_size_stats);
+            info->has_colo_size_stats = true;
+        }
         /* TODO: display COLO specific informations(checkpoint info etc.),*/
         break;
     case MIG_STATE_COMPLETED:
@@ -433,6 +448,21 @@  static MigrationState *migrate_init(const MigrationParams *params)
     } else {
         rstats_reset(s->expected_downtime_stats);
     }
+    if (!s->colo_checkpoint_stats) {
+        s->colo_checkpoint_stats = rstats_init(10, 0.2);
+    } else {
+        rstats_reset(s->colo_checkpoint_stats);
+    }
+    if (!s->colo_paused_stats) {
+        s->colo_paused_stats = rstats_init(10, 0.2);
+    } else {
+        rstats_reset(s->colo_paused_stats);
+    }
+    if (!s->colo_size_stats) {
+        s->colo_size_stats = rstats_init(10, 0.2);
+    } else {
+        rstats_reset(s->colo_size_stats);
+    }
     s->bandwidth_limit = bandwidth_limit;
     s->state = MIG_STATE_SETUP;
     trace_migrate_set_state(MIG_STATE_SETUP);
diff --git a/qapi-schema.json b/qapi-schema.json
index 2ec35c7..f2a666c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -486,6 +486,12 @@ 
 # @expected-downtime-stats: #optional more detailed statistics from the
 #        downtime estimation.
 #
+# @colo-checkpoint-stats: #optional The length of COLO checkpoints (ms)
+#
+# @colo-paused-stats: #optional The time paused (ms) as COLO took checkpoints
+#
+# @colo-size-stats: #optional The size of COLO checkpoints (bytes)
+#
 # Since: 0.14.0
 ##
 { 'type': 'MigrationInfo',
@@ -496,7 +502,10 @@ 
            '*expected-downtime': 'int',
            '*downtime': 'int',
            '*setup-time': 'int',
-           '*expected-downtime-stats': 'RollingStats' } }
+           '*expected-downtime-stats': 'RollingStats',
+           '*colo-checkpoint-stats': 'RollingStats',
+           '*colo-paused-stats': 'RollingStats',
+           '*colo-size-stats': 'RollingStats' } }
 
 ##
 # @query-migrate