diff mbox series

[v2,3/5] migration: Add per vmstate downtime tracepoints

Message ID 20231030163346.765724-4-peterx@redhat.com
State New
Headers show
Series migration: Downtime tracepoints | expand

Commit Message

Peter Xu Oct. 30, 2023, 4:33 p.m. UTC
We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime.  Two major problems:

  - savevm_section* tracepoints are dumping all sections, we only care
    about the sections that contribute to the downtime

  - They don't have an identifier to show the type of sections, so no way
    to filter downtime information either easily.

We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.

Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime.  The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.

Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa.  We can't figure that out without tracing both.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/savevm.c     | 49 ++++++++++++++++++++++++++++++++++++++----
 migration/trace-events |  2 ++
 2 files changed, 47 insertions(+), 4 deletions(-)

Comments

Juan Quintela Oct. 31, 2023, 12:52 p.m. UTC | #1
Peter Xu <peterx@redhat.com> wrote:
> We have a bunch of savevm_section* tracepoints, they're good to analyze
> migration stream, but not always suitable if someone would like to analyze
> the migration downtime.  Two major problems:
>
>   - savevm_section* tracepoints are dumping all sections, we only care
>     about the sections that contribute to the downtime
>
>   - They don't have an identifier to show the type of sections, so no way
>     to filter downtime information either easily.
>
> We can add type into the tracepoints, but instead of doing so, this patch
> kept them untouched, instead of adding a bunch of downtime specific
> tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
> full picture of how the downtime is distributed across iterative and
> non-iterative vmstate save/load.
>
> Note that here both save() and load() need to be traced, because both of
> them may contribute to the downtime.  The contribution is not a simple "add
> them together", though: consider when the src is doing a save() of device1
> while the dest can be load()ing for device2, so they can happen
> concurrently.
>
> Tracking both sides make sense because device load() and save() can be
> imbalanced, one device can save() super fast, but load() super slow, vice
> versa.  We can't figure that out without tracing both.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

queued.

>  static
>  int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
>  {
> +    int64_t start_ts_each, end_ts_each;
>      SaveStateEntry *se;
>      int ret;
>  
> @@ -1475,6 +1476,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
>                  continue;
>              }
>          }
> +
> +        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
I still think that:
           int64_t start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);

>          trace_savevm_section_start(se->idstr, se->section_id);
>  
>          save_section_header(f, se, QEMU_VM_SECTION_END);
> @@ -1486,6 +1489,9 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
>              qemu_file_set_error(f, ret);
>              return -1;
>          }
> +        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);

and

           int64_t end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);

is clearer.

Having to pass the type thing is not "pleasant", but I can't think of a
better way to do it.

Later, Juan.
diff mbox series

Patch

diff --git a/migration/savevm.c b/migration/savevm.c
index 8622f229e5..cd6d6ba493 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1459,6 +1459,7 @@  void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 static
 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 {
+    int64_t start_ts_each, end_ts_each;
     SaveStateEntry *se;
     int ret;
 
@@ -1475,6 +1476,8 @@  int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
                 continue;
             }
         }
+
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
         trace_savevm_section_start(se->idstr, se->section_id);
 
         save_section_header(f, se, QEMU_VM_SECTION_END);
@@ -1486,6 +1489,9 @@  int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
             qemu_file_set_error(f, ret);
             return -1;
         }
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
     }
 
     return 0;
@@ -1496,6 +1502,7 @@  int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                                     bool inactivate_disks)
 {
     MigrationState *ms = migrate_get_current();
+    int64_t start_ts_each, end_ts_each;
     JSONWriter *vmdesc = ms->vmdesc;
     int vmdesc_len;
     SaveStateEntry *se;
@@ -1507,11 +1514,17 @@  int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
             continue;
         }
 
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+
         ret = vmstate_save(f, se, vmdesc);
         if (ret) {
             qemu_file_set_error(f, ret);
             return ret;
         }
+
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
     }
 
     if (inactivate_disks) {
@@ -2506,9 +2519,12 @@  static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
 }
 
 static int
-qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
+                               uint8_t type)
 {
+    bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
     uint32_t instance_id, version_id, section_id;
+    int64_t start_ts, end_ts;
     SaveStateEntry *se;
     char idstr[256];
     int ret;
@@ -2557,12 +2573,23 @@  qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
         return -EINVAL;
     }
 
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
     ret = vmstate_load(f, se);
     if (ret < 0) {
         error_report("error while loading state for instance 0x%"PRIx32" of"
                      " device '%s'", instance_id, idstr);
         return ret;
     }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("non-iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
     if (!check_section_footer(f, se)) {
         return -EINVAL;
     }
@@ -2571,8 +2598,11 @@  qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
 }
 
 static int
-qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
+                             uint8_t type)
 {
+    bool trace_downtime = (type == QEMU_VM_SECTION_END);
+    int64_t start_ts, end_ts;
     uint32_t section_id;
     SaveStateEntry *se;
     int ret;
@@ -2597,12 +2627,23 @@  qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
         return -EINVAL;
     }
 
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
     ret = vmstate_load(f, se);
     if (ret < 0) {
         error_report("error while loading state section id %d(%s)",
                      section_id, se->idstr);
         return ret;
     }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
     if (!check_section_footer(f, se)) {
         return -EINVAL;
     }
@@ -2791,14 +2832,14 @@  retry:
         switch (section_type) {
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
-            ret = qemu_loadvm_section_start_full(f, mis);
+            ret = qemu_loadvm_section_start_full(f, mis, section_type);
             if (ret < 0) {
                 goto out;
             }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
-            ret = qemu_loadvm_section_part_end(f, mis);
+            ret = qemu_loadvm_section_part_end(f, mis, section_type);
             if (ret < 0) {
                 goto out;
             }
diff --git a/migration/trace-events b/migration/trace-events
index fa9486dffe..5820add1f3 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -48,6 +48,8 @@  savevm_state_cleanup(void) ""
 savevm_state_complete_precopy(void) ""
 vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
 vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
+vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
+vmstate_downtime_load(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
 postcopy_pause_incoming(void) ""
 postcopy_pause_incoming_continued(void) ""
 postcopy_page_req_sync(void *host_addr) "sync page req %p"