diff mbox series

[v3,07/10] migration: Add migration_rp_wait|kick()

Message ID 20231004220240.167175-8-peterx@redhat.com
State New
Headers show
Series migration: Better error handling in rp thread, allow failures in recover | expand

Commit Message

Peter Xu Oct. 4, 2023, 10:02 p.m. UTC
It's just a simple wrapper for rp_sem on either wait() or kick(), make it
even clearer on how it is used.  Prepared to be used even for other things.

Reviewed-by: Fabiano Rosas <farosas@suse.de>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.h | 15 +++++++++++++++
 migration/migration.c | 14 ++++++++++++--
 migration/ram.c       | 16 +++++++---------
 3 files changed, 34 insertions(+), 11 deletions(-)

Comments

Juan Quintela Oct. 5, 2023, 7:49 a.m. UTC | #1
Peter Xu <peterx@redhat.com> wrote:
> It's just a simple wrapper for rp_sem on either wait() or kick(), make it
> even clearer on how it is used.  Prepared to be used even for other things.
>
> Reviewed-by: Fabiano Rosas <farosas@suse.de>
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

I agree with the idea, but I think that the problem is the name of the
semaphore.


> +void migration_rp_wait(MigrationState *s)
> +{
> +    qemu_sem_wait(&s->rp_state.rp_sem);

I am not sure if it would be better to have the wrappers or just rename

If we rename the remaphore to migration_thread, this becomes:

    qemu_sem_wait(&s->rp_state.return_path_ready);

    qemu_sem_post(&s->rp_state.return_path_ready);

Or something similar?
Peter Xu Oct. 5, 2023, 8:47 p.m. UTC | #2
On Thu, Oct 05, 2023 at 09:49:25AM +0200, Juan Quintela wrote:
> Peter Xu <peterx@redhat.com> wrote:
> > It's just a simple wrapper for rp_sem on either wait() or kick(), make it
> > even clearer on how it is used.  Prepared to be used even for other things.
> >
> > Reviewed-by: Fabiano Rosas <farosas@suse.de>
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> 
> I agree with the idea, but I think that the problem is the name of the
> semaphore.
> 
> > +void migration_rp_wait(MigrationState *s)
> > +{
> > +    qemu_sem_wait(&s->rp_state.rp_sem);
> 
> I am not sure if it would be better to have the wrappers or just rename
> 
> If we rename the remaphore to migration_thread, this becomes:
> 
>     qemu_sem_wait(&s->rp_state.return_path_ready);
> 
>     qemu_sem_post(&s->rp_state.return_path_ready);
> 
> Or something similar?

I'd prefer keeping a pair of helpers, but I'm open to other suggestions,
e.g. I can rename the sem at the same time, or have a better name just for
the helpers.

Thanks,
diff mbox series

Patch

diff --git a/migration/migration.h b/migration/migration.h
index 33a7831da4..573aa69f19 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -315,6 +315,12 @@  struct MigrationState {
          * be cleared in the rp_thread!
          */
         bool          rp_thread_created;
+        /*
+         * Used to synchronize between migration main thread and return
+         * path thread.  The migration thread can wait() on this sem, while
+         * other threads (e.g., return path thread) can kick it using a
+         * post().
+         */
         QemuSemaphore rp_sem;
         /*
          * We post to this when we got one PONG from dest. So far it's an
@@ -526,4 +532,13 @@  void migration_populate_vfio_info(MigrationInfo *info);
 void migration_reset_vfio_bytes_transferred(void);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 
+/* Migration thread waiting for return path thread. */
+void migration_rp_wait(MigrationState *s);
+/*
+ * Kick the migration thread waiting for return path messages.  NOTE: the
+ * name can be slightly confusing (when read as "kick the rp thread"), just
+ * to remember the target is always the migration thread.
+ */
+void migration_rp_kick(MigrationState *s);
+
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index b28b504b4c..1b7ed2d35a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1749,6 +1749,16 @@  void qmp_migrate_continue(MigrationStatus state, Error **errp)
     qemu_sem_post(&s->pause_sem);
 }
 
+void migration_rp_wait(MigrationState *s)
+{
+    qemu_sem_wait(&s->rp_state.rp_sem);
+}
+
+void migration_rp_kick(MigrationState *s)
+{
+    qemu_sem_post(&s->rp_state.rp_sem);
+}
+
 static struct rp_cmd_args {
     ssize_t     len; /* -1 = variable */
     const char *name;
@@ -1820,7 +1830,7 @@  static int migrate_handle_rp_resume_ack(MigrationState *s,
                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
 
     /* Notify send thread that time to continue send pages */
-    qemu_sem_post(&s->rp_state.rp_sem);
+    migration_rp_kick(s);
 
     return 0;
 }
@@ -2447,7 +2457,7 @@  static int postcopy_resume_handshake(MigrationState *s)
     qemu_savevm_send_postcopy_resume(s->to_dst_file);
 
     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
-        qemu_sem_wait(&s->rp_state.rp_sem);
+        migration_rp_wait(s);
     }
 
     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
diff --git a/migration/ram.c b/migration/ram.c
index ef4af3fbce..43ba62be83 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4143,7 +4143,7 @@  static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
 
     /* Wait until all the ramblocks' dirty bitmap synced */
     while (qatomic_read(&rs->postcopy_bmap_sync_requested)) {
-        qemu_sem_wait(&s->rp_state.rp_sem);
+        migration_rp_wait(s);
     }
 
     trace_ram_dirty_bitmap_sync_complete();
@@ -4151,11 +4151,6 @@  static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
     return 0;
 }
 
-static void ram_dirty_bitmap_reload_notify(MigrationState *s)
-{
-    qemu_sem_post(&s->rp_state.rp_sem);
-}
-
 /*
  * Read the received bitmap, revert it as the initial dirty bitmap.
  * This is only used when the postcopy migration is paused but wants
@@ -4238,10 +4233,13 @@  int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp)
     qatomic_dec(&rs->postcopy_bmap_sync_requested);
 
     /*
-     * We succeeded to sync bitmap for current ramblock. If this is
-     * the last one to sync, we need to notify the main send thread.
+     * We succeeded to sync bitmap for current ramblock. Always kick the
+     * migration thread to check whether all requested bitmaps are
+     * reloaded.  NOTE: it's racy to only kick when requested==0, because
+     * we don't know whether the migration thread may still be increasing
+     * it.
      */
-    ram_dirty_bitmap_reload_notify(s);
+    migration_rp_kick(s);
 
     ret = 0;
 out: