diff mbox series

[RFC,v1,20/26] kvm: vmi: intercept live migration

Message ID 20200415005938.23895-21-alazar@bitdefender.com
State New
Headers show
Series VM introspection | expand

Commit Message

Adalbert Lazăr April 15, 2020, 12:59 a.m. UTC
From: Marian Rotariu <marian.c.rotariu@gmail.com>

It is possible that the introspection tool has made some changes inside
the introspected VM which can make the guest crash if the introspection
connection is suddenly closed.

When the live migration starts, for now, the introspection tool is
signaled to remove its hooks from the introspected VM.

CC: Juan Quintela <quintela@redhat.com>
CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
---
 accel/kvm/vmi.c                | 31 +++++++++++++++++++++++++++----
 include/sysemu/vmi-intercept.h |  1 +
 migration/migration.c          | 18 +++++++++++++++---
 migration/migration.h          |  2 ++
 4 files changed, 45 insertions(+), 7 deletions(-)

Comments

Dr. David Alan Gilbert April 27, 2020, 7:08 p.m. UTC | #1
* Adalbert Lazăr (alazar@bitdefender.com) wrote:
> From: Marian Rotariu <marian.c.rotariu@gmail.com>
> 
> It is possible that the introspection tool has made some changes inside
> the introspected VM which can make the guest crash if the introspection
> connection is suddenly closed.
> 
> When the live migration starts, for now, the introspection tool is
> signaled to remove its hooks from the introspected VM.
> 
> CC: Juan Quintela <quintela@redhat.com>
> CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
> Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>

OK, so this isn't too intrusive to the migration code; and other than
renaming 'start_live_migration_thread' to
'start_outgoing_migration_thread' I think I'd be OK with this,

but it might depend what your overall aim is.

For example, you might be better intercepting each migration_state
change in your notifier, that's much finer grain than just the start of
migration.

The other thing I worry about is that there doesn't seem to be much
guard against odd orderings of things - for example, what happens
if the introspection client was to issue the  INTERCEPT_MIGRATE command
twice while a migration was already running?  Or before an actual
incoming channel connetion had happened?

Dave

> ---
>  accel/kvm/vmi.c                | 31 +++++++++++++++++++++++++++----
>  include/sysemu/vmi-intercept.h |  1 +
>  migration/migration.c          | 18 +++++++++++++++---
>  migration/migration.h          |  2 ++
>  4 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/accel/kvm/vmi.c b/accel/kvm/vmi.c
> index 90906478b4..ea7191e48d 100644
> --- a/accel/kvm/vmi.c
> +++ b/accel/kvm/vmi.c
> @@ -21,6 +21,8 @@
>  #include "chardev/char.h"
>  #include "chardev/char-fe.h"
>  #include "migration/vmstate.h"
> +#include "migration/migration.h"
> +#include "migration/misc.h"
>  
>  #include "sysemu/vmi-intercept.h"
>  #include "sysemu/vmi-handshake.h"
> @@ -58,6 +60,7 @@ typedef struct VMIntrospection {
>      int64_t vm_start_time;
>  
>      Notifier machine_ready;
> +    Notifier migration_state_change;
>      bool created_from_command_line;
>  
>      bool kvmi_hooked;
> @@ -74,9 +77,11 @@ static const char *action_string[] = {
>      "suspend",
>      "resume",
>      "force-reset",
> +    "migrate",
>  };
>  
>  static bool suspend_pending;
> +static bool migrate_pending;
>  
>  #define TYPE_VM_INTROSPECTION "introspection"
>  
> @@ -88,6 +93,15 @@ static bool suspend_pending;
>  static Error *vm_introspection_init(VMIntrospection *i);
>  static void vm_introspection_reset(void *opaque);
>  
> +static void migration_state_notifier(Notifier *notifier, void *data)
> +{
> +    MigrationState *s = data;
> +
> +    if (migration_has_failed(s)) {
> +        migrate_pending = false;
> +    }
> +}
> +
>  static void machine_ready(Notifier *notifier, void *data)
>  {
>      VMIntrospection *i = container_of(notifier, VMIntrospection, machine_ready);
> @@ -144,6 +158,9 @@ static void complete(UserCreatable *uc, Error **errp)
>  
>      ic->uniq = i;
>  
> +    i->migration_state_change.notify = migration_state_notifier;
> +    add_migration_state_change_notifier(&i->migration_state_change);
> +
>      qemu_register_reset(vm_introspection_reset, i);
>  }
>  
> @@ -478,6 +495,9 @@ static void continue_with_the_intercepted_action(VMIntrospection *i)
>      case VMI_INTERCEPT_SUSPEND:
>          vm_stop(RUN_STATE_PAUSED);
>          break;
> +    case VMI_INTERCEPT_MIGRATE:
> +        start_live_migration_thread(migrate_get_current());
> +        break;
>      default:
>          error_report("VMI: %s: unexpected action %d",
>                       __func__, i->intercepted_action);
> @@ -571,9 +591,9 @@ static void chr_event_open(VMIntrospection *i)
>  {
>      Error *local_err = NULL;
>  
> -    if (suspend_pending) {
> -        info_report("VMI: %s: too soon (suspend=%d)",
> -                    __func__, suspend_pending);
> +    if (suspend_pending || migrate_pending) {
> +        info_report("VMI: %s: too soon (suspend=%d, migrate=%d)",
> +                    __func__, suspend_pending, migrate_pending);
>          maybe_disable_socket_reconnect(i);
>          qemu_chr_fe_disconnect(&i->sock);
>          return;
> @@ -608,7 +628,7 @@ static void chr_event_close(VMIntrospection *i)
>      cancel_unhook_timer(i);
>      cancel_handshake_timer(i);
>  
> -    if (suspend_pending) {
> +    if (suspend_pending || migrate_pending) {
>          maybe_disable_socket_reconnect(i);
>  
>          if (i->intercepted_action != VMI_INTERCEPT_NONE) {
> @@ -680,6 +700,9 @@ static bool record_intercept_action(VMI_intercept_command action)
>          break;
>      case VMI_INTERCEPT_FORCE_RESET:
>          break;
> +    case VMI_INTERCEPT_MIGRATE:
> +        migrate_pending = true;
> +        break;
>      default:
>          return false;
>      }
> diff --git a/include/sysemu/vmi-intercept.h b/include/sysemu/vmi-intercept.h
> index ef591b49e7..b4a9a3faa7 100644
> --- a/include/sysemu/vmi-intercept.h
> +++ b/include/sysemu/vmi-intercept.h
> @@ -15,6 +15,7 @@ typedef enum {
>      VMI_INTERCEPT_SUSPEND,
>      VMI_INTERCEPT_RESUME,
>      VMI_INTERCEPT_FORCE_RESET,
> +    VMI_INTERCEPT_MIGRATE,
>  } VMI_intercept_command;
>  
>  bool vm_introspection_intercept(VMI_intercept_command ic, Error **errp);
> diff --git a/migration/migration.c b/migration/migration.c
> index 187ac0410c..222037d739 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -55,6 +55,8 @@
>  #include "qemu/queue.h"
>  #include "multifd.h"
>  
> +#include "sysemu/vmi-intercept.h"
> +
>  #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
>  
>  /* Amount of time to allocate to each "chunk" of bandwidth-throttled
> @@ -3471,6 +3473,13 @@ static void *migration_thread(void *opaque)
>      return NULL;
>  }
>  
> +void start_live_migration_thread(MigrationState *s)
> +{
> +    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> +                    QEMU_THREAD_JOINABLE);
> +    s->migration_thread_running = true;
> +}
> +
>  void migrate_fd_connect(MigrationState *s, Error *error_in)
>  {
>      Error *local_err = NULL;
> @@ -3534,9 +3543,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>          migrate_fd_cleanup(s);
>          return;
>      }
> -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> -                       QEMU_THREAD_JOINABLE);
> -    s->migration_thread_running = true;
> +
> +    if (vm_introspection_intercept(VMI_INTERCEPT_MIGRATE, &error_in)) {
> +        return;
> +    }
> +
> +    start_live_migration_thread(s);
>  }
>  
>  void migration_global_dump(Monitor *mon)
> diff --git a/migration/migration.h b/migration/migration.h
> index 507284e563..eb5668e1f2 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -263,6 +263,8 @@ struct MigrationState
>      uint8_t clear_bitmap_shift;
>  };
>  
> +void start_live_migration_thread(MigrationState *s);
> +
>  void migrate_set_state(int *state, int old_state, int new_state);
>  
>  void migration_fd_process_incoming(QEMUFile *f, Error **errp);
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Adalbert Lazăr April 28, 2020, 12:14 p.m. UTC | #2
On Mon, 27 Apr 2020 20:08:55 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > From: Marian Rotariu <marian.c.rotariu@gmail.com>
> > 
> > It is possible that the introspection tool has made some changes inside
> > the introspected VM which can make the guest crash if the introspection
> > connection is suddenly closed.
> > 
> > When the live migration starts, for now, the introspection tool is
> > signaled to remove its hooks from the introspected VM.
> > 
> > CC: Juan Quintela <quintela@redhat.com>
> > CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
> > Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
> 
> OK, so this isn't too intrusive to the migration code; and other than
> renaming 'start_live_migration_thread' to
> 'start_outgoing_migration_thread' I think I'd be OK with this,
> 
> but it might depend what your overall aim is.
> 
> For example, you might be better intercepting each migration_state
> change in your notifier, that's much finer grain than just the start of
> migration.

Thank you, Dave.

We want to intercept the live migration and 'block' it while the guest
is running (some changes made to the guest by the introspection app has
to be undone while the vCPUs are in certain states).

I'm not sure what is the best way to block these kind of events
(including the pause/shutdown commands). If calling main_loop_wait()
is enough (patch [22/26] kvm: vmi: add 'async_unhook' property [1])
then we can drop a lot of code.

The use of a notifier will be nice, but from what I understand, we can't
block the migration from a notification callback.

> The other thing I worry about is that there doesn't seem to be much
> guard against odd orderings of things - for example, what happens
> if the introspection client was to issue the  INTERCEPT_MIGRATE command
> twice while a migration was already running?  Or before an actual
> incoming channel connetion had happened?
> 
> Dave

Sorry that I haven't described the interception. When we intercept
an action that we want to 'block', we set a static variable first,
regardless if the introspection channel is connected or not, and :

   - if the introspection channel is not connected we don't block the
   action, but this (variable) will prevent the activation of this
   channel until the action (ie. migrate) is completed (a). I assume
   that there could be only one migrate (or suspend/pause) user command
   at any given time (b).

   - if the introspection channel is connected, the introspection app
   is signaled to start its unhook/undo process. We let the code flow
   continue, but the action (migrate/suspend/pause) is delayed until
   the introspection channel is closed. Meanwhile, any other intercepted
   action will not be blocked/delayed (c), but the fact that these actions
   are in progress is saved to static variables and the introspecton
   channel won't be reactivated.

Indeed, there are cases that are not handled well:

  a) if the migration is started and canceled before the introspection
  object is created (through QMP), the introspection channel will be
  disabled until the next migration starts and finishes.

  b) if a migration command has been delayed, a following migrate command
  (if this is possible) won't be delayed and we will have two migration
  threads started.

  c) if a migration command has been delayed, a following suspend/pause
  command won't be delayed and the introspection app might not have
  enough time to finish its unhook/undo process.

[1]: https://lore.kernel.org/qemu-devel/20200415005938.23895-23-alazar@bitdefender.com/

> > ---
> >  accel/kvm/vmi.c                | 31 +++++++++++++++++++++++++++----
> >  include/sysemu/vmi-intercept.h |  1 +
> >  migration/migration.c          | 18 +++++++++++++++---
> >  migration/migration.h          |  2 ++
> >  4 files changed, 45 insertions(+), 7 deletions(-)
> > 
> > diff --git a/accel/kvm/vmi.c b/accel/kvm/vmi.c
> > index 90906478b4..ea7191e48d 100644
> > --- a/accel/kvm/vmi.c
> > +++ b/accel/kvm/vmi.c
> > @@ -21,6 +21,8 @@
> >  #include "chardev/char.h"
> >  #include "chardev/char-fe.h"
> >  #include "migration/vmstate.h"
> > +#include "migration/migration.h"
> > +#include "migration/misc.h"
> >  
> >  #include "sysemu/vmi-intercept.h"
> >  #include "sysemu/vmi-handshake.h"
> > @@ -58,6 +60,7 @@ typedef struct VMIntrospection {
> >      int64_t vm_start_time;
> >  
> >      Notifier machine_ready;
> > +    Notifier migration_state_change;
> >      bool created_from_command_line;
> >  
> >      bool kvmi_hooked;
> > @@ -74,9 +77,11 @@ static const char *action_string[] = {
> >      "suspend",
> >      "resume",
> >      "force-reset",
> > +    "migrate",
> >  };
> >  
> >  static bool suspend_pending;
> > +static bool migrate_pending;
> >  
> >  #define TYPE_VM_INTROSPECTION "introspection"
> >  
> > @@ -88,6 +93,15 @@ static bool suspend_pending;
> >  static Error *vm_introspection_init(VMIntrospection *i);
> >  static void vm_introspection_reset(void *opaque);
> >  
> > +static void migration_state_notifier(Notifier *notifier, void *data)
> > +{
> > +    MigrationState *s = data;
> > +
> > +    if (migration_has_failed(s)) {
> > +        migrate_pending = false;
> > +    }
> > +}
> > +
> >  static void machine_ready(Notifier *notifier, void *data)
> >  {
> >      VMIntrospection *i = container_of(notifier, VMIntrospection, machine_ready);
> > @@ -144,6 +158,9 @@ static void complete(UserCreatable *uc, Error **errp)
> >  
> >      ic->uniq = i;
> >  
> > +    i->migration_state_change.notify = migration_state_notifier;
> > +    add_migration_state_change_notifier(&i->migration_state_change);
> > +
> >      qemu_register_reset(vm_introspection_reset, i);
> >  }
> >  
> > @@ -478,6 +495,9 @@ static void continue_with_the_intercepted_action(VMIntrospection *i)
> >      case VMI_INTERCEPT_SUSPEND:
> >          vm_stop(RUN_STATE_PAUSED);
> >          break;
> > +    case VMI_INTERCEPT_MIGRATE:
> > +        start_live_migration_thread(migrate_get_current());
> > +        break;
> >      default:
> >          error_report("VMI: %s: unexpected action %d",
> >                       __func__, i->intercepted_action);
> > @@ -571,9 +591,9 @@ static void chr_event_open(VMIntrospection *i)
> >  {
> >      Error *local_err = NULL;
> >  
> > -    if (suspend_pending) {
> > -        info_report("VMI: %s: too soon (suspend=%d)",
> > -                    __func__, suspend_pending);
> > +    if (suspend_pending || migrate_pending) {
> > +        info_report("VMI: %s: too soon (suspend=%d, migrate=%d)",
> > +                    __func__, suspend_pending, migrate_pending);
> >          maybe_disable_socket_reconnect(i);
> >          qemu_chr_fe_disconnect(&i->sock);
> >          return;
> > @@ -608,7 +628,7 @@ static void chr_event_close(VMIntrospection *i)
> >      cancel_unhook_timer(i);
> >      cancel_handshake_timer(i);
> >  
> > -    if (suspend_pending) {
> > +    if (suspend_pending || migrate_pending) {
> >          maybe_disable_socket_reconnect(i);
> >  
> >          if (i->intercepted_action != VMI_INTERCEPT_NONE) {
> > @@ -680,6 +700,9 @@ static bool record_intercept_action(VMI_intercept_command action)
> >          break;
> >      case VMI_INTERCEPT_FORCE_RESET:
> >          break;
> > +    case VMI_INTERCEPT_MIGRATE:
> > +        migrate_pending = true;
> > +        break;
> >      default:
> >          return false;
> >      }
> > diff --git a/include/sysemu/vmi-intercept.h b/include/sysemu/vmi-intercept.h
> > index ef591b49e7..b4a9a3faa7 100644
> > --- a/include/sysemu/vmi-intercept.h
> > +++ b/include/sysemu/vmi-intercept.h
> > @@ -15,6 +15,7 @@ typedef enum {
> >      VMI_INTERCEPT_SUSPEND,
> >      VMI_INTERCEPT_RESUME,
> >      VMI_INTERCEPT_FORCE_RESET,
> > +    VMI_INTERCEPT_MIGRATE,
> >  } VMI_intercept_command;
> >  
> >  bool vm_introspection_intercept(VMI_intercept_command ic, Error **errp);
> > diff --git a/migration/migration.c b/migration/migration.c
> > index 187ac0410c..222037d739 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -55,6 +55,8 @@
> >  #include "qemu/queue.h"
> >  #include "multifd.h"
> >  
> > +#include "sysemu/vmi-intercept.h"
> > +
> >  #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
> >  
> >  /* Amount of time to allocate to each "chunk" of bandwidth-throttled
> > @@ -3471,6 +3473,13 @@ static void *migration_thread(void *opaque)
> >      return NULL;
> >  }
> >  
> > +void start_live_migration_thread(MigrationState *s)
> > +{
> > +    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > +                    QEMU_THREAD_JOINABLE);
> > +    s->migration_thread_running = true;
> > +}
> > +
> >  void migrate_fd_connect(MigrationState *s, Error *error_in)
> >  {
> >      Error *local_err = NULL;
> > @@ -3534,9 +3543,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
> >          migrate_fd_cleanup(s);
> >          return;
> >      }
> > -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > -                       QEMU_THREAD_JOINABLE);
> > -    s->migration_thread_running = true;
> > +
> > +    if (vm_introspection_intercept(VMI_INTERCEPT_MIGRATE, &error_in)) {
> > +        return;
> > +    }
> > +
> > +    start_live_migration_thread(s);
> >  }
> >  
> >  void migration_global_dump(Monitor *mon)
> > diff --git a/migration/migration.h b/migration/migration.h
> > index 507284e563..eb5668e1f2 100644
> > --- a/migration/migration.h
> > +++ b/migration/migration.h
> > @@ -263,6 +263,8 @@ struct MigrationState
> >      uint8_t clear_bitmap_shift;
> >  };
> >  
> > +void start_live_migration_thread(MigrationState *s);
> > +
> >  void migrate_set_state(int *state, int old_state, int new_state);
> >  
> >  void migration_fd_process_incoming(QEMUFile *f, Error **errp);
> > 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert April 28, 2020, 12:24 p.m. UTC | #3
* Adalbert Lazăr (alazar@bitdefender.com) wrote:
> On Mon, 27 Apr 2020 20:08:55 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > > From: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > 
> > > It is possible that the introspection tool has made some changes inside
> > > the introspected VM which can make the guest crash if the introspection
> > > connection is suddenly closed.
> > > 
> > > When the live migration starts, for now, the introspection tool is
> > > signaled to remove its hooks from the introspected VM.
> > > 
> > > CC: Juan Quintela <quintela@redhat.com>
> > > CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > > Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
> > 
> > OK, so this isn't too intrusive to the migration code; and other than
> > renaming 'start_live_migration_thread' to
> > 'start_outgoing_migration_thread' I think I'd be OK with this,
> > 
> > but it might depend what your overall aim is.
> > 
> > For example, you might be better intercepting each migration_state
> > change in your notifier, that's much finer grain than just the start of
> > migration.
> 
> Thank you, Dave.
> 
> We want to intercept the live migration and 'block' it while the guest
> is running (some changes made to the guest by the introspection app has
> to be undone while the vCPUs are in certain states).
> 
> I'm not sure what is the best way to block these kind of events
> (including the pause/shutdown commands). If calling main_loop_wait()
> is enough (patch [22/26] kvm: vmi: add 'async_unhook' property [1])
> then we can drop a lot of code.
> 
> The use of a notifier will be nice, but from what I understand, we can't
> block the migration from a notification callback.

Oh, if your intention is *just* to block a migration starting then you
can use 'migrate_add_blocker' - see hw/9pfs/9p.c for an example where
it's used and then removed; they use it to stop migration while the fs
 is mounted.  That causes an attempt to start a migration to give an
error (of your choosing).

> > The other thing I worry about is that there doesn't seem to be much
> > guard against odd orderings of things - for example, what happens
> > if the introspection client was to issue the  INTERCEPT_MIGRATE command
> > twice while a migration was already running?  Or before an actual
> > incoming channel connetion had happened?
> > 
> > Dave
> 
> Sorry that I haven't described the interception. When we intercept
> an action that we want to 'block', we set a static variable first,
> regardless if the introspection channel is connected or not, and :
> 
>    - if the introspection channel is not connected we don't block the
>    action, but this (variable) will prevent the activation of this
>    channel until the action (ie. migrate) is completed (a). I assume
>    that there could be only one migrate (or suspend/pause) user command
>    at any given time (b).
> 
>    - if the introspection channel is connected, the introspection app
>    is signaled to start its unhook/undo process. We let the code flow
>    continue, but the action (migrate/suspend/pause) is delayed until
>    the introspection channel is closed. Meanwhile, any other intercepted
>    action will not be blocked/delayed (c), but the fact that these actions
>    are in progress is saved to static variables and the introspecton
>    channel won't be reactivated.
> 
> Indeed, there are cases that are not handled well:
> 
>   a) if the migration is started and canceled before the introspection
>   object is created (through QMP), the introspection channel will be
>   disabled until the next migration starts and finishes.
> 
>   b) if a migration command has been delayed, a following migrate command
>   (if this is possible) won't be delayed and we will have two migration
>   threads started.
> 
>   c) if a migration command has been delayed, a following suspend/pause
>   command won't be delayed and the introspection app might not have
>   enough time to finish its unhook/undo process.

Yeh that sounds a bit messy.

Dave


> [1]: https://lore.kernel.org/qemu-devel/20200415005938.23895-23-alazar@bitdefender.com/
> 
> > > ---
> > >  accel/kvm/vmi.c                | 31 +++++++++++++++++++++++++++----
> > >  include/sysemu/vmi-intercept.h |  1 +
> > >  migration/migration.c          | 18 +++++++++++++++---
> > >  migration/migration.h          |  2 ++
> > >  4 files changed, 45 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/accel/kvm/vmi.c b/accel/kvm/vmi.c
> > > index 90906478b4..ea7191e48d 100644
> > > --- a/accel/kvm/vmi.c
> > > +++ b/accel/kvm/vmi.c
> > > @@ -21,6 +21,8 @@
> > >  #include "chardev/char.h"
> > >  #include "chardev/char-fe.h"
> > >  #include "migration/vmstate.h"
> > > +#include "migration/migration.h"
> > > +#include "migration/misc.h"
> > >  
> > >  #include "sysemu/vmi-intercept.h"
> > >  #include "sysemu/vmi-handshake.h"
> > > @@ -58,6 +60,7 @@ typedef struct VMIntrospection {
> > >      int64_t vm_start_time;
> > >  
> > >      Notifier machine_ready;
> > > +    Notifier migration_state_change;
> > >      bool created_from_command_line;
> > >  
> > >      bool kvmi_hooked;
> > > @@ -74,9 +77,11 @@ static const char *action_string[] = {
> > >      "suspend",
> > >      "resume",
> > >      "force-reset",
> > > +    "migrate",
> > >  };
> > >  
> > >  static bool suspend_pending;
> > > +static bool migrate_pending;
> > >  
> > >  #define TYPE_VM_INTROSPECTION "introspection"
> > >  
> > > @@ -88,6 +93,15 @@ static bool suspend_pending;
> > >  static Error *vm_introspection_init(VMIntrospection *i);
> > >  static void vm_introspection_reset(void *opaque);
> > >  
> > > +static void migration_state_notifier(Notifier *notifier, void *data)
> > > +{
> > > +    MigrationState *s = data;
> > > +
> > > +    if (migration_has_failed(s)) {
> > > +        migrate_pending = false;
> > > +    }
> > > +}
> > > +
> > >  static void machine_ready(Notifier *notifier, void *data)
> > >  {
> > >      VMIntrospection *i = container_of(notifier, VMIntrospection, machine_ready);
> > > @@ -144,6 +158,9 @@ static void complete(UserCreatable *uc, Error **errp)
> > >  
> > >      ic->uniq = i;
> > >  
> > > +    i->migration_state_change.notify = migration_state_notifier;
> > > +    add_migration_state_change_notifier(&i->migration_state_change);
> > > +
> > >      qemu_register_reset(vm_introspection_reset, i);
> > >  }
> > >  
> > > @@ -478,6 +495,9 @@ static void continue_with_the_intercepted_action(VMIntrospection *i)
> > >      case VMI_INTERCEPT_SUSPEND:
> > >          vm_stop(RUN_STATE_PAUSED);
> > >          break;
> > > +    case VMI_INTERCEPT_MIGRATE:
> > > +        start_live_migration_thread(migrate_get_current());
> > > +        break;
> > >      default:
> > >          error_report("VMI: %s: unexpected action %d",
> > >                       __func__, i->intercepted_action);
> > > @@ -571,9 +591,9 @@ static void chr_event_open(VMIntrospection *i)
> > >  {
> > >      Error *local_err = NULL;
> > >  
> > > -    if (suspend_pending) {
> > > -        info_report("VMI: %s: too soon (suspend=%d)",
> > > -                    __func__, suspend_pending);
> > > +    if (suspend_pending || migrate_pending) {
> > > +        info_report("VMI: %s: too soon (suspend=%d, migrate=%d)",
> > > +                    __func__, suspend_pending, migrate_pending);
> > >          maybe_disable_socket_reconnect(i);
> > >          qemu_chr_fe_disconnect(&i->sock);
> > >          return;
> > > @@ -608,7 +628,7 @@ static void chr_event_close(VMIntrospection *i)
> > >      cancel_unhook_timer(i);
> > >      cancel_handshake_timer(i);
> > >  
> > > -    if (suspend_pending) {
> > > +    if (suspend_pending || migrate_pending) {
> > >          maybe_disable_socket_reconnect(i);
> > >  
> > >          if (i->intercepted_action != VMI_INTERCEPT_NONE) {
> > > @@ -680,6 +700,9 @@ static bool record_intercept_action(VMI_intercept_command action)
> > >          break;
> > >      case VMI_INTERCEPT_FORCE_RESET:
> > >          break;
> > > +    case VMI_INTERCEPT_MIGRATE:
> > > +        migrate_pending = true;
> > > +        break;
> > >      default:
> > >          return false;
> > >      }
> > > diff --git a/include/sysemu/vmi-intercept.h b/include/sysemu/vmi-intercept.h
> > > index ef591b49e7..b4a9a3faa7 100644
> > > --- a/include/sysemu/vmi-intercept.h
> > > +++ b/include/sysemu/vmi-intercept.h
> > > @@ -15,6 +15,7 @@ typedef enum {
> > >      VMI_INTERCEPT_SUSPEND,
> > >      VMI_INTERCEPT_RESUME,
> > >      VMI_INTERCEPT_FORCE_RESET,
> > > +    VMI_INTERCEPT_MIGRATE,
> > >  } VMI_intercept_command;
> > >  
> > >  bool vm_introspection_intercept(VMI_intercept_command ic, Error **errp);
> > > diff --git a/migration/migration.c b/migration/migration.c
> > > index 187ac0410c..222037d739 100644
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -55,6 +55,8 @@
> > >  #include "qemu/queue.h"
> > >  #include "multifd.h"
> > >  
> > > +#include "sysemu/vmi-intercept.h"
> > > +
> > >  #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
> > >  
> > >  /* Amount of time to allocate to each "chunk" of bandwidth-throttled
> > > @@ -3471,6 +3473,13 @@ static void *migration_thread(void *opaque)
> > >      return NULL;
> > >  }
> > >  
> > > +void start_live_migration_thread(MigrationState *s)
> > > +{
> > > +    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > > +                    QEMU_THREAD_JOINABLE);
> > > +    s->migration_thread_running = true;
> > > +}
> > > +
> > >  void migrate_fd_connect(MigrationState *s, Error *error_in)
> > >  {
> > >      Error *local_err = NULL;
> > > @@ -3534,9 +3543,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
> > >          migrate_fd_cleanup(s);
> > >          return;
> > >      }
> > > -    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> > > -                       QEMU_THREAD_JOINABLE);
> > > -    s->migration_thread_running = true;
> > > +
> > > +    if (vm_introspection_intercept(VMI_INTERCEPT_MIGRATE, &error_in)) {
> > > +        return;
> > > +    }
> > > +
> > > +    start_live_migration_thread(s);
> > >  }
> > >  
> > >  void migration_global_dump(Monitor *mon)
> > > diff --git a/migration/migration.h b/migration/migration.h
> > > index 507284e563..eb5668e1f2 100644
> > > --- a/migration/migration.h
> > > +++ b/migration/migration.h
> > > @@ -263,6 +263,8 @@ struct MigrationState
> > >      uint8_t clear_bitmap_shift;
> > >  };
> > >  
> > > +void start_live_migration_thread(MigrationState *s);
> > > +
> > >  void migrate_set_state(int *state, int old_state, int new_state);
> > >  
> > >  void migration_fd_process_incoming(QEMUFile *f, Error **errp);
> > > 
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Adalbert Lazăr April 28, 2020, 1:16 p.m. UTC | #4
On Tue, 28 Apr 2020 13:24:39 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > On Mon, 27 Apr 2020 20:08:55 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > > > From: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > > 
> > > > It is possible that the introspection tool has made some changes inside
> > > > the introspected VM which can make the guest crash if the introspection
> > > > connection is suddenly closed.
> > > > 
> > > > When the live migration starts, for now, the introspection tool is
> > > > signaled to remove its hooks from the introspected VM.
> > > > 
> > > > CC: Juan Quintela <quintela@redhat.com>
> > > > CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > > > Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > > Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
> > > 
> > > OK, so this isn't too intrusive to the migration code; and other than
> > > renaming 'start_live_migration_thread' to
> > > 'start_outgoing_migration_thread' I think I'd be OK with this,
> > > 
> > > but it might depend what your overall aim is.
> > > 
> > > For example, you might be better intercepting each migration_state
> > > change in your notifier, that's much finer grain than just the start of
> > > migration.
> > 
> > Thank you, Dave.
> > 
> > We want to intercept the live migration and 'block' it while the guest
> > is running (some changes made to the guest by the introspection app has
> > to be undone while the vCPUs are in certain states).
> > 
> > I'm not sure what is the best way to block these kind of events
> > (including the pause/shutdown commands). If calling main_loop_wait()
> > is enough (patch [22/26] kvm: vmi: add 'async_unhook' property [1])
> > then we can drop a lot of code.
> > 
> > The use of a notifier will be nice, but from what I understand, we can't
> > block the migration from a notification callback.
> 
> Oh, if your intention is *just* to block a migration starting then you
> can use 'migrate_add_blocker' - see hw/9pfs/9p.c for an example where
> it's used and then removed; they use it to stop migration while the fs
>  is mounted.  That causes an attempt to start a migration to give an
> error (of your choosing).

One use case is to do VM introspection all the time the guest is running.
From the user perspective, the pause/suspend/shutdown/snapshot/migrate
commands should work regardless if the VM is currently introspected
or not. Our first option was to delay these commands for a couple of
seconds when the VM is introspected, while the introspection app reverts
its changes, without blocking the vCPUs.

I'll see if we can mix the migrate notifier with migrate_add_blocker(),
or add a new migration state. To block the migration (with an error)
is our second option, because the user doing this might not be allowed
to stop the VM introspection.

Thank you,
Adalbert
Dr. David Alan Gilbert April 28, 2020, 1:43 p.m. UTC | #5
* Adalbert Lazăr (alazar@bitdefender.com) wrote:
> On Tue, 28 Apr 2020 13:24:39 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > > On Mon, 27 Apr 2020 20:08:55 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > > > > From: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > > > 
> > > > > It is possible that the introspection tool has made some changes inside
> > > > > the introspected VM which can make the guest crash if the introspection
> > > > > connection is suddenly closed.
> > > > > 
> > > > > When the live migration starts, for now, the introspection tool is
> > > > > signaled to remove its hooks from the introspected VM.
> > > > > 
> > > > > CC: Juan Quintela <quintela@redhat.com>
> > > > > CC: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > > > > Signed-off-by: Marian Rotariu <marian.c.rotariu@gmail.com>
> > > > > Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
> > > > 
> > > > OK, so this isn't too intrusive to the migration code; and other than
> > > > renaming 'start_live_migration_thread' to
> > > > 'start_outgoing_migration_thread' I think I'd be OK with this,
> > > > 
> > > > but it might depend what your overall aim is.
> > > > 
> > > > For example, you might be better intercepting each migration_state
> > > > change in your notifier, that's much finer grain than just the start of
> > > > migration.
> > > 
> > > Thank you, Dave.
> > > 
> > > We want to intercept the live migration and 'block' it while the guest
> > > is running (some changes made to the guest by the introspection app has
> > > to be undone while the vCPUs are in certain states).
> > > 
> > > I'm not sure what is the best way to block these kind of events
> > > (including the pause/shutdown commands). If calling main_loop_wait()
> > > is enough (patch [22/26] kvm: vmi: add 'async_unhook' property [1])
> > > then we can drop a lot of code.
> > > 
> > > The use of a notifier will be nice, but from what I understand, we can't
> > > block the migration from a notification callback.
> > 
> > Oh, if your intention is *just* to block a migration starting then you
> > can use 'migrate_add_blocker' - see hw/9pfs/9p.c for an example where
> > it's used and then removed; they use it to stop migration while the fs
> >  is mounted.  That causes an attempt to start a migration to give an
> > error (of your choosing).
> 
> One use case is to do VM introspection all the time the guest is running.
> From the user perspective, the pause/suspend/shutdown/snapshot/migrate
> commands should work regardless if the VM is currently introspected
> or not. Our first option was to delay these commands for a couple of
> seconds when the VM is introspected, while the introspection app reverts
> its changes, without blocking the vCPUs.

Ah OK, so it's not really about blocking it completely; just delaying it
a bit; in that case add_blocker is the wrong thing.

> I'll see if we can mix the migrate notifier with migrate_add_blocker(),
> or add a new migration state. To block the migration (with an error)
> is our second option, because the user doing this might not be allowed
> to stop the VM introspection.

Maybe the right thing is to do something just like
MIGRATION_STATUS_WAIT_UNPLUG, it's right near the start of the thread.
Again it's job is just to make the migration wait while it does some
stuff before it can let migration continue.

Dave

> Thank you,
> Adalbert
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Adalbert Lazăr April 28, 2020, 2:38 p.m. UTC | #6
On Tue, 28 Apr 2020 14:43:20 +0100, "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> * Adalbert Lazăr (alazar@bitdefender.com) wrote:
> > One use case is to do VM introspection all the time the guest is running.
> > From the user perspective, the pause/suspend/shutdown/snapshot/migrate
> > commands should work regardless if the VM is currently introspected
> > or not. Our first option was to delay these commands for a couple of
> > seconds when the VM is introspected, while the introspection app reverts
> > its changes, without blocking the vCPUs.
> 
> Ah OK, so it's not really about blocking it completely; just delaying it
> a bit; in that case add_blocker is the wrong thing.
> 
> > I'll see if we can mix the migrate notifier with migrate_add_blocker(),
> > or add a new migration state. To block the migration (with an error)
> > is our second option, because the user doing this might not be allowed
> > to stop the VM introspection.
> 
> Maybe the right thing is to do something just like
> MIGRATION_STATUS_WAIT_UNPLUG, it's right near the start of the thread.
> Again it's job is just to make the migration wait while it does some
> stuff before it can let migration continue.
> 

This is it! Thank you, Dave.

We already register a VMStateDescription structure to save the VM start time
([18/26] kvm: vmi: store/restore 'vm_start_time' on migrate/snapshot [1]).
All we have to do is setup the dev_unplug_pending callback and
return true when the introspection channel is still active.

[1]: https://lore.kernel.org/qemu-devel/20200415005938.23895-19-alazar@bitdefender.com/
diff mbox series

Patch

diff --git a/accel/kvm/vmi.c b/accel/kvm/vmi.c
index 90906478b4..ea7191e48d 100644
--- a/accel/kvm/vmi.c
+++ b/accel/kvm/vmi.c
@@ -21,6 +21,8 @@ 
 #include "chardev/char.h"
 #include "chardev/char-fe.h"
 #include "migration/vmstate.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
 
 #include "sysemu/vmi-intercept.h"
 #include "sysemu/vmi-handshake.h"
@@ -58,6 +60,7 @@  typedef struct VMIntrospection {
     int64_t vm_start_time;
 
     Notifier machine_ready;
+    Notifier migration_state_change;
     bool created_from_command_line;
 
     bool kvmi_hooked;
@@ -74,9 +77,11 @@  static const char *action_string[] = {
     "suspend",
     "resume",
     "force-reset",
+    "migrate",
 };
 
 static bool suspend_pending;
+static bool migrate_pending;
 
 #define TYPE_VM_INTROSPECTION "introspection"
 
@@ -88,6 +93,15 @@  static bool suspend_pending;
 static Error *vm_introspection_init(VMIntrospection *i);
 static void vm_introspection_reset(void *opaque);
 
+static void migration_state_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *s = data;
+
+    if (migration_has_failed(s)) {
+        migrate_pending = false;
+    }
+}
+
 static void machine_ready(Notifier *notifier, void *data)
 {
     VMIntrospection *i = container_of(notifier, VMIntrospection, machine_ready);
@@ -144,6 +158,9 @@  static void complete(UserCreatable *uc, Error **errp)
 
     ic->uniq = i;
 
+    i->migration_state_change.notify = migration_state_notifier;
+    add_migration_state_change_notifier(&i->migration_state_change);
+
     qemu_register_reset(vm_introspection_reset, i);
 }
 
@@ -478,6 +495,9 @@  static void continue_with_the_intercepted_action(VMIntrospection *i)
     case VMI_INTERCEPT_SUSPEND:
         vm_stop(RUN_STATE_PAUSED);
         break;
+    case VMI_INTERCEPT_MIGRATE:
+        start_live_migration_thread(migrate_get_current());
+        break;
     default:
         error_report("VMI: %s: unexpected action %d",
                      __func__, i->intercepted_action);
@@ -571,9 +591,9 @@  static void chr_event_open(VMIntrospection *i)
 {
     Error *local_err = NULL;
 
-    if (suspend_pending) {
-        info_report("VMI: %s: too soon (suspend=%d)",
-                    __func__, suspend_pending);
+    if (suspend_pending || migrate_pending) {
+        info_report("VMI: %s: too soon (suspend=%d, migrate=%d)",
+                    __func__, suspend_pending, migrate_pending);
         maybe_disable_socket_reconnect(i);
         qemu_chr_fe_disconnect(&i->sock);
         return;
@@ -608,7 +628,7 @@  static void chr_event_close(VMIntrospection *i)
     cancel_unhook_timer(i);
     cancel_handshake_timer(i);
 
-    if (suspend_pending) {
+    if (suspend_pending || migrate_pending) {
         maybe_disable_socket_reconnect(i);
 
         if (i->intercepted_action != VMI_INTERCEPT_NONE) {
@@ -680,6 +700,9 @@  static bool record_intercept_action(VMI_intercept_command action)
         break;
     case VMI_INTERCEPT_FORCE_RESET:
         break;
+    case VMI_INTERCEPT_MIGRATE:
+        migrate_pending = true;
+        break;
     default:
         return false;
     }
diff --git a/include/sysemu/vmi-intercept.h b/include/sysemu/vmi-intercept.h
index ef591b49e7..b4a9a3faa7 100644
--- a/include/sysemu/vmi-intercept.h
+++ b/include/sysemu/vmi-intercept.h
@@ -15,6 +15,7 @@  typedef enum {
     VMI_INTERCEPT_SUSPEND,
     VMI_INTERCEPT_RESUME,
     VMI_INTERCEPT_FORCE_RESET,
+    VMI_INTERCEPT_MIGRATE,
 } VMI_intercept_command;
 
 bool vm_introspection_intercept(VMI_intercept_command ic, Error **errp);
diff --git a/migration/migration.c b/migration/migration.c
index 187ac0410c..222037d739 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -55,6 +55,8 @@ 
 #include "qemu/queue.h"
 #include "multifd.h"
 
+#include "sysemu/vmi-intercept.h"
+
 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
 
 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
@@ -3471,6 +3473,13 @@  static void *migration_thread(void *opaque)
     return NULL;
 }
 
+void start_live_migration_thread(MigrationState *s)
+{
+    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
+                    QEMU_THREAD_JOINABLE);
+    s->migration_thread_running = true;
+}
+
 void migrate_fd_connect(MigrationState *s, Error *error_in)
 {
     Error *local_err = NULL;
@@ -3534,9 +3543,12 @@  void migrate_fd_connect(MigrationState *s, Error *error_in)
         migrate_fd_cleanup(s);
         return;
     }
-    qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
-                       QEMU_THREAD_JOINABLE);
-    s->migration_thread_running = true;
+
+    if (vm_introspection_intercept(VMI_INTERCEPT_MIGRATE, &error_in)) {
+        return;
+    }
+
+    start_live_migration_thread(s);
 }
 
 void migration_global_dump(Monitor *mon)
diff --git a/migration/migration.h b/migration/migration.h
index 507284e563..eb5668e1f2 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -263,6 +263,8 @@  struct MigrationState
     uint8_t clear_bitmap_shift;
 };
 
+void start_live_migration_thread(MigrationState *s);
+
 void migrate_set_state(int *state, int old_state, int new_state);
 
 void migration_fd_process_incoming(QEMUFile *f, Error **errp);