diff mbox

[RESEND,V3,4/6] migration: add postcopy downtime into MigrationIncommingState

Message ID 1493362658-8179-5-git-send-email-a.perevalov@samsung.com
State New
Headers show

Commit Message

Alexey Perevalov April 28, 2017, 6:57 a.m. UTC
This patch add request to kernel space for UFFD_FEATURE_THREAD_ID,
in case when this feature is provided by kernel.

DowntimeContext is incapsulated inside migration.c.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
---
 include/migration/migration.h | 12 ++++++++++++
 migration/migration.c         | 33 +++++++++++++++++++++++++++++++++
 migration/postcopy-ram.c      |  8 ++++++++
 3 files changed, 53 insertions(+)

Comments

Peter Xu April 28, 2017, 9:38 a.m. UTC | #1
On Fri, Apr 28, 2017 at 09:57:36AM +0300, Alexey Perevalov wrote:
> This patch add request to kernel space for UFFD_FEATURE_THREAD_ID,
> in case when this feature is provided by kernel.
> 
> DowntimeContext is incapsulated inside migration.c.
> 
> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---
>  include/migration/migration.h | 12 ++++++++++++
>  migration/migration.c         | 33 +++++++++++++++++++++++++++++++++
>  migration/postcopy-ram.c      |  8 ++++++++
>  3 files changed, 53 insertions(+)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ba1a16c..e8fb68f 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -83,6 +83,8 @@ typedef enum {
>      POSTCOPY_INCOMING_END
>  } PostcopyState;
>  
> +struct DowntimeContext;

Nit: shall we embed something like "Postcopy" (or short form) into
this struct name? Since the whole thing is really tailored for
postcopy, only.

> +
>  /* State for the incoming migration */
>  struct MigrationIncomingState {
>      QEMUFile *from_src_file;
> @@ -123,10 +125,20 @@ struct MigrationIncomingState {
>  
>      /* See savevm.c */
>      LoadStateEntry_Head loadvm_handlers;
> +
> +    /*
> +     * DowntimeContext to keep information for postcopy
> +     * live migration, to calculate downtime
> +     * */
> +    struct DowntimeContext *downtime_ctx;
>  };
>  
>  MigrationIncomingState *migration_incoming_get_current(void);
>  void migration_incoming_state_destroy(void);
> +/*
> + * Functions to work with downtime context
> + */
> +struct DowntimeContext *downtime_context_new(void);
>  
>  struct MigrationState
>  {
> diff --git a/migration/migration.c b/migration/migration.c
> index 569a7f6..ec76e5c 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -77,6 +77,18 @@ static NotifierList migration_state_notifiers =
>  
>  static bool deferred_incoming;
>  
> +typedef struct DowntimeContext {
> +    /* time when page fault initiated per vCPU */
> +    int64_t *page_fault_vcpu_time;
> +    /* page address per vCPU */
> +    uint64_t *vcpu_addr;
> +    int64_t total_downtime;
> +    /* downtime per vCPU */
> +    int64_t *vcpu_downtime;
> +    /* point in time when last page fault was initiated */
> +    int64_t last_begin;
> +} DowntimeContext;
> +
>  /*
>   * Current state of incoming postcopy; note this is not part of
>   * MigrationIncomingState since it's state is used during cleanup
> @@ -116,6 +128,23 @@ MigrationState *migrate_get_current(void)
>      return &current_migration;
>  }
>  
> +struct DowntimeContext *downtime_context_new(void)
> +{
> +    DowntimeContext *ctx = g_new0(DowntimeContext, 1);
> +    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
> +    ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
> +    ctx->vcpu_downtime = g_new0(int64_t, smp_cpus);
> +    return ctx;
> +}
> +
> +static void destroy_downtime_context(struct DowntimeContext *ctx)
> +{
> +    g_free(ctx->page_fault_vcpu_time);
> +    g_free(ctx->vcpu_addr);
> +    g_free(ctx->vcpu_downtime);
> +    g_free(ctx);
> +}
> +
>  MigrationIncomingState *migration_incoming_get_current(void)
>  {
>      static bool once;
> @@ -138,6 +167,10 @@ void migration_incoming_state_destroy(void)
>  
>      qemu_event_destroy(&mis->main_thread_load_event);
>      loadvm_free_handlers(mis);
> +    if (mis->downtime_ctx) {
> +        destroy_downtime_context(mis->downtime_ctx);
> +        mis->downtime_ctx = NULL;
> +    }
>  }
>  
>  
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 21e7150..f3688f5 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
>          return false;
>      }
>  
> +#ifdef UFFD_FEATURE_THREAD_ID
> +    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> +        /* kernel supports that feature */
> +        mis->downtime_ctx = downtime_context_new();
> +        new_features |= UFFD_FEATURE_THREAD_ID;

So here I know why in patch 2 new_features == 0... 

If I were you, I would like the series be done in below 4 patches:

1. update header
2. introduce THREAD_ID feature, and enable it conditionally
3. squash all the downtime thing (downtime context, calculation) in
   one patch here
4. introduce trace

IMHO that's clearer and easier for review. But I'm okay with current
as well as long as the maintainers (Dave/Juan) won't disagree. :)

Thanks,

> +    }
> +#endif
> +
>      /* request features */
>      if (new_features && !request_ufd_features(ufd, new_features)) {
>          error_report("ufd_version_check failed: features %" PRIu64,
> -- 
> 1.9.1
>
Alexey Perevalov April 28, 2017, 10:03 a.m. UTC | #2
On 04/28/2017 12:38 PM, Peter Xu wrote:
> On Fri, Apr 28, 2017 at 09:57:36AM +0300, Alexey Perevalov wrote:
>> This patch add request to kernel space for UFFD_FEATURE_THREAD_ID,
>> in case when this feature is provided by kernel.
>>
>> DowntimeContext is incapsulated inside migration.c.
>>
>> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
>> ---
>>   include/migration/migration.h | 12 ++++++++++++
>>   migration/migration.c         | 33 +++++++++++++++++++++++++++++++++
>>   migration/postcopy-ram.c      |  8 ++++++++
>>   3 files changed, 53 insertions(+)
>>
>> diff --git a/include/migration/migration.h b/include/migration/migration.h
>> index ba1a16c..e8fb68f 100644
>> --- a/include/migration/migration.h
>> +++ b/include/migration/migration.h
>> @@ -83,6 +83,8 @@ typedef enum {
>>       POSTCOPY_INCOMING_END
>>   } PostcopyState;
>>   
>> +struct DowntimeContext;
> Nit: shall we embed something like "Postcopy" (or short form) into
> this struct name? Since the whole thing is really tailored for
> postcopy, only.
Yes, that is postcopy only structure, so maybe PostcopyDowntimeContext
is more readable.

>
>> +
>>   /* State for the incoming migration */
>>   struct MigrationIncomingState {
>>       QEMUFile *from_src_file;
>> @@ -123,10 +125,20 @@ struct MigrationIncomingState {
>>   
>>       /* See savevm.c */
>>       LoadStateEntry_Head loadvm_handlers;
>> +
>> +    /*
>> +     * DowntimeContext to keep information for postcopy
>> +     * live migration, to calculate downtime
>> +     * */
>> +    struct DowntimeContext *downtime_ctx;
>>   };
>>   
>>   MigrationIncomingState *migration_incoming_get_current(void);
>>   void migration_incoming_state_destroy(void);
>> +/*
>> + * Functions to work with downtime context
>> + */
>> +struct DowntimeContext *downtime_context_new(void);
>>   
>>   struct MigrationState
>>   {
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 569a7f6..ec76e5c 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -77,6 +77,18 @@ static NotifierList migration_state_notifiers =
>>   
>>   static bool deferred_incoming;
>>   
>> +typedef struct DowntimeContext {
>> +    /* time when page fault initiated per vCPU */
>> +    int64_t *page_fault_vcpu_time;
>> +    /* page address per vCPU */
>> +    uint64_t *vcpu_addr;
>> +    int64_t total_downtime;
>> +    /* downtime per vCPU */
>> +    int64_t *vcpu_downtime;
>> +    /* point in time when last page fault was initiated */
>> +    int64_t last_begin;
>> +} DowntimeContext;
>> +
>>   /*
>>    * Current state of incoming postcopy; note this is not part of
>>    * MigrationIncomingState since it's state is used during cleanup
>> @@ -116,6 +128,23 @@ MigrationState *migrate_get_current(void)
>>       return &current_migration;
>>   }
>>   
>> +struct DowntimeContext *downtime_context_new(void)
>> +{
>> +    DowntimeContext *ctx = g_new0(DowntimeContext, 1);
>> +    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
>> +    ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
>> +    ctx->vcpu_downtime = g_new0(int64_t, smp_cpus);
>> +    return ctx;
>> +}
>> +
>> +static void destroy_downtime_context(struct DowntimeContext *ctx)
>> +{
>> +    g_free(ctx->page_fault_vcpu_time);
>> +    g_free(ctx->vcpu_addr);
>> +    g_free(ctx->vcpu_downtime);
>> +    g_free(ctx);
>> +}
>> +
>>   MigrationIncomingState *migration_incoming_get_current(void)
>>   {
>>       static bool once;
>> @@ -138,6 +167,10 @@ void migration_incoming_state_destroy(void)
>>   
>>       qemu_event_destroy(&mis->main_thread_load_event);
>>       loadvm_free_handlers(mis);
>> +    if (mis->downtime_ctx) {
>> +        destroy_downtime_context(mis->downtime_ctx);
>> +        mis->downtime_ctx = NULL;
>> +    }
>>   }
>>   
>>   
>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>> index 21e7150..f3688f5 100644
>> --- a/migration/postcopy-ram.c
>> +++ b/migration/postcopy-ram.c
>> @@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
>>           return false;
>>       }
>>   
>> +#ifdef UFFD_FEATURE_THREAD_ID
>> +    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
>> +        /* kernel supports that feature */
>> +        mis->downtime_ctx = downtime_context_new();
>> +        new_features |= UFFD_FEATURE_THREAD_ID;
> So here I know why in patch 2 new_features == 0...
>
> If I were you, I would like the series be done in below 4 patches:
>
> 1. update header
> 2. introduce THREAD_ID feature, and enable it conditionally
> 3. squash all the downtime thing (downtime context, calculation) in
>     one patch here
> 4. introduce trace
>
> IMHO that's clearer and easier for review. But I'm okay with current
> as well as long as the maintainers (Dave/Juan) won't disagree. :)
In previous series, David asked me to split one patch into 2
[Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature 
support

 >There seem to be two parts to this:
 >  a) Adding the mis parameter to ufd_version_check
 >  b) Asking for the feature

 >Please split it into two patches.

So in current patch set, I also added re-factoring, which was missed before
"migration: split ufd_version_check onto receive/request features part"

>
> Thanks,
>
>> +    }
>> +#endif
>> +
>>       /* request features */
>>       if (new_features && !request_ufd_features(ufd, new_features)) {
>>           error_report("ufd_version_check failed: features %" PRIu64,
>> -- 
>> 1.9.1
>>
Peter Xu April 28, 2017, 10:07 a.m. UTC | #3
On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:

[...]

> >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> >>index 21e7150..f3688f5 100644
> >>--- a/migration/postcopy-ram.c
> >>+++ b/migration/postcopy-ram.c
> >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> >>          return false;
> >>      }
> >>+#ifdef UFFD_FEATURE_THREAD_ID
> >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> >>+        /* kernel supports that feature */
> >>+        mis->downtime_ctx = downtime_context_new();
> >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> >So here I know why in patch 2 new_features == 0...
> >
> >If I were you, I would like the series be done in below 4 patches:
> >
> >1. update header
> >2. introduce THREAD_ID feature, and enable it conditionally
> >3. squash all the downtime thing (downtime context, calculation) in
> >    one patch here
> >4. introduce trace
> >
> >IMHO that's clearer and easier for review. But I'm okay with current
> >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> In previous series, David asked me to split one patch into 2
> [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> support
> 
> >There seem to be two parts to this:
> >  a) Adding the mis parameter to ufd_version_check
> >  b) Asking for the feature
> 
> >Please split it into two patches.
> 
> So in current patch set, I also added re-factoring, which was missed before
> "migration: split ufd_version_check onto receive/request features part"

Sure. As long as Dave agrees, I'm okay with either way.
Dr. David Alan Gilbert April 28, 2017, 4:22 p.m. UTC | #4
* Peter Xu (peterx@redhat.com) wrote:
> On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> 
> [...]
> 
> > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > >>index 21e7150..f3688f5 100644
> > >>--- a/migration/postcopy-ram.c
> > >>+++ b/migration/postcopy-ram.c
> > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > >>          return false;
> > >>      }
> > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > >>+        /* kernel supports that feature */
> > >>+        mis->downtime_ctx = downtime_context_new();
> > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > >So here I know why in patch 2 new_features == 0...
> > >
> > >If I were you, I would like the series be done in below 4 patches:
> > >
> > >1. update header
> > >2. introduce THREAD_ID feature, and enable it conditionally
> > >3. squash all the downtime thing (downtime context, calculation) in
> > >    one patch here
> > >4. introduce trace
> > >
> > >IMHO that's clearer and easier for review. But I'm okay with current
> > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > In previous series, David asked me to split one patch into 2
> > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > support
> > 
> > >There seem to be two parts to this:
> > >  a) Adding the mis parameter to ufd_version_check
> > >  b) Asking for the feature
> > 
> > >Please split it into two patches.
> > 
> > So in current patch set, I also added re-factoring, which was missed before
> > "migration: split ufd_version_check onto receive/request features part"
> 
> Sure. As long as Dave agrees, I'm okay with either way.

I'm OK with the split, it pretty much matches what I asked last time I think.

The question I still have is how is this memory-expensive feature turned
on and off by the user?
Also I think Peter had some ideas for simpler data structures, how did
that play out?

Dave


> -- 
> Peter Xu
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Alexey Perevalov April 29, 2017, 9:16 a.m. UTC | #5
On Fri, Apr 28, 2017 at 05:22:05PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> > 
> > [...]
> > 
> > > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > >>index 21e7150..f3688f5 100644
> > > >>--- a/migration/postcopy-ram.c
> > > >>+++ b/migration/postcopy-ram.c
> > > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > >>          return false;
> > > >>      }
> > > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > > >>+        /* kernel supports that feature */
> > > >>+        mis->downtime_ctx = downtime_context_new();
> > > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > > >So here I know why in patch 2 new_features == 0...
> > > >
> > > >If I were you, I would like the series be done in below 4 patches:
> > > >
> > > >1. update header
> > > >2. introduce THREAD_ID feature, and enable it conditionally
> > > >3. squash all the downtime thing (downtime context, calculation) in
> > > >    one patch here
> > > >4. introduce trace
> > > >
> > > >IMHO that's clearer and easier for review. But I'm okay with current
> > > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > > In previous series, David asked me to split one patch into 2
> > > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > > support
> > > 
> > > >There seem to be two parts to this:
> > > >  a) Adding the mis parameter to ufd_version_check
> > > >  b) Asking for the feature
> > > 
> > > >Please split it into two patches.
> > > 
> > > So in current patch set, I also added re-factoring, which was missed before
> > > "migration: split ufd_version_check onto receive/request features part"
> > 
> > Sure. As long as Dave agrees, I'm okay with either way.
> 
> I'm OK with the split, it pretty much matches what I asked last time I think.
> 
> The question I still have is how is this memory-expensive feature turned
> on and off by the user?
> Also I think Peter had some ideas for simpler data structures, how did
> that play out?
Maybe introduce it as extension of MigrationParameter,
I mean { "execute": "migrate-set-parameters" , "arguments":
	{ "calculate-postcopy-downtime": 1 } }


> 
> Dave
> 
> 
> > -- 
> > Peter Xu
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Eric Blake April 29, 2017, 3:02 p.m. UTC | #6
On 04/29/2017 04:16 AM, Alexey wrote:

>>>>
>>>>> There seem to be two parts to this:
>>>>>  a) Adding the mis parameter to ufd_version_check
>>>>>  b) Asking for the feature
>>>>
>>>>> Please split it into two patches.

Also, fix the typo in the subject line: s/Incomming/Incoming/
Dr. David Alan Gilbert May 2, 2017, 8:51 a.m. UTC | #7
* Alexey (a.perevalov@samsung.com) wrote:
> On Fri, Apr 28, 2017 at 05:22:05PM +0100, Dr. David Alan Gilbert wrote:
> > * Peter Xu (peterx@redhat.com) wrote:
> > > On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> > > 
> > > [...]
> > > 
> > > > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > > >>index 21e7150..f3688f5 100644
> > > > >>--- a/migration/postcopy-ram.c
> > > > >>+++ b/migration/postcopy-ram.c
> > > > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > > >>          return false;
> > > > >>      }
> > > > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > > > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > > > >>+        /* kernel supports that feature */
> > > > >>+        mis->downtime_ctx = downtime_context_new();
> > > > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > > > >So here I know why in patch 2 new_features == 0...
> > > > >
> > > > >If I were you, I would like the series be done in below 4 patches:
> > > > >
> > > > >1. update header
> > > > >2. introduce THREAD_ID feature, and enable it conditionally
> > > > >3. squash all the downtime thing (downtime context, calculation) in
> > > > >    one patch here
> > > > >4. introduce trace
> > > > >
> > > > >IMHO that's clearer and easier for review. But I'm okay with current
> > > > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > > > In previous series, David asked me to split one patch into 2
> > > > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > > > support
> > > > 
> > > > >There seem to be two parts to this:
> > > > >  a) Adding the mis parameter to ufd_version_check
> > > > >  b) Asking for the feature
> > > > 
> > > > >Please split it into two patches.
> > > > 
> > > > So in current patch set, I also added re-factoring, which was missed before
> > > > "migration: split ufd_version_check onto receive/request features part"
> > > 
> > > Sure. As long as Dave agrees, I'm okay with either way.
> > 
> > I'm OK with the split, it pretty much matches what I asked last time I think.
> > 
> > The question I still have is how is this memory-expensive feature turned
> > on and off by the user?
> > Also I think Peter had some ideas for simpler data structures, how did
> > that play out?
> Maybe introduce it as extension of MigrationParameter,
> I mean { "execute": "migrate-set-parameters" , "arguments":
> 	{ "calculate-postcopy-downtime": 1 } }

Use migrate-set-capabilities, they're effectively the same but just booleans.

Dave

> 
> > 
> > Dave
> > 
> > 
> > > -- 
> > > Peter Xu
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 
> -- 
> 
> BR
> Alexey
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Alexey Perevalov May 4, 2017, 1:09 p.m. UTC | #8
On Tue, May 02, 2017 at 09:51:44AM +0100, Dr. David Alan Gilbert wrote:
> * Alexey (a.perevalov@samsung.com) wrote:
> > On Fri, Apr 28, 2017 at 05:22:05PM +0100, Dr. David Alan Gilbert wrote:
> > > * Peter Xu (peterx@redhat.com) wrote:
> > > > On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> > > > 
> > > > [...]
> > > > 
> > > > > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > > > >>index 21e7150..f3688f5 100644
> > > > > >>--- a/migration/postcopy-ram.c
> > > > > >>+++ b/migration/postcopy-ram.c
> > > > > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > > > >>          return false;
> > > > > >>      }
> > > > > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > > > > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > > > > >>+        /* kernel supports that feature */
> > > > > >>+        mis->downtime_ctx = downtime_context_new();
> > > > > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > > > > >So here I know why in patch 2 new_features == 0...
> > > > > >
> > > > > >If I were you, I would like the series be done in below 4 patches:
> > > > > >
> > > > > >1. update header
> > > > > >2. introduce THREAD_ID feature, and enable it conditionally
> > > > > >3. squash all the downtime thing (downtime context, calculation) in
> > > > > >    one patch here
> > > > > >4. introduce trace
> > > > > >
> > > > > >IMHO that's clearer and easier for review. But I'm okay with current
> > > > > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > > > > In previous series, David asked me to split one patch into 2
> > > > > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > > > > support
> > > > > 
> > > > > >There seem to be two parts to this:
> > > > > >  a) Adding the mis parameter to ufd_version_check
> > > > > >  b) Asking for the feature
> > > > > 
> > > > > >Please split it into two patches.
> > > > > 
> > > > > So in current patch set, I also added re-factoring, which was missed before
> > > > > "migration: split ufd_version_check onto receive/request features part"
> > > > 
> > > > Sure. As long as Dave agrees, I'm okay with either way.
> > > 
> > > I'm OK with the split, it pretty much matches what I asked last time I think.
> > > 
> > > The question I still have is how is this memory-expensive feature turned
> > > on and off by the user?
> > > Also I think Peter had some ideas for simpler data structures, how did
> > > that play out?
> > Maybe introduce it as extension of MigrationParameter,
> > I mean { "execute": "migrate-set-parameters" , "arguments":
> > 	{ "calculate-postcopy-downtime": 1 } }
> 
> Use migrate-set-capabilities, they're effectively the same but just booleans.

For me it's not so clear, where to set that capability, on destination or on source
side. User sets postcopy ram capability on source side, probably on
source side user wants to set postcopy-downtime.
If I'm not wrong, neither capabilities nor parameters are transferring
from source to destination.

I wanted to pass in in MIG_CMD_POSTCOPY_ADVISE, but it holds only 2
uint64, and they are already occupied.
Like with RETURN PATH protocol, MIG couldn't be extended w/o breaking backward
compatibility. Length for cmd is transmitted, but compared with
predefined len from mig_cmd_args.

Maybe just increase QEMU_VM_FILE_VERSION in this case, it will be
possible to return downtime back to source by return path.
For supporting backward compatibility keep several versions of mig_cmd_args
per QEMU_VM_FILE_VERSION. 


> Dave
> 
> > 
> > > 
> > > Dave
> > > 
> > > 
> > > > -- 
> > > > Peter Xu
> > > --
> > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > 
> > 
> > -- 
> > 
> > BR
> > Alexey
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert May 5, 2017, 2:11 p.m. UTC | #9
* Alexey (a.perevalov@samsung.com) wrote:
> On Tue, May 02, 2017 at 09:51:44AM +0100, Dr. David Alan Gilbert wrote:
> > * Alexey (a.perevalov@samsung.com) wrote:
> > > On Fri, Apr 28, 2017 at 05:22:05PM +0100, Dr. David Alan Gilbert wrote:
> > > > * Peter Xu (peterx@redhat.com) wrote:
> > > > > On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> > > > > 
> > > > > [...]
> > > > > 
> > > > > > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > > > > >>index 21e7150..f3688f5 100644
> > > > > > >>--- a/migration/postcopy-ram.c
> > > > > > >>+++ b/migration/postcopy-ram.c
> > > > > > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > > > > >>          return false;
> > > > > > >>      }
> > > > > > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > > > > > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > > > > > >>+        /* kernel supports that feature */
> > > > > > >>+        mis->downtime_ctx = downtime_context_new();
> > > > > > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > > > > > >So here I know why in patch 2 new_features == 0...
> > > > > > >
> > > > > > >If I were you, I would like the series be done in below 4 patches:
> > > > > > >
> > > > > > >1. update header
> > > > > > >2. introduce THREAD_ID feature, and enable it conditionally
> > > > > > >3. squash all the downtime thing (downtime context, calculation) in
> > > > > > >    one patch here
> > > > > > >4. introduce trace
> > > > > > >
> > > > > > >IMHO that's clearer and easier for review. But I'm okay with current
> > > > > > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > > > > > In previous series, David asked me to split one patch into 2
> > > > > > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > > > > > support
> > > > > > 
> > > > > > >There seem to be two parts to this:
> > > > > > >  a) Adding the mis parameter to ufd_version_check
> > > > > > >  b) Asking for the feature
> > > > > > 
> > > > > > >Please split it into two patches.
> > > > > > 
> > > > > > So in current patch set, I also added re-factoring, which was missed before
> > > > > > "migration: split ufd_version_check onto receive/request features part"
> > > > > 
> > > > > Sure. As long as Dave agrees, I'm okay with either way.
> > > > 
> > > > I'm OK with the split, it pretty much matches what I asked last time I think.
> > > > 
> > > > The question I still have is how is this memory-expensive feature turned
> > > > on and off by the user?
> > > > Also I think Peter had some ideas for simpler data structures, how did
> > > > that play out?
> > > Maybe introduce it as extension of MigrationParameter,
> > > I mean { "execute": "migrate-set-parameters" , "arguments":
> > > 	{ "calculate-postcopy-downtime": 1 } }
> > 
> > Use migrate-set-capabilities, they're effectively the same but just booleans.
> 
> For me it's not so clear, where to set that capability, on destination or on source
> side. User sets postcopy ram capability on source side, probably on
> source side user wants to set postcopy-downtime.
> If I'm not wrong, neither capabilities nor parameters are transferring
> from source to destination.

Use a capability on the destination specifically for this; it's OK to set capabilities
on the destination, and actually libvirt already sets some for us.

One question: Now we're using Peter's idea, so you don't have that big tree
structure, what are the costs now - is it as big a problem as it was?

> I wanted to pass in in MIG_CMD_POSTCOPY_ADVISE, but it holds only 2
> uint64, and they are already occupied.
> Like with RETURN PATH protocol, MIG couldn't be extended w/o breaking backward
> compatibility. Length for cmd is transmitted, but compared with
> predefined len from mig_cmd_args.
> 
> Maybe just increase QEMU_VM_FILE_VERSION in this case, it will be
> possible to return downtime back to source by return path.
> For supporting backward compatibility keep several versions of mig_cmd_args
> per QEMU_VM_FILE_VERSION. 

No, we'll only change file version on some massive improvement.

Dave

> 
> > Dave
> > 
> > > 
> > > > 
> > > > Dave
> > > > 
> > > > 
> > > > > -- 
> > > > > Peter Xu
> > > > --
> > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > 
> > > 
> > > -- 
> > > 
> > > BR
> > > Alexey
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 
> -- 
> 
> BR
> Alexey
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Alexey Perevalov May 5, 2017, 4:25 p.m. UTC | #10
On Fri, May 05, 2017 at 03:11:14PM +0100, Dr. David Alan Gilbert wrote:
> * Alexey (a.perevalov@samsung.com) wrote:
> > On Tue, May 02, 2017 at 09:51:44AM +0100, Dr. David Alan Gilbert wrote:
> > > * Alexey (a.perevalov@samsung.com) wrote:
> > > > On Fri, Apr 28, 2017 at 05:22:05PM +0100, Dr. David Alan Gilbert wrote:
> > > > > * Peter Xu (peterx@redhat.com) wrote:
> > > > > > On Fri, Apr 28, 2017 at 01:03:45PM +0300, Alexey Perevalov wrote:
> > > > > > 
> > > > > > [...]
> > > > > > 
> > > > > > > >>diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > > > > > >>index 21e7150..f3688f5 100644
> > > > > > > >>--- a/migration/postcopy-ram.c
> > > > > > > >>+++ b/migration/postcopy-ram.c
> > > > > > > >>@@ -132,6 +132,14 @@ static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
> > > > > > > >>          return false;
> > > > > > > >>      }
> > > > > > > >>+#ifdef UFFD_FEATURE_THREAD_ID
> > > > > > > >>+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
> > > > > > > >>+        /* kernel supports that feature */
> > > > > > > >>+        mis->downtime_ctx = downtime_context_new();
> > > > > > > >>+        new_features |= UFFD_FEATURE_THREAD_ID;
> > > > > > > >So here I know why in patch 2 new_features == 0...
> > > > > > > >
> > > > > > > >If I were you, I would like the series be done in below 4 patches:
> > > > > > > >
> > > > > > > >1. update header
> > > > > > > >2. introduce THREAD_ID feature, and enable it conditionally
> > > > > > > >3. squash all the downtime thing (downtime context, calculation) in
> > > > > > > >    one patch here
> > > > > > > >4. introduce trace
> > > > > > > >
> > > > > > > >IMHO that's clearer and easier for review. But I'm okay with current
> > > > > > > >as well as long as the maintainers (Dave/Juan) won't disagree. :)
> > > > > > > In previous series, David asked me to split one patch into 2
> > > > > > > [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature
> > > > > > > support
> > > > > > > 
> > > > > > > >There seem to be two parts to this:
> > > > > > > >  a) Adding the mis parameter to ufd_version_check
> > > > > > > >  b) Asking for the feature
> > > > > > > 
> > > > > > > >Please split it into two patches.
> > > > > > > 
> > > > > > > So in current patch set, I also added re-factoring, which was missed before
> > > > > > > "migration: split ufd_version_check onto receive/request features part"
> > > > > > 
> > > > > > Sure. As long as Dave agrees, I'm okay with either way.
> > > > > 
> > > > > I'm OK with the split, it pretty much matches what I asked last time I think.
> > > > > 
> > > > > The question I still have is how is this memory-expensive feature turned
> > > > > on and off by the user?
> > > > > Also I think Peter had some ideas for simpler data structures, how did
> > > > > that play out?
> > > > Maybe introduce it as extension of MigrationParameter,
> > > > I mean { "execute": "migrate-set-parameters" , "arguments":
> > > > 	{ "calculate-postcopy-downtime": 1 } }
> > > 
> > > Use migrate-set-capabilities, they're effectively the same but just booleans.
> > 
> > For me it's not so clear, where to set that capability, on destination or on source
> > side. User sets postcopy ram capability on source side, probably on
> > source side user wants to set postcopy-downtime.
> > If I'm not wrong, neither capabilities nor parameters are transferring
> > from source to destination.
> 
> Use a capability on the destination specifically for this; it's OK to set capabilities
> on the destination, and actually libvirt already sets some for us.
> 
> One question: Now we're using Peter's idea, so you don't have that big tree
> structure, what are the costs now - is it as big a problem as it was?
It was a tree where key was a page address, so in worst case when we could face
with huge number of pages (Tera bytes of RAM and 4kb page size) that structure was
big, and it consumes a lot of memory, but lookup wasn't so bad due to
tree. Every time in _begin and in _end  logarithm complexity search
processed. Right now O(1) complexity in _begin to fill necessary field,
due to cpu_index is an array index (but need to lookup for cpu_index by
thread_id, see bellow), and O(n) complexity in _end, where n is number of cpus.
Size of PostcopyDowntime context depends just on vCPU number.
It's about vCPU_Number * (array of int64_t for page_fault_vcpu_time + array of
uint64_t for vcpu_addr + array of int64_t  for vcpu_downtime) + int64_t
for last_begin + int for number of vCPC suspended + int64_t for total
downtime.
In case of 2046 vCPU it will be 49124 bytes. Algorithm doesn't depends
on number of pages, but depends on number of vCPU, obviously in common
case that number is lesser.

for recall:
typede PostcopyDowntimeContext {
    /* time when page fault initiated per vCPU */
    int64_t *page_fault_vcpu_time;
    /* page address per vCPU */
    uint64_t *vcpu_addr;
    int64_t total_downtime;
    /* downtime per vCPU */
    int64_t *vcpu_downtime;
    /* point in time when last page fault was initiated */
    int64_t last_begin;
    /* number of vCPU are suspended */
    int smp_cpus_down;
} PostcopyDowntimeContext;


And need to remember about get_mem_fault_cpu_index where QEMU iterates
on cpus every pagefault to lookup cpu_index by process's thread id.
Peter suggested hash there, but I think tree will be enough. Currently
servers with about 2000 of cpus just begin selling. I prepared patch
set, but didn't include tree for lookup in get_mem_fault_cpu_index, not
so clear how to be with it, live time of that lookup tree, should it be
just a part of PostcopyDowntimeContext or general code. BTW similar
lookup (linear) is doing in qemu_get_cpu by cpu_index, so I think it
will be useful to have macros for construct/destruct search tree for
cpus per unique field.


> 
> > I wanted to pass in in MIG_CMD_POSTCOPY_ADVISE, but it holds only 2
> > uint64, and they are already occupied.
> > Like with RETURN PATH protocol, MIG couldn't be extended w/o breaking backward
> > compatibility. Length for cmd is transmitted, but compared with
> > predefined len from mig_cmd_args.
> > 
> > Maybe just increase QEMU_VM_FILE_VERSION in this case, it will be
> > possible to return downtime back to source by return path.
> > For supporting backward compatibility keep several versions of mig_cmd_args
> > per QEMU_VM_FILE_VERSION. 
> 
> No, we'll only change file version on some massive improvement.
> 
> Dave
> 
> > 
> > > Dave
> > > 
> > > > 
> > > > > 
> > > > > Dave
> > > > > 
> > > > > 
> > > > > > -- 
> > > > > > Peter Xu
> > > > > --
> > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > > > 
> > > > 
> > > > -- 
> > > > 
> > > > BR
> > > > Alexey
> > > --
> > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > > 
> > 
> > -- 
> > 
> > BR
> > Alexey
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index ba1a16c..e8fb68f 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -83,6 +83,8 @@  typedef enum {
     POSTCOPY_INCOMING_END
 } PostcopyState;
 
+struct DowntimeContext;
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
     QEMUFile *from_src_file;
@@ -123,10 +125,20 @@  struct MigrationIncomingState {
 
     /* See savevm.c */
     LoadStateEntry_Head loadvm_handlers;
+
+    /*
+     * DowntimeContext to keep information for postcopy
+     * live migration, to calculate downtime
+     * */
+    struct DowntimeContext *downtime_ctx;
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
+/*
+ * Functions to work with downtime context
+ */
+struct DowntimeContext *downtime_context_new(void);
 
 struct MigrationState
 {
diff --git a/migration/migration.c b/migration/migration.c
index 569a7f6..ec76e5c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -77,6 +77,18 @@  static NotifierList migration_state_notifiers =
 
 static bool deferred_incoming;
 
+typedef struct DowntimeContext {
+    /* time when page fault initiated per vCPU */
+    int64_t *page_fault_vcpu_time;
+    /* page address per vCPU */
+    uint64_t *vcpu_addr;
+    int64_t total_downtime;
+    /* downtime per vCPU */
+    int64_t *vcpu_downtime;
+    /* point in time when last page fault was initiated */
+    int64_t last_begin;
+} DowntimeContext;
+
 /*
  * Current state of incoming postcopy; note this is not part of
  * MigrationIncomingState since it's state is used during cleanup
@@ -116,6 +128,23 @@  MigrationState *migrate_get_current(void)
     return &current_migration;
 }
 
+struct DowntimeContext *downtime_context_new(void)
+{
+    DowntimeContext *ctx = g_new0(DowntimeContext, 1);
+    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+    ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
+    ctx->vcpu_downtime = g_new0(int64_t, smp_cpus);
+    return ctx;
+}
+
+static void destroy_downtime_context(struct DowntimeContext *ctx)
+{
+    g_free(ctx->page_fault_vcpu_time);
+    g_free(ctx->vcpu_addr);
+    g_free(ctx->vcpu_downtime);
+    g_free(ctx);
+}
+
 MigrationIncomingState *migration_incoming_get_current(void)
 {
     static bool once;
@@ -138,6 +167,10 @@  void migration_incoming_state_destroy(void)
 
     qemu_event_destroy(&mis->main_thread_load_event);
     loadvm_free_handlers(mis);
+    if (mis->downtime_ctx) {
+        destroy_downtime_context(mis->downtime_ctx);
+        mis->downtime_ctx = NULL;
+    }
 }
 
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 21e7150..f3688f5 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -132,6 +132,14 @@  static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
         return false;
     }
 
+#ifdef UFFD_FEATURE_THREAD_ID
+    if (mis && UFFD_FEATURE_THREAD_ID & supported_features) {
+        /* kernel supports that feature */
+        mis->downtime_ctx = downtime_context_new();
+        new_features |= UFFD_FEATURE_THREAD_ID;
+    }
+#endif
+
     /* request features */
     if (new_features && !request_ufd_features(ufd, new_features)) {
         error_report("ufd_version_check failed: features %" PRIu64,