diff mbox series

[v6,21/28] migration: setup ramstate for resume

Message ID 20180208103132.28452-22-peterx@redhat.com
State New
Headers show
Series [v6,01/28] migration: better error handling with QEMUFile | expand

Commit Message

Peter Xu Feb. 8, 2018, 10:31 a.m. UTC
After we updated the dirty bitmaps of ramblocks, we also need to update
the critical fields in RAMState to make sure it is ready for a resume.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/ram.c        | 40 +++++++++++++++++++++++++++++++++++++++-
 migration/trace-events |  1 +
 2 files changed, 40 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert Feb. 13, 2018, 6:17 p.m. UTC | #1
* Peter Xu (peterx@redhat.com) wrote:
> After we updated the dirty bitmaps of ramblocks, we also need to update
> the critical fields in RAMState to make sure it is ready for a resume.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/ram.c        | 40 +++++++++++++++++++++++++++++++++++++++-
>  migration/trace-events |  1 +
>  2 files changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index a2a4b05d5c..d275875f54 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2250,6 +2250,36 @@ static int ram_init_all(RAMState **rsp)
>      return 0;
>  }
>  
> +static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
> +{
> +    RAMBlock *block;
> +    long pages = 0;
> +
> +    /*
> +     * Postcopy is not using xbzrle/compression, so no need for that.
> +     * Also, since source are already halted, we don't need to care
> +     * about dirty page logging as well.
> +     */
> +
> +    RAMBLOCK_FOREACH(block) {
> +        pages += bitmap_count_one(block->bmap,
> +                                  block->used_length >> TARGET_PAGE_BITS);
> +    }
> +
> +    /* This may not be aligned with current bitmaps. Recalculate. */
> +    rs->migration_dirty_pages = pages;

migration_dirty_pages is uint64_t - so we should probably do the cast
above and keep 'pages' as uint64_t.

> +    rs->last_seen_block = NULL;
> +    rs->last_sent_block = NULL;
> +    rs->last_page = 0;
> +    rs->last_version = ram_list.version;

Do you need to explicitly set
       rs->ram_bulk_stage = false;

if the failure happened just after the start of postcopy and no
requested pages had been sent, I think it might still  be set?


> +    /* Update RAMState cache of output QEMUFile */
> +    rs->f = out;
> +
> +    trace_ram_state_resume_prepare(pages);
> +}
> +
>  /*
>   * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>   * long-running RCU critical section.  When rcu-reclaims in the code
> @@ -3178,8 +3208,16 @@ out:
>  static int ram_resume_prepare(MigrationState *s, void *opaque)
>  {
>      RAMState *rs = *(RAMState **)opaque;
> +    int ret;
>  
> -    return ram_dirty_bitmap_sync_all(s, rs);
> +    ret = ram_dirty_bitmap_sync_all(s, rs);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    ram_state_resume_prepare(rs, s->to_dst_file);
> +
> +    return 0;
>  }
>  
>  static SaveVMHandlers savevm_ram_handlers = {
> diff --git a/migration/trace-events b/migration/trace-events
> index 45b1d89217..f5913ff51c 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -88,6 +88,7 @@ ram_dirty_bitmap_reload_complete(char *str) "%s"
>  ram_dirty_bitmap_sync_start(void) ""
>  ram_dirty_bitmap_sync_wait(void) ""
>  ram_dirty_bitmap_sync_complete(void) ""
> +ram_state_resume_prepare(long v) "%ld"
>  
>  # migration/migration.c
>  await_return_path_close_on_source_close(void) ""

Dave

> -- 
> 2.14.3
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Peter Xu Feb. 14, 2018, 4:20 a.m. UTC | #2
On Tue, Feb 13, 2018 at 06:17:51PM +0000, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > After we updated the dirty bitmaps of ramblocks, we also need to update
> > the critical fields in RAMState to make sure it is ready for a resume.
> > 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  migration/ram.c        | 40 +++++++++++++++++++++++++++++++++++++++-
> >  migration/trace-events |  1 +
> >  2 files changed, 40 insertions(+), 1 deletion(-)
> > 
> > diff --git a/migration/ram.c b/migration/ram.c
> > index a2a4b05d5c..d275875f54 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2250,6 +2250,36 @@ static int ram_init_all(RAMState **rsp)
> >      return 0;
> >  }
> >  
> > +static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
> > +{
> > +    RAMBlock *block;
> > +    long pages = 0;
> > +
> > +    /*
> > +     * Postcopy is not using xbzrle/compression, so no need for that.
> > +     * Also, since source are already halted, we don't need to care
> > +     * about dirty page logging as well.
> > +     */
> > +
> > +    RAMBLOCK_FOREACH(block) {
> > +        pages += bitmap_count_one(block->bmap,
> > +                                  block->used_length >> TARGET_PAGE_BITS);
> > +    }
> > +
> > +    /* This may not be aligned with current bitmaps. Recalculate. */
> > +    rs->migration_dirty_pages = pages;
> 
> migration_dirty_pages is uint64_t - so we should probably do the cast
> above and keep 'pages' as uint64_t.

Sure.

> 
> > +    rs->last_seen_block = NULL;
> > +    rs->last_sent_block = NULL;
> > +    rs->last_page = 0;
> > +    rs->last_version = ram_list.version;
> 
> Do you need to explicitly set
>        rs->ram_bulk_stage = false;
> 
> if the failure happened just after the start of postcopy and no
> requested pages had been sent, I think it might still  be set?

Could you elaborate what would go wrong even if it's still set?

Thanks,
Dr. David Alan Gilbert Feb. 14, 2018, 6:40 p.m. UTC | #3
* Peter Xu (peterx@redhat.com) wrote:
> On Tue, Feb 13, 2018 at 06:17:51PM +0000, Dr. David Alan Gilbert wrote:
> > * Peter Xu (peterx@redhat.com) wrote:
> > > After we updated the dirty bitmaps of ramblocks, we also need to update
> > > the critical fields in RAMState to make sure it is ready for a resume.
> > > 
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > ---
> > >  migration/ram.c        | 40 +++++++++++++++++++++++++++++++++++++++-
> > >  migration/trace-events |  1 +
> > >  2 files changed, 40 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index a2a4b05d5c..d275875f54 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2250,6 +2250,36 @@ static int ram_init_all(RAMState **rsp)
> > >      return 0;
> > >  }
> > >  
> > > +static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
> > > +{
> > > +    RAMBlock *block;
> > > +    long pages = 0;
> > > +
> > > +    /*
> > > +     * Postcopy is not using xbzrle/compression, so no need for that.
> > > +     * Also, since source are already halted, we don't need to care
> > > +     * about dirty page logging as well.
> > > +     */
> > > +
> > > +    RAMBLOCK_FOREACH(block) {
> > > +        pages += bitmap_count_one(block->bmap,
> > > +                                  block->used_length >> TARGET_PAGE_BITS);
> > > +    }
> > > +
> > > +    /* This may not be aligned with current bitmaps. Recalculate. */
> > > +    rs->migration_dirty_pages = pages;
> > 
> > migration_dirty_pages is uint64_t - so we should probably do the cast
> > above and keep 'pages' as uint64_t.
> 
> Sure.
> 
> > 
> > > +    rs->last_seen_block = NULL;
> > > +    rs->last_sent_block = NULL;
> > > +    rs->last_page = 0;
> > > +    rs->last_version = ram_list.version;
> > 
> > Do you need to explicitly set
> >        rs->ram_bulk_stage = false;
> > 
> > if the failure happened just after the start of postcopy and no
> > requested pages had been sent, I think it might still  be set?
> 
> Could you elaborate what would go wrong even if it's still set?

I think it might start sending all pages rather than just those
that are dirty/needed;  see migration_bitmap_find_dirty.

Dave

> Thanks,
> 
> -- 
> Peter Xu
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Peter Xu Feb. 22, 2018, 7:34 a.m. UTC | #4
On Wed, Feb 14, 2018 at 06:40:46PM +0000, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > On Tue, Feb 13, 2018 at 06:17:51PM +0000, Dr. David Alan Gilbert wrote:
> > > * Peter Xu (peterx@redhat.com) wrote:
> > > > After we updated the dirty bitmaps of ramblocks, we also need to update
> > > > the critical fields in RAMState to make sure it is ready for a resume.
> > > > 
> > > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > > ---
> > > >  migration/ram.c        | 40 +++++++++++++++++++++++++++++++++++++++-
> > > >  migration/trace-events |  1 +
> > > >  2 files changed, 40 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > index a2a4b05d5c..d275875f54 100644
> > > > --- a/migration/ram.c
> > > > +++ b/migration/ram.c
> > > > @@ -2250,6 +2250,36 @@ static int ram_init_all(RAMState **rsp)
> > > >      return 0;
> > > >  }
> > > >  
> > > > +static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
> > > > +{
> > > > +    RAMBlock *block;
> > > > +    long pages = 0;
> > > > +
> > > > +    /*
> > > > +     * Postcopy is not using xbzrle/compression, so no need for that.
> > > > +     * Also, since source are already halted, we don't need to care
> > > > +     * about dirty page logging as well.
> > > > +     */
> > > > +
> > > > +    RAMBLOCK_FOREACH(block) {
> > > > +        pages += bitmap_count_one(block->bmap,
> > > > +                                  block->used_length >> TARGET_PAGE_BITS);
> > > > +    }
> > > > +
> > > > +    /* This may not be aligned with current bitmaps. Recalculate. */
> > > > +    rs->migration_dirty_pages = pages;
> > > 
> > > migration_dirty_pages is uint64_t - so we should probably do the cast
> > > above and keep 'pages' as uint64_t.
> > 
> > Sure.
> > 
> > > 
> > > > +    rs->last_seen_block = NULL;
> > > > +    rs->last_sent_block = NULL;
> > > > +    rs->last_page = 0;
> > > > +    rs->last_version = ram_list.version;
> > > 
> > > Do you need to explicitly set
> > >        rs->ram_bulk_stage = false;
> > > 
> > > if the failure happened just after the start of postcopy and no
> > > requested pages had been sent, I think it might still  be set?
> > 
> > Could you elaborate what would go wrong even if it's still set?
> 
> I think it might start sending all pages rather than just those
> that are dirty/needed;  see migration_bitmap_find_dirty.

Ah yes.  I should turn it off.
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index a2a4b05d5c..d275875f54 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2250,6 +2250,36 @@  static int ram_init_all(RAMState **rsp)
     return 0;
 }
 
+static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
+{
+    RAMBlock *block;
+    long pages = 0;
+
+    /*
+     * Postcopy is not using xbzrle/compression, so no need for that.
+     * Also, since source are already halted, we don't need to care
+     * about dirty page logging as well.
+     */
+
+    RAMBLOCK_FOREACH(block) {
+        pages += bitmap_count_one(block->bmap,
+                                  block->used_length >> TARGET_PAGE_BITS);
+    }
+
+    /* This may not be aligned with current bitmaps. Recalculate. */
+    rs->migration_dirty_pages = pages;
+
+    rs->last_seen_block = NULL;
+    rs->last_sent_block = NULL;
+    rs->last_page = 0;
+    rs->last_version = ram_list.version;
+
+    /* Update RAMState cache of output QEMUFile */
+    rs->f = out;
+
+    trace_ram_state_resume_prepare(pages);
+}
+
 /*
  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
@@ -3178,8 +3208,16 @@  out:
 static int ram_resume_prepare(MigrationState *s, void *opaque)
 {
     RAMState *rs = *(RAMState **)opaque;
+    int ret;
 
-    return ram_dirty_bitmap_sync_all(s, rs);
+    ret = ram_dirty_bitmap_sync_all(s, rs);
+    if (ret) {
+        return ret;
+    }
+
+    ram_state_resume_prepare(rs, s->to_dst_file);
+
+    return 0;
 }
 
 static SaveVMHandlers savevm_ram_handlers = {
diff --git a/migration/trace-events b/migration/trace-events
index 45b1d89217..f5913ff51c 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -88,6 +88,7 @@  ram_dirty_bitmap_reload_complete(char *str) "%s"
 ram_dirty_bitmap_sync_start(void) ""
 ram_dirty_bitmap_sync_wait(void) ""
 ram_dirty_bitmap_sync_complete(void) ""
+ram_state_resume_prepare(long v) "%ld"
 
 # migration/migration.c
 await_return_path_close_on_source_close(void) ""