diff mbox

[v7,32/42] Page request: Consume pages off the post-copy queue

Message ID 1434450415-11339-33-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert June 16, 2015, 10:26 a.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

When transmitting RAM pages, consume pages that have been queued by
MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.

Note:
  a) After a queued page the linear walk carries on from after the
unqueued page; there is a reasonable chance that the destination
was about to ask for other closeby pages anyway.

  b) We have to be careful of any assumptions that the page walking
code makes, in particular it does some short cuts on its first linear
walk that break as soon as we do a queued page.

  c) We have to be careful to not break up host-page size chunks, since
this makes it harder to place the pages on the destination.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/ram.c | 227 ++++++++++++++++++++++++++++++++++++++++++++------------
 trace-events    |   2 +
 2 files changed, 183 insertions(+), 46 deletions(-)

Comments

Juan Quintela July 14, 2015, 9:40 a.m. UTC | #1
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> When transmitting RAM pages, consume pages that have been queued by
> MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.
>
> Note:
>   a) After a queued page the linear walk carries on from after the
> unqueued page; there is a reasonable chance that the destination
> was about to ask for other closeby pages anyway.
>
>   b) We have to be careful of any assumptions that the page walking
> code makes, in particular it does some short cuts on its first linear
> walk that break as soon as we do a queued page.
>
>   c) We have to be careful to not break up host-page size chunks, since
> this makes it harder to place the pages on the destination.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>


> +static bool last_was_from_queue;

Are we using this variable later in the series?

>  static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
>  {
>      migration_dirty_pages +=
> @@ -923,6 +933,41 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
>      return pages;
>  }
>  
> +/*
> + * Unqueue a page from the queue fed by postcopy page requests
> + *
> + * Returns:      The RAMBlock* to transmit from (or NULL if the queue is empty)
> + *      ms:      MigrationState in
> + *  offset:      the byte offset within the RAMBlock for the start of the page
> + * ram_addr_abs: global offset in the dirty/sent bitmaps
> + */
> +static RAMBlock *ram_save_unqueue_page(MigrationState *ms, ram_addr_t *offset,
> +                                       ram_addr_t *ram_addr_abs)
> +{
> +    RAMBlock *result = NULL;
> +    qemu_mutex_lock(&ms->src_page_req_mutex);
> +    if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
> +        struct MigrationSrcPageRequest *entry =
> +                                    QSIMPLEQ_FIRST(&ms->src_page_requests);
> +        result = entry->rb;
> +        *offset = entry->offset;
> +        *ram_addr_abs = (entry->offset + entry->rb->offset) & TARGET_PAGE_MASK;
> +
> +        if (entry->len > TARGET_PAGE_SIZE) {
> +            entry->len -= TARGET_PAGE_SIZE;
> +            entry->offset += TARGET_PAGE_SIZE;
> +        } else {
> +            memory_region_unref(result->mr);

Here it is the unref, but I still don't understand why we don't need to
undo that on the error case on previous patch.

> +            QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
> +            g_free(entry);
> +        }
> +    }
> +    qemu_mutex_unlock(&ms->src_page_req_mutex);
> +
> +    return result;
> +}
> +
> +
>  /**
>   * Queue the pages for transmission, e.g. a request from postcopy destination
>   *   ms: MigrationStatus in which the queue is held
> @@ -987,6 +1032,58 @@ err:
>  

> @@ -997,65 +1094,102 @@ err:
>   * @f: QEMUFile where to send the data
>   * @last_stage: if we are at the completion stage
>   * @bytes_transferred: increase it with the number of transferred bytes
> + *
> + * On systems where host-page-size > target-page-size it will send all the
> + * pages in a host page that are dirty.
>   */
>  
>  static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>                                     uint64_t *bytes_transferred)
>  {
> +    MigrationState *ms = migrate_get_current();
>      RAMBlock *block = last_seen_block;
> +    RAMBlock *tmpblock;
>      ram_addr_t offset = last_offset;
> +    ram_addr_t tmpoffset;
>      bool complete_round = false;
>      int pages = 0;
> -    MemoryRegion *mr;
>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>                                   ram_addr_t space */
>  
> -    if (!block)
> +    if (!block) {
>          block = QLIST_FIRST_RCU(&ram_list.blocks);
> +        last_was_from_queue = false;
> +    }
>  
> -    while (true) {
> -        mr = block->mr;
> -        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> -                                                       &dirty_ram_abs);
> -        if (complete_round && block == last_seen_block &&
> -            offset >= last_offset) {
> -            break;
> -        }
> -        if (offset >= block->used_length) {
> -            offset = 0;
> -            block = QLIST_NEXT_RCU(block, next);
> -            if (!block) {
> -                block = QLIST_FIRST_RCU(&ram_list.blocks);
> -                complete_round = true;
> -                ram_bulk_stage = false;
> -                if (migrate_use_xbzrle()) {
> -                    /* If xbzrle is on, stop using the data compression at this
> -                     * point. In theory, xbzrle can do better than compression.
> -                     */
> -                    flush_compressed_data(f);
> -                    compression_switch = false;
> -                }
> +    while (true) { /* Until we send a block or run out of stuff to send */
> +        tmpblock = ram_save_unqueue_page(ms, &tmpoffset, &dirty_ram_abs);

This function was ugly.  You already split it in the past.  This patch
makes it even more complicated.  Can we try something like add a

ram_find_next_page() and try to put some of the code inside the while
there?

Once here, can we agree to send the next N pages (if they are contiguos)
if we receive a queued request?  Yeap, deciding N means testing and measuring.
And can wait for this to be integrated.

> +
> +        if (tmpblock) {
> +            /* We've got a block from the postcopy queue */
> +            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
> +                                                   (uint64_t)tmpoffset,
> +                                                   (uint64_t)dirty_ram_abs);
> +            /*
> +             * We're sending this page, and since it's postcopy nothing else
> +             * will dirty it, and we must make sure it doesn't get sent again
> +             * even if this queue request was received after the background
> +             * search already sent it.
> +             */
> +            if (!test_bit(dirty_ram_abs >> TARGET_PAGE_BITS,
> +                          migration_bitmap)) {

I think this test can be inside ram_save_unqueue_page()

I.e. rename to:

ram_save_get_next_queued_page()
Amit Shah July 27, 2015, 6:05 a.m. UTC | #2
On (Tue) 16 Jun 2015 [11:26:45], Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> When transmitting RAM pages, consume pages that have been queued by
> MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.

It's slightly confusing with 'consume': we're /servicing/ requests from
the dest at the src here rather than /consuming/ pages sent by src at
the dest.  If you find 'service' better than 'consume', please update
the commit msg+log.

> Note:
>   a) After a queued page the linear walk carries on from after the
> unqueued page; there is a reasonable chance that the destination
> was about to ask for other closeby pages anyway.
> 
>   b) We have to be careful of any assumptions that the page walking
> code makes, in particular it does some short cuts on its first linear
> walk that break as soon as we do a queued page.
> 
>   c) We have to be careful to not break up host-page size chunks, since
> this makes it harder to place the pages on the destination.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Reviewed-by: Amit Shah <amit.shah@redhat.com>

> +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block,
> +                              ram_addr_t *offset, bool last_stage,
> +                              uint64_t *bytes_transferred,
> +                              ram_addr_t dirty_ram_abs)
> +{
> +    int tmppages, pages = 0;
> +    do {
> +        /* Check the pages is dirty and if it is send it */
> +        if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
> +            if (compression_switch && migrate_use_compression()) {
> +                tmppages = ram_save_compressed_page(f, block, *offset,
> +                                                    last_stage,
> +                                                    bytes_transferred);
> +            } else {
> +                tmppages = ram_save_page(f, block, *offset, last_stage,
> +                                         bytes_transferred);
> +            }

Something for the future: we should just have ram_save_page which does
compression (or not); and even encryption (or not), and so on.

> +
> +            if (tmppages < 0) {
> +                return tmppages;
> +            } else {
> +                if (ms->sentmap) {
> +                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
> +                }
> +            }

This else could be dropped as the if stmt returns.

> +            pages += tmppages;
> +        }
> +        *offset += TARGET_PAGE_SIZE;
> +        dirty_ram_abs += TARGET_PAGE_SIZE;
> +    } while (*offset & (qemu_host_page_size - 1));
> +
> +    /* The offset we leave with is the last one we looked at */
> +    *offset -= TARGET_PAGE_SIZE;
> +    return pages;
> +}
> +
> +/**
>   * ram_find_and_save_block: Finds a dirty page and sends it to f
>   *
>   * Called within an RCU critical section.
> @@ -997,65 +1094,102 @@ err:
>   * @f: QEMUFile where to send the data
>   * @last_stage: if we are at the completion stage
>   * @bytes_transferred: increase it with the number of transferred bytes
> + *
> + * On systems where host-page-size > target-page-size it will send all the
> + * pages in a host page that are dirty.
>   */
>  
>  static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>                                     uint64_t *bytes_transferred)
>  {
> +    MigrationState *ms = migrate_get_current();
>      RAMBlock *block = last_seen_block;
> +    RAMBlock *tmpblock;
>      ram_addr_t offset = last_offset;
> +    ram_addr_t tmpoffset;
>      bool complete_round = false;
>      int pages = 0;
> -    MemoryRegion *mr;
>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>                                   ram_addr_t space */
>  
> -    if (!block)
> +    if (!block) {
>          block = QLIST_FIRST_RCU(&ram_list.blocks);
> +        last_was_from_queue = false;
> +    }
>  
> -    while (true) {
> -        mr = block->mr;
> -        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> -                                                       &dirty_ram_abs);
> -        if (complete_round && block == last_seen_block &&
> -            offset >= last_offset) {
> -            break;
> -        }
> -        if (offset >= block->used_length) {
> -            offset = 0;
> -            block = QLIST_NEXT_RCU(block, next);
> -            if (!block) {
> -                block = QLIST_FIRST_RCU(&ram_list.blocks);
> -                complete_round = true;
> -                ram_bulk_stage = false;
> -                if (migrate_use_xbzrle()) {
> -                    /* If xbzrle is on, stop using the data compression at this
> -                     * point. In theory, xbzrle can do better than compression.
> -                     */
> -                    flush_compressed_data(f);
> -                    compression_switch = false;
> -                }
> +    while (true) { /* Until we send a block or run out of stuff to send */
> +        tmpblock = ram_save_unqueue_page(ms, &tmpoffset, &dirty_ram_abs);
> +
> +        if (tmpblock) {
> +            /* We've got a block from the postcopy queue */
> +            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
> +                                                   (uint64_t)tmpoffset,
> +                                                   (uint64_t)dirty_ram_abs);
> +            /*
> +             * We're sending this page, and since it's postcopy nothing else
> +             * will dirty it, and we must make sure it doesn't get sent again
> +             * even if this queue request was received after the background
> +             * search already sent it.
> +             */
> +            if (!test_bit(dirty_ram_abs >> TARGET_PAGE_BITS,
> +                          migration_bitmap)) {
> +                trace_ram_find_and_save_block_postcopy_not_dirty(
> +                    tmpblock->idstr, (uint64_t)tmpoffset,
> +                    (uint64_t)dirty_ram_abs,
> +                    test_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap));
> +
> +                continue;
>              }
> +            /*
> +             * As soon as we start servicing pages out of order, then we have
> +             * to kill the bulk stage, since the bulk stage assumes
> +             * in (migration_bitmap_find_and_reset_dirty) that every page is
> +             * dirty, that's no longer true.
> +             */
> +            ram_bulk_stage = false;
> +            /*
> +             * We want the background search to continue from the queued page
> +             * since the guest is likely to want other pages near to the page
> +             * it just requested.
> +             */
> +            block = tmpblock;
> +            offset = tmpoffset;
>          } else {
> -            if (compression_switch && migrate_use_compression()) {
> -                pages = ram_save_compressed_page(f, block, offset, last_stage,
> -                                                 bytes_transferred);
> -            } else {
> -                pages = ram_save_page(f, block, offset, last_stage,
> -                                      bytes_transferred);
> +            MemoryRegion *mr;
> +            /* priority queue empty, so just search for something dirty */
> +            mr = block->mr;
> +            offset = migration_bitmap_find_dirty(mr, offset, &dirty_ram_abs);
> +            if (complete_round && block == last_seen_block &&
> +                offset >= last_offset) {
> +                break;
>              }
> -
> -            /* if page is unmodified, continue to the next */
> -            if (pages > 0) {
> -                MigrationState *ms = migrate_get_current();
> -                if (ms->sentmap) {
> -                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
> +            if (offset >= block->used_length) {
> +                offset = 0;
> +                block = QLIST_NEXT_RCU(block, next);
> +                if (!block) {
> +                    block = QLIST_FIRST_RCU(&ram_list.blocks);
> +                    complete_round = true;
> +                    ram_bulk_stage = false;
> +                    if (migrate_use_xbzrle()) {
> +                        /* If xbzrle is on, stop using the data compression at
> +                         * this point. In theory, xbzrle can do better than
> +                         * compression.
> +                         */
> +                        flush_compressed_data(f);
> +                        compression_switch = false;
> +                    }
>                  }
> -
> -                last_sent_block = block;
> -                break;
> +                continue; /* pick an offset in the new block */
>              }
>          }
> +
> +        pages = ram_save_host_page(ms, f, block, &offset, last_stage,
> +                                   bytes_transferred, dirty_ram_abs);
> +
> +        /* if page is unmodified, continue to the next */
> +        if (pages > 0) {
> +            break;
> +        }

This function could use splitting into multiple ones.


		Amit
Dr. David Alan Gilbert Sept. 16, 2015, 6:36 p.m. UTC | #3
* Juan Quintela (quintela@redhat.com) wrote:
> "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >
> > When transmitting RAM pages, consume pages that have been queued by
> > MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.
> >
> > Note:
> >   a) After a queued page the linear walk carries on from after the
> > unqueued page; there is a reasonable chance that the destination
> > was about to ask for other closeby pages anyway.
> >
> >   b) We have to be careful of any assumptions that the page walking
> > code makes, in particular it does some short cuts on its first linear
> > walk that break as soon as we do a queued page.
> >
> >   c) We have to be careful to not break up host-page size chunks, since
> > this makes it harder to place the pages on the destination.
> >
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> 
> 
> > +static bool last_was_from_queue;
> 
> Are we using this variable later in the series?

That was a straggler; I've killed it off.

> >  static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
> >  {
> >      migration_dirty_pages +=
> > @@ -923,6 +933,41 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
> >      return pages;
> >  }
> >  
> > +/*
> > + * Unqueue a page from the queue fed by postcopy page requests
> > + *
> > + * Returns:      The RAMBlock* to transmit from (or NULL if the queue is empty)
> > + *      ms:      MigrationState in
> > + *  offset:      the byte offset within the RAMBlock for the start of the page
> > + * ram_addr_abs: global offset in the dirty/sent bitmaps
> > + */
> > +static RAMBlock *ram_save_unqueue_page(MigrationState *ms, ram_addr_t *offset,
> > +                                       ram_addr_t *ram_addr_abs)
> > +{
> > +    RAMBlock *result = NULL;
> > +    qemu_mutex_lock(&ms->src_page_req_mutex);
> > +    if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
> > +        struct MigrationSrcPageRequest *entry =
> > +                                    QSIMPLEQ_FIRST(&ms->src_page_requests);
> > +        result = entry->rb;
> > +        *offset = entry->offset;
> > +        *ram_addr_abs = (entry->offset + entry->rb->offset) & TARGET_PAGE_MASK;
> > +
> > +        if (entry->len > TARGET_PAGE_SIZE) {
> > +            entry->len -= TARGET_PAGE_SIZE;
> > +            entry->offset += TARGET_PAGE_SIZE;
> > +        } else {
> > +            memory_region_unref(result->mr);
> 
> Here it is the unref, but I still don't understand why we don't need to
> undo that on the error case on previous patch.

I've added an unref to the 'flush_page_queue' routine that's
called during cleanup; thus we take a ref whenever anything is added to
the queue, and release it either when we remove it to use it, or during
cleanup.

> > +            QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
> > +            g_free(entry);
> > +        }
> > +    }
> > +    qemu_mutex_unlock(&ms->src_page_req_mutex);
> > +
> > +    return result;
> > +}
> > +
> > +
> >  /**
> >   * Queue the pages for transmission, e.g. a request from postcopy destination
> >   *   ms: MigrationStatus in which the queue is held
> > @@ -987,6 +1032,58 @@ err:
> >  
> 
> > @@ -997,65 +1094,102 @@ err:
> >   * @f: QEMUFile where to send the data
> >   * @last_stage: if we are at the completion stage
> >   * @bytes_transferred: increase it with the number of transferred bytes
> > + *
> > + * On systems where host-page-size > target-page-size it will send all the
> > + * pages in a host page that are dirty.
> >   */
> >  
> >  static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
> >                                     uint64_t *bytes_transferred)
> >  {
> > +    MigrationState *ms = migrate_get_current();
> >      RAMBlock *block = last_seen_block;
> > +    RAMBlock *tmpblock;
> >      ram_addr_t offset = last_offset;
> > +    ram_addr_t tmpoffset;
> >      bool complete_round = false;
> >      int pages = 0;
> > -    MemoryRegion *mr;
> >      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
> >                                   ram_addr_t space */
> >  
> > -    if (!block)
> > +    if (!block) {
> >          block = QLIST_FIRST_RCU(&ram_list.blocks);
> > +        last_was_from_queue = false;
> > +    }
> >  
> > -    while (true) {
> > -        mr = block->mr;
> > -        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> > -                                                       &dirty_ram_abs);
> > -        if (complete_round && block == last_seen_block &&
> > -            offset >= last_offset) {
> > -            break;
> > -        }
> > -        if (offset >= block->used_length) {
> > -            offset = 0;
> > -            block = QLIST_NEXT_RCU(block, next);
> > -            if (!block) {
> > -                block = QLIST_FIRST_RCU(&ram_list.blocks);
> > -                complete_round = true;
> > -                ram_bulk_stage = false;
> > -                if (migrate_use_xbzrle()) {
> > -                    /* If xbzrle is on, stop using the data compression at this
> > -                     * point. In theory, xbzrle can do better than compression.
> > -                     */
> > -                    flush_compressed_data(f);
> > -                    compression_switch = false;
> > -                }
> > +    while (true) { /* Until we send a block or run out of stuff to send */
> > +        tmpblock = ram_save_unqueue_page(ms, &tmpoffset, &dirty_ram_abs);
> 
> This function was ugly.  You already split it in the past.  This patch
> makes it even more complicated.  Can we try something like add a
> 
> ram_find_next_page() and try to put some of the code inside the while
> there?

I've just posted a pair of patches separately that do this; please let
me know if they're on the right lines; they can be applied without postcopy.

> Once here, can we agree to send the next N pages (if they are contiguos)
> if we receive a queued request?  Yeap, deciding N means testing and measuring.
> And can wait for this to be integrated.

Yes we could do that; at the moment I'm working in host page sized chunks.

> > +
> > +        if (tmpblock) {
> > +            /* We've got a block from the postcopy queue */
> > +            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
> > +                                                   (uint64_t)tmpoffset,
> > +                                                   (uint64_t)dirty_ram_abs);
> > +            /*
> > +             * We're sending this page, and since it's postcopy nothing else
> > +             * will dirty it, and we must make sure it doesn't get sent again
> > +             * even if this queue request was received after the background
> > +             * search already sent it.
> > +             */
> > +            if (!test_bit(dirty_ram_abs >> TARGET_PAGE_BITS,
> > +                          migration_bitmap)) {
> 
> I think this test can be inside ram_save_unqueue_page()
> 
> I.e. rename to:
> 
> ram_save_get_next_queued_page()

Renamed to the shorter get_queued_page (it's static anyway).

Dave

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert Sept. 16, 2015, 6:48 p.m. UTC | #4
* Amit Shah (amit.shah@redhat.com) wrote:
> On (Tue) 16 Jun 2015 [11:26:45], Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > When transmitting RAM pages, consume pages that have been queued by
> > MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning.
> 
> It's slightly confusing with 'consume': we're /servicing/ requests from
> the dest at the src here rather than /consuming/ pages sent by src at
> the dest.  If you find 'service' better than 'consume', please update
> the commit msg+log.

'consume' is a fairly normal term for taking an item off a queue and
processing it.

> > Note:
> >   a) After a queued page the linear walk carries on from after the
> > unqueued page; there is a reasonable chance that the destination
> > was about to ask for other closeby pages anyway.
> > 
> >   b) We have to be careful of any assumptions that the page walking
> > code makes, in particular it does some short cuts on its first linear
> > walk that break as soon as we do a queued page.
> > 
> >   c) We have to be careful to not break up host-page size chunks, since
> > this makes it harder to place the pages on the destination.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> 
> Reviewed-by: Amit Shah <amit.shah@redhat.com>
> 
> > +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block,
> > +                              ram_addr_t *offset, bool last_stage,
> > +                              uint64_t *bytes_transferred,
> > +                              ram_addr_t dirty_ram_abs)
> > +{
> > +    int tmppages, pages = 0;
> > +    do {
> > +        /* Check the pages is dirty and if it is send it */
> > +        if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
> > +            if (compression_switch && migrate_use_compression()) {
> > +                tmppages = ram_save_compressed_page(f, block, *offset,
> > +                                                    last_stage,
> > +                                                    bytes_transferred);
> > +            } else {
> > +                tmppages = ram_save_page(f, block, *offset, last_stage,
> > +                                         bytes_transferred);
> > +            }
> 
> Something for the future: we should just have ram_save_page which does
> compression (or not); and even encryption (or not), and so on.

Yep, in my current world that's now a 'ram_save_host_page' function
that has that buried in it.

> > +
> > +            if (tmppages < 0) {
> > +                return tmppages;
> > +            } else {
> > +                if (ms->sentmap) {
> > +                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
> > +                }
> > +            }
> 
> This else could be dropped as the if stmt returns.

Done

> > +            pages += tmppages;
> > +        }
> > +        *offset += TARGET_PAGE_SIZE;
> > +        dirty_ram_abs += TARGET_PAGE_SIZE;
> > +    } while (*offset & (qemu_host_page_size - 1));
> > +
> > +    /* The offset we leave with is the last one we looked at */
> > +    *offset -= TARGET_PAGE_SIZE;
> > +    return pages;
> > +}
> > +
> > +/**
> >   * ram_find_and_save_block: Finds a dirty page and sends it to f
> >   *
> >   * Called within an RCU critical section.
> > @@ -997,65 +1094,102 @@ err:
> >   * @f: QEMUFile where to send the data
> >   * @last_stage: if we are at the completion stage
> >   * @bytes_transferred: increase it with the number of transferred bytes
> > + *
> > + * On systems where host-page-size > target-page-size it will send all the
> > + * pages in a host page that are dirty.
> >   */
> >  
> >  static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
> >                                     uint64_t *bytes_transferred)
> >  {
> > +    MigrationState *ms = migrate_get_current();
> >      RAMBlock *block = last_seen_block;
> > +    RAMBlock *tmpblock;
> >      ram_addr_t offset = last_offset;
> > +    ram_addr_t tmpoffset;
> >      bool complete_round = false;
> >      int pages = 0;
> > -    MemoryRegion *mr;
> >      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
> >                                   ram_addr_t space */
> >  
> > -    if (!block)
> > +    if (!block) {
> >          block = QLIST_FIRST_RCU(&ram_list.blocks);
> > +        last_was_from_queue = false;
> > +    }
> >  
> > -    while (true) {
> > -        mr = block->mr;
> > -        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
> > -                                                       &dirty_ram_abs);
> > -        if (complete_round && block == last_seen_block &&
> > -            offset >= last_offset) {
> > -            break;
> > -        }
> > -        if (offset >= block->used_length) {
> > -            offset = 0;
> > -            block = QLIST_NEXT_RCU(block, next);
> > -            if (!block) {
> > -                block = QLIST_FIRST_RCU(&ram_list.blocks);
> > -                complete_round = true;
> > -                ram_bulk_stage = false;
> > -                if (migrate_use_xbzrle()) {
> > -                    /* If xbzrle is on, stop using the data compression at this
> > -                     * point. In theory, xbzrle can do better than compression.
> > -                     */
> > -                    flush_compressed_data(f);
> > -                    compression_switch = false;
> > -                }
> > +    while (true) { /* Until we send a block or run out of stuff to send */
> > +        tmpblock = ram_save_unqueue_page(ms, &tmpoffset, &dirty_ram_abs);
> > +
> > +        if (tmpblock) {
> > +            /* We've got a block from the postcopy queue */
> > +            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
> > +                                                   (uint64_t)tmpoffset,
> > +                                                   (uint64_t)dirty_ram_abs);
> > +            /*
> > +             * We're sending this page, and since it's postcopy nothing else
> > +             * will dirty it, and we must make sure it doesn't get sent again
> > +             * even if this queue request was received after the background
> > +             * search already sent it.
> > +             */
> > +            if (!test_bit(dirty_ram_abs >> TARGET_PAGE_BITS,
> > +                          migration_bitmap)) {
> > +                trace_ram_find_and_save_block_postcopy_not_dirty(
> > +                    tmpblock->idstr, (uint64_t)tmpoffset,
> > +                    (uint64_t)dirty_ram_abs,
> > +                    test_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap));
> > +
> > +                continue;
> >              }
> > +            /*
> > +             * As soon as we start servicing pages out of order, then we have
> > +             * to kill the bulk stage, since the bulk stage assumes
> > +             * in (migration_bitmap_find_and_reset_dirty) that every page is
> > +             * dirty, that's no longer true.
> > +             */
> > +            ram_bulk_stage = false;
> > +            /*
> > +             * We want the background search to continue from the queued page
> > +             * since the guest is likely to want other pages near to the page
> > +             * it just requested.
> > +             */
> > +            block = tmpblock;
> > +            offset = tmpoffset;
> >          } else {
> > -            if (compression_switch && migrate_use_compression()) {
> > -                pages = ram_save_compressed_page(f, block, offset, last_stage,
> > -                                                 bytes_transferred);
> > -            } else {
> > -                pages = ram_save_page(f, block, offset, last_stage,
> > -                                      bytes_transferred);
> > +            MemoryRegion *mr;
> > +            /* priority queue empty, so just search for something dirty */
> > +            mr = block->mr;
> > +            offset = migration_bitmap_find_dirty(mr, offset, &dirty_ram_abs);
> > +            if (complete_round && block == last_seen_block &&
> > +                offset >= last_offset) {
> > +                break;
> >              }
> > -
> > -            /* if page is unmodified, continue to the next */
> > -            if (pages > 0) {
> > -                MigrationState *ms = migrate_get_current();
> > -                if (ms->sentmap) {
> > -                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
> > +            if (offset >= block->used_length) {
> > +                offset = 0;
> > +                block = QLIST_NEXT_RCU(block, next);
> > +                if (!block) {
> > +                    block = QLIST_FIRST_RCU(&ram_list.blocks);
> > +                    complete_round = true;
> > +                    ram_bulk_stage = false;
> > +                    if (migrate_use_xbzrle()) {
> > +                        /* If xbzrle is on, stop using the data compression at
> > +                         * this point. In theory, xbzrle can do better than
> > +                         * compression.
> > +                         */
> > +                        flush_compressed_data(f);
> > +                        compression_switch = false;
> > +                    }
> >                  }
> > -
> > -                last_sent_block = block;
> > -                break;
> > +                continue; /* pick an offset in the new block */
> >              }
> >          }
> > +
> > +        pages = ram_save_host_page(ms, f, block, &offset, last_stage,
> > +                                   bytes_transferred, dirty_ram_abs);
> > +
> > +        /* if page is unmodified, continue to the next */
> > +        if (pages > 0) {
> > +            break;
> > +        }
> 
> This function could use splitting into multiple ones.

Done, a separate pair of patches is on list to do that split; please review.

Dave

> 
> 
> 		Amit
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/migration/ram.c b/migration/ram.c
index da3e9ea..316834b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -222,6 +222,7 @@  static RAMBlock *last_seen_block;
 /* This is the last block from where we have sent data */
 static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
+static bool last_was_from_queue;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;
@@ -503,9 +504,9 @@  static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
  * Returns: byte offset within memory region of the start of a dirty page
  */
 static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
-                                                 ram_addr_t start,
-                                                 ram_addr_t *ram_addr_abs)
+ram_addr_t migration_bitmap_find_dirty(MemoryRegion *mr,
+                                       ram_addr_t start,
+                                       ram_addr_t *ram_addr_abs)
 {
     unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -520,14 +521,23 @@  ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
         next = find_next_bit(migration_bitmap, size, nr);
     }
 
-    if (next < size) {
-        clear_bit(next, migration_bitmap);
-        migration_dirty_pages--;
-    }
     *ram_addr_abs = next << TARGET_PAGE_BITS;
     return (next - base) << TARGET_PAGE_BITS;
 }
 
+static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
+{
+    bool ret;
+    int nr = addr >> TARGET_PAGE_BITS;
+
+    ret = test_and_clear_bit(nr, migration_bitmap);
+
+    if (ret) {
+        migration_dirty_pages--;
+    }
+    return ret;
+}
+
 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
     migration_dirty_pages +=
@@ -923,6 +933,41 @@  static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
     return pages;
 }
 
+/*
+ * Unqueue a page from the queue fed by postcopy page requests
+ *
+ * Returns:      The RAMBlock* to transmit from (or NULL if the queue is empty)
+ *      ms:      MigrationState in
+ *  offset:      the byte offset within the RAMBlock for the start of the page
+ * ram_addr_abs: global offset in the dirty/sent bitmaps
+ */
+static RAMBlock *ram_save_unqueue_page(MigrationState *ms, ram_addr_t *offset,
+                                       ram_addr_t *ram_addr_abs)
+{
+    RAMBlock *result = NULL;
+    qemu_mutex_lock(&ms->src_page_req_mutex);
+    if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
+        struct MigrationSrcPageRequest *entry =
+                                    QSIMPLEQ_FIRST(&ms->src_page_requests);
+        result = entry->rb;
+        *offset = entry->offset;
+        *ram_addr_abs = (entry->offset + entry->rb->offset) & TARGET_PAGE_MASK;
+
+        if (entry->len > TARGET_PAGE_SIZE) {
+            entry->len -= TARGET_PAGE_SIZE;
+            entry->offset += TARGET_PAGE_SIZE;
+        } else {
+            memory_region_unref(result->mr);
+            QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
+            g_free(entry);
+        }
+    }
+    qemu_mutex_unlock(&ms->src_page_req_mutex);
+
+    return result;
+}
+
+
 /**
  * Queue the pages for transmission, e.g. a request from postcopy destination
  *   ms: MigrationStatus in which the queue is held
@@ -987,6 +1032,58 @@  err:
 
 
 /**
+ * ram_save_host_page: Starting at *offset send pages upto the end
+ *                     of the current host page.  It's valid for the initial
+ *                     offset to point into the middle of a host page
+ *                     in which case the remainder of the hostpage is sent.
+ *                     Only dirty target pages are sent.
+ *
+ * Returns: Number of pages written.
+ *
+ * @f: QEMUFile where to send the data
+ * @block: pointer to block that contains the page we want to send
+ * @offset: offset inside the block for the page; updated to last target page
+ *          sent
+ * @last_stage: if we are at the completion stage
+ * @bytes_transferred: increase it with the number of transferred bytes
+ */
+static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block,
+                              ram_addr_t *offset, bool last_stage,
+                              uint64_t *bytes_transferred,
+                              ram_addr_t dirty_ram_abs)
+{
+    int tmppages, pages = 0;
+    do {
+        /* Check the pages is dirty and if it is send it */
+        if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
+            if (compression_switch && migrate_use_compression()) {
+                tmppages = ram_save_compressed_page(f, block, *offset,
+                                                    last_stage,
+                                                    bytes_transferred);
+            } else {
+                tmppages = ram_save_page(f, block, *offset, last_stage,
+                                         bytes_transferred);
+            }
+
+            if (tmppages < 0) {
+                return tmppages;
+            } else {
+                if (ms->sentmap) {
+                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
+                }
+            }
+            pages += tmppages;
+        }
+        *offset += TARGET_PAGE_SIZE;
+        dirty_ram_abs += TARGET_PAGE_SIZE;
+    } while (*offset & (qemu_host_page_size - 1));
+
+    /* The offset we leave with is the last one we looked at */
+    *offset -= TARGET_PAGE_SIZE;
+    return pages;
+}
+
+/**
  * ram_find_and_save_block: Finds a dirty page and sends it to f
  *
  * Called within an RCU critical section.
@@ -997,65 +1094,102 @@  err:
  * @f: QEMUFile where to send the data
  * @last_stage: if we are at the completion stage
  * @bytes_transferred: increase it with the number of transferred bytes
+ *
+ * On systems where host-page-size > target-page-size it will send all the
+ * pages in a host page that are dirty.
  */
 
 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
                                    uint64_t *bytes_transferred)
 {
+    MigrationState *ms = migrate_get_current();
     RAMBlock *block = last_seen_block;
+    RAMBlock *tmpblock;
     ram_addr_t offset = last_offset;
+    ram_addr_t tmpoffset;
     bool complete_round = false;
     int pages = 0;
-    MemoryRegion *mr;
     ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
                                  ram_addr_t space */
 
-    if (!block)
+    if (!block) {
         block = QLIST_FIRST_RCU(&ram_list.blocks);
+        last_was_from_queue = false;
+    }
 
-    while (true) {
-        mr = block->mr;
-        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
-                                                       &dirty_ram_abs);
-        if (complete_round && block == last_seen_block &&
-            offset >= last_offset) {
-            break;
-        }
-        if (offset >= block->used_length) {
-            offset = 0;
-            block = QLIST_NEXT_RCU(block, next);
-            if (!block) {
-                block = QLIST_FIRST_RCU(&ram_list.blocks);
-                complete_round = true;
-                ram_bulk_stage = false;
-                if (migrate_use_xbzrle()) {
-                    /* If xbzrle is on, stop using the data compression at this
-                     * point. In theory, xbzrle can do better than compression.
-                     */
-                    flush_compressed_data(f);
-                    compression_switch = false;
-                }
+    while (true) { /* Until we send a block or run out of stuff to send */
+        tmpblock = ram_save_unqueue_page(ms, &tmpoffset, &dirty_ram_abs);
+
+        if (tmpblock) {
+            /* We've got a block from the postcopy queue */
+            trace_ram_find_and_save_block_postcopy(tmpblock->idstr,
+                                                   (uint64_t)tmpoffset,
+                                                   (uint64_t)dirty_ram_abs);
+            /*
+             * We're sending this page, and since it's postcopy nothing else
+             * will dirty it, and we must make sure it doesn't get sent again
+             * even if this queue request was received after the background
+             * search already sent it.
+             */
+            if (!test_bit(dirty_ram_abs >> TARGET_PAGE_BITS,
+                          migration_bitmap)) {
+                trace_ram_find_and_save_block_postcopy_not_dirty(
+                    tmpblock->idstr, (uint64_t)tmpoffset,
+                    (uint64_t)dirty_ram_abs,
+                    test_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap));
+
+                continue;
             }
+            /*
+             * As soon as we start servicing pages out of order, then we have
+             * to kill the bulk stage, since the bulk stage assumes
+             * in (migration_bitmap_find_and_reset_dirty) that every page is
+             * dirty, that's no longer true.
+             */
+            ram_bulk_stage = false;
+            /*
+             * We want the background search to continue from the queued page
+             * since the guest is likely to want other pages near to the page
+             * it just requested.
+             */
+            block = tmpblock;
+            offset = tmpoffset;
         } else {
-            if (compression_switch && migrate_use_compression()) {
-                pages = ram_save_compressed_page(f, block, offset, last_stage,
-                                                 bytes_transferred);
-            } else {
-                pages = ram_save_page(f, block, offset, last_stage,
-                                      bytes_transferred);
+            MemoryRegion *mr;
+            /* priority queue empty, so just search for something dirty */
+            mr = block->mr;
+            offset = migration_bitmap_find_dirty(mr, offset, &dirty_ram_abs);
+            if (complete_round && block == last_seen_block &&
+                offset >= last_offset) {
+                break;
             }
-
-            /* if page is unmodified, continue to the next */
-            if (pages > 0) {
-                MigrationState *ms = migrate_get_current();
-                if (ms->sentmap) {
-                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
+            if (offset >= block->used_length) {
+                offset = 0;
+                block = QLIST_NEXT_RCU(block, next);
+                if (!block) {
+                    block = QLIST_FIRST_RCU(&ram_list.blocks);
+                    complete_round = true;
+                    ram_bulk_stage = false;
+                    if (migrate_use_xbzrle()) {
+                        /* If xbzrle is on, stop using the data compression at
+                         * this point. In theory, xbzrle can do better than
+                         * compression.
+                         */
+                        flush_compressed_data(f);
+                        compression_switch = false;
+                    }
                 }
-
-                last_sent_block = block;
-                break;
+                continue; /* pick an offset in the new block */
             }
         }
+
+        pages = ram_save_host_page(ms, f, block, &offset, last_stage,
+                                   bytes_transferred, dirty_ram_abs);
+
+        /* if page is unmodified, continue to the next */
+        if (pages > 0) {
+            break;
+        }
     }
 
     last_seen_block = block;
@@ -1148,6 +1282,7 @@  static void reset_ram_globals(void)
     last_offset = 0;
     last_version = ram_list.version;
     ram_bulk_stage = true;
+    last_was_from_queue = false;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
diff --git a/trace-events b/trace-events
index 04f6682..cb707c7 100644
--- a/trace-events
+++ b/trace-events
@@ -1231,6 +1231,8 @@  qemu_file_fclose(void) ""
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""
+ram_find_and_save_block_postcopy(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64
+ram_find_and_save_block_postcopy_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)"
 ram_postcopy_send_discard_bitmap(void) ""
 ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"