Message ID | 1443515898-3594-41-git-send-email-dgilbert@redhat.com |
---|---|
State | New |
Headers | show |
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > When transmitting RAM pages, consume pages that have been queued by > MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning. > > Note: > a) After a queued page the linear walk carries on from after the > unqueued page; there is a reasonable chance that the destination > was about to ask for other closeby pages anyway. > > b) We have to be careful of any assumptions that the page walking > code makes, in particular it does some short cuts on its first linear > walk that break as soon as we do a queued page. > > c) We have to be careful to not break up host-page size chunks, since > this makes it harder to place the pages on the destination. > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > migration/ram.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++--------- > trace-events | 2 + > 2 files changed, 168 insertions(+), 29 deletions(-) > > diff --git a/migration/ram.c b/migration/ram.c > index 5771983..487e838 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -516,9 +516,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, > * Returns: byte offset within memory region of the start of a dirty page > */ > static inline > -ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, > - ram_addr_t start, > - ram_addr_t *ram_addr_abs) > +ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, > + ram_addr_t start, > + ram_addr_t *ram_addr_abs) > { > unsigned long base = rb->offset >> TARGET_PAGE_BITS; > unsigned long nr = base + (start >> TARGET_PAGE_BITS); > @@ -535,15 +535,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, > next = find_next_bit(bitmap, size, nr); > } > > - if (next < size) { > - clear_bit(next, bitmap); > - migration_dirty_pages--; > - } > *ram_addr_abs = next << TARGET_PAGE_BITS; > return (next - base) << TARGET_PAGE_BITS; > } > > -/* Called with rcu_read_lock() to protect migration_bitmap */ > +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) > +{ > + bool ret; > + int nr = addr >> TARGET_PAGE_BITS; > + unsigned long *bitmap = atomic_rcu_read(&migration_bitmap); > + > + ret = test_and_clear_bit(nr, bitmap); > + > + if (ret) { > + migration_dirty_pages--; > + } > + return ret; > +} > + > static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) > { > unsigned long *bitmap; > @@ -960,9 +969,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, > static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, > bool *again, ram_addr_t *ram_addr_abs) > { > - pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, > - pss->offset, > - ram_addr_abs); > + pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, > + ram_addr_abs); > if (pss->complete_round && pss->block == last_seen_block && > pss->offset >= last_offset) { > /* > @@ -1001,6 +1009,88 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, > } > } > > +/* > + * Unqueue a page from the queue fed by postcopy page requests; skips pages > + * that are already sent (!dirty) > + * > + * Returns: true if a queued page is found > + * ms: MigrationState in > + * pss: PageSearchStatus structure updated with found block/offset > + * ram_addr_abs: global offset in the dirty/sent bitmaps > + */ > +static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, > + ram_addr_t *ram_addr_abs) > +{ > + RAMBlock *block; > + ram_addr_t offset; > + bool dirty; > + > + do { > + block = NULL; > + qemu_mutex_lock(&ms->src_page_req_mutex); > + if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { > + struct MigrationSrcPageRequest *entry = > + QSIMPLEQ_FIRST(&ms->src_page_requests); > + block = entry->rb; > + offset = entry->offset; > + *ram_addr_abs = (entry->offset + entry->rb->offset) & > + TARGET_PAGE_MASK; > + > + if (entry->len > TARGET_PAGE_SIZE) { > + entry->len -= TARGET_PAGE_SIZE; > + entry->offset += TARGET_PAGE_SIZE; > + } else { > + memory_region_unref(block->mr); > + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); > + g_free(entry); > + } > + } > + qemu_mutex_unlock(&ms->src_page_req_mutex); Can we spilt this chunk with a name like: it_is_complicated_to_get_the_first_queued_pagge(&ms, &block, &offset, ram_addr_abs) or something like that? Yes, we can improve naming here. > + > + /* > + * We're sending this page, and since it's postcopy nothing else > + * will dirty it, and we must make sure it doesn't get sent again > + * even if this queue request was received after the background > + * search already sent it. > + */ > + if (block) { > + dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, > + migration_bitmap); You need to do the atomic_rcu_read(&migration_bitmap) dance, no? Why don't you do here a test_and_clear_bit() and then you don't have to change migration_bintmap_find_and_reset_dirty() All our migration code works with ram address, but we need basically everywhere page numbers. I am not sure if things will get clearer/more complicated if we changed the conventions to use page_number insntead of ram_addr_abs. But this one is completely independent of this patch. > + if (!dirty) { > + trace_get_queued_page_not_dirty( > + block->idstr, (uint64_t)offset, > + (uint64_t)*ram_addr_abs, > + test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, ms->sentmap)); > + } else { > + trace_get_queued_page(block->idstr, > + (uint64_t)offset, > + (uint64_t)*ram_addr_abs); > + } > + } > + > + } while (block && !dirty); > + > + if (block) { > + /* > + * As soon as we start servicing pages out of order, then we have > + * to kill the bulk stage, since the bulk stage assumes > + * in (migration_bitmap_find_and_reset_dirty) that every page is > + * dirty, that's no longer true. > + */ > + ram_bulk_stage = false; > + > + /* > + * We want the background search to continue from the queued page > + * since the guest is likely to want other pages near to the page > + * it just requested. > + */ > + pss->block = block; > + pss->offset = offset; > + } > + > + return !!block; > +} > + > /** > * flush_page_queue: Flush any remaining pages in the ram request queue > * it should be empty at the end anyway, but in error cases there may be > @@ -1087,6 +1177,57 @@ err: > > > /** > + * ram_save_host_page: Starting at *offset send pages upto the end > + * of the current host page. It's valid for the initial > + * offset to point into the middle of a host page > + * in which case the remainder of the hostpage is sent. > + * Only dirty target pages are sent. > + * > + * Returns: Number of pages written. > + * > + * @f: QEMUFile where to send the data > + * @block: pointer to block that contains the page we want to send > + * @offset: offset inside the block for the page; updated to last target page > + * sent > + * @last_stage: if we are at the completion stage > + * @bytes_transferred: increase it with the number of transferred bytes > + */ > +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block, > + ram_addr_t *offset, bool last_stage, > + uint64_t *bytes_transferred, > + ram_addr_t dirty_ram_abs) > +{ > + int tmppages, pages = 0; > + do { > + /* Check the pages is dirty and if it is send it */ > + if (migration_bitmap_clear_dirty(dirty_ram_abs)) { > + if (compression_switch && migrate_use_compression()) { > + tmppages = ram_save_compressed_page(f, block, *offset, > + last_stage, > + bytes_transferred); > + } else { > + tmppages = ram_save_page(f, block, *offset, last_stage, > + bytes_transferred); > + } > + > + if (tmppages < 0) { > + return tmppages; > + } > + if (ms->sentmap) { > + set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); > + } > + pages += tmppages; > + } > + *offset += TARGET_PAGE_SIZE; > + dirty_ram_abs += TARGET_PAGE_SIZE; > + } while (*offset & (qemu_host_page_size - 1)); > + > + /* The offset we leave with is the last one we looked at */ > + *offset -= TARGET_PAGE_SIZE; > + return pages; > +} Split this function first to make changes easier to gasp? We are doing (at least) two quite different things here. > + > +/** > * ram_find_and_save_block: Finds a dirty page and sends it to f > * > * Called within an RCU critical section. > @@ -1097,12 +1238,16 @@ err: > * @f: QEMUFile where to send the data > * @last_stage: if we are at the completion stage > * @bytes_transferred: increase it with the number of transferred bytes > + * > + * On systems where host-page-size > target-page-size it will send all the > + * pages in a host page that are dirty. > */ > > static int ram_find_and_save_block(QEMUFile *f, bool last_stage, > uint64_t *bytes_transferred) > { > PageSearchStatus pss; > + MigrationState *ms = migrate_get_current(); > int pages = 0; > bool again, found; > ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in > @@ -1117,26 +1262,18 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, > } > > do { > - found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > + again = true; > + found = get_queued_page(ms, &pss, &dirty_ram_abs); > > - if (found) { > - if (compression_switch && migrate_use_compression()) { > - pages = ram_save_compressed_page(f, pss.block, pss.offset, > - last_stage, > - bytes_transferred); > - } else { > - pages = ram_save_page(f, pss.block, pss.offset, last_stage, > - bytes_transferred); > - } > + if (!found) { > + /* priority queue empty, so just search for something dirty */ > + found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > + } > > - /* if page is unmodified, continue to the next */ > - if (pages > 0) { > - MigrationState *ms = migrate_get_current(); > - last_sent_block = pss.block; > - if (ms->sentmap) { > - set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); > - } > - } > + if (found) { > + pages = ram_save_host_page(ms, f, pss.block, &pss.offset, > + last_stage, bytes_transferred, > + dirty_ram_abs); > } > } while (!pages && again); Using too loops here? This is the code after your changes: do { again = true; found = get_queued_page(ms, &pss, &dirty_ram_abs); if (!found) { /* priority queue empty, so just search for something dirty */ found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); } if (found) { pages = ram_save_host_page(ms, f, pss.block, &pss.offset, last_stage, bytes_transferred, dirty_ram_abs); } } while (!pages && again); while (get_queued_page(ms, &pss, &dirty_ram_abs)) { pages = ram_save_host_page(ms, f, pss.block, &pss.offset, last_stage, bytes_transferred, dirty_ram_abs); } do { /* priority queue empty, so just search for something dirty */ found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); if (found) { pages = ram_save_host_page(ms, f, pss.block, &pss.offset, last_stage, bytes_transferred, dirty_ram_abs); } } while (!pages && again); We repeat the ram_save_host_page() call, but IMHO, it is easrier to see what we are doing, and specially how we get out of the loop. Later, Juan. > > diff --git a/trace-events b/trace-events > index e40f00e..9e4206b 100644 > --- a/trace-events > +++ b/trace-events > @@ -1244,6 +1244,8 @@ vmstate_subsection_load_good(const char *parent) "%s" > qemu_file_fclose(void) "" > > # migration/ram.c > +get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 > +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" > migration_bitmap_sync_start(void) "" > migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" > migration_throttle(void) ""
* Juan Quintela (quintela@redhat.com) wrote: > "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > > When transmitting RAM pages, consume pages that have been queued by > > MIG_RPCOMM_REQPAGE commands and send them ahead of normal page scanning. > > > > Note: > > a) After a queued page the linear walk carries on from after the > > unqueued page; there is a reasonable chance that the destination > > was about to ask for other closeby pages anyway. > > > > b) We have to be careful of any assumptions that the page walking > > code makes, in particular it does some short cuts on its first linear > > walk that break as soon as we do a queued page. > > > > c) We have to be careful to not break up host-page size chunks, since > > this makes it harder to place the pages on the destination. > > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > --- > > migration/ram.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++--------- > > trace-events | 2 + > > 2 files changed, 168 insertions(+), 29 deletions(-) > > > > diff --git a/migration/ram.c b/migration/ram.c > > index 5771983..487e838 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -516,9 +516,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, > > * Returns: byte offset within memory region of the start of a dirty page > > */ > > static inline > > -ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, > > - ram_addr_t start, > > - ram_addr_t *ram_addr_abs) > > +ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, > > + ram_addr_t start, > > + ram_addr_t *ram_addr_abs) > > { > > unsigned long base = rb->offset >> TARGET_PAGE_BITS; > > unsigned long nr = base + (start >> TARGET_PAGE_BITS); > > @@ -535,15 +535,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, > > next = find_next_bit(bitmap, size, nr); > > } > > > > - if (next < size) { > > - clear_bit(next, bitmap); > > - migration_dirty_pages--; > > - } > > *ram_addr_abs = next << TARGET_PAGE_BITS; > > return (next - base) << TARGET_PAGE_BITS; > > } > > > > -/* Called with rcu_read_lock() to protect migration_bitmap */ > > +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) > > +{ > > + bool ret; > > + int nr = addr >> TARGET_PAGE_BITS; > > + unsigned long *bitmap = atomic_rcu_read(&migration_bitmap); > > + > > + ret = test_and_clear_bit(nr, bitmap); > > + > > + if (ret) { > > + migration_dirty_pages--; > > + } > > + return ret; > > +} > > + > > static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) > > { > > unsigned long *bitmap; > > @@ -960,9 +969,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, > > static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, > > bool *again, ram_addr_t *ram_addr_abs) > > { > > - pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, > > - pss->offset, > > - ram_addr_abs); > > + pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, > > + ram_addr_abs); > > if (pss->complete_round && pss->block == last_seen_block && > > pss->offset >= last_offset) { > > /* > > @@ -1001,6 +1009,88 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, > > } > > } > > > > +/* > > + * Unqueue a page from the queue fed by postcopy page requests; skips pages > > + * that are already sent (!dirty) > > + * > > + * Returns: true if a queued page is found > > + * ms: MigrationState in > > + * pss: PageSearchStatus structure updated with found block/offset > > + * ram_addr_abs: global offset in the dirty/sent bitmaps > > + */ > > +static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, > > + ram_addr_t *ram_addr_abs) > > +{ > > + RAMBlock *block; > > + ram_addr_t offset; > > + bool dirty; > > + > > + do { > > + block = NULL; > > + qemu_mutex_lock(&ms->src_page_req_mutex); > > + if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { > > + struct MigrationSrcPageRequest *entry = > > + QSIMPLEQ_FIRST(&ms->src_page_requests); > > + block = entry->rb; > > + offset = entry->offset; > > + *ram_addr_abs = (entry->offset + entry->rb->offset) & > > + TARGET_PAGE_MASK; > > + > > + if (entry->len > TARGET_PAGE_SIZE) { > > + entry->len -= TARGET_PAGE_SIZE; > > + entry->offset += TARGET_PAGE_SIZE; > > + } else { > > + memory_region_unref(block->mr); > > + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); > > + g_free(entry); > > + } > > + } > > + qemu_mutex_unlock(&ms->src_page_req_mutex); > > Can we spilt this chunk with a name like: > > it_is_complicated_to_get_the_first_queued_pagge(&ms, &block, &offset, > ram_addr_abs) or something like that? > > Yes, we can improve naming here. Done; we still have get_queued_page and it called 'unqueue_page' that does the first half of that. > > + > > + /* > > + * We're sending this page, and since it's postcopy nothing else > > + * will dirty it, and we must make sure it doesn't get sent again > > + * even if this queue request was received after the background > > + * search already sent it. > > + */ > > + if (block) { > > + dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, > > + migration_bitmap); > > > You need to do the atomic_rcu_read(&migration_bitmap) dance, no? Done. > Why don't you do here a test_and_clear_bit() and then you don't have to > change migration_bintmap_find_and_reset_dirty() Because it gets messy with the host pages; we're only 'finding' the first target-page in a host page, and leaving the host-page code to do the work on the whole of the host page, so I want to leave the dirty bits for it to deal with. > All our migration code works with ram address, but we need basically > everywhere page numbers. I am not sure if things will get clearer/more > complicated if we changed the conventions to use page_number insntead of > ram_addr_abs. But this one is completely independent of this patch. Yes; it's VERY confusing. > > + if (!dirty) { > > + trace_get_queued_page_not_dirty( > > + block->idstr, (uint64_t)offset, > > + (uint64_t)*ram_addr_abs, > > + test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, ms->sentmap)); > > + } else { > > + trace_get_queued_page(block->idstr, > > + (uint64_t)offset, > > + (uint64_t)*ram_addr_abs); > > + } > > + } > > + > > + } while (block && !dirty); > > + > > + if (block) { > > + /* > > + * As soon as we start servicing pages out of order, then we have > > + * to kill the bulk stage, since the bulk stage assumes > > + * in (migration_bitmap_find_and_reset_dirty) that every page is > > + * dirty, that's no longer true. > > + */ > > + ram_bulk_stage = false; > > + > > + /* > > + * We want the background search to continue from the queued page > > + * since the guest is likely to want other pages near to the page > > + * it just requested. > > + */ > > + pss->block = block; > > + pss->offset = offset; > > + } > > + > > + return !!block; > > +} > > + > > /** > > * flush_page_queue: Flush any remaining pages in the ram request queue > > * it should be empty at the end anyway, but in error cases there may be > > @@ -1087,6 +1177,57 @@ err: > > > > > > /** > > + * ram_save_host_page: Starting at *offset send pages upto the end > > + * of the current host page. It's valid for the initial > > + * offset to point into the middle of a host page > > + * in which case the remainder of the hostpage is sent. > > + * Only dirty target pages are sent. > > + * > > + * Returns: Number of pages written. > > + * > > + * @f: QEMUFile where to send the data > > + * @block: pointer to block that contains the page we want to send > > + * @offset: offset inside the block for the page; updated to last target page > > + * sent > > + * @last_stage: if we are at the completion stage > > + * @bytes_transferred: increase it with the number of transferred bytes > > + */ > > +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block, > > + ram_addr_t *offset, bool last_stage, > > + uint64_t *bytes_transferred, > > + ram_addr_t dirty_ram_abs) > > +{ > > + int tmppages, pages = 0; > > + do { > > + /* Check the pages is dirty and if it is send it */ > > + if (migration_bitmap_clear_dirty(dirty_ram_abs)) { > > + if (compression_switch && migrate_use_compression()) { > > + tmppages = ram_save_compressed_page(f, block, *offset, > > + last_stage, > > + bytes_transferred); > > + } else { > > + tmppages = ram_save_page(f, block, *offset, last_stage, > > + bytes_transferred); > > + } > > + > > + if (tmppages < 0) { > > + return tmppages; > > + } > > + if (ms->sentmap) { > > + set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); > > + } > > + pages += tmppages; > > + } > > + *offset += TARGET_PAGE_SIZE; > > + dirty_ram_abs += TARGET_PAGE_SIZE; > > + } while (*offset & (qemu_host_page_size - 1)); > > + > > + /* The offset we leave with is the last one we looked at */ > > + *offset -= TARGET_PAGE_SIZE; > > + return pages; > > +} > > Split this function first to make changes easier to gasp? > > We are doing (at least) two quite different things here. Done; ram_save_host_page now calls ram_save_target_page to do the meat of it. > > + > > +/** > > * ram_find_and_save_block: Finds a dirty page and sends it to f > > * > > * Called within an RCU critical section. > > @@ -1097,12 +1238,16 @@ err: > > * @f: QEMUFile where to send the data > > * @last_stage: if we are at the completion stage > > * @bytes_transferred: increase it with the number of transferred bytes > > + * > > + * On systems where host-page-size > target-page-size it will send all the > > + * pages in a host page that are dirty. > > */ > > > > static int ram_find_and_save_block(QEMUFile *f, bool last_stage, > > uint64_t *bytes_transferred) > > { > > PageSearchStatus pss; > > + MigrationState *ms = migrate_get_current(); > > int pages = 0; > > bool again, found; > > ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in > > @@ -1117,26 +1262,18 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, > > } > > > > do { > > - found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > > + again = true; > > + found = get_queued_page(ms, &pss, &dirty_ram_abs); > > > > - if (found) { > > - if (compression_switch && migrate_use_compression()) { > > - pages = ram_save_compressed_page(f, pss.block, pss.offset, > > - last_stage, > > - bytes_transferred); > > - } else { > > - pages = ram_save_page(f, pss.block, pss.offset, last_stage, > > - bytes_transferred); > > - } > > + if (!found) { > > + /* priority queue empty, so just search for something dirty */ > > + found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > > + } > > > > - /* if page is unmodified, continue to the next */ > > - if (pages > 0) { > > - MigrationState *ms = migrate_get_current(); > > - last_sent_block = pss.block; > > - if (ms->sentmap) { > > - set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); > > - } > > - } > > + if (found) { > > + pages = ram_save_host_page(ms, f, pss.block, &pss.offset, > > + last_stage, bytes_transferred, > > + dirty_ram_abs); > > } > > } while (!pages && again); > > > Using too loops here? > This is the code after your changes: > > > do { > again = true; > found = get_queued_page(ms, &pss, &dirty_ram_abs); > > if (!found) { > /* priority queue empty, so just search for something dirty */ > found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > } > > if (found) { > pages = ram_save_host_page(ms, f, pss.block, &pss.offset, > last_stage, bytes_transferred, > dirty_ram_abs); > } > } while (!pages && again); > > > while (get_queued_page(ms, &pss, &dirty_ram_abs)) { > pages = ram_save_host_page(ms, f, pss.block, &pss.offset, > last_stage, bytes_transferred, > dirty_ram_abs); > } > > > > do { > /* priority queue empty, so just search for something dirty */ > found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); > > if (found) { > pages = ram_save_host_page(ms, f, pss.block, &pss.offset, > last_stage, bytes_transferred, > dirty_ram_abs); > } > } while (!pages && again); > > > We repeat the ram_save_host_page() call, but IMHO, it is easrier to see > what we are doing, and specially how we get out of the loop. Note you've changed the behaviour there; my loop sends only one (host) page (preferably from the queue, but failing that finds a dirty one); yours sends *all* the queued pages and then tries to find a dirty one. That might not be a bad change, and I don't think it will break anything higher up, but it's not the same behaviour. Dave > > Later, Juan. > > > > > > diff --git a/trace-events b/trace-events > > index e40f00e..9e4206b 100644 > > --- a/trace-events > > +++ b/trace-events > > @@ -1244,6 +1244,8 @@ vmstate_subsection_load_good(const char *parent) "%s" > > qemu_file_fclose(void) "" > > > > # migration/ram.c > > +get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 > > +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" > > migration_bitmap_sync_start(void) "" > > migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" > > migration_throttle(void) "" -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/migration/ram.c b/migration/ram.c index 5771983..487e838 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -516,9 +516,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, * Returns: byte offset within memory region of the start of a dirty page */ static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, - ram_addr_t start, - ram_addr_t *ram_addr_abs) +ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, + ram_addr_t start, + ram_addr_t *ram_addr_abs) { unsigned long base = rb->offset >> TARGET_PAGE_BITS; unsigned long nr = base + (start >> TARGET_PAGE_BITS); @@ -535,15 +535,24 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, next = find_next_bit(bitmap, size, nr); } - if (next < size) { - clear_bit(next, bitmap); - migration_dirty_pages--; - } *ram_addr_abs = next << TARGET_PAGE_BITS; return (next - base) << TARGET_PAGE_BITS; } -/* Called with rcu_read_lock() to protect migration_bitmap */ +static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) +{ + bool ret; + int nr = addr >> TARGET_PAGE_BITS; + unsigned long *bitmap = atomic_rcu_read(&migration_bitmap); + + ret = test_and_clear_bit(nr, bitmap); + + if (ret) { + migration_dirty_pages--; + } + return ret; +} + static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; @@ -960,9 +969,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, bool *again, ram_addr_t *ram_addr_abs) { - pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, - pss->offset, - ram_addr_abs); + pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, + ram_addr_abs); if (pss->complete_round && pss->block == last_seen_block && pss->offset >= last_offset) { /* @@ -1001,6 +1009,88 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, } } +/* + * Unqueue a page from the queue fed by postcopy page requests; skips pages + * that are already sent (!dirty) + * + * Returns: true if a queued page is found + * ms: MigrationState in + * pss: PageSearchStatus structure updated with found block/offset + * ram_addr_abs: global offset in the dirty/sent bitmaps + */ +static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, + ram_addr_t *ram_addr_abs) +{ + RAMBlock *block; + ram_addr_t offset; + bool dirty; + + do { + block = NULL; + qemu_mutex_lock(&ms->src_page_req_mutex); + if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { + struct MigrationSrcPageRequest *entry = + QSIMPLEQ_FIRST(&ms->src_page_requests); + block = entry->rb; + offset = entry->offset; + *ram_addr_abs = (entry->offset + entry->rb->offset) & + TARGET_PAGE_MASK; + + if (entry->len > TARGET_PAGE_SIZE) { + entry->len -= TARGET_PAGE_SIZE; + entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + g_free(entry); + } + } + qemu_mutex_unlock(&ms->src_page_req_mutex); + + /* + * We're sending this page, and since it's postcopy nothing else + * will dirty it, and we must make sure it doesn't get sent again + * even if this queue request was received after the background + * search already sent it. + */ + if (block) { + dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, + migration_bitmap); + if (!dirty) { + trace_get_queued_page_not_dirty( + block->idstr, (uint64_t)offset, + (uint64_t)*ram_addr_abs, + test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, ms->sentmap)); + } else { + trace_get_queued_page(block->idstr, + (uint64_t)offset, + (uint64_t)*ram_addr_abs); + } + } + + } while (block && !dirty); + + if (block) { + /* + * As soon as we start servicing pages out of order, then we have + * to kill the bulk stage, since the bulk stage assumes + * in (migration_bitmap_find_and_reset_dirty) that every page is + * dirty, that's no longer true. + */ + ram_bulk_stage = false; + + /* + * We want the background search to continue from the queued page + * since the guest is likely to want other pages near to the page + * it just requested. + */ + pss->block = block; + pss->offset = offset; + } + + return !!block; +} + /** * flush_page_queue: Flush any remaining pages in the ram request queue * it should be empty at the end anyway, but in error cases there may be @@ -1087,6 +1177,57 @@ err: /** + * ram_save_host_page: Starting at *offset send pages upto the end + * of the current host page. It's valid for the initial + * offset to point into the middle of a host page + * in which case the remainder of the hostpage is sent. + * Only dirty target pages are sent. + * + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: pointer to block that contains the page we want to send + * @offset: offset inside the block for the page; updated to last target page + * sent + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ +static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock* block, + ram_addr_t *offset, bool last_stage, + uint64_t *bytes_transferred, + ram_addr_t dirty_ram_abs) +{ + int tmppages, pages = 0; + do { + /* Check the pages is dirty and if it is send it */ + if (migration_bitmap_clear_dirty(dirty_ram_abs)) { + if (compression_switch && migrate_use_compression()) { + tmppages = ram_save_compressed_page(f, block, *offset, + last_stage, + bytes_transferred); + } else { + tmppages = ram_save_page(f, block, *offset, last_stage, + bytes_transferred); + } + + if (tmppages < 0) { + return tmppages; + } + if (ms->sentmap) { + set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); + } + pages += tmppages; + } + *offset += TARGET_PAGE_SIZE; + dirty_ram_abs += TARGET_PAGE_SIZE; + } while (*offset & (qemu_host_page_size - 1)); + + /* The offset we leave with is the last one we looked at */ + *offset -= TARGET_PAGE_SIZE; + return pages; +} + +/** * ram_find_and_save_block: Finds a dirty page and sends it to f * * Called within an RCU critical section. @@ -1097,12 +1238,16 @@ err: * @f: QEMUFile where to send the data * @last_stage: if we are at the completion stage * @bytes_transferred: increase it with the number of transferred bytes + * + * On systems where host-page-size > target-page-size it will send all the + * pages in a host page that are dirty. */ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, uint64_t *bytes_transferred) { PageSearchStatus pss; + MigrationState *ms = migrate_get_current(); int pages = 0; bool again, found; ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in @@ -1117,26 +1262,18 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, } do { - found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + again = true; + found = get_queued_page(ms, &pss, &dirty_ram_abs); - if (found) { - if (compression_switch && migrate_use_compression()) { - pages = ram_save_compressed_page(f, pss.block, pss.offset, - last_stage, - bytes_transferred); - } else { - pages = ram_save_page(f, pss.block, pss.offset, last_stage, - bytes_transferred); - } + if (!found) { + /* priority queue empty, so just search for something dirty */ + found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + } - /* if page is unmodified, continue to the next */ - if (pages > 0) { - MigrationState *ms = migrate_get_current(); - last_sent_block = pss.block; - if (ms->sentmap) { - set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap); - } - } + if (found) { + pages = ram_save_host_page(ms, f, pss.block, &pss.offset, + last_stage, bytes_transferred, + dirty_ram_abs); } } while (!pages && again); diff --git a/trace-events b/trace-events index e40f00e..9e4206b 100644 --- a/trace-events +++ b/trace-events @@ -1244,6 +1244,8 @@ vmstate_subsection_load_good(const char *parent) "%s" qemu_file_fclose(void) "" # migration/ram.c +get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" migration_throttle(void) ""