Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/807510/?format=api
{ "id": 807510, "url": "http://patchwork.ozlabs.org/api/patches/807510/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/1504081950-2528-25-git-send-email-peterx@redhat.com/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<1504081950-2528-25-git-send-email-peterx@redhat.com>", "list_archive_url": null, "date": "2017-08-30T08:32:21", "name": "[RFC,v2,24/33] migration: synchronize dirty bitmap for resume", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "367d8d9c6fdcf16cf2f1491951be050eadc953a8", "submitter": { "id": 67717, "url": "http://patchwork.ozlabs.org/api/people/67717/?format=api", "name": "Peter Xu", "email": "peterx@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/1504081950-2528-25-git-send-email-peterx@redhat.com/mbox/", "series": [ { "id": 552, "url": "http://patchwork.ozlabs.org/api/series/552/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=552", "date": "2017-08-30T08:31:59", "name": "Migration: postcopy failure recovery", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/552/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/807510/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/807510/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=nongnu.org\n\t(client-ip=2001:4830:134:3::11; helo=lists.gnu.org;\n\tenvelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n\treceiver=<UNKNOWN>)", "ext-mx05.extmail.prod.ext.phx2.redhat.com;\n\tdmarc=none (p=none dis=none) header.from=redhat.com", "ext-mx05.extmail.prod.ext.phx2.redhat.com;\n\tspf=fail smtp.mailfrom=peterx@redhat.com" ], "Received": [ "from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11])\n\t(using TLSv1 with cipher AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xhzjh0PStz9t2Q\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed, 30 Aug 2017 18:50:32 +1000 (AEST)", "from localhost ([::1]:49086 helo=lists.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.71) (envelope-from\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1dmyhh-0004ow-Ox\n\tfor incoming@patchwork.ozlabs.org; Wed, 30 Aug 2017 04:50:29 -0400", "from eggs.gnu.org ([2001:4830:134:3::10]:35181)\n\tby lists.gnu.org with esmtp (Exim 4.71)\n\t(envelope-from <peterx@redhat.com>) id 1dmySt-0000Sd-UT\n\tfor qemu-devel@nongnu.org; Wed, 30 Aug 2017 04:35:13 -0400", "from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)\n\t(envelope-from <peterx@redhat.com>) id 1dmySp-00049l-1x\n\tfor qemu-devel@nongnu.org; Wed, 30 Aug 2017 04:35:11 -0400", "from mx1.redhat.com ([209.132.183.28]:52278)\n\tby eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32)\n\t(Exim 4.71) (envelope-from <peterx@redhat.com>) id 1dmySo-000487-PC\n\tfor qemu-devel@nongnu.org; Wed, 30 Aug 2017 04:35:06 -0400", "from smtp.corp.redhat.com\n\t(int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.15])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby mx1.redhat.com (Postfix) with ESMTPS id B2B0B37E90;\n\tWed, 30 Aug 2017 08:35:05 +0000 (UTC)", "from pxdev.xzpeter.org.com (dhcp-14-103.nay.redhat.com\n\t[10.66.14.103])\n\tby smtp.corp.redhat.com (Postfix) with ESMTP id E8E2384792;\n\tWed, 30 Aug 2017 08:34:58 +0000 (UTC)" ], "DMARC-Filter": "OpenDMARC Filter v1.3.2 mx1.redhat.com B2B0B37E90", "From": "Peter Xu <peterx@redhat.com>", "To": "qemu-devel@nongnu.org", "Date": "Wed, 30 Aug 2017 16:32:21 +0800", "Message-Id": "<1504081950-2528-25-git-send-email-peterx@redhat.com>", "In-Reply-To": "<1504081950-2528-1-git-send-email-peterx@redhat.com>", "References": "<1504081950-2528-1-git-send-email-peterx@redhat.com>", "X-Scanned-By": "MIMEDefang 2.79 on 10.5.11.15", "X-Greylist": "Sender IP whitelisted, not delayed by milter-greylist-4.5.16\n\t(mx1.redhat.com [10.5.110.29]);\n\tWed, 30 Aug 2017 08:35:05 +0000 (UTC)", "X-detected-operating-system": "by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic]\n\t[fuzzy]", "X-Received-From": "209.132.183.28", "Subject": "[Qemu-devel] [RFC v2 24/33] migration: synchronize dirty bitmap for\n\tresume", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.21", "Precedence": "list", "List-Id": "<qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<http://lists.nongnu.org/archive/html/qemu-devel/>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Cc": "Laurent Vivier <lvivier@redhat.com>,\n\tAndrea Arcangeli <aarcange@redhat.com>, \n\tJuan Quintela <quintela@redhat.com>,\n\tAlexey Perevalov <a.perevalov@samsung.com>, peterx@redhat.com,\n\t\"Dr . David Alan Gilbert\" <dgilbert@redhat.com>", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "\"Qemu-devel\"\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>" }, "content": "This patch implements the first part of core RAM resume logic for\npostcopy. ram_resume_prepare() is provided for the work.\n\nWhen the migration is interrupted by network failure, the dirty bitmap\non the source side will be meaningless, because even the dirty bit is\ncleared, it is still possible that the sent page was lost along the way\nto destination. Here instead of continue the migration with the old\ndirty bitmap on source, we ask the destination side to send back its\nreceived bitmap, then invert it to be our initial dirty bitmap.\n\nThe source side send thread will issue the MIG_CMD_RECV_BITMAP requests,\nonce per ramblock, to ask for the received bitmap. On destination side,\nMIG_RP_MSG_RECV_BITMAP will be issued, along with the requested bitmap.\nData will be received on the return-path thread of source, and the main\nmigration thread will be notified when all the ramblock bitmaps are\nsynchronized.\n\nSigned-off-by: Peter Xu <peterx@redhat.com>\n---\n migration/migration.c | 4 +++\n migration/migration.h | 1 +\n migration/ram.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++\n migration/trace-events | 4 +++\n 4 files changed, 76 insertions(+)", "diff": "diff --git a/migration/migration.c b/migration/migration.c\nindex 19b7f3a5..19aed72 100644\n--- a/migration/migration.c\n+++ b/migration/migration.c\n@@ -2605,6 +2605,8 @@ static void migration_instance_finalize(Object *obj)\n \n g_free(params->tls_hostname);\n g_free(params->tls_creds);\n+\n+ qemu_sem_destroy(&ms->rp_state.rp_sem);\n }\n \n static void migration_instance_init(Object *obj)\n@@ -2629,6 +2631,8 @@ static void migration_instance_init(Object *obj)\n params->has_downtime_limit = true;\n params->has_x_checkpoint_delay = true;\n params->has_block_incremental = true;\n+\n+ qemu_sem_init(&ms->rp_state.rp_sem, 1);\n }\n \n /*\ndiff --git a/migration/migration.h b/migration/migration.h\nindex a3a0582..d041369 100644\n--- a/migration/migration.h\n+++ b/migration/migration.h\n@@ -107,6 +107,7 @@ struct MigrationState\n QEMUFile *from_dst_file;\n QemuThread rp_thread;\n bool error;\n+ QemuSemaphore rp_sem;\n } rp_state;\n \n double mbps;\ndiff --git a/migration/ram.c b/migration/ram.c\nindex 5d938e3..afabcf5 100644\n--- a/migration/ram.c\n+++ b/migration/ram.c\n@@ -47,6 +47,7 @@\n #include \"exec/target_page.h\"\n #include \"qemu/rcu_queue.h\"\n #include \"migration/colo.h\"\n+#include \"savevm.h\"\n \n /***********************************************************/\n /* ram save/restore */\n@@ -295,6 +296,8 @@ struct RAMState {\n RAMBlock *last_req_rb;\n /* Queue of outstanding page requests from the destination */\n QemuMutex src_page_req_mutex;\n+ /* Ramblock counts to sync dirty bitmap. Only used for recovery */\n+ int ramblock_to_sync;\n QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;\n };\n typedef struct RAMState RAMState;\n@@ -2770,6 +2773,56 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)\n return ret;\n }\n \n+/* Sync all the dirty bitmap with destination VM. */\n+static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)\n+{\n+ RAMBlock *block;\n+ QEMUFile *file = s->to_dst_file;\n+ int ramblock_count = 0;\n+\n+ trace_ram_dirty_bitmap_sync_start();\n+\n+ /*\n+ * We do this in such order:\n+ *\n+ * 1. calculate block count\n+ * 2. fill in the count to N\n+ * 3. send MIG_CMD_RECV_BITMAP requests\n+ * 4. wait on the semaphore until N -> 0\n+ */\n+\n+ RAMBLOCK_FOREACH(block) {\n+ ramblock_count++;\n+ }\n+\n+ atomic_set(&rs->ramblock_to_sync, ramblock_count);\n+\n+ RAMBLOCK_FOREACH(block) {\n+ qemu_savevm_send_recv_bitmap(file, block->idstr);\n+ }\n+\n+ trace_ram_dirty_bitmap_sync_wait();\n+\n+ /* Wait until all the ramblocks' dirty bitmap synced */\n+ while (atomic_read(&rs->ramblock_to_sync)) {\n+ qemu_sem_wait(&s->rp_state.rp_sem);\n+ }\n+\n+ trace_ram_dirty_bitmap_sync_complete();\n+\n+ return 0;\n+}\n+\n+static void ram_dirty_bitmap_reload_notify(MigrationState *s)\n+{\n+ atomic_dec(&ram_state->ramblock_to_sync);\n+ if (ram_state->ramblock_to_sync == 0) {\n+ /* Make sure the other thread gets the latest */\n+ trace_ram_dirty_bitmap_sync_notify();\n+ qemu_sem_post(&s->rp_state.rp_sem);\n+ }\n+}\n+\n /*\n * Read the received bitmap, revert it as the initial dirty bitmap.\n * This is only used when the postcopy migration is paused but wants\n@@ -2841,12 +2894,25 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)\n \n trace_ram_dirty_bitmap_reload(block->idstr);\n \n+ /*\n+ * We succeeded to sync bitmap for current ramblock. If this is\n+ * the last one to sync, we need to notify the main send thread.\n+ */\n+ ram_dirty_bitmap_reload_notify(s);\n+\n ret = 0;\n out:\n free(le_bitmap);\n return ret;\n }\n \n+static int ram_resume_prepare(MigrationState *s, void *opaque)\n+{\n+ RAMState *rs = *(RAMState **)opaque;\n+\n+ return ram_dirty_bitmap_sync_all(s, rs);\n+}\n+\n static SaveVMHandlers savevm_ram_handlers = {\n .save_setup = ram_save_setup,\n .save_live_iterate = ram_save_iterate,\n@@ -2857,6 +2923,7 @@ static SaveVMHandlers savevm_ram_handlers = {\n .save_cleanup = ram_save_cleanup,\n .load_setup = ram_load_setup,\n .load_cleanup = ram_load_cleanup,\n+ .resume_prepare = ram_resume_prepare,\n };\n \n void ram_mig_init(void)\ndiff --git a/migration/trace-events b/migration/trace-events\nindex 61b0d49..8962916 100644\n--- a/migration/trace-events\n+++ b/migration/trace-events\n@@ -81,6 +81,10 @@ ram_postcopy_send_discard_bitmap(void) \"\"\n ram_save_page(const char *rbname, uint64_t offset, void *host) \"%s: offset: 0x%\" PRIx64 \" host: %p\"\n ram_save_queue_pages(const char *rbname, size_t start, size_t len) \"%s: start: 0x%zx len: 0x%zx\"\n ram_dirty_bitmap_reload(char *str) \"%s\"\n+ram_dirty_bitmap_sync_start(void) \"\"\n+ram_dirty_bitmap_sync_wait(void) \"\"\n+ram_dirty_bitmap_sync_notify(void) \"\"\n+ram_dirty_bitmap_sync_complete(void) \"\"\n \n # migration/migration.c\n await_return_path_close_on_source_close(void) \"\"\n", "prefixes": [ "RFC", "v2", "24/33" ] }