From patchwork Mon Aug 15 21:08:40 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paolo Bonzini X-Patchwork-Id: 110097 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [140.186.70.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 60738B6F70 for ; Tue, 16 Aug 2011 07:39:05 +1000 (EST) Received: from localhost ([::1]:53216 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Qt4Qe-0007AM-Co for incoming@patchwork.ozlabs.org; Mon, 15 Aug 2011 17:10:36 -0400 Received: from eggs.gnu.org ([140.186.70.92]:49500) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Qt4Q2-0005q2-5C for qemu-devel@nongnu.org; Mon, 15 Aug 2011 17:09:59 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Qt4Pz-0000KE-00 for qemu-devel@nongnu.org; Mon, 15 Aug 2011 17:09:57 -0400 Received: from mail-pz0-f42.google.com ([209.85.210.42]:56686) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Qt4Py-0000G7-RE for qemu-devel@nongnu.org; Mon, 15 Aug 2011 17:09:54 -0400 Received: by mail-pz0-f42.google.com with SMTP id 37so9398728pzk.29 for ; Mon, 15 Aug 2011 14:09:54 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=sender:from:to:subject:date:message-id:x-mailer:in-reply-to :references; bh=rWNiES3jOHUcstwlVomIc9ohSFYNn+wmr/A2lNXZEQw=; b=gbfd8cC7a1R78vgVbdRLM4Hsf7PCeC/TMkRnFDiFQkSrrZbqDinhZvYxiBUuqjHjLp VyDiKb8nDa3awj+rVK0xju22Cq3LRZozlJa/NM9SP/2AhVy6kJyOpuxvsHJ3oxI3g8nB aNKD5E3Y45DJYCGh/tMAw5MGVAULz3K81Na0U= Received: by 10.142.213.9 with SMTP id l9mr1972612wfg.233.1313442594460; Mon, 15 Aug 2011 14:09:54 -0700 (PDT) Received: from localhost.localdomain ([216.123.155.199]) by mx.google.com with ESMTPS id i8sm1064587pbi.92.2011.08.15.14.09.52 (version=TLSv1/SSLv3 cipher=OTHER); Mon, 15 Aug 2011 14:09:53 -0700 (PDT) From: Paolo Bonzini To: qemu-devel@nongnu.org Date: Mon, 15 Aug 2011 14:08:40 -0700 Message-Id: <1313442520-12062-14-git-send-email-pbonzini@redhat.com> X-Mailer: git-send-email 1.7.6 In-Reply-To: <1313442520-12062-1-git-send-email-pbonzini@redhat.com> References: <1313442520-12062-1-git-send-email-pbonzini@redhat.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2) X-Received-From: 209.85.210.42 Subject: [Qemu-devel] [RFC PATCH 13/13] RCUify ram_list X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Incomplete because the users of qemu_get_ram_ptr should be wrapped with rcu_read_lock/rcu_read_unlock. Happens to work because those are nops anyway. :) Signed-off-by: Paolo Bonzini --- arch_init.c | 14 +++++++ cpu-all.h | 4 ++ exec.c | 124 ++++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 103 insertions(+), 39 deletions(-) diff --git a/arch_init.c b/arch_init.c index 484b39d..f5a567b 100644 --- a/arch_init.c +++ b/arch_init.c @@ -117,6 +117,7 @@ static int ram_save_block(QEMUFile *f) ram_addr_t current_addr; int bytes_sent = 0; + rcu_read_lock(); if (!block) block = QLIST_FIRST(&ram_list.blocks); @@ -167,6 +168,7 @@ static int ram_save_block(QEMUFile *f) current_addr = block->offset + offset; } while (current_addr != last_block->offset + last_offset); + rcu_read_unlock(); last_block = block; last_offset = offset; @@ -181,6 +183,7 @@ static ram_addr_t ram_save_remaining(void) RAMBlock *block; ram_addr_t count = 0; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { ram_addr_t addr; for (addr = block->offset; addr < block->offset + block->length; @@ -190,6 +193,7 @@ static ram_addr_t ram_save_remaining(void) } } } + rcu_read_unlock(); return count; } @@ -209,8 +213,10 @@ uint64_t ram_bytes_total(void) RAMBlock *block; uint64_t total = 0; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) total += block->length; + rcu_read_unlock(); return total; } @@ -232,6 +238,7 @@ static void sort_ram_list(void) RAMBlock *block, *nblock, **blocks; int n; n = 0; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { ++n; } @@ -245,6 +252,7 @@ static void sort_ram_list(void) while (--n >= 0) { QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next); } + //qemu_mutex_unlock(&ram_list.mutex); qemu_free(blocks); } @@ -273,6 +281,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) sort_ram_list(); /* Make sure all dirty bits are set */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { for (addr = block->offset; addr < block->offset + block->length; addr += TARGET_PAGE_SIZE) { @@ -282,17 +291,20 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) } } } + rcu_read_unlock(); /* Enable dirty memory tracking */ cpu_physical_memory_set_dirty_tracking(1); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); qemu_put_be64(f, block->length); } + rcu_read_unlock(); } bytes_transferred_last = bytes_transferred; @@ -374,6 +386,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } + rcu_read_lock(); do { addr = qemu_get_be64(f); @@ -453,6 +466,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) return -EIO; } } while (!(flags & RAM_SAVE_FLAG_EOS)); + rcu_read_unlock(); return 0; } diff --git a/cpu-all.h b/cpu-all.h index 083d9e6..7ed3f75 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -21,6 +21,7 @@ #include "qemu-common.h" #include "cpu-common.h" +#include "rcu.h" /* some important defines: * @@ -475,11 +476,14 @@ extern ram_addr_t ram_size; #define RAM_PREALLOC_MASK (1 << 0) typedef struct RAMBlock { + struct rcu_head h; uint8_t *host; ram_addr_t offset; ram_addr_t length; uint32_t flags; + // should be protected by its own lock + RCU on the read side QLIST_ENTRY(RAMBlock) next; + // protected by the iothread lock + RCU to on the read side QLIST_ENTRY(RAMBlock) next_mru; char idstr[256]; #if defined(__linux__) && !defined(TARGET_S390X) diff --git a/exec.c b/exec.c index d25e3cc..6a7cec7 100644 --- a/exec.c +++ b/exec.c @@ -2935,6 +2935,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (!strcmp(block->idstr, new_block->idstr)) { fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", @@ -2986,6 +2987,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + //qemu_mutex_unlock(&ram_list.mutex); QLIST_INSERT_HEAD(&ram_list.blocks_mru, new_block, next_mru); ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, @@ -3008,52 +3010,62 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) { RAMBlock *block; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); - qemu_free(block); + call_rcu(&block->h, rcu_free); return; } } + //qemu_mutex_unlock(&ram_list.mutex); +} + +static void rcu_free_block(struct rcu_head *h) +{ + RAMBlock *block = container_of(h, RAMBlock, h); + if (block->flags & RAM_PREALLOC_MASK) { + ; + } else if (mem_path) { +#if defined (__linux__) && !defined(TARGET_S390X) + if (block->fd) { + munmap(block->host, block->length); + close(block->fd); + } else { + qemu_vfree(block->host); + } +#else + abort(); +#endif + } else { +#if defined(TARGET_S390X) && defined(CONFIG_KVM) + munmap(block->host, block->length); +#else + if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); + } else { + qemu_vfree(block->host); + } +#endif + } + qemu_free(block); } void qemu_ram_free(ram_addr_t addr) { RAMBlock *block; + //qemu_mutex_lock(&ram_list.mutex); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr == block->offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); - if (block->flags & RAM_PREALLOC_MASK) { - ; - } else if (mem_path) { -#if defined (__linux__) && !defined(TARGET_S390X) - if (block->fd) { - munmap(block->host, block->length); - close(block->fd); - } else { - qemu_vfree(block->host); - } -#else - abort(); -#endif - } else { -#if defined(TARGET_S390X) && defined(CONFIG_KVM) - munmap(block->host, block->length); -#else - if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); - } else { - qemu_vfree(block->host); - } -#endif - } - qemu_free(block); + call_rcu(&block->h, rcu_free_block); return; } } + //qemu_mutex_unlock(&ram_list.mutex); } @@ -3065,6 +3077,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) int flags; void *area, *vaddr; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { offset = addr - block->offset; if (offset < block->length) { @@ -3072,8 +3085,9 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) if (block->flags & RAM_PREALLOC_MASK) { ; } else { + /* No need to munmap the memory, mmap will discard the old mapping + * atomically. */ flags = MAP_FIXED; - munmap(vaddr, length); if (mem_path) { #if defined(__linux__) && !defined(TARGET_S390X) if (block->fd) { @@ -3112,9 +3126,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) } qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE); } - return; + break; } } + rcu_read_unlock(); } #endif /* !_WIN32 */ @@ -3128,8 +3143,16 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) */ void *qemu_get_ram_ptr(ram_addr_t addr) { + uint8_t *p = NULL; RAMBlock *block; + /* RCU protects the "existence" of the blocks, the iothread lock + * protects the next_mru chain. This rcu_read_lock() is most + * likely nested, since the caller probably wants to do something + * with the result as well! FIXME: this is not done anywhere yet. + * Due to our RCU implementation we can avoid that, but it's not + * clean. */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) { if (addr - block->offset < block->length) { /* Move this entry to to start of the list. */ @@ -3143,20 +3166,24 @@ void *qemu_get_ram_ptr(ram_addr_t addr) * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); + p = xen_map_cache(addr, 0, 0); + break; } else if (block->host == NULL) { block->host = xen_map_cache(block->offset, block->length, 1); } } - return block->host + (addr - block->offset); + p = block->host + (addr - block->offset); + break; } } + rcu_read_unlock(); - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); - - return NULL; + if (!p) { + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + } + return p; } /* Return a host pointer to ram allocated with qemu_ram_alloc. @@ -3164,8 +3191,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr) */ void *qemu_safe_ram_ptr(ram_addr_t addr) { + uint8_t *p = NULL; RAMBlock *block; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { if (xen_enabled()) { @@ -3174,20 +3203,25 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) * In that case just map until the end of the page. */ if (block->offset == 0) { - return xen_map_cache(addr, 0, 0); + p = xen_map_cache(addr, 0, 0); + break; } else if (block->host == NULL) { block->host = xen_map_cache(block->offset, block->length, 1); } } - return block->host + (addr - block->offset); + p = block->host + (addr - block->offset); + break; } } + rcu_read_unlock(); - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); + if (!p) { + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + } - return NULL; + return p; } /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr @@ -3202,13 +3236,16 @@ void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size) } else { RAMBlock *block; + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { if (addr - block->offset + *size > block->length) *size = block->length - addr + block->offset; + rcu_read_unlock(); return block->host + (addr - block->offset); } } + rcu_read_unlock(); fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); abort(); @@ -3230,6 +3267,13 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) return 0; } + /* RCU protects the "existence" of the blocks, the iothread lock + * protects the next_mru chain. This rcu_read_lock() is most + * likely nested, since the caller probably wants to do something + * with the result as well! FIXME: this is not done anywhere yet. + * Due to our RCU implementation we can avoid that, but it's not + * clean. */ + rcu_read_lock(); QLIST_FOREACH(block, &ram_list.blocks_mru, next_mru) { /* This case append when the block is not mapped. */ if (block->host == NULL) { @@ -3237,9 +3281,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } if (host - block->host < block->length) { *ram_addr = block->offset + (host - block->host); + rcu_read_unlock(); return 0; } } + rcu_read_unlock(); return -1; }