From patchwork Fri Sep 17 11:15:02 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anthony PERARD X-Patchwork-Id: 65068 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 323C2B70AA for ; Fri, 17 Sep 2010 21:33:30 +1000 (EST) Received: from localhost ([127.0.0.1]:37028 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OwZBi-0000Eg-7j for incoming@patchwork.ozlabs.org; Fri, 17 Sep 2010 07:33:06 -0400 Received: from [140.186.70.92] (port=37147 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OwYvo-0006Qx-St for qemu-devel@nongnu.org; Fri, 17 Sep 2010 07:16:43 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1OwYvm-00083M-8k for qemu-devel@nongnu.org; Fri, 17 Sep 2010 07:16:40 -0400 Received: from smtp02.citrix.com ([66.165.176.63]:45944) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1OwYvl-00081M-VJ for qemu-devel@nongnu.org; Fri, 17 Sep 2010 07:16:38 -0400 X-IronPort-AV: E=Sophos;i="4.56,381,1280721600"; d="scan'208";a="114266519" Received: from ftlpexchmx01.citrite.net ([10.9.154.126]) by FTLPIPO02.CITRIX.COM with ESMTP; 17 Sep 2010 07:16:37 -0400 Received: from smtp01.ad.xensource.com ([10.219.128.104]) by FTLPEXCHMX01.citrite.net with Microsoft SMTPSVC(6.0.3790.4675); Fri, 17 Sep 2010 07:16:26 -0400 Received: from localhost.localdomain (perard.cam.xci-test.com [10.80.248.106]) by smtp01.ad.xensource.com (8.13.1/8.13.1) with ESMTP id o8HBFpau017817; Fri, 17 Sep 2010 04:16:36 -0700 From: anthony.perard@citrix.com To: qemu-devel@nongnu.org Date: Fri, 17 Sep 2010 12:15:02 +0100 Message-Id: <1284722107-28550-8-git-send-email-anthony.perard@citrix.com> X-Mailer: git-send-email 1.6.5 In-Reply-To: <1284722107-28550-1-git-send-email-anthony.perard@citrix.com> References: <1284722107-28550-1-git-send-email-anthony.perard@citrix.com> X-OriginalArrivalTime: 17 Sep 2010 11:16:26.0303 (UTC) FILETIME=[C4BD9CF0:01CB5659] X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. Cc: anthony.perard@citrix.com, xen-devel@lists.xensource.com, Stefano.Stabellini@eu.citrix.com Subject: [Qemu-devel] [PATCH RFC V3 07/12] xen: Introduce the Xen mapcache X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Anthony PERARD The mapcache maps chucks of guest memory on demand, unmaps them when they are not needed anymore. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Signed-off-by: Anthony PERARD --- Makefile.target | 2 +- exec.c | 36 ++++++- hw/xen.h | 4 + xen-all.c | 63 ++++++++++++ xen-stub.c | 4 + xen_mapcache.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ xen_mapcache.h | 26 +++++ 7 files changed, 432 insertions(+), 5 deletions(-) create mode 100644 xen_mapcache.c create mode 100644 xen_mapcache.h diff --git a/Makefile.target b/Makefile.target index 6b390e6..ea14393 100644 --- a/Makefile.target +++ b/Makefile.target @@ -183,7 +183,7 @@ QEMU_CFLAGS += $(VNC_PNG_CFLAGS) # xen backend driver support obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o -obj-$(CONFIG_XEN) += xen-all.o +obj-$(CONFIG_XEN) += xen-all.o xen_mapcache.o obj-$(CONFIG_NO_XEN) += xen-stub.o # xen full virtualized machine diff --git a/exec.c b/exec.c index 380dab5..f5888eb 100644 --- a/exec.c +++ b/exec.c @@ -60,6 +60,9 @@ #endif #endif +#include "hw/xen.h" +#include "xen_mapcache.h" + //#define DEBUG_TB_INVALIDATE //#define DEBUG_FLUSH //#define DEBUG_TLB @@ -2833,6 +2836,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } } + new_block->offset = find_ram_offset(size); if (host) { new_block->host = host; } else { @@ -2856,15 +2860,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + if (xen_enabled()) { + xen_ram_alloc(new_block->offset, size); + } else { + new_block->host = qemu_vmalloc(size); + } #endif #ifdef MADV_MERGEABLE madvise(new_block->host, size, MADV_MERGEABLE); #endif } } - - new_block->offset = find_ram_offset(size); new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -2905,7 +2911,11 @@ void qemu_ram_free(ram_addr_t addr) #if defined(TARGET_S390X) && defined(CONFIG_KVM) munmap(block->host, block->length); #else - qemu_vfree(block->host); + if (xen_enabled()) { + qemu_invalidate_entry(block->host); + } else { + qemu_vfree(block->host); + } #endif } qemu_free(block); @@ -2931,6 +2941,14 @@ void *qemu_get_ram_ptr(ram_addr_t addr) if (addr - block->offset < block->length) { QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); + if (xen_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) + return qemu_map_cache(addr, 0, 1); + block->host = qemu_map_cache(block->offset, block->length, 1); + } return block->host + (addr - block->offset); } } @@ -2949,11 +2967,18 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) uint8_t *host = ptr; QLIST_FOREACH(block, &ram_list.blocks, next) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) + continue; if (host - block->host < block->length) { return block->offset + (host - block->host); } } + if (xen_enabled()) { + return qemu_ram_addr_from_mapcache(ptr); + } + fprintf(stderr, "Bad ram pointer %p\n", ptr); abort(); @@ -3728,6 +3753,9 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, if (is_write) { cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len); } + if (xen_enabled()) { + qemu_invalidate_entry(buffer); + } qemu_vfree(bounce.buffer); bounce.buffer = NULL; cpu_notify_map_clients(); diff --git a/hw/xen.h b/hw/xen.h index c5189b1..2b62ff5 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -34,4 +34,8 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); int xen_init(int smp_cpus); +#ifdef NEED_CPU_H +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); +#endif + #endif /* QEMU_HW_XEN_H */ diff --git a/xen-all.c b/xen-all.c index 765f87a..4e0b061 100644 --- a/xen-all.c +++ b/xen-all.c @@ -12,6 +12,8 @@ #include "hw/xen_common.h" #include "hw/xen_backend.h" +#include "xen_mapcache.h" + /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -52,6 +54,63 @@ qemu_irq *i8259_xen_init(void) return qemu_allocate_irqs(i8259_set_irq, NULL, 16); } + +/* Memory Ops */ + +static void xen_ram_init(ram_addr_t ram_size) +{ + RAMBlock *new_block; + ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; + + new_block = qemu_mallocz(sizeof (*new_block)); + pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); + new_block->host = NULL; + new_block->offset = 0; + new_block->length = ram_size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + new_block->length >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, new_block->length >> TARGET_PAGE_BITS); + + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + below_4g_mem_size = ram_size; + } + + cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); +#if TARGET_PHYS_ADDR_BITS > 32 + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + new_block->offset + below_4g_mem_size); + } +#endif +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + + if (xc_domain_memory_populate_physmap(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + hw_error("xen: failed to populate ram at %lx", ram_addr); + } + + qemu_free(pfn_list); +} + + /* Initialise Xen */ int xen_init(int smp_cpus) @@ -62,5 +121,9 @@ int xen_init(int smp_cpus) return -1; } + /* Init RAM management */ + qemu_map_cache_init(); + xen_ram_init(ram_size); + return 0; } diff --git a/xen-stub.c b/xen-stub.c index 07e64bc..c9f477d 100644 --- a/xen-stub.c +++ b/xen-stub.c @@ -24,6 +24,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) { } +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ +} + int xen_init(int smp_cpus) { return -ENOSYS; diff --git a/xen_mapcache.c b/xen_mapcache.c new file mode 100644 index 0000000..8e3bf6c --- /dev/null +++ b/xen_mapcache.c @@ -0,0 +1,302 @@ +#include "config.h" + +#include "hw/xen_backend.h" +#include "blockdev.h" + +#include +#include + +#include "xen_mapcache.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +#define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(MAPCACHE) + +#define BITS_PER_LONG (sizeof(long)*8) +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +#define test_bit(bit,map) \ + (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG)))) + +typedef struct MapCacheEntry { + unsigned long paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + unsigned long paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + unsigned long last_address_index; + uint8_t *last_address_vaddr; +} MapCache; + +static MapCache *mapcache; + + +int qemu_map_cache_init(void) +{ + unsigned long size; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = ~0UL; + + mapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + /* + * Use mmap() directly: lets us allocate a big hash table with no up-front + * cost in storage space. The OS will allocate memory only for the buckets + * that we actually use. All others will contain all zeroes. + */ + size = mapcache->nr_buckets * sizeof(MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1, 0); + if (mapcache->entry == MAP_FAILED) { + errno = ENOMEM; + return -1; + } + + return 0; +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + unsigned long address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i, j; + + pfns = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (xen_pfn_t)); + err = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (int)); + + if (entry->vaddr_base != NULL) { + errno = munmap(entry->vaddr_base, size); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + } + + for (i = 0; i < size >> XC_PAGE_SHIFT; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, + size >> XC_PAGE_SHIFT); + if (vaddr_base == NULL) { + fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < size >> XC_PAGE_SHIFT; i += BITS_PER_LONG) { + unsigned long word = 0; + j = ((i + BITS_PER_LONG) > (size >> XC_PAGE_SHIFT)) ? + (size >> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG; + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + unsigned long address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1); + + if (address_index == mapcache->last_address_index && !lock) + return mapcache->last_address_vaddr + address_offset; + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof(MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + + if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = ~0UL; + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next); + } + + return mapcache->last_address_vaddr + address_offset; +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + unsigned long paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + unsigned long paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) + mapcache->last_address_index = ~0UL; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) + return; + + pentry->next = entry->next; + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, but %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) + continue; + + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = ~0UL; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} +#else +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) +{ + return qemu_get_ram_ptr(phys_addr); +} + +void qemu_invalidate_map_cache(void) +{ +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ +} +#endif /* !MAPCACHE */ diff --git a/xen_mapcache.h b/xen_mapcache.h new file mode 100644 index 0000000..5a6730f --- /dev/null +++ b/xen_mapcache.h @@ -0,0 +1,26 @@ +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +#if (defined(__i386__) || defined(__x86_64__)) +# define MAPCACHE +# if defined(__i386__) +# define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */ +# define MCACHE_BUCKET_SHIFT 16 +# elif defined(__x86_64__) +# define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */ +# define MCACHE_BUCKET_SHIFT 20 +# endif +# define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) +#endif + +int qemu_map_cache_init(void); +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); +void qemu_invalidate_entry(uint8_t *buffer); +void qemu_invalidate_map_cache(void); + +#define mapcache_lock() ((void)0) +#define mapcache_unlock() ((void)0) + + +#endif /* !XEN_MAPCACHE_H */