Patchwork [10/15] xen: Introduce the Xen mapcache

login
register
mail settings
Submitter Stefano Stabellini
Date Aug. 23, 2010, 9:50 a.m.
Message ID <1282557052-14285-10-git-send-email-stefano.stabellini@eu.citrix.com>
Download mbox | patch
Permalink /patch/62474/
State New
Headers show

Comments

Stefano Stabellini - Aug. 23, 2010, 9:50 a.m.
From: Anthony PERARD <anthony.perard@citrix.com>

Introduce a mapcache to handle the 64bit address space of the guest
from a 32bit userland process (Qemu).
The mapcache maps chucks of guest memory on demand, unmaps them when
they are not needed anymore.

Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 hw/xen_machine_fv.c       |    7 ++
 target-xen/qemu-xen.h     |   15 +++
 target-xen/xen_mapcache.c |  247 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 269 insertions(+), 0 deletions(-)

Patch

diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
index bcf1101..cb40d22 100644
--- a/hw/xen_machine_fv.c
+++ b/hw/xen_machine_fv.c
@@ -82,6 +82,13 @@  static void xen_init_fv(ram_addr_t ram_size,
         exit(1);
     }
 
+#if defined(__i386__) || defined(__x86_64__)
+    if (qemu_map_cache_init()) {
+        fprintf(stderr, "qemu_map_cache_init returned: error %d\n", errno);
+        exit(-1);
+    }
+#endif
+
     xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
     fprintf(stderr, "shared page at pfn %lx\n", ioreq_pfn);
     shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
diff --git a/target-xen/qemu-xen.h b/target-xen/qemu-xen.h
index 79a4638..e4a7030 100644
--- a/target-xen/qemu-xen.h
+++ b/target-xen/qemu-xen.h
@@ -13,6 +13,21 @@ 
 
 /* xen_mapcache.c */
 
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(QEMU_TOOL)
+#define MAPCACHE
+
+#if defined(__i386__)
+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#define MCACHE_BUCKET_SHIFT 20
+#endif
+
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+#endif
+
+int qemu_map_cache_init(void);
 uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock);
 void     qemu_invalidate_entry(uint8_t *buffer);
 void     qemu_invalidate_map_cache(void);
diff --git a/target-xen/xen_mapcache.c b/target-xen/xen_mapcache.c
index 39daae2..5baaeb9 100644
--- a/target-xen/xen_mapcache.c
+++ b/target-xen/xen_mapcache.c
@@ -1,5 +1,251 @@ 
+#include "config.h"
+
+#include "hw/xen_backend.h"
 #include "qemu-xen.h"
 
+#include <xen/hvm/params.h>
+#include <sys/mman.h>
+
+#define DPRINTF(fmt, ...) do { \
+    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
+} while (0)
+
+#if defined(MAPCACHE)
+
+#define BITS_PER_LONG (sizeof(long)*8)
+#define BITS_TO_LONGS(bits) \
+    (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+    unsigned long name[BITS_TO_LONGS(bits)]
+#define test_bit(bit,map) \
+    (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG))))
+
+typedef struct MapCacheEntry {
+    unsigned long paddr_index;
+    uint8_t *vaddr_base;
+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT);
+    uint8_t lock;
+    struct MapCacheEntry *next;
+} MapCacheEntry;
+
+typedef struct MapCacheRev {
+    uint8_t *vaddr_req;
+    unsigned long paddr_index;
+    QTAILQ_ENTRY(MapCacheRev) next;
+} MapCacheRev;
+
+typedef struct MapCache {
+    MapCacheEntry *entry;
+    unsigned long nr_buckets;
+    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
+
+    /* For most cases (>99.9%), the page address is the same. */
+    unsigned long last_address_index;
+    uint8_t      *last_address_vaddr;
+} MapCache;
+
+static MapCache *mapcache;
+
+
+int qemu_map_cache_init(void)
+{
+    unsigned long size;
+
+    mapcache = qemu_mallocz(sizeof (MapCache));
+
+    QTAILQ_INIT(&mapcache->locked_entries);
+    mapcache->last_address_index = ~0UL;
+
+    mapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) +
+                   (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+                  (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+    /*
+     * Use mmap() directly: lets us allocate a big hash table with no up-front
+     * cost in storage space. The OS will allocate memory only for the buckets
+     * that we actually use. All others will contain all zeroes.
+     */
+    size = mapcache->nr_buckets * sizeof(MapCacheEntry);
+    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+    DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size);
+    mapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
+                          MAP_SHARED|MAP_ANON, -1, 0);
+    if (mapcache->entry == MAP_FAILED) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    return 0;
+}
+
+static void qemu_remap_bucket(MapCacheEntry *entry,
+                              unsigned long address_index)
+{
+    uint8_t *vaddr_base;
+    xen_pfn_t pfns[MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT];
+    int err[MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT];
+    unsigned int i, j;
+
+    if (entry->vaddr_base != NULL) {
+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+    }
+
+    for (i = 0; i < MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT; i++) {
+        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+    }
+
+    vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+                                     pfns, err,
+                                     MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
+    if (vaddr_base == NULL) {
+        fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno);
+        exit(-1);
+    }
+
+    entry->vaddr_base  = vaddr_base;
+    entry->paddr_index = address_index;
+
+    for (i = 0; i < MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT; i += BITS_PER_LONG) {
+        unsigned long word = 0;
+        j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT)) ?
+            (MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG;
+        while (j > 0) {
+            word = (word << 1) | !err[i + --j];
+        }
+        entry->valid_mapping[i / BITS_PER_LONG] = word;
+    }
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock)
+{
+    MapCacheEntry *entry, *pentry = NULL;
+    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
+
+    if (address_index == mapcache->last_address_index && !lock)
+        return mapcache->last_address_vaddr + address_offset;
+
+    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+
+    while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        entry = qemu_mallocz(sizeof(MapCacheEntry));
+        pentry->next = entry;
+        qemu_remap_bucket(entry, address_index);
+    } else if (!entry->lock) {
+        if (!entry->vaddr_base || entry->paddr_index != address_index || !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
+            qemu_remap_bucket(entry, address_index);
+    }
+
+    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
+        mapcache->last_address_index = ~0UL;
+        return NULL;
+    }
+
+    mapcache->last_address_index = address_index;
+    mapcache->last_address_vaddr = entry->vaddr_base;
+    if (lock) {
+        MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev));
+        entry->lock++;
+        reventry->vaddr_req = mapcache->last_address_vaddr + address_offset;
+        reventry->paddr_index = mapcache->last_address_index;
+        QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next);
+    }
+
+    return mapcache->last_address_vaddr + address_offset;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+    MapCacheEntry *entry = NULL, *pentry = NULL;
+    MapCacheRev *reventry;
+    unsigned long paddr_index;
+    int found = 0;
+
+    if (mapcache->last_address_vaddr == buffer)
+        mapcache->last_address_index =  ~0UL;
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == buffer) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
+        }
+        return;
+    }
+    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+    qemu_free(reventry);
+
+    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+    while (entry && entry->paddr_index != paddr_index) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
+        return;
+    }
+    entry->lock--;
+    if (entry->lock > 0 || pentry == NULL)
+        return;
+
+    pentry->next = entry->next;
+    errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+    if (errno) {
+        fprintf(stderr, "unmap fails %d\n", errno);
+        exit(-1);
+    }
+    qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+    unsigned long i;
+    MapCacheRev *reventry;
+
+    qemu_aio_flush();
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        DPRINTF("There should be no locked mappings at this time, but %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
+    }
+
+    mapcache_lock();
+
+    for (i = 0; i < mapcache->nr_buckets; i++) {
+        MapCacheEntry *entry = &mapcache->entry[i];
+
+        if (entry->vaddr_base == NULL)
+            continue;
+
+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+
+        entry->paddr_index = 0;
+        entry->vaddr_base  = NULL;
+    }
+
+    mapcache->last_address_index =  ~0UL;
+    mapcache->last_address_vaddr = NULL;
+
+    mapcache_unlock();
+}
+#else
 uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock)
 {
     return phys_ram_addr(phys_addr);
@@ -12,3 +258,4 @@  void qemu_invalidate_map_cache(void)
 void qemu_invalidate_entry(uint8_t *buffer)
 {
 }
+#endif /* !MAPCACHE */