Patchwork [V6,09/15] xen: Introduce the Xen mapcache

login
register
mail settings
Submitter Anthony PERARD
Date Oct. 21, 2010, 5:36 p.m.
Message ID <1287682587-18642-10-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/68764/
State New
Headers show

Comments

Anthony PERARD - Oct. 21, 2010, 5:36 p.m.
From: Jun Nakajima <jun.nakajima@intel.com>

On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
lock option, so mapcache will not unmap these ram_ptr.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 Makefile.target     |    3 +
 configure           |    3 +
 exec.c              |   40 ++++++-
 hw/xen.h            |   10 ++
 hw/xen_common.h     |    2 +
 xen-all.c           |   64 +++++++++++
 xen-mapcache-stub.c |   33 ++++++
 xen-mapcache.c      |  301 +++++++++++++++++++++++++++++++++++++++++++++++++++
 xen-mapcache.h      |   14 +++
 xen-stub.c          |    4 +
 10 files changed, 470 insertions(+), 4 deletions(-)
 create mode 100644 xen-mapcache-stub.c
 create mode 100644 xen-mapcache.c
 create mode 100644 xen-mapcache.h
Alexander Graf - Nov. 15, 2010, 11:43 a.m.
On 21.10.2010, at 19:36, Anthony.Perard@citrix.com wrote:

> From: Jun Nakajima <jun.nakajima@intel.com>
> 
> On IA32 host or IA32 PAE host, at present, generally, we can't create
> an HVM guest with more than 2G memory, because generally it's almost
> impossible for Qemu to find a large enough and consecutive virtual
> address space to map an HVM guest's whole physical address space.
> The attached patch fixes this issue using dynamic mapping based on
> little blocks of memory.
> 
> Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
> lock option, so mapcache will not unmap these ram_ptr.
> 
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
> Makefile.target     |    3 +
> configure           |    3 +
> exec.c              |   40 ++++++-
> hw/xen.h            |   10 ++
> hw/xen_common.h     |    2 +
> xen-all.c           |   64 +++++++++++
> xen-mapcache-stub.c |   33 ++++++
> xen-mapcache.c      |  301 +++++++++++++++++++++++++++++++++++++++++++++++++++
> xen-mapcache.h      |   14 +++
> xen-stub.c          |    4 +
> 10 files changed, 470 insertions(+), 4 deletions(-)
> create mode 100644 xen-mapcache-stub.c
> create mode 100644 xen-mapcache.c
> create mode 100644 xen-mapcache.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index db84edb..5646582 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -187,8 +187,11 @@ QEMU_CFLAGS += $(VNC_PNG_CFLAGS)
> obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
> 
> # xen support
> +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
> obj-$(CONFIG_XEN) += xen-all.o
> obj-$(CONFIG_NO_XEN) += xen-stub.o
> +obj-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
> +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
> 
> # xen full virtualized machine
> obj-i386-$(CONFIG_XEN) += xen_machine_fv.o
> diff --git a/configure b/configure
> index f6a7073..d5a8553 100755
> --- a/configure
> +++ b/configure
> @@ -2943,6 +2943,9 @@ case "$target_arch2" in
>   i386|x86_64)
>     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
>       echo "CONFIG_XEN=y" >> $config_target_mak
> +      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
> +          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
> +      fi
>     fi
> esac
> case "$target_arch2" in
> diff --git a/exec.c b/exec.c
> index 631d8c5..d2cded6 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -39,6 +39,7 @@
> #include "hw/qdev.h"
> #include "osdep.h"
> #include "kvm.h"
> +#include "hw/xen.h"
> #include "qemu-timer.h"
> #if defined(CONFIG_USER_ONLY)
> #include <qemu.h>
> @@ -58,6 +59,8 @@
> #include <libutil.h>
> #endif
> #endif
> +#else /* !CONFIG_USER_ONLY */
> +#include "xen-mapcache.h"
> #endif
> 
> //#define DEBUG_TB_INVALIDATE
> @@ -2834,6 +2837,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>         }
>     }
> 
> +    new_block->offset = find_ram_offset(size);
>     if (host) {
>         new_block->host = host;
>     } else {
> @@ -2855,13 +2859,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>                                    PROT_EXEC|PROT_READ|PROT_WRITE,
>                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> #else
> -            new_block->host = qemu_vmalloc(size);
> +            if (xen_mapcache_enabled()) {
> +                xen_ram_alloc(new_block->offset, size);
> +            } else {
> +                new_block->host = qemu_vmalloc(size);
> +            }
> #endif
>             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>         }
>     }
> -
> -    new_block->offset = find_ram_offset(size);
>     new_block->length = size;
> 
>     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> @@ -2902,7 +2908,11 @@ void qemu_ram_free(ram_addr_t addr)
> #if defined(TARGET_S390X) && defined(CONFIG_KVM)
>                 munmap(block->host, block->length);
> #else
> -                qemu_vfree(block->host);
> +                if (xen_mapcache_enabled()) {
> +                    qemu_invalidate_entry(block->host);
> +                } else {
> +                    qemu_vfree(block->host);
> +                }
> #endif
>             }
>             qemu_free(block);
> @@ -2928,6 +2938,15 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>         if (addr - block->offset < block->length) {
>             QLIST_REMOVE(block, next);
>             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
> +            if (xen_mapcache_enabled()) {
> +                /* We need to check if the requested address is in the RAM
> +                 * because we don't want to map the entire memory in QEMU.
> +                 */
> +                if (block->offset == 0) {
> +                    return qemu_map_cache(addr, 0, 1);
> +                }
> +                block->host = qemu_map_cache(block->offset, block->length, 1);
> +            }
>             return block->host + (addr - block->offset);
>         }
>     }
> @@ -2944,11 +2963,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>     uint8_t *host = ptr;
> 
>     QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        /* This case append when the block is not mapped. */
> +        if (block->host == NULL) {
> +            continue;
> +        }
>         if (host - block->host < block->length) {
>             *ram_addr = block->offset + (host - block->host);
>             return 0;
>         }
>     }
> +
> +    if (xen_mapcache_enabled()) {
> +        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
> +        return 0;
> +    }
> +
>     return -1;
> }
> 
> @@ -3733,6 +3762,9 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
>     if (is_write) {
>         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
>     }
> +    if (xen_enabled()) {
> +        qemu_invalidate_entry(buffer);
> +    }
>     qemu_vfree(bounce.buffer);
>     bounce.buffer = NULL;
>     cpu_notify_map_clients();
> diff --git a/hw/xen.h b/hw/xen.h
> index c5189b1..0261ae6 100644
> --- a/hw/xen.h
> +++ b/hw/xen.h
> @@ -28,10 +28,20 @@ extern int xen_allowed;
> #define xen_enabled() (0)
> #endif
> 
> +#if defined CONFIG_XEN_MAPCACHE
> +#  define xen_mapcache_enabled() (xen_enabled())

static inline int xen_mapcache_enabled(void)
{
#ifdef CONFIG_XEN_MAPCACHE
    return xen_enabled();
#else
    return 0;
#endif
}

> +#else
> +#  define xen_mapcache_enabled() (0)
> +#endif
> +
> int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
> void xen_piix3_set_irq(void *opaque, int irq_num, int level);
> void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
> 
> int xen_init(int smp_cpus);
> 
> +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
> +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
> +#endif
> +
> #endif /* QEMU_HW_XEN_H */
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index a24bcb3..2773b45 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -36,6 +36,8 @@ typedef int qemu_xc_interface;
>     xc_gnttab_map_grant_refs(gnt, count, domids, refs, flags)
> #  define xc_gnttab_munmap(xc, gnt, pages, niov) xc_gnttab_munmap(gnt, pages, niov)
> #  define xc_gnttab_close(xc, dev)               xc_gnttab_close(dev)
> +#  define xc_map_foreign_bulk(xc, domid, opts, pfns, err, size) \
> +    xc_map_foreign_batch(xc, domid, opts, pfns, size)

See my comment on the introduction of these defines.


Alex

Patch

diff --git a/Makefile.target b/Makefile.target
index db84edb..5646582 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -187,8 +187,11 @@  QEMU_CFLAGS += $(VNC_PNG_CFLAGS)
 obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
 
 # xen support
+CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
 obj-$(CONFIG_XEN) += xen-all.o
 obj-$(CONFIG_NO_XEN) += xen-stub.o
+obj-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
+obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
 
 # xen full virtualized machine
 obj-i386-$(CONFIG_XEN) += xen_machine_fv.o
diff --git a/configure b/configure
index f6a7073..d5a8553 100755
--- a/configure
+++ b/configure
@@ -2943,6 +2943,9 @@  case "$target_arch2" in
   i386|x86_64)
     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
       echo "CONFIG_XEN=y" >> $config_target_mak
+      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
+          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
+      fi
     fi
 esac
 case "$target_arch2" in
diff --git a/exec.c b/exec.c
index 631d8c5..d2cded6 100644
--- a/exec.c
+++ b/exec.c
@@ -39,6 +39,7 @@ 
 #include "hw/qdev.h"
 #include "osdep.h"
 #include "kvm.h"
+#include "hw/xen.h"
 #include "qemu-timer.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
@@ -58,6 +59,8 @@ 
 #include <libutil.h>
 #endif
 #endif
+#else /* !CONFIG_USER_ONLY */
+#include "xen-mapcache.h"
 #endif
 
 //#define DEBUG_TB_INVALIDATE
@@ -2834,6 +2837,7 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
         }
     }
 
+    new_block->offset = find_ram_offset(size);
     if (host) {
         new_block->host = host;
     } else {
@@ -2855,13 +2859,15 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
                                    PROT_EXEC|PROT_READ|PROT_WRITE,
                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-            new_block->host = qemu_vmalloc(size);
+            if (xen_mapcache_enabled()) {
+                xen_ram_alloc(new_block->offset, size);
+            } else {
+                new_block->host = qemu_vmalloc(size);
+            }
 #endif
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
-
-    new_block->offset = find_ram_offset(size);
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
@@ -2902,7 +2908,11 @@  void qemu_ram_free(ram_addr_t addr)
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
                 munmap(block->host, block->length);
 #else
-                qemu_vfree(block->host);
+                if (xen_mapcache_enabled()) {
+                    qemu_invalidate_entry(block->host);
+                } else {
+                    qemu_vfree(block->host);
+                }
 #endif
             }
             qemu_free(block);
@@ -2928,6 +2938,15 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
         if (addr - block->offset < block->length) {
             QLIST_REMOVE(block, next);
             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+            if (xen_mapcache_enabled()) {
+                /* We need to check if the requested address is in the RAM
+                 * because we don't want to map the entire memory in QEMU.
+                 */
+                if (block->offset == 0) {
+                    return qemu_map_cache(addr, 0, 1);
+                }
+                block->host = qemu_map_cache(block->offset, block->length, 1);
+            }
             return block->host + (addr - block->offset);
         }
     }
@@ -2944,11 +2963,21 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
     uint8_t *host = ptr;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL) {
+            continue;
+        }
         if (host - block->host < block->length) {
             *ram_addr = block->offset + (host - block->host);
             return 0;
         }
     }
+
+    if (xen_mapcache_enabled()) {
+        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
+        return 0;
+    }
+
     return -1;
 }
 
@@ -3733,6 +3762,9 @@  void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
     if (is_write) {
         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
     }
+    if (xen_enabled()) {
+        qemu_invalidate_entry(buffer);
+    }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
diff --git a/hw/xen.h b/hw/xen.h
index c5189b1..0261ae6 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -28,10 +28,20 @@  extern int xen_allowed;
 #define xen_enabled() (0)
 #endif
 
+#if defined CONFIG_XEN_MAPCACHE
+#  define xen_mapcache_enabled() (xen_enabled())
+#else
+#  define xen_mapcache_enabled() (0)
+#endif
+
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
 void xen_piix3_set_irq(void *opaque, int irq_num, int level);
 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
 
 int xen_init(int smp_cpus);
 
+#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
+#endif
+
 #endif /* QEMU_HW_XEN_H */
diff --git a/hw/xen_common.h b/hw/xen_common.h
index a24bcb3..2773b45 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -36,6 +36,8 @@  typedef int qemu_xc_interface;
     xc_gnttab_map_grant_refs(gnt, count, domids, refs, flags)
 #  define xc_gnttab_munmap(xc, gnt, pages, niov) xc_gnttab_munmap(gnt, pages, niov)
 #  define xc_gnttab_close(xc, dev)               xc_gnttab_close(dev)
+#  define xc_map_foreign_bulk(xc, domid, opts, pfns, err, size) \
+    xc_map_foreign_batch(xc, domid, opts, pfns, size)
 #else
 typedef xc_interface *qemu_xc_interface;
 #  define XC_HANDLER_INITIAL_VALUE NULL
diff --git a/xen-all.c b/xen-all.c
index 90c03eb..3048c4d 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -12,6 +12,8 @@ 
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
 
+#include "xen-mapcache.h"
+
 /* Xen specific function for piix pci */
 
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
@@ -54,6 +56,64 @@  qemu_irq *i8259_xen_init(void)
     return qemu_allocate_irqs(i8259_set_irq, NULL, 16);
 }
 
+
+/* Memory Ops */
+
+static void xen_ram_init(ram_addr_t ram_size)
+{
+    RAMBlock *new_block;
+    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
+
+    new_block = qemu_mallocz(sizeof (*new_block));
+    pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
+    new_block->host = NULL;
+    new_block->offset = 0;
+    new_block->length = ram_size;
+
+    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+                                       new_block->length >> TARGET_PAGE_BITS);
+    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+           0xff, new_block->length >> TARGET_PAGE_BITS);
+
+    if (ram_size >= 0xe0000000 ) {
+        above_4g_mem_size = ram_size - 0xe0000000;
+        below_4g_mem_size = 0xe0000000;
+    } else {
+        below_4g_mem_size = ram_size;
+    }
+
+    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
+#if TARGET_PHYS_ADDR_BITS > 32
+    if (above_4g_mem_size > 0) {
+        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
+                                     new_block->offset + below_4g_mem_size);
+    }
+#endif
+}
+
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+    unsigned long nr_pfn;
+    xen_pfn_t *pfn_list;
+    int i;
+
+    nr_pfn = size >> TARGET_PAGE_BITS;
+    pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
+
+    for (i = 0; i < nr_pfn; i++) {
+        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
+    }
+
+    if (xc_domain_memory_populate_physmap(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
+        hw_error("xen: failed to populate ram at %lx", ram_addr);
+    }
+
+    qemu_free(pfn_list);
+}
+
+
 /* Initialise Xen */
 
 int xen_init(int smp_cpus)
@@ -64,5 +124,9 @@  int xen_init(int smp_cpus)
         return -1;
     }
 
+    /* Init RAM management */
+    qemu_map_cache_init();
+    xen_ram_init(ram_size);
+
     return 0;
 }
diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c
new file mode 100644
index 0000000..69ce2e7
--- /dev/null
+++ b/xen-mapcache-stub.c
@@ -0,0 +1,33 @@ 
+#include "config.h"
+
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "cpu-common.h"
+#include "xen-mapcache.h"
+
+int qemu_map_cache_init(void)
+{
+    return 0;
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
+{
+    return qemu_get_ram_ptr(phys_addr);
+}
+
+void qemu_map_cache_unlock(void *buffer)
+{
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+    return -1;
+}
+
+void qemu_invalidate_map_cache(void)
+{
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+}
diff --git a/xen-mapcache.c b/xen-mapcache.c
new file mode 100644
index 0000000..3e1cca9
--- /dev/null
+++ b/xen-mapcache.c
@@ -0,0 +1,301 @@ 
+#include "config.h"
+
+#include "hw/xen_backend.h"
+#include "blockdev.h"
+
+#include <xen/hvm/params.h>
+#include <sys/mman.h>
+
+#include "xen-mapcache.h"
+
+
+//#define MAPCACHE_DEBUG
+
+#ifdef MAPCACHE_DEBUG
+#  define DPRINTF(fmt, ...) do { \
+    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
+} while (0)
+#else
+#  define DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+#if defined(__i386__)
+#  define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#  define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#  define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#  define MCACHE_BUCKET_SHIFT 20
+#endif
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define test_bit(bit, map) \
+    (!!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG))))
+
+typedef struct MapCacheEntry {
+    target_phys_addr_t paddr_index;
+    uint8_t *vaddr_base;
+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
+    uint8_t lock;
+    struct MapCacheEntry *next;
+} MapCacheEntry;
+
+typedef struct MapCacheRev {
+    uint8_t *vaddr_req;
+    target_phys_addr_t paddr_index;
+    QTAILQ_ENTRY(MapCacheRev) next;
+} MapCacheRev;
+
+typedef struct MapCache {
+    MapCacheEntry *entry;
+    unsigned long nr_buckets;
+    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
+
+    /* For most cases (>99.9%), the page address is the same. */
+    target_phys_addr_t last_address_index;
+    uint8_t *last_address_vaddr;
+} MapCache;
+
+static MapCache *mapcache;
+
+
+int qemu_map_cache_init(void)
+{
+    unsigned long size;
+
+    mapcache = qemu_mallocz(sizeof (MapCache));
+
+    QTAILQ_INIT(&mapcache->locked_entries);
+    mapcache->last_address_index = -1;
+
+    mapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) +
+                   (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+                  (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+    /*
+     * Use mmap() directly: lets us allocate a big hash table with no up-front
+     * cost in storage space. The OS will allocate memory only for the buckets
+     * that we actually use. All others will contain all zeroes.
+     */
+    size = mapcache->nr_buckets * sizeof (MapCacheEntry);
+    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+    DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size);
+    mapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
+                          MAP_SHARED|MAP_ANON, -1, 0);
+    if (mapcache->entry == MAP_FAILED) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static void qemu_remap_bucket(MapCacheEntry *entry,
+                              target_phys_addr_t size,
+                              target_phys_addr_t address_index)
+{
+    uint8_t *vaddr_base;
+    xen_pfn_t *pfns;
+    int *err;
+    unsigned int i, j;
+    target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT;
+
+    pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t));
+    err = qemu_mallocz(nb_pfn * sizeof (int));
+
+    if (entry->vaddr_base != NULL) {
+        if (munmap(entry->vaddr_base, size) != 0) {
+            perror("unmap fails");
+            exit(-1);
+        }
+    }
+
+    for (i = 0; i < nb_pfn; i++) {
+        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+    }
+
+    vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+                                     pfns, err, nb_pfn);
+    if (vaddr_base == NULL) {
+        perror("xc_map_foreign_bulk");
+        exit(-1);
+    }
+
+    entry->vaddr_base = vaddr_base;
+    entry->paddr_index = address_index;
+
+    for (i = 0; i < nb_pfn; i += BITS_PER_LONG) {
+        unsigned long word = 0;
+        if ((i + BITS_PER_LONG) > nb_pfn) {
+            j = nb_pfn % BITS_PER_LONG;
+        } else {
+            j = BITS_PER_LONG;
+        }
+        while (j > 0) {
+            word = (word << 1) | !err[i + --j];
+        }
+        entry->valid_mapping[i / BITS_PER_LONG] = word;
+    }
+
+    qemu_free(pfns);
+    qemu_free(err);
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
+{
+    MapCacheEntry *entry, *pentry = NULL;
+    target_phys_addr_t address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
+
+    if (address_index == mapcache->last_address_index && !lock) {
+        return mapcache->last_address_vaddr + address_offset;
+    }
+
+    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+
+    while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        entry = qemu_mallocz(sizeof (MapCacheEntry));
+        pentry->next = entry;
+        qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+    } else if (!entry->lock) {
+        if (!entry->vaddr_base || entry->paddr_index != address_index ||
+            !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+            qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+        }
+    }
+
+    if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+        mapcache->last_address_index = -1;
+        return NULL;
+    }
+
+    mapcache->last_address_index = address_index;
+    mapcache->last_address_vaddr = entry->vaddr_base;
+    if (lock) {
+        MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev));
+        entry->lock++;
+        reventry->vaddr_req = mapcache->last_address_vaddr + address_offset;
+        reventry->paddr_index = mapcache->last_address_index;
+        QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next);
+    }
+
+    return mapcache->last_address_vaddr + address_offset;
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+    MapCacheRev *reventry;
+    target_phys_addr_t paddr_index;
+    int found = 0;
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == ptr) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   %lx -> %p is present\n", reventry->paddr_index,
+                    reventry->vaddr_req);
+        }
+        abort();
+        return 0;
+    }
+
+    return paddr_index << MCACHE_BUCKET_SHIFT;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+    MapCacheEntry *entry = NULL, *pentry = NULL;
+    MapCacheRev *reventry;
+    target_phys_addr_t paddr_index;
+    int found = 0;
+
+    if (mapcache->last_address_vaddr == buffer) {
+        mapcache->last_address_index = -1;
+    }
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == buffer) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
+        }
+        return;
+    }
+    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+    qemu_free(reventry);
+
+    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+    while (entry && entry->paddr_index != paddr_index) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
+        return;
+    }
+    entry->lock--;
+    if (entry->lock > 0 || pentry == NULL) {
+        return;
+    }
+
+    pentry->next = entry->next;
+    if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+        perror("unmap fails");
+        exit(-1);
+    }
+    qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+    unsigned long i;
+    MapCacheRev *reventry;
+
+    qemu_aio_flush();
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        DPRINTF("There should be no locked mappings at this time, "
+                "but %lx -> %p is present\n",
+                reventry->paddr_index, reventry->vaddr_req);
+    }
+
+    mapcache_lock();
+
+    for (i = 0; i < mapcache->nr_buckets; i++) {
+        MapCacheEntry *entry = &mapcache->entry[i];
+
+        if (entry->vaddr_base == NULL) {
+            continue;
+        }
+
+        if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+            perror("unmap fails");
+            exit(-1);
+        }
+
+        entry->paddr_index = 0;
+        entry->vaddr_base = NULL;
+    }
+
+    mapcache->last_address_index = -1;
+    mapcache->last_address_vaddr = NULL;
+
+    mapcache_unlock();
+}
diff --git a/xen-mapcache.h b/xen-mapcache.h
new file mode 100644
index 0000000..86a017b
--- /dev/null
+++ b/xen-mapcache.h
@@ -0,0 +1,14 @@ 
+#ifndef XEN_MAPCACHE_H
+#define XEN_MAPCACHE_H
+
+int      qemu_map_cache_init(void);
+uint8_t  *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock);
+void     qemu_map_cache_unlock(void *phys_addr);
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr);
+void     qemu_invalidate_entry(uint8_t *buffer);
+void     qemu_invalidate_map_cache(void);
+
+#define mapcache_lock()   ((void)0)
+#define mapcache_unlock() ((void)0)
+
+#endif /* !XEN_MAPCACHE_H */
diff --git a/xen-stub.c b/xen-stub.c
index 07e64bc..c9f477d 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -24,6 +24,10 @@  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
 {
 }
 
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+}
+
 int xen_init(int smp_cpus)
 {
     return -ENOSYS;