Patchwork [V11,09/15] xen: Introduce the Xen mapcache

login
register
mail settings
Submitter Anthony PERARD
Date March 1, 2011, 6:35 p.m.
Message ID <1299004529-31290-10-git-send-email-anthony.perard@citrix.com>
Download mbox | patch
Permalink /patch/84975/
State New
Headers show

Comments

Anthony PERARD - March 1, 2011, 6:35 p.m.
From: Jun Nakajima <jun.nakajima@intel.com>

On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
lock option, so mapcache will not unmap these ram_ptr.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 Makefile.target     |    3 +
 configure           |    3 +
 exec.c              |   40 ++++++-
 hw/xen.h            |   13 ++
 hw/xen_common.h     |    9 ++
 xen-all.c           |   64 +++++++++++
 xen-mapcache-stub.c |   40 +++++++
 xen-mapcache.c      |  310 +++++++++++++++++++++++++++++++++++++++++++++++++++
 xen-mapcache.h      |   22 ++++
 xen-stub.c          |    4 +
 10 files changed, 504 insertions(+), 4 deletions(-)
 create mode 100644 xen-mapcache-stub.c
 create mode 100644 xen-mapcache.c
 create mode 100644 xen-mapcache.h
Alexander Graf - March 23, 2011, 12:33 p.m.
On 01.03.2011, at 19:35, Anthony.Perard@citrix.com wrote:

> From: Jun Nakajima <jun.nakajima@intel.com>
> 
> On IA32 host or IA32 PAE host, at present, generally, we can't create
> an HVM guest with more than 2G memory, because generally it's almost
> impossible for Qemu to find a large enough and consecutive virtual
> address space to map an HVM guest's whole physical address space.
> The attached patch fixes this issue using dynamic mapping based on
> little blocks of memory.
> 
> Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
> lock option, so mapcache will not unmap these ram_ptr.
> 
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
> Makefile.target     |    3 +
> configure           |    3 +
> exec.c              |   40 ++++++-
> hw/xen.h            |   13 ++
> hw/xen_common.h     |    9 ++
> xen-all.c           |   64 +++++++++++
> xen-mapcache-stub.c |   40 +++++++
> xen-mapcache.c      |  310 +++++++++++++++++++++++++++++++++++++++++++++++++++
> xen-mapcache.h      |   22 ++++
> xen-stub.c          |    4 +
> 10 files changed, 504 insertions(+), 4 deletions(-)
> create mode 100644 xen-mapcache-stub.c
> create mode 100644 xen-mapcache.c
> create mode 100644 xen-mapcache.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index c539b1e..dcdd51d 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -214,8 +214,11 @@ else
> CONFIG_NO_XEN = y
> endif
> # xen support
> +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
> obj-i386-$(CONFIG_XEN) += xen-all.o
> obj-$(CONFIG_NO_XEN) += xen-stub.o
> +obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
> +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
> 
> obj-i386-$(CONFIG_XEN) += xen_platform.o
> 
> diff --git a/configure b/configure
> index a84d974..4fd8696 100755
> --- a/configure
> +++ b/configure
> @@ -3172,6 +3172,9 @@ case "$target_arch2" in
>   i386|x86_64)
>     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
>       echo "CONFIG_XEN=y" >> $config_target_mak
> +      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
> +          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
> +      fi
>     fi
> esac
> case "$target_arch2" in
> diff --git a/exec.c b/exec.c
> index d611100..558595a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -32,6 +32,7 @@
> #include "hw/qdev.h"
> #include "osdep.h"
> #include "kvm.h"
> +#include "hw/xen.h"
> #include "qemu-timer.h"
> #if defined(CONFIG_USER_ONLY)
> #include <qemu.h>
> @@ -51,6 +52,8 @@
> #include <libutil.h>
> #endif
> #endif
> +#else /* !CONFIG_USER_ONLY */
> +#include "xen-mapcache.h"
> #endif
> 
> //#define DEBUG_TB_INVALIDATE
> @@ -2865,6 +2868,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>         }
>     }
> 
> +    new_block->offset = find_ram_offset(size);
>     if (host) {
>         new_block->host = host;
>     } else {
> @@ -2886,13 +2890,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>                                    PROT_EXEC|PROT_READ|PROT_WRITE,
>                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> #else
> -            new_block->host = qemu_vmalloc(size);
> +            if (xen_mapcache_enabled()) {
> +                xen_ram_alloc(new_block->offset, size);
> +            } else {
> +                new_block->host = qemu_vmalloc(size);
> +            }
> #endif
>             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>         }
>     }
> -
> -    new_block->offset = find_ram_offset(size);
>     new_block->length = size;
> 
>     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> @@ -2933,7 +2939,11 @@ void qemu_ram_free(ram_addr_t addr)
> #if defined(TARGET_S390X) && defined(CONFIG_KVM)
>                 munmap(block->host, block->length);
> #else
> -                qemu_vfree(block->host);
> +                if (xen_mapcache_enabled()) {
> +                    qemu_invalidate_entry(block->host);
> +                } else {
> +                    qemu_vfree(block->host);
> +                }
> #endif
>             }
>             qemu_free(block);
> @@ -2959,6 +2969,15 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>         if (addr - block->offset < block->length) {
>             QLIST_REMOVE(block, next);
>             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
> +            if (xen_mapcache_enabled()) {
> +                /* We need to check if the requested address is in the RAM
> +                 * because we don't want to map the entire memory in QEMU.
> +                 */
> +                if (block->offset == 0) {
> +                    return qemu_map_cache(addr, 0, 1);
> +                }
> +                block->host = qemu_map_cache(block->offset, block->length, 1);
> +            }
>             return block->host + (addr - block->offset);
>         }
>     }
> @@ -2994,11 +3013,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
>     uint8_t *host = ptr;
> 
>     QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        /* This case append when the block is not mapped. */
> +        if (block->host == NULL) {
> +            continue;
> +        }
>         if (host - block->host < block->length) {
>             *ram_addr = block->offset + (host - block->host);
>             return 0;
>         }
>     }
> +
> +    if (xen_mapcache_enabled()) {
> +        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
> +        return 0;
> +    }
> +
>     return -1;
> }
> 
> @@ -3909,6 +3938,9 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
>     if (is_write) {
>         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
>     }
> +    if (xen_enabled()) {
> +        qemu_invalidate_entry(buffer);
> +    }
>     qemu_vfree(bounce.buffer);
>     bounce.buffer = NULL;
>     cpu_notify_map_clients();
> diff --git a/hw/xen.h b/hw/xen.h
> index 12d4e5f..e26d061 100644
> --- a/hw/xen.h
> +++ b/hw/xen.h
> @@ -31,6 +31,15 @@ static inline int xen_enabled(void)
> #endif
> }
> 
> +static inline int xen_mapcache_enabled(void)
> +{
> +#ifdef CONFIG_XEN_MAPCACHE
> +    return xen_enabled();
> +#else
> +    return 0;
> +#endif
> +}
> +
> int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
> void xen_piix3_set_irq(void *opaque, int irq_num, int level);
> void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
> @@ -41,6 +50,10 @@ void pci_xen_platform_init(PCIBus *bus);
> 
> int xen_init(void);
> 
> +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
> +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
> +#endif
> +
> #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
> #  define HVM_MAX_VCPUS 32
> #endif
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index 7e123ec..5a36642 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -50,6 +50,15 @@ static inline int xc_fd(int xen_xc)
> }
> 
> 
> +static inline int xc_domain_populate_physmap_exact
> +    (XenXC xc_handle, uint32_t domid, unsigned long nr_extents,
> +     unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start)
> +{
> +    return xc_domain_memory_populate_physmap
> +        (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start);
> +}
> +
> +
> /* Xen 4.1 */
> #else
> 
> diff --git a/xen-all.c b/xen-all.c
> index 761f2a0..03d1e90 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -10,6 +10,8 @@
> #include "hw/xen_common.h"
> #include "hw/xen_backend.h"
> 
> +#include "xen-mapcache.h"
> +
> /* Xen specific function for piix pci */
> 
> int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
> @@ -52,6 +54,64 @@ qemu_irq *xen_interrupt_controller_init(void)
>     return qemu_allocate_irqs(xen_set_irq, NULL, 16);
> }
> 
> +
> +/* Memory Ops */
> +
> +static void xen_ram_init(ram_addr_t ram_size)
> +{
> +    RAMBlock *new_block;
> +    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
> +
> +    new_block = qemu_mallocz(sizeof (*new_block));
> +    pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
> +    new_block->host = NULL;
> +    new_block->offset = 0;
> +    new_block->length = ram_size;
> +
> +    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> +
> +    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
> +                                       new_block->length >> TARGET_PAGE_BITS);
> +    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> +           0xff, new_block->length >> TARGET_PAGE_BITS);
> +
> +    if (ram_size >= 0xe0000000 ) {
> +        above_4g_mem_size = ram_size - 0xe0000000;
> +        below_4g_mem_size = 0xe0000000;
> +    } else {
> +        below_4g_mem_size = ram_size;
> +    }
> +
> +    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
> +#if TARGET_PHYS_ADDR_BITS > 32
> +    if (above_4g_mem_size > 0) {
> +        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
> +                                     new_block->offset + below_4g_mem_size);
> +    }
> +#endif
> +}
> +
> +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
> +{
> +    unsigned long nr_pfn;
> +    xen_pfn_t *pfn_list;
> +    int i;
> +
> +    nr_pfn = size >> TARGET_PAGE_BITS;
> +    pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
> +
> +    for (i = 0; i < nr_pfn; i++) {
> +        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
> +    }
> +
> +    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
> +        hw_error("xen: failed to populate ram at %lx", ram_addr);
> +    }
> +
> +    qemu_free(pfn_list);
> +}
> +
> +
> /* Initialise Xen */
> 
> int xen_init(void)
> @@ -62,5 +122,9 @@ int xen_init(void)
>         return -1;
>     }
> 
> +    /* Init RAM management */
> +    qemu_map_cache_init();
> +    xen_ram_init(ram_size);
> +
>     return 0;
> }
> diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c
> new file mode 100644
> index 0000000..541bee6
> --- /dev/null
> +++ b/xen-mapcache-stub.c
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "config.h"
> +
> +#include "exec-all.h"
> +#include "qemu-common.h"
> +#include "cpu-common.h"
> +#include "xen-mapcache.h"
> +
> +void qemu_map_cache_init(void)
> +{
> +}
> +
> +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
> +{
> +    return qemu_get_ram_ptr(phys_addr);
> +}
> +
> +void qemu_map_cache_unlock(void *buffer)
> +{
> +}
> +
> +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
> +{
> +    return -1;
> +}
> +
> +void qemu_invalidate_map_cache(void)
> +{
> +}
> +
> +void qemu_invalidate_entry(uint8_t *buffer)
> +{
> +}
> diff --git a/xen-mapcache.c b/xen-mapcache.c
> new file mode 100644
> index 0000000..d7f44a7
> --- /dev/null
> +++ b/xen-mapcache.c
> @@ -0,0 +1,310 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "config.h"
> +
> +#include <sys/resource.h>
> +
> +#include "hw/xen_backend.h"
> +#include "blockdev.h"
> +
> +#include <xen/hvm/params.h>
> +#include <sys/mman.h>
> +
> +#include "xen-mapcache.h"
> +
> +
> +//#define MAPCACHE_DEBUG
> +
> +#ifdef MAPCACHE_DEBUG
> +#  define DPRINTF(fmt, ...) do { \
> +    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
> +} while (0)
> +#else
> +#  define DPRINTF(fmt, ...) do { } while (0)
> +#endif
> +
> +#if defined(__i386__)
> +#  define MCACHE_BUCKET_SHIFT 16
> +#elif defined(__x86_64__)
> +#  define MCACHE_BUCKET_SHIFT 20
> +#endif
> +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
> +
> +#define BITS_PER_LONG (sizeof(long) * 8)
> +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
> +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
> +
> +typedef struct MapCacheEntry {
> +    target_phys_addr_t paddr_index;
> +    uint8_t *vaddr_base;
> +    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
> +    uint8_t lock;
> +    struct MapCacheEntry *next;
> +} MapCacheEntry;
> +
> +typedef struct MapCacheRev {
> +    uint8_t *vaddr_req;
> +    target_phys_addr_t paddr_index;
> +    QTAILQ_ENTRY(MapCacheRev) next;
> +} MapCacheRev;
> +
> +typedef struct MapCache {
> +    MapCacheEntry *entry;
> +    unsigned long nr_buckets;
> +    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
> +
> +    /* For most cases (>99.9%), the page address is the same. */
> +    target_phys_addr_t last_address_index;
> +    uint8_t *last_address_vaddr;
> +    unsigned long max_mcache_size;
> +    unsigned int mcache_bucket_shift;
> +} MapCache;
> +
> +static MapCache *mapcache;
> +
> +static inline int test_bit(unsigned int bit, const unsigned long *map)
> +{
> +    return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG)));
> +}

We have a bitmap framework in qemu now. Please use that :). See bitmap.h / bitops.h / bitops.c.


Alex
Anthony PERARD - March 28, 2011, 4:28 p.m.
On Wed, Mar 23, 2011 at 12:33, Alexander Graf <agraf@suse.de> wrote:
> We have a bitmap framework in qemu now. Please use that :). See bitmap.h / bitops.h / bitops.c.

Will do.

Thanks,

Patch

diff --git a/Makefile.target b/Makefile.target
index c539b1e..dcdd51d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -214,8 +214,11 @@  else
 CONFIG_NO_XEN = y
 endif
 # xen support
+CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
 obj-i386-$(CONFIG_XEN) += xen-all.o
 obj-$(CONFIG_NO_XEN) += xen-stub.o
+obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
+obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
 
 obj-i386-$(CONFIG_XEN) += xen_platform.o
 
diff --git a/configure b/configure
index a84d974..4fd8696 100755
--- a/configure
+++ b/configure
@@ -3172,6 +3172,9 @@  case "$target_arch2" in
   i386|x86_64)
     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
       echo "CONFIG_XEN=y" >> $config_target_mak
+      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
+          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
+      fi
     fi
 esac
 case "$target_arch2" in
diff --git a/exec.c b/exec.c
index d611100..558595a 100644
--- a/exec.c
+++ b/exec.c
@@ -32,6 +32,7 @@ 
 #include "hw/qdev.h"
 #include "osdep.h"
 #include "kvm.h"
+#include "hw/xen.h"
 #include "qemu-timer.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
@@ -51,6 +52,8 @@ 
 #include <libutil.h>
 #endif
 #endif
+#else /* !CONFIG_USER_ONLY */
+#include "xen-mapcache.h"
 #endif
 
 //#define DEBUG_TB_INVALIDATE
@@ -2865,6 +2868,7 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
         }
     }
 
+    new_block->offset = find_ram_offset(size);
     if (host) {
         new_block->host = host;
     } else {
@@ -2886,13 +2890,15 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
                                    PROT_EXEC|PROT_READ|PROT_WRITE,
                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-            new_block->host = qemu_vmalloc(size);
+            if (xen_mapcache_enabled()) {
+                xen_ram_alloc(new_block->offset, size);
+            } else {
+                new_block->host = qemu_vmalloc(size);
+            }
 #endif
             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
         }
     }
-
-    new_block->offset = find_ram_offset(size);
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
@@ -2933,7 +2939,11 @@  void qemu_ram_free(ram_addr_t addr)
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
                 munmap(block->host, block->length);
 #else
-                qemu_vfree(block->host);
+                if (xen_mapcache_enabled()) {
+                    qemu_invalidate_entry(block->host);
+                } else {
+                    qemu_vfree(block->host);
+                }
 #endif
             }
             qemu_free(block);
@@ -2959,6 +2969,15 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
         if (addr - block->offset < block->length) {
             QLIST_REMOVE(block, next);
             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+            if (xen_mapcache_enabled()) {
+                /* We need to check if the requested address is in the RAM
+                 * because we don't want to map the entire memory in QEMU.
+                 */
+                if (block->offset == 0) {
+                    return qemu_map_cache(addr, 0, 1);
+                }
+                block->host = qemu_map_cache(block->offset, block->length, 1);
+            }
             return block->host + (addr - block->offset);
         }
     }
@@ -2994,11 +3013,21 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
     uint8_t *host = ptr;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL) {
+            continue;
+        }
         if (host - block->host < block->length) {
             *ram_addr = block->offset + (host - block->host);
             return 0;
         }
     }
+
+    if (xen_mapcache_enabled()) {
+        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
+        return 0;
+    }
+
     return -1;
 }
 
@@ -3909,6 +3938,9 @@  void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
     if (is_write) {
         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
     }
+    if (xen_enabled()) {
+        qemu_invalidate_entry(buffer);
+    }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
diff --git a/hw/xen.h b/hw/xen.h
index 12d4e5f..e26d061 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -31,6 +31,15 @@  static inline int xen_enabled(void)
 #endif
 }
 
+static inline int xen_mapcache_enabled(void)
+{
+#ifdef CONFIG_XEN_MAPCACHE
+    return xen_enabled();
+#else
+    return 0;
+#endif
+}
+
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
 void xen_piix3_set_irq(void *opaque, int irq_num, int level);
 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
@@ -41,6 +50,10 @@  void pci_xen_platform_init(PCIBus *bus);
 
 int xen_init(void);
 
+#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
+#endif
+
 #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
 #  define HVM_MAX_VCPUS 32
 #endif
diff --git a/hw/xen_common.h b/hw/xen_common.h
index 7e123ec..5a36642 100644
--- a/hw/xen_common.h
+++ b/hw/xen_common.h
@@ -50,6 +50,15 @@  static inline int xc_fd(int xen_xc)
 }
 
 
+static inline int xc_domain_populate_physmap_exact
+    (XenXC xc_handle, uint32_t domid, unsigned long nr_extents,
+     unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start)
+{
+    return xc_domain_memory_populate_physmap
+        (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start);
+}
+
+
 /* Xen 4.1 */
 #else
 
diff --git a/xen-all.c b/xen-all.c
index 761f2a0..03d1e90 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -10,6 +10,8 @@ 
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
 
+#include "xen-mapcache.h"
+
 /* Xen specific function for piix pci */
 
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
@@ -52,6 +54,64 @@  qemu_irq *xen_interrupt_controller_init(void)
     return qemu_allocate_irqs(xen_set_irq, NULL, 16);
 }
 
+
+/* Memory Ops */
+
+static void xen_ram_init(ram_addr_t ram_size)
+{
+    RAMBlock *new_block;
+    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
+
+    new_block = qemu_mallocz(sizeof (*new_block));
+    pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
+    new_block->host = NULL;
+    new_block->offset = 0;
+    new_block->length = ram_size;
+
+    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+
+    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+                                       new_block->length >> TARGET_PAGE_BITS);
+    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
+           0xff, new_block->length >> TARGET_PAGE_BITS);
+
+    if (ram_size >= 0xe0000000 ) {
+        above_4g_mem_size = ram_size - 0xe0000000;
+        below_4g_mem_size = 0xe0000000;
+    } else {
+        below_4g_mem_size = ram_size;
+    }
+
+    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
+#if TARGET_PHYS_ADDR_BITS > 32
+    if (above_4g_mem_size > 0) {
+        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
+                                     new_block->offset + below_4g_mem_size);
+    }
+#endif
+}
+
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+    unsigned long nr_pfn;
+    xen_pfn_t *pfn_list;
+    int i;
+
+    nr_pfn = size >> TARGET_PAGE_BITS;
+    pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
+
+    for (i = 0; i < nr_pfn; i++) {
+        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
+    }
+
+    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
+        hw_error("xen: failed to populate ram at %lx", ram_addr);
+    }
+
+    qemu_free(pfn_list);
+}
+
+
 /* Initialise Xen */
 
 int xen_init(void)
@@ -62,5 +122,9 @@  int xen_init(void)
         return -1;
     }
 
+    /* Init RAM management */
+    qemu_map_cache_init();
+    xen_ram_init(ram_size);
+
     return 0;
 }
diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c
new file mode 100644
index 0000000..541bee6
--- /dev/null
+++ b/xen-mapcache-stub.c
@@ -0,0 +1,40 @@ 
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "config.h"
+
+#include "exec-all.h"
+#include "qemu-common.h"
+#include "cpu-common.h"
+#include "xen-mapcache.h"
+
+void qemu_map_cache_init(void)
+{
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
+{
+    return qemu_get_ram_ptr(phys_addr);
+}
+
+void qemu_map_cache_unlock(void *buffer)
+{
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+    return -1;
+}
+
+void qemu_invalidate_map_cache(void)
+{
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+}
diff --git a/xen-mapcache.c b/xen-mapcache.c
new file mode 100644
index 0000000..d7f44a7
--- /dev/null
+++ b/xen-mapcache.c
@@ -0,0 +1,310 @@ 
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "config.h"
+
+#include <sys/resource.h>
+
+#include "hw/xen_backend.h"
+#include "blockdev.h"
+
+#include <xen/hvm/params.h>
+#include <sys/mman.h>
+
+#include "xen-mapcache.h"
+
+
+//#define MAPCACHE_DEBUG
+
+#ifdef MAPCACHE_DEBUG
+#  define DPRINTF(fmt, ...) do { \
+    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
+} while (0)
+#else
+#  define DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+#if defined(__i386__)
+#  define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#  define MCACHE_BUCKET_SHIFT 20
+#endif
+#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef struct MapCacheEntry {
+    target_phys_addr_t paddr_index;
+    uint8_t *vaddr_base;
+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
+    uint8_t lock;
+    struct MapCacheEntry *next;
+} MapCacheEntry;
+
+typedef struct MapCacheRev {
+    uint8_t *vaddr_req;
+    target_phys_addr_t paddr_index;
+    QTAILQ_ENTRY(MapCacheRev) next;
+} MapCacheRev;
+
+typedef struct MapCache {
+    MapCacheEntry *entry;
+    unsigned long nr_buckets;
+    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
+
+    /* For most cases (>99.9%), the page address is the same. */
+    target_phys_addr_t last_address_index;
+    uint8_t *last_address_vaddr;
+    unsigned long max_mcache_size;
+    unsigned int mcache_bucket_shift;
+} MapCache;
+
+static MapCache *mapcache;
+
+static inline int test_bit(unsigned int bit, const unsigned long *map)
+{
+    return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG)));
+}
+
+void qemu_map_cache_init(void)
+{
+    unsigned long size;
+    struct rlimit rlimit_as;
+
+    mapcache = qemu_mallocz(sizeof (MapCache));
+
+    QTAILQ_INIT(&mapcache->locked_entries);
+    mapcache->last_address_index = -1;
+
+    getrlimit(RLIMIT_AS, &rlimit_as);
+    rlimit_as.rlim_cur = rlimit_as.rlim_max;
+    setrlimit(RLIMIT_AS, &rlimit_as);
+    mapcache->max_mcache_size = rlimit_as.rlim_max;
+
+    mapcache->nr_buckets =
+        (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
+          (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+         (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+    size = mapcache->nr_buckets * sizeof (MapCacheEntry);
+    size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+    DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size);
+    mapcache->entry = qemu_mallocz(size);
+}
+
+static void qemu_remap_bucket(MapCacheEntry *entry,
+                              target_phys_addr_t size,
+                              target_phys_addr_t address_index)
+{
+    uint8_t *vaddr_base;
+    xen_pfn_t *pfns;
+    int *err;
+    unsigned int i, j;
+    target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT;
+
+    pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t));
+    err = qemu_mallocz(nb_pfn * sizeof (int));
+
+    if (entry->vaddr_base != NULL) {
+        if (munmap(entry->vaddr_base, size) != 0) {
+            perror("unmap fails");
+            exit(-1);
+        }
+    }
+
+    for (i = 0; i < nb_pfn; i++) {
+        pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+    }
+
+    vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+                                     pfns, err, nb_pfn);
+    if (vaddr_base == NULL) {
+        perror("xc_map_foreign_bulk");
+        exit(-1);
+    }
+
+    entry->vaddr_base = vaddr_base;
+    entry->paddr_index = address_index;
+
+    for (i = 0; i < nb_pfn; i += BITS_PER_LONG) {
+        unsigned long word = 0;
+        if ((i + BITS_PER_LONG) > nb_pfn) {
+            j = nb_pfn % BITS_PER_LONG;
+        } else {
+            j = BITS_PER_LONG;
+        }
+        while (j > 0) {
+            word = (word << 1) | !err[i + --j];
+        }
+        entry->valid_mapping[i / BITS_PER_LONG] = word;
+    }
+
+    qemu_free(pfns);
+    qemu_free(err);
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock)
+{
+    MapCacheEntry *entry, *pentry = NULL;
+    target_phys_addr_t address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
+    target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
+
+    if (address_index == mapcache->last_address_index && !lock) {
+        return mapcache->last_address_vaddr + address_offset;
+    }
+
+    entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+
+    while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        entry = qemu_mallocz(sizeof (MapCacheEntry));
+        pentry->next = entry;
+        qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+    } else if (!entry->lock) {
+        if (!entry->vaddr_base || entry->paddr_index != address_index ||
+            !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+            qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index);
+        }
+    }
+
+    if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) {
+        mapcache->last_address_index = -1;
+        return NULL;
+    }
+
+    mapcache->last_address_index = address_index;
+    mapcache->last_address_vaddr = entry->vaddr_base;
+    if (lock) {
+        MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev));
+        entry->lock++;
+        reventry->vaddr_req = mapcache->last_address_vaddr + address_offset;
+        reventry->paddr_index = mapcache->last_address_index;
+        QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next);
+    }
+
+    return mapcache->last_address_vaddr + address_offset;
+}
+
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
+{
+    MapCacheRev *reventry;
+    target_phys_addr_t paddr_index;
+    int found = 0;
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == ptr) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
+                    reventry->vaddr_req);
+        }
+        abort();
+        return 0;
+    }
+
+    return paddr_index << MCACHE_BUCKET_SHIFT;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+    MapCacheEntry *entry = NULL, *pentry = NULL;
+    MapCacheRev *reventry;
+    target_phys_addr_t paddr_index;
+    int found = 0;
+
+    if (mapcache->last_address_vaddr == buffer) {
+        mapcache->last_address_index = -1;
+    }
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        if (reventry->vaddr_req == buffer) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer);
+        QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+            DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
+        }
+        return;
+    }
+    QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+    qemu_free(reventry);
+
+    entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+    while (entry && entry->paddr_index != paddr_index) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
+        return;
+    }
+    entry->lock--;
+    if (entry->lock > 0 || pentry == NULL) {
+        return;
+    }
+
+    pentry->next = entry->next;
+    if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+        perror("unmap fails");
+        exit(-1);
+    }
+    qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+    unsigned long i;
+    MapCacheRev *reventry;
+
+    /* Flush pending AIO before destroying the mapcache */
+    qemu_aio_flush();
+
+    QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+        DPRINTF("There should be no locked mappings at this time, "
+                "but "TARGET_FMT_plx" -> %p is present\n",
+                reventry->paddr_index, reventry->vaddr_req);
+    }
+
+    mapcache_lock();
+
+    for (i = 0; i < mapcache->nr_buckets; i++) {
+        MapCacheEntry *entry = &mapcache->entry[i];
+
+        if (entry->vaddr_base == NULL) {
+            continue;
+        }
+
+        if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) {
+            perror("unmap fails");
+            exit(-1);
+        }
+
+        entry->paddr_index = 0;
+        entry->vaddr_base = NULL;
+    }
+
+    mapcache->last_address_index = -1;
+    mapcache->last_address_vaddr = NULL;
+
+    mapcache_unlock();
+}
diff --git a/xen-mapcache.h b/xen-mapcache.h
new file mode 100644
index 0000000..ef3717f
--- /dev/null
+++ b/xen-mapcache.h
@@ -0,0 +1,22 @@ 
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef XEN_MAPCACHE_H
+#define XEN_MAPCACHE_H
+
+void     qemu_map_cache_init(void);
+uint8_t  *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock);
+void     qemu_map_cache_unlock(void *phys_addr);
+ram_addr_t qemu_ram_addr_from_mapcache(void *ptr);
+void     qemu_invalidate_entry(uint8_t *buffer);
+void     qemu_invalidate_map_cache(void);
+
+#define mapcache_lock()   ((void)0)
+#define mapcache_unlock() ((void)0)
+
+#endif /* !XEN_MAPCACHE_H */
diff --git a/xen-stub.c b/xen-stub.c
index e335b67..eebc223 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -22,6 +22,10 @@  void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
 {
 }
 
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
+{
+}
+
 qemu_irq *xen_interrupt_controller_init(void)
 {
     return NULL;