Message ID | 1281622202-3453-10-git-send-email-stefano.stabellini@eu.citrix.com |
---|---|
State | New |
Headers | show |
On 08/12/2010 09:09 AM, stefano.stabellini@eu.citrix.com wrote: > From: Anthony PERARD<anthony.perard@citrix.com> > > Introduce a mapcache to handle the 64bit address space of the guest > from a 32bit userland process (Qemu). > The mapcache maps chucks of guest memory on demand, unmaps them when > they are not needed anymore. > > Signed-off-by: Anthony PERARD<anthony.perard@citrix.com> > Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com> > --- > hw/xen_machine_fv.c | 7 ++ > target-xen/qemu-xen.h | 15 +++ > target-xen/xen_mapcache.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 255 insertions(+), 0 deletions(-) > > diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c > index b1bc88d..58237d6 100644 > --- a/hw/xen_machine_fv.c > +++ b/hw/xen_machine_fv.c > @@ -84,6 +84,13 @@ static void xen_init_fv(ram_addr_t ram_size, > exit(1); > } > > +#if defined(__i386__) || defined(__x86_64__) > + if (qemu_map_cache_init()) { > + fprintf(stderr, "qemu_map_cache_init returned: error %d\n", errno); > + exit(-1); > + } > +#endif > + > xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN,&ioreq_pfn); > fprintf(stderr, "shared page at pfn %lx\n", ioreq_pfn); > shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, > diff --git a/target-xen/qemu-xen.h b/target-xen/qemu-xen.h > index 79a4638..e4a7030 100644 > --- a/target-xen/qemu-xen.h > +++ b/target-xen/qemu-xen.h > @@ -13,6 +13,21 @@ > > /* xen_mapcache.c */ > > +#if (defined(__i386__) || defined(__x86_64__))&& !defined(QEMU_TOOL) > +#define MAPCACHE > + > +#if defined(__i386__) > +#define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */ > +#define MCACHE_BUCKET_SHIFT 16 > +#elif defined(__x86_64__) > +#define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */ > +#define MCACHE_BUCKET_SHIFT 20 > +#endif > + > +#define MCACHE_BUCKET_SIZE (1UL<< MCACHE_BUCKET_SHIFT) > +#endif > + > +int qemu_map_cache_init(void); > uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock); > void qemu_invalidate_entry(uint8_t *buffer); > void qemu_invalidate_map_cache(void); > diff --git a/target-xen/xen_mapcache.c b/target-xen/xen_mapcache.c > index 39daae2..efe036c 100644 > --- a/target-xen/xen_mapcache.c > +++ b/target-xen/xen_mapcache.c > @@ -1,5 +1,237 @@ > +#include "config.h" > + > +#include "hw/xen_backend.h" > #include "qemu-xen.h" > > +#include<xen/hvm/params.h> > +#include<sys/mman.h> > + > +#if defined(MAPCACHE) > + > +#define BITS_PER_LONG (sizeof(long)*8) > +#define BITS_TO_LONGS(bits) \ > + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) > +#define DECLARE_BITMAP(name,bits) \ > + unsigned long name[BITS_TO_LONGS(bits)] > +#define test_bit(bit,map) \ > + (!!((map)[(bit)/BITS_PER_LONG]& (1UL<< ((bit)%BITS_PER_LONG)))) > + > +struct map_cache { > + unsigned long paddr_index; > + uint8_t *vaddr_base; > + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT); > + uint8_t lock; > + struct map_cache *next; > +}; > + > +struct map_cache_rev { > + uint8_t *vaddr_req; > + unsigned long paddr_index; > + QTAILQ_ENTRY(map_cache_rev) next; > +}; > > CODING_STYLE > +static struct map_cache *mapcache_entry; > +static unsigned long nr_buckets; > +QTAILQ_HEAD(map_cache_head, map_cache_rev) locked_entries = QTAILQ_HEAD_INITIALIZER(locked_entries); > + > +/* For most cases (>99.9%), the page address is the same. */ > +static unsigned long last_address_index = ~0UL; > +static uint8_t *last_address_vaddr; > Should refactor away global state. > +int qemu_map_cache_init(void) > +{ > + unsigned long size; > + > + nr_buckets = (((MAX_MCACHE_SIZE>> XC_PAGE_SHIFT) + > + (1UL<< (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1)>> > + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); > + > + /* > + * Use mmap() directly: lets us allocate a big hash table with no up-front > + * cost in storage space. The OS will allocate memory only for the buckets > + * that we actually use. All others will contain all zeroes. > + */ > + size = nr_buckets * sizeof(struct map_cache); > + size = (size + XC_PAGE_SIZE - 1)& ~(XC_PAGE_SIZE - 1); > + fprintf(stderr, "qemu_map_cache_init nr_buckets = %lx size %lu\n", nr_buckets, size); > + mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE, > + MAP_SHARED|MAP_ANON, -1, 0); > + if (mapcache_entry == MAP_FAILED) { > + errno = ENOMEM; > + return -1; > + } > + > + return 0; > +} > + > +static void qemu_remap_bucket(struct map_cache *entry, > + unsigned long address_index) > +{ > + uint8_t *vaddr_base; > + xen_pfn_t pfns[MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT]; > + int err[MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT]; > + unsigned int i, j; > + > + if (entry->vaddr_base != NULL) { > + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); > + if (errno) { > + fprintf(stderr, "unmap fails %d\n", errno); > + exit(-1); > + } > + } > + > + for (i = 0; i< MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT; i++) { > + pfns[i] = (address_index<< (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; > + } > + > + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, > + pfns, err, > + MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT); > + if (vaddr_base == NULL) { > + fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno); > + exit(-1); > + } > + > + entry->vaddr_base = vaddr_base; > + entry->paddr_index = address_index; > + > + for (i = 0; i< MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT; i += BITS_PER_LONG) { > + unsigned long word = 0; > + j = ((i + BITS_PER_LONG)> (MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT)) ? > + (MCACHE_BUCKET_SIZE>> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG; > + while (j> 0) { > + word = (word<< 1) | !err[i + --j]; > + } > + entry->valid_mapping[i / BITS_PER_LONG] = word; > + } > +} > + > +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) > +{ > + struct map_cache *entry, *pentry = NULL; > + unsigned long address_index = phys_addr>> MCACHE_BUCKET_SHIFT; > + unsigned long address_offset = phys_addr& (MCACHE_BUCKET_SIZE-1); > + > + if (address_index == last_address_index&& !lock) > + return last_address_vaddr + address_offset; > + > + entry =&mapcache_entry[address_index % nr_buckets]; > + > + while (entry&& entry->lock&& entry->paddr_index != address_index&& entry->vaddr_base) { > + pentry = entry; > + entry = entry->next; > + } > + if (!entry) { > + entry = qemu_mallocz(sizeof(struct map_cache)); > + pentry->next = entry; > + qemu_remap_bucket(entry, address_index); > + } else if (!entry->lock) { > + if (!entry->vaddr_base || entry->paddr_index != address_index || !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) > + qemu_remap_bucket(entry, address_index); > + } > + > + if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) { > + last_address_index = ~0UL; > + return NULL; > + } > + > + last_address_index = address_index; > + last_address_vaddr = entry->vaddr_base; > + if (lock) { > + struct map_cache_rev *reventry = qemu_mallocz(sizeof(struct map_cache_rev)); > + entry->lock++; > + reventry->vaddr_req = last_address_vaddr + address_offset; > + reventry->paddr_index = last_address_index; > + QTAILQ_INSERT_TAIL(&locked_entries, reventry, next); > + } > + > + return last_address_vaddr + address_offset; > +} > + > +void qemu_invalidate_entry(uint8_t *buffer) > +{ > + struct map_cache *entry = NULL, *pentry = NULL; > + struct map_cache_rev *reventry; > + unsigned long paddr_index; > + int found = 0; > + > + if (last_address_vaddr == buffer) > + last_address_index = ~0UL; > + > + QTAILQ_FOREACH(reventry,&locked_entries, next) { > + if (reventry->vaddr_req == buffer) { > + paddr_index = reventry->paddr_index; > + found = 1; > + break; > + } > + } > + if (!found) { > + fprintf(stderr, "qemu_invalidate_entry: could not find %p\n", buffer); > + QTAILQ_FOREACH(reventry,&locked_entries, next) { > + fprintf(stderr, " %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); > + } > + return; > + } > + QTAILQ_REMOVE(&locked_entries, reventry, next); > + qemu_free(reventry); > + > + entry =&mapcache_entry[paddr_index % nr_buckets]; > + while (entry&& entry->paddr_index != paddr_index) { > + pentry = entry; > + entry = entry->next; > + } > + if (!entry) { > + fprintf(stderr, "Trying to unmap address %p that is not in the mapcache!\n", buffer); > + return; > + } > + entry->lock--; > + if (entry->lock> 0 || pentry == NULL) > + return; > + > + pentry->next = entry->next; > + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); > + if (errno) { > + fprintf(stderr, "unmap fails %d\n", errno); > + exit(-1); > + } > + qemu_free(entry); > +} > + > +void qemu_invalidate_map_cache(void) > +{ > + unsigned long i; > + struct map_cache_rev *reventry; > + > + qemu_aio_flush(); > + > + QTAILQ_FOREACH(reventry,&locked_entries, next) { > + fprintf(stderr, "There should be no locked mappings at this time, but %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); > + } > + > + mapcache_lock(); > + > + for (i = 0; i< nr_buckets; i++) { > + struct map_cache *entry =&mapcache_entry[i]; > + > + if (entry->vaddr_base == NULL) > + continue; > + > + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); > + if (errno) { > + fprintf(stderr, "unmap fails %d\n", errno); > + exit(-1); > + } > + > + entry->paddr_index = 0; > + entry->vaddr_base = NULL; > + } > + > + last_address_index = ~0UL; > + last_address_vaddr = NULL; > + > + mapcache_unlock(); > +} > +#else > uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) > { > return phys_ram_addr(phys_addr); > @@ -12,3 +244,4 @@ void qemu_invalidate_map_cache(void) > void qemu_invalidate_entry(uint8_t *buffer) > { > } > +#endif /* !MAPCACHE */ > This should really tie into the RAMBlock infrastructure. Regards, Anthony Liguori
diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c index b1bc88d..58237d6 100644 --- a/hw/xen_machine_fv.c +++ b/hw/xen_machine_fv.c @@ -84,6 +84,13 @@ static void xen_init_fv(ram_addr_t ram_size, exit(1); } +#if defined(__i386__) || defined(__x86_64__) + if (qemu_map_cache_init()) { + fprintf(stderr, "qemu_map_cache_init returned: error %d\n", errno); + exit(-1); + } +#endif + xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); fprintf(stderr, "shared page at pfn %lx\n", ioreq_pfn); shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, diff --git a/target-xen/qemu-xen.h b/target-xen/qemu-xen.h index 79a4638..e4a7030 100644 --- a/target-xen/qemu-xen.h +++ b/target-xen/qemu-xen.h @@ -13,6 +13,21 @@ /* xen_mapcache.c */ +#if (defined(__i386__) || defined(__x86_64__)) && !defined(QEMU_TOOL) +#define MAPCACHE + +#if defined(__i386__) +#define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */ +#define MCACHE_BUCKET_SHIFT 16 +#elif defined(__x86_64__) +#define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */ +#define MCACHE_BUCKET_SHIFT 20 +#endif + +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) +#endif + +int qemu_map_cache_init(void); uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock); void qemu_invalidate_entry(uint8_t *buffer); void qemu_invalidate_map_cache(void); diff --git a/target-xen/xen_mapcache.c b/target-xen/xen_mapcache.c index 39daae2..efe036c 100644 --- a/target-xen/xen_mapcache.c +++ b/target-xen/xen_mapcache.c @@ -1,5 +1,237 @@ +#include "config.h" + +#include "hw/xen_backend.h" #include "qemu-xen.h" +#include <xen/hvm/params.h> +#include <sys/mman.h> + +#if defined(MAPCACHE) + +#define BITS_PER_LONG (sizeof(long)*8) +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +#define test_bit(bit,map) \ + (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG)))) + +struct map_cache { + unsigned long paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT); + uint8_t lock; + struct map_cache *next; +}; + +struct map_cache_rev { + uint8_t *vaddr_req; + unsigned long paddr_index; + QTAILQ_ENTRY(map_cache_rev) next; +}; + +static struct map_cache *mapcache_entry; +static unsigned long nr_buckets; +QTAILQ_HEAD(map_cache_head, map_cache_rev) locked_entries = QTAILQ_HEAD_INITIALIZER(locked_entries); + +/* For most cases (>99.9%), the page address is the same. */ +static unsigned long last_address_index = ~0UL; +static uint8_t *last_address_vaddr; + +int qemu_map_cache_init(void) +{ + unsigned long size; + + nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + /* + * Use mmap() directly: lets us allocate a big hash table with no up-front + * cost in storage space. The OS will allocate memory only for the buckets + * that we actually use. All others will contain all zeroes. + */ + size = nr_buckets * sizeof(struct map_cache); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + fprintf(stderr, "qemu_map_cache_init nr_buckets = %lx size %lu\n", nr_buckets, size); + mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1, 0); + if (mapcache_entry == MAP_FAILED) { + errno = ENOMEM; + return -1; + } + + return 0; +} + +static void qemu_remap_bucket(struct map_cache *entry, + unsigned long address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t pfns[MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT]; + int err[MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT]; + unsigned int i, j; + + if (entry->vaddr_base != NULL) { + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + } + + for (i = 0; i < MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, + MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + if (vaddr_base == NULL) { + fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT; i += BITS_PER_LONG) { + unsigned long word = 0; + j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT)) ? + (MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG; + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) +{ + struct map_cache *entry, *pentry = NULL; + unsigned long address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1); + + if (address_index == last_address_index && !lock) + return last_address_vaddr + address_offset; + + entry = &mapcache_entry[address_index % nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof(struct map_cache)); + pentry->next = entry; + qemu_remap_bucket(entry, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) + qemu_remap_bucket(entry, address_index); + } + + if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) { + last_address_index = ~0UL; + return NULL; + } + + last_address_index = address_index; + last_address_vaddr = entry->vaddr_base; + if (lock) { + struct map_cache_rev *reventry = qemu_mallocz(sizeof(struct map_cache_rev)); + entry->lock++; + reventry->vaddr_req = last_address_vaddr + address_offset; + reventry->paddr_index = last_address_index; + QTAILQ_INSERT_TAIL(&locked_entries, reventry, next); + } + + return last_address_vaddr + address_offset; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + struct map_cache *entry = NULL, *pentry = NULL; + struct map_cache_rev *reventry; + unsigned long paddr_index; + int found = 0; + + if (last_address_vaddr == buffer) + last_address_index = ~0UL; + + QTAILQ_FOREACH(reventry, &locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_invalidate_entry: could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &locked_entries, next) { + fprintf(stderr, " %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache_entry[paddr_index % nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + fprintf(stderr, "Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) + return; + + pentry->next = entry->next; + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + struct map_cache_rev *reventry; + + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &locked_entries, next) { + fprintf(stderr, "There should be no locked mappings at this time, but %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < nr_buckets; i++) { + struct map_cache *entry = &mapcache_entry[i]; + + if (entry->vaddr_base == NULL) + continue; + + errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); + if (errno) { + fprintf(stderr, "unmap fails %d\n", errno); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + last_address_index = ~0UL; + last_address_vaddr = NULL; + + mapcache_unlock(); +} +#else uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) { return phys_ram_addr(phys_addr); @@ -12,3 +244,4 @@ void qemu_invalidate_map_cache(void) void qemu_invalidate_entry(uint8_t *buffer) { } +#endif /* !MAPCACHE */