Message ID | 1444985258-6605-3-git-send-email-mst@redhat.com |
---|---|
State | New |
Headers | show |
On 16/10/2015 10:49, Michael S. Tsirkin wrote: > Anonymous and file-backed RAM allocation are now almost exactly the same. > > Reduce code duplication by moving RAM mmap code out of oslib-posix.c and > exec.c. > > Reported-by: Marc-André Lureau <mlureau@redhat.com> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > Acked-by: Paolo Bonzini <pbonzini@redhat.com> > --- > include/qemu/mmap-alloc.h | 10 +++++++ > exec.c | 47 +++++++------------------------ > util/mmap-alloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ > util/oslib-posix.c | 28 +++---------------- > util/Makefile.objs | 1 + > 5 files changed, 96 insertions(+), 61 deletions(-) > create mode 100644 include/qemu/mmap-alloc.h > create mode 100644 util/mmap-alloc.c > > diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > new file mode 100644 > index 0000000..56388e6 > --- /dev/null > +++ b/include/qemu/mmap-alloc.h > @@ -0,0 +1,10 @@ > +#ifndef QEMU_MMAP_ALLOC > +#define QEMU_MMAP_ALLOC > + > +#include "qemu-common.h" > + > +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); > + > +void qemu_ram_munmap(void *ptr, size_t size); > + > +#endif > diff --git a/exec.c b/exec.c > index 7d90a52..4505dc7 100644 > --- a/exec.c > +++ b/exec.c > @@ -55,6 +55,9 @@ > #include "exec/ram_addr.h" > > #include "qemu/range.h" > +#ifndef _WIN32 > +#include "qemu/mmap-alloc.h" > +#endif > > //#define DEBUG_SUBPAGE > > @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; > */ > #define RAM_RESIZEABLE (1 << 2) > > -/* An extra page is mapped on top of this RAM. > +/* RAM is backed by an mmapped file. > */ > -#define RAM_EXTRA (1 << 3) > +#define RAM_FILE (1 << 3) > #endif > > struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); > @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, > char *filename; > char *sanitized_name; > char *c; > - void *ptr; > - void *area = NULL; > + void *area; > int fd; > uint64_t hpagesize; > - uint64_t total; > Error *local_err = NULL; > - size_t offset; > > hpagesize = gethugepagesize(path, &local_err); > if (local_err) { > @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, > g_free(filename); > > memory = ROUND_UP(memory, hpagesize); > - total = memory + hpagesize; > > /* > * ftruncate is not supported by hugetlbfs in older > @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, > perror("ftruncate"); > } > > - ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, > - -1, 0); > - if (ptr == MAP_FAILED) { > - error_setg_errno(errp, errno, > - "unable to allocate memory range for hugepages"); > - close(fd); > - goto error; > - } > - > - offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; > - > - area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, > - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | > - MAP_FIXED, > - fd, 0); > + area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); > if (area == MAP_FAILED) { > error_setg_errno(errp, errno, > "unable to map backing store for hugepages"); > - munmap(ptr, total); > close(fd); > goto error; > } > > - if (offset > 0) { > - munmap(ptr, offset); > - } > - ptr += offset; > - total -= offset; > - > - if (total > memory + getpagesize()) { > - munmap(ptr + memory + getpagesize(), > - total - memory - getpagesize()); > - } > - > if (mem_prealloc) { > os_mem_prealloc(fd, area, memory); > } > @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, > new_block->used_length = size; > new_block->max_length = size; > new_block->flags = share ? RAM_SHARED : 0; > - new_block->flags |= RAM_EXTRA; > + new_block->flags |= RAM_FILE; > new_block->host = file_ram_alloc(new_block, size, > mem_path, errp); > if (!new_block->host) { > @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) > xen_invalidate_map_cache_entry(block->host); > #ifndef _WIN32 > } else if (block->fd >= 0) { > - if (block->flags & RAM_EXTRA) { > - munmap(block->host, block->max_length + getpagesize()); > + if (block->flags & RAM_FILE) { > + qemu_ram_munmap(block->host, block->max_length); > } else { > munmap(block->host, block->max_length); > } > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > new file mode 100644 > index 0000000..1394269 > --- /dev/null > +++ b/util/mmap-alloc.c > @@ -0,0 +1,71 @@ > +/* > + * Support for RAM backed by mmaped host memory. > + * > + * Copyright (c) 2015 Red Hat, Inc. > + * > + * Authors: > + * Michael S. Tsirkin <mst@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > +#include <qemu/mmap-alloc.h> > +#include <sys/types.h> > +#include <sys/mman.h> > +#include <assert.h> > + > +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) > +{ > + /* > + * Note: this always allocates at least one extra page of virtual address > + * space, even if size is already aligned. > + */ > + size_t total = size + align; > + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > + void *ptr1; > + > + if (ptr == MAP_FAILED) { > + return NULL; Existing code is checking for MAP_FAILED (and NULL is a valid return value of mmap), so you should return ptr. > + } > + > + /* Make sure align is a power of 2 */ > + assert(!(align & (align - 1))); > + /* Always align to host page size */ > + assert(align >= getpagesize()); > + > + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > + MAP_FIXED | > + (fd == -1 ? MAP_ANONYMOUS : 0) | > + (shared ? MAP_SHARED : MAP_PRIVATE), > + fd, 0); > + if (ptr1 == MAP_FAILED) { > + munmap(ptr, total); > + return NULL; Same here---return ptr1 instead of NULL. Paolo > + } > + > + ptr += offset; > + total -= offset; > + > + if (offset > 0) { > + munmap(ptr - offset, offset); > + } > + > + /* > + * Leave a single PROT_NONE page allocated after the RAM block, to serve as > + * a guard page guarding against potential buffer overflows. > + */ > + if (total > size + getpagesize()) { > + munmap(ptr + size + getpagesize(), total - size - getpagesize()); > + } > + > + return ptr; > +} > + > +void qemu_ram_munmap(void *ptr, size_t size) > +{ > + if (ptr) { > + /* Unmap both the RAM block and the guard page */ > + munmap(ptr, size + getpagesize()); > + } > +} > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index a0fcdc2..892d2d8 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c > @@ -72,6 +72,8 @@ extern int daemon(int, int); > #include <sys/sysctl.h> > #endif > > +#include <qemu/mmap-alloc.h> > + > int qemu_get_thread_id(void) > { > #if defined(__linux__) > @@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size) > void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) > { > size_t align = QEMU_VMALLOC_ALIGN; > - size_t total = size + align; > - void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > - size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > - void *ptr1; > + void *ptr = qemu_ram_mmap(-1, size, align, false); > > if (ptr == MAP_FAILED) { > return NULL; > @@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) > *alignment = align; > } > > - ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > - MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > - if (ptr1 == MAP_FAILED) { > - munmap(ptr, total); > - return NULL; > - } > - > - ptr += offset; > - total -= offset; > - > - if (offset > 0) { > - munmap(ptr - offset, offset); > - } > - if (total > size + getpagesize()) { > - munmap(ptr + size + getpagesize(), total - size - getpagesize()); > - } > - > trace_qemu_anon_ram_alloc(size, ptr); > return ptr; > } > @@ -171,9 +153,7 @@ void qemu_vfree(void *ptr) > void qemu_anon_ram_free(void *ptr, size_t size) > { > trace_qemu_anon_ram_free(ptr, size); > - if (ptr) { > - munmap(ptr, size + getpagesize()); > - } > + qemu_ram_munmap(ptr, size); > } > > void qemu_set_block(int fd) > diff --git a/util/Makefile.objs b/util/Makefile.objs > index 114d657..f011b81 100644 > --- a/util/Makefile.objs > +++ b/util/Makefile.objs > @@ -1,6 +1,7 @@ > util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o > util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o > util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o > +util-obj-$(CONFIG_POSIX) += mmap-alloc.o > util-obj-y += envlist.o path.o module.o > util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o > util-obj-y += bitmap.o bitops.o hbitmap.o >
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h new file mode 100644 index 0000000..56388e6 --- /dev/null +++ b/include/qemu/mmap-alloc.h @@ -0,0 +1,10 @@ +#ifndef QEMU_MMAP_ALLOC +#define QEMU_MMAP_ALLOC + +#include "qemu-common.h" + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); + +void qemu_ram_munmap(void *ptr, size_t size); + +#endif diff --git a/exec.c b/exec.c index 7d90a52..4505dc7 100644 --- a/exec.c +++ b/exec.c @@ -55,6 +55,9 @@ #include "exec/ram_addr.h" #include "qemu/range.h" +#ifndef _WIN32 +#include "qemu/mmap-alloc.h" +#endif //#define DEBUG_SUBPAGE @@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; */ #define RAM_RESIZEABLE (1 << 2) -/* An extra page is mapped on top of this RAM. +/* RAM is backed by an mmapped file. */ -#define RAM_EXTRA (1 << 3) +#define RAM_FILE (1 << 3) #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, char *filename; char *sanitized_name; char *c; - void *ptr; - void *area = NULL; + void *area; int fd; uint64_t hpagesize; - uint64_t total; Error *local_err = NULL; - size_t offset; hpagesize = gethugepagesize(path, &local_err); if (local_err) { @@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, g_free(filename); memory = ROUND_UP(memory, hpagesize); - total = memory + hpagesize; /* * ftruncate is not supported by hugetlbfs in older @@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (ptr == MAP_FAILED) { - error_setg_errno(errp, errno, - "unable to allocate memory range for hugepages"); - close(fd); - goto error; - } - - offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; - - area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | - MAP_FIXED, - fd, 0); + area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for hugepages"); - munmap(ptr, total); close(fd); goto error; } - if (offset > 0) { - munmap(ptr, offset); - } - ptr += offset; - total -= offset; - - if (total > memory + getpagesize()) { - munmap(ptr + memory + getpagesize(), - total - memory - getpagesize()); - } - if (mem_prealloc) { os_mem_prealloc(fd, area, memory); } @@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; - new_block->flags |= RAM_EXTRA; + new_block->flags |= RAM_FILE; new_block->host = file_ram_alloc(new_block, size, mem_path, errp); if (!new_block->host) { @@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - if (block->flags & RAM_EXTRA) { - munmap(block->host, block->max_length + getpagesize()); + if (block->flags & RAM_FILE) { + qemu_ram_munmap(block->host, block->max_length); } else { munmap(block->host, block->max_length); } diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c new file mode 100644 index 0000000..1394269 --- /dev/null +++ b/util/mmap-alloc.c @@ -0,0 +1,71 @@ +/* + * Support for RAM backed by mmaped host memory. + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include <qemu/mmap-alloc.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <assert.h> + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +{ + /* + * Note: this always allocates at least one extra page of virtual address + * space, even if size is already aligned. + */ + size_t total = size + align; + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; + + if (ptr == MAP_FAILED) { + return NULL; + } + + /* Make sure align is a power of 2 */ + assert(!(align & (align - 1))); + /* Always align to host page size */ + assert(align >= getpagesize()); + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | + (fd == -1 ? MAP_ANONYMOUS : 0) | + (shared ? MAP_SHARED : MAP_PRIVATE), + fd, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return NULL; + } + + ptr += offset; + total -= offset; + + if (offset > 0) { + munmap(ptr - offset, offset); + } + + /* + * Leave a single PROT_NONE page allocated after the RAM block, to serve as + * a guard page guarding against potential buffer overflows. + */ + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); + } + + return ptr; +} + +void qemu_ram_munmap(void *ptr, size_t size) +{ + if (ptr) { + /* Unmap both the RAM block and the guard page */ + munmap(ptr, size + getpagesize()); + } +} diff --git a/util/oslib-posix.c b/util/oslib-posix.c index a0fcdc2..892d2d8 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -72,6 +72,8 @@ extern int daemon(int, int); #include <sys/sysctl.h> #endif +#include <qemu/mmap-alloc.h> + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -128,10 +130,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; - size_t total = size + align; - void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; - void *ptr1; + void *ptr = qemu_ram_mmap(-1, size, align, false); if (ptr == MAP_FAILED) { return NULL; @@ -141,23 +140,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) *alignment = align; } - ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (ptr1 == MAP_FAILED) { - munmap(ptr, total); - return NULL; - } - - ptr += offset; - total -= offset; - - if (offset > 0) { - munmap(ptr - offset, offset); - } - if (total > size + getpagesize()) { - munmap(ptr + size + getpagesize(), total - size - getpagesize()); - } - trace_qemu_anon_ram_alloc(size, ptr); return ptr; } @@ -171,9 +153,7 @@ void qemu_vfree(void *ptr) void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); - if (ptr) { - munmap(ptr, size + getpagesize()); - } + qemu_ram_munmap(ptr, size); } void qemu_set_block(int fd) diff --git a/util/Makefile.objs b/util/Makefile.objs index 114d657..f011b81 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,6 +1,7 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o +util-obj-$(CONFIG_POSIX) += mmap-alloc.o util-obj-y += envlist.o path.o module.o util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o util-obj-y += bitmap.o bitops.o hbitmap.o