Patchwork [uq/master,2/2] Add option to use file backed guest memory

login
register
mail settings
Submitter Marcelo Tosatti
Date March 1, 2010, 11:25 p.m.
Message ID <20100301232508.GA13703@amt.cnet>
Download mbox | patch
Permalink /patch/46618/
State New
Headers show

Comments

Marcelo Tosatti - March 1, 2010, 11:25 p.m.
Hi Paul,

Thank you for reviewing.

On Sun, Feb 28, 2010 at 01:28:16AM +0000, Paul Brook wrote:
> IMHO it would be better to check the mem_path != NULL here, rather that 
> burying the check in file_ram_alloc.
> 
> >+    if (memory < hpagesize) {
> >+        return NULL;
> >+    }
> 
> Ah, so it's actually "allocate memory in $path, if you feel like it". Good job 
> we aren't relying on this for correctness.  At minimum I recommend documenting 
> this heuristic.

More like "allocate memory in $path, if it its larger than a hugepage."

Huge pages are an optimization.

> 
> >+    if (!new_block->host) {
> > #if defined(TARGET_S390X) && defined(CONFIG_KVM)
> >-    /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
> 
> By my reading this implies -mempath is probably broken on s390 KVM?
> 
> >+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> >+    "-mem-path FILE  provide backing storage for guest RAM\n")
> >+STEXI
> >+@item -mem-path @var{path}
> >+Allocate guest RAM from a temporarily created file in @var{path}.
> >+ETEXI
> 
> You should mention that this is only useful when PATH happens to be a linux 
> hugetlbfs mount.

It can be used with a file, since its mapped as MAP_PRIVATE.

Can you check whether the patch below properly addresses your concerns.


Add option to use file backed guest memory

Port qemu-kvm's -mem-path and -mem-prealloc options. These are useful 
for backing guest memory with huge pages via hugetlbfs.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: john cooper <john.cooper@redhat.com>
Marcelo Tosatti - March 1, 2010, 11:32 p.m.
On Mon, Mar 01, 2010 at 08:25:08PM -0300, Marcelo Tosatti wrote:
> Hi Paul,
> 
> Thank you for reviewing.
> 
> On Sun, Feb 28, 2010 at 01:28:16AM +0000, Paul Brook wrote:
> > IMHO it would be better to check the mem_path != NULL here, rather that 
> > burying the check in file_ram_alloc.
> > 
> > >+    if (memory < hpagesize) {
> > >+        return NULL;
> > >+    }
> > 
> > Ah, so it's actually "allocate memory in $path, if you feel like it". Good job 
> > we aren't relying on this for correctness.  At minimum I recommend documenting 
> > this heuristic.
> 
> More like "allocate memory in $path, if it its larger than a hugepage."
> 
> Huge pages are an optimization.
> 
> > 
> > >+    if (!new_block->host) {
> > > #if defined(TARGET_S390X) && defined(CONFIG_KVM)
> > >-    /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
> > 
> > By my reading this implies -mempath is probably broken on s390 KVM?
> > 
> > >+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> > >+    "-mem-path FILE  provide backing storage for guest RAM\n")
> > >+STEXI
> > >+@item -mem-path @var{path}
> > >+Allocate guest RAM from a temporarily created file in @var{path}.
> > >+ETEXI
> > 
> > You should mention that this is only useful when PATH happens to be a linux 
> > hugetlbfs mount.
> 
> It can be used with a file, since its mapped as MAP_PRIVATE.

I meant non hugetlbfs backed file.

Patch

Index: qemu/cpu-all.h
===================================================================
--- qemu.orig/cpu-all.h
+++ qemu/cpu-all.h
@@ -847,6 +847,9 @@  extern uint8_t *phys_ram_dirty;
 extern ram_addr_t ram_size;
 extern ram_addr_t last_ram_offset;
 
+extern const char *mem_path;
+extern int mem_prealloc;
+
 /* physical memory access */
 
 /* MMIO pages are identified by a combination of an IO device index and
Index: qemu/exec.c
===================================================================
--- qemu.orig/exec.c
+++ qemu/exec.c
@@ -2529,6 +2529,99 @@  void qemu_flush_coalesced_mmio_buffer(vo
         kvm_flush_coalesced_mmio_buffer();
 }
 
+#if defined(__linux__) && !defined(TARGET_S390X)
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+	    ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+	    perror("statfs");
+	    return 0;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC)
+	    fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+    return fs.f_bsize;
+}
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+    char *filename;
+    void *area;
+    int fd;
+#ifdef MAP_POPULATE
+    int flags;
+#endif
+    unsigned long hpagesize;
+
+    hpagesize = gethugepagesize(path);
+    if (!hpagesize) {
+	return NULL;
+    }
+
+    if (memory < hpagesize) {
+        return NULL;
+    }
+
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+        return NULL;
+    }
+
+    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
+	return NULL;
+    }
+
+    fd = mkstemp(filename);
+    if (fd < 0) {
+	perror("mkstemp");
+	free(filename);
+	return NULL;
+    }
+    unlink(filename);
+    free(filename);
+
+    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+    /*
+     * ftruncate is not supported by hugetlbfs in older
+     * hosts, so don't bother bailing out on errors.
+     * If anything goes wrong with it under other filesystems,
+     * mmap will fail.
+     */
+    if (ftruncate(fd, memory))
+	perror("ftruncate");
+
+#ifdef MAP_POPULATE
+    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
+     * to sidestep this quirk.
+     */
+    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
+#else
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+    if (area == MAP_FAILED) {
+	perror("file_ram_alloc: can't mmap RAM pages");
+	close(fd);
+	return (NULL);
+    }
+    return area;
+}
+#endif
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
     RAMBlock *new_block;
@@ -2536,16 +2629,28 @@  ram_addr_t qemu_ram_alloc(ram_addr_t siz
     size = TARGET_PAGE_ALIGN(size);
     new_block = qemu_malloc(sizeof(*new_block));
 
+    if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+        new_block->host = file_ram_alloc(size, mem_path);
+        if (!new_block->host)
+            exit(1);
+#else
+        fprintf(stderr, "-mem-path option unsupported\n");
+        exit(1);
+#endif
+    } else {
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
-    /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
-    new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
-                           MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+        /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+        new_block->host = mmap((void*)0x1000000, size,
+                                PROT_EXEC|PROT_READ|PROT_WRITE,
+                                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-    new_block->host = qemu_vmalloc(size);
+        new_block->host = qemu_vmalloc(size);
 #endif
 #ifdef MADV_MERGEABLE
-    madvise(new_block->host, size, MADV_MERGEABLE);
+        madvise(new_block->host, size, MADV_MERGEABLE);
 #endif
+    }
     new_block->offset = last_ram_offset;
     new_block->length = size;
 
Index: qemu/qemu-options.hx
===================================================================
--- qemu.orig/qemu-options.hx
+++ qemu/qemu-options.hx
@@ -314,6 +314,22 @@  a suffix of ``M'' or ``G'' can be used t
 gigabytes respectively.
 ETEXI
 
+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
+    "-mem-path FILE  provide backing storage for guest RAM\n")
+STEXI
+@item -mem-path @var{path}
+Allocate guest RAM from a temporarily created file in @var{path}.
+ETEXI
+
+#ifdef MAP_POPULATE
+DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
+    "-mem-prealloc   preallocate guest memory (use with -mem-path)\n")
+STEXI
+@item -mem-prealloc
+Preallocate memory when using -mem-path.
+ETEXI
+#endif
+
 DEF("k", HAS_ARG, QEMU_OPTION_k,
     "-k language     use keyboard layout (for example 'fr' for French)\n")
 STEXI
Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c
+++ qemu/vl.c
@@ -185,6 +185,10 @@  enum vga_retrace_method vga_retrace_meth
 DisplayType display_type = DT_DEFAULT;
 const char* keyboard_layout = NULL;
 ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 0; /* force preallocation of physical target memory */
+#endif
 int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int vm_running;
@@ -5216,6 +5220,14 @@  int main(int argc, char **argv, char **e
                 ram_size = value;
                 break;
             }
+            case QEMU_OPTION_mempath:
+                mem_path = optarg;
+                break;
+#ifdef MAP_POPULATE
+            case QEMU_OPTION_mem_prealloc:
+                mem_prealloc = 1;
+                break;
+#endif
             case QEMU_OPTION_d:
                 {
                     int mask;