diff mbox

[v3,08/32] exec: allow memory to be allocated from any kind of path

Message ID 1444535584-18220-9-git-send-email-guangrong.xiao@linux.intel.com
State New
Headers show

Commit Message

Xiao Guangrong Oct. 11, 2015, 3:52 a.m. UTC
Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 exec.c | 55 ++++++++++++++-----------------------------------------
 1 file changed, 14 insertions(+), 41 deletions(-)

Comments

Michael S. Tsirkin Oct. 12, 2015, 10:08 a.m. UTC | #1
On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
> 
> So this patch let it work on any kind of path
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

This conflicts with map alloc rework.
Please rebase this on top of my tree.


> ---
>  exec.c | 55 ++++++++++++++-----------------------------------------
>  1 file changed, 14 insertions(+), 41 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 7d90a52..70cb0ef 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> -
> -#include <sys/vfs.h>
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
> -                         path);
> -        return 0;
> -    }
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC)
> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -    return fs.f_bsize;

What this *actually* is trying to warn against is that
mapping a regular file (as opposed to hugetlbfs)
means transparent huge pages don't work.

So I don't think we should drop this warning completely.
Either let's add the nvdimm magic, or simply check the
page size.


> -}
> -
>  static void *file_ram_alloc(RAMBlock *block,
>                              ram_addr_t memory,
>                              const char *path,
> @@ -1191,22 +1165,21 @@ static void *file_ram_alloc(RAMBlock *block,
>      void *ptr;
>      void *area = NULL;
>      int fd;
> -    uint64_t hpagesize;
> +    uint64_t pagesize;
>      uint64_t total;
> -    Error *local_err = NULL;
>      size_t offset;
>  
> -    hpagesize = gethugepagesize(path, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> +    pagesize = qemu_file_get_page_size(path);
> +    if (!pagesize) {
> +        error_setg(errp, "can't get page size for %s", path);
>          goto error;
>      }
> -    block->mr->align = hpagesize;
> +    block->mr->align = pagesize;
>  
> -    if (memory < hpagesize) {
> +    if (memory < pagesize) {
>          error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
> -                   "or larger than huge page size 0x%" PRIx64,
> -                   memory, hpagesize);
> +                   "or larger than page size 0x%" PRIx64,
> +                   memory, pagesize);
>          goto error;
>      }
>  
> @@ -1230,15 +1203,15 @@ static void *file_ram_alloc(RAMBlock *block,
>      fd = mkstemp(filename);
>      if (fd < 0) {
>          error_setg_errno(errp, errno,
> -                         "unable to create backing store for hugepages");
> +                         "unable to create backing store for path %s", path);
>          g_free(filename);
>          goto error;
>      }
>      unlink(filename);
>      g_free(filename);
>  
> -    memory = ROUND_UP(memory, hpagesize);
> -    total = memory + hpagesize;
> +    memory = ROUND_UP(memory, pagesize);
> +    total = memory + pagesize;
>  
>      /*
>       * ftruncate is not supported by hugetlbfs in older
> @@ -1254,12 +1227,12 @@ static void *file_ram_alloc(RAMBlock *block,
>                  -1, 0);
>      if (ptr == MAP_FAILED) {
>          error_setg_errno(errp, errno,
> -                         "unable to allocate memory range for hugepages");
> +                         "unable to allocate memory range for path %s", path);
>          close(fd);
>          goto error;
>      }
>  
> -    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> +    offset = QEMU_ALIGN_UP((uintptr_t)ptr, pagesize) - (uintptr_t)ptr;
>  
>      area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
>                  (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> @@ -1267,7 +1240,7 @@ static void *file_ram_alloc(RAMBlock *block,
>                  fd, 0);
>      if (area == MAP_FAILED) {
>          error_setg_errno(errp, errno,
> -                         "unable to map backing store for hugepages");
> +                         "unable to map backing store for path %s", path);
>          munmap(ptr, total);
>          close(fd);
>          goto error;
> -- 
> 1.8.3.1
Xiao Guangrong Oct. 13, 2015, 3:31 a.m. UTC | #2
On 10/12/2015 06:08 PM, Michael S. Tsirkin wrote:
> On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:
>> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
>> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
>> locates at DAX enabled filesystem
>>
>> So this patch let it work on any kind of path
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>
> This conflicts with map alloc rework.
> Please rebase this on top of my tree.
>

Okay, thanks for your reminder. I did it based on upstream QEMU tree,
will do it on pci branch on your tree instead.

>
>> ---
>>   exec.c | 55 ++++++++++++++-----------------------------------------
>>   1 file changed, 14 insertions(+), 41 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 7d90a52..70cb0ef 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
>>   }
>>
>>   #ifdef __linux__
>> -
>> -#include <sys/vfs.h>
>> -
>> -#define HUGETLBFS_MAGIC       0x958458f6
>> -
>> -static long gethugepagesize(const char *path, Error **errp)
>> -{
>> -    struct statfs fs;
>> -    int ret;
>> -
>> -    do {
>> -        ret = statfs(path, &fs);
>> -    } while (ret != 0 && errno == EINTR);
>> -
>> -    if (ret != 0) {
>> -        error_setg_errno(errp, errno, "failed to get page size of file %s",
>> -                         path);
>> -        return 0;
>> -    }
>> -
>> -    if (fs.f_type != HUGETLBFS_MAGIC)
>> -        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
>> -
>> -    return fs.f_bsize;
>
> What this *actually* is trying to warn against is that
> mapping a regular file (as opposed to hugetlbfs)
> means transparent huge pages don't work.
>
> So I don't think we should drop this warning completely.
> Either let's add the nvdimm magic, or simply check the
> page size.

Check the page size sounds good, will check:
if (pagesize != getpagesize()) {
	...print something...
}

I agree with you that showing the info is needed, however,
'Warning' might scare some users, how about drop this word or
just show “Memory is not allocated from HugeTlbfs”?
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 7d90a52..70cb0ef 100644
--- a/exec.c
+++ b/exec.c
@@ -1154,32 +1154,6 @@  void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
-
-#include <sys/vfs.h>
-
-#define HUGETLBFS_MAGIC       0x958458f6
-
-static long gethugepagesize(const char *path, Error **errp)
-{
-    struct statfs fs;
-    int ret;
-
-    do {
-        ret = statfs(path, &fs);
-    } while (ret != 0 && errno == EINTR);
-
-    if (ret != 0) {
-        error_setg_errno(errp, errno, "failed to get page size of file %s",
-                         path);
-        return 0;
-    }
-
-    if (fs.f_type != HUGETLBFS_MAGIC)
-        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
-
-    return fs.f_bsize;
-}
-
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1191,22 +1165,21 @@  static void *file_ram_alloc(RAMBlock *block,
     void *ptr;
     void *area = NULL;
     int fd;
-    uint64_t hpagesize;
+    uint64_t pagesize;
     uint64_t total;
-    Error *local_err = NULL;
     size_t offset;
 
-    hpagesize = gethugepagesize(path, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    pagesize = qemu_file_get_page_size(path);
+    if (!pagesize) {
+        error_setg(errp, "can't get page size for %s", path);
         goto error;
     }
-    block->mr->align = hpagesize;
+    block->mr->align = pagesize;
 
-    if (memory < hpagesize) {
+    if (memory < pagesize) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
-                   "or larger than huge page size 0x%" PRIx64,
-                   memory, hpagesize);
+                   "or larger than page size 0x%" PRIx64,
+                   memory, pagesize);
         goto error;
     }
 
@@ -1230,15 +1203,15 @@  static void *file_ram_alloc(RAMBlock *block,
     fd = mkstemp(filename);
     if (fd < 0) {
         error_setg_errno(errp, errno,
-                         "unable to create backing store for hugepages");
+                         "unable to create backing store for path %s", path);
         g_free(filename);
         goto error;
     }
     unlink(filename);
     g_free(filename);
 
-    memory = ROUND_UP(memory, hpagesize);
-    total = memory + hpagesize;
+    memory = ROUND_UP(memory, pagesize);
+    total = memory + pagesize;
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1254,12 +1227,12 @@  static void *file_ram_alloc(RAMBlock *block,
                 -1, 0);
     if (ptr == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to allocate memory range for hugepages");
+                         "unable to allocate memory range for path %s", path);
         close(fd);
         goto error;
     }
 
-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
+    offset = QEMU_ALIGN_UP((uintptr_t)ptr, pagesize) - (uintptr_t)ptr;
 
     area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
                 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
@@ -1267,7 +1240,7 @@  static void *file_ram_alloc(RAMBlock *block,
                 fd, 0);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to map backing store for hugepages");
+                         "unable to map backing store for path %s", path);
         munmap(ptr, total);
         close(fd);
         goto error;