diff mbox

[01/38] exec: Fix memory allocation when memory path names new file

Message ID 1456771254-17511-2-git-send-email-armbru@redhat.com
State New
Headers show

Commit Message

Markus Armbruster Feb. 29, 2016, 6:40 p.m. UTC
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names.  Even though it passes O_CREAT to open(),
it actually works only for existing files.  Reproducer adapted from
the commit's qemu-doc.texi update:

    $ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
    qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory

Rearrange the code to create the file (if necessary) before getting
its page size.

While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory.  Help text of -mem-path agrees.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 exec.c | 50 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 19 deletions(-)

Comments

Paolo Bonzini March 1, 2016, 11:35 a.m. UTC | #1
On 29/02/2016 19:40, Markus Armbruster wrote:
> -    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
> +    ret = stat(path, &st);
> +    if (!ret && S_ISDIR(st.st_mode)) {
> +        /* path names a directory -> create a temporary file there */
>          /* Make name safe to use with mkstemp by replacing '/' with '_'. */
>          sanitized_name = g_strdup(memory_region_name(block->mr));
>          for (c = sanitized_name; *c != '\0'; c++) {
> @@ -1282,13 +1271,32 @@ static void *file_ram_alloc(RAMBlock *block,
>              unlink(filename);
>          }
>          g_free(filename);
> +    } else if (!ret) {
> +        /* path names an existing file -> use it */
> +        fd = open(path, O_RDWR);
>      } else {
> +        /* create a new file */
>          fd = open(path, O_RDWR | O_CREAT, 0644);
> +        unlink_on_error = true;
>      }

While at it, let's avoid TOCTTOU conditions:

    for (;;) {
        fd = open(path, O_RDWR);
        if (fd != -1) {
            break;
        }
        if (errno == ENOENT) {
            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
            if (fd != -1) {
                unlink_on_error = true;
                break;
            }
        } else if (errno == EISDIR) {
            ... mkstemp ...
            if (fd != -1) {
                unlink_on_error = true;
                break;
            }
        }
        if (errno != EEXIST && errno != EINTR) {
            goto error;
        }
    }

and use fstatfs in gethugepagesize.

Paolo
Markus Armbruster March 1, 2016, 11:58 a.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 29/02/2016 19:40, Markus Armbruster wrote:
>> -    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
>> +    ret = stat(path, &st);
>> +    if (!ret && S_ISDIR(st.st_mode)) {
>> +        /* path names a directory -> create a temporary file there */
>>          /* Make name safe to use with mkstemp by replacing '/' with '_'. */
>>          sanitized_name = g_strdup(memory_region_name(block->mr));
>>          for (c = sanitized_name; *c != '\0'; c++) {
>> @@ -1282,13 +1271,32 @@ static void *file_ram_alloc(RAMBlock *block,
>>              unlink(filename);
>>          }
>>          g_free(filename);
>> +    } else if (!ret) {
>> +        /* path names an existing file -> use it */
>> +        fd = open(path, O_RDWR);
>>      } else {
>> +        /* create a new file */
>>          fd = open(path, O_RDWR | O_CREAT, 0644);
>> +        unlink_on_error = true;
>>      }
>
> While at it, let's avoid TOCTTOU conditions:
>
>     for (;;) {
>         fd = open(path, O_RDWR);
>         if (fd != -1) {
>             break;
>         }
>         if (errno == ENOENT) {
>             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
>             if (fd != -1) {
>                 unlink_on_error = true;
>                 break;
>             }
>         } else if (errno == EISDIR) {
>             ... mkstemp ...
>             if (fd != -1) {
>                 unlink_on_error = true;
>                 break;
>             }
>         }
>         if (errno != EEXIST && errno != EINTR) {
>             goto error;
>         }
>     }
>
> and use fstatfs in gethugepagesize.

Good point, will do!
Markus Armbruster March 4, 2016, 6:50 p.m. UTC | #3
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 29/02/2016 19:40, Markus Armbruster wrote:
>> -    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
>> +    ret = stat(path, &st);
>> +    if (!ret && S_ISDIR(st.st_mode)) {
>> +        /* path names a directory -> create a temporary file there */
>>          /* Make name safe to use with mkstemp by replacing '/' with '_'. */
>>          sanitized_name = g_strdup(memory_region_name(block->mr));
>>          for (c = sanitized_name; *c != '\0'; c++) {
>> @@ -1282,13 +1271,32 @@ static void *file_ram_alloc(RAMBlock *block,
>>              unlink(filename);
>>          }
>>          g_free(filename);
>> +    } else if (!ret) {
>> +        /* path names an existing file -> use it */
>> +        fd = open(path, O_RDWR);
>>      } else {
>> +        /* create a new file */
>>          fd = open(path, O_RDWR | O_CREAT, 0644);
>> +        unlink_on_error = true;
>>      }
>
> While at it, let's avoid TOCTTOU conditions:
>
>     for (;;) {
>         fd = open(path, O_RDWR);
>         if (fd != -1) {
>             break;
>         }
>         if (errno == ENOENT) {
>             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
>             if (fd != -1) {
>                 unlink_on_error = true;
>                 break;
>             }
>         } else if (errno == EISDIR) {
>             ... mkstemp ...
>             if (fd != -1) {
>                 unlink_on_error = true;
>                 break;
>             }
>         }
>         if (errno != EEXIST && errno != EINTR) {
>             goto error;
>         }
>     }
>
> and use fstatfs in gethugepagesize.

A question on gethugepagesize().  We have a couple of copies.

Here's target-ppc/kvm.c's:

    static long gethugepagesize(const char *mem_path)
    {
        struct statfs fs;
        int ret;

        do {
            ret = statfs(mem_path, &fs);
        } while (ret != 0 && errno == EINTR);

        if (ret != 0) {
            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
                    strerror(errno));
            exit(1);
        }

    #define HUGETLBFS_MAGIC       0x958458f6

        if (fs.f_type != HUGETLBFS_MAGIC) {
            /* Explicit mempath, but it's ordinary pages */
            return getpagesize();
        }

        /* It's hugepage, return the huge page size */
        return fs.f_bsize;
    }

I guess the use of HUGETLBFS_MAGIC is fine since kvm.c is Linux-specific.

There's another one in ivshmem_server.c, functionally identical and
wrapped in CONFIG_LINUX.

Here's exec.c's:

    #define HUGETLBFS_MAGIC       0x958458f6

    static long gethugepagesize(const char *path, Error **errp)
    {
        struct statfs fs;
        int ret;

        do {
            ret = statfs(path, &fs);
        } while (ret != 0 && errno == EINTR);

        if (ret != 0) {
            error_setg_errno(errp, errno, "failed to get page size of file %s",
                             path);
            return 0;
        }

        return fs.f_bsize;
    }

Before commit bfc2a1a, it additionally had

    if (fs.f_type != HUGETLBFS_MAGIC)
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);

Note the lack of "if not hugetlbfs, use getpagesize()" logic.

Here's util/mmap-alloc.c's:

    #define HUGETLBFS_MAGIC       0x958458f6

    #ifdef CONFIG_LINUX
    #include <sys/vfs.h>
    #endif

    size_t qemu_fd_getpagesize(int fd)
    {
    #ifdef CONFIG_LINUX
        struct statfs fs;
        int ret;

        if (fd != -1) {
            do {
                ret = fstatfs(fd, &fs);
            } while (ret != 0 && errno == EINTR);

            if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
                return fs.f_bsize;
            }
        }
    #endif

        return getpagesize();
    }

Would you like me to convert the others users to this one and drop the
dupes?
Paolo Bonzini March 7, 2016, 1:12 p.m. UTC | #4
On 04/03/2016 19:50, Markus Armbruster wrote:
> There's another one in ivshmem_server.c, functionally identical and
> wrapped in CONFIG_LINUX.

Not quite identical, since it returns -1 for non-hugetlbfs.  It should
return getpagesize().

> Here's exec.c's:
> 
>     #define HUGETLBFS_MAGIC       0x958458f6
> 
>     static long gethugepagesize(const char *path, Error **errp)
>     {
>         struct statfs fs;
>         int ret;
> 
>         do {
>             ret = statfs(path, &fs);
>         } while (ret != 0 && errno == EINTR);
> 
>         if (ret != 0) {
>             error_setg_errno(errp, errno, "failed to get page size of file %s",
>                              path);
>             return 0;
>         }
> 
>         return fs.f_bsize;
>     }
> 
> Before commit bfc2a1a, it additionally had
> 
>     if (fs.f_type != HUGETLBFS_MAGIC)
>         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> 
> Note the lack of "if not hugetlbfs, use getpagesize()" logic.
> 
> Here's util/mmap-alloc.c's:
> 
>     #define HUGETLBFS_MAGIC       0x958458f6
> 
>     #ifdef CONFIG_LINUX
>     #include <sys/vfs.h>
>     #endif
> 
>     size_t qemu_fd_getpagesize(int fd)
>     {
>     #ifdef CONFIG_LINUX
>         struct statfs fs;
>         int ret;
> 
>         if (fd != -1) {
>             do {
>                 ret = fstatfs(fd, &fs);
>             } while (ret != 0 && errno == EINTR);
> 
>             if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
>                 return fs.f_bsize;
>             }
>         }
>     #endif
> 
>         return getpagesize();
>     }
> 
> Would you like me to convert the others users to this one and drop the
> dupes?

That would be great, since all of them really should use fstatfs instead
of statfs.

Paolo
diff mbox

Patch

diff --git a/exec.c b/exec.c
index c62c439..81b0063 100644
--- a/exec.c
+++ b/exec.c
@@ -1235,36 +1235,25 @@  static void *file_ram_alloc(RAMBlock *block,
                             const char *path,
                             Error **errp)
 {
+    bool unlink_on_error = false;
     struct stat st;
     char *filename;
     char *sanitized_name;
     char *c;
     void *area;
-    int fd;
+    int ret, fd;
     uint64_t hpagesize;
     Error *local_err = NULL;
 
-    hpagesize = gethugepagesize(path, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto error;
-    }
-    block->mr->align = hpagesize;
-
-    if (memory < hpagesize) {
-        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
-                   "or larger than huge page size 0x%" PRIx64,
-                   memory, hpagesize);
-        goto error;
-    }
-
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
         error_setg(errp,
                    "host lacks kvm mmu notifiers, -mem-path unsupported");
-        goto error;
+        return NULL;
     }
 
-    if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
+    ret = stat(path, &st);
+    if (!ret && S_ISDIR(st.st_mode)) {
+        /* path names a directory -> create a temporary file there */
         /* Make name safe to use with mkstemp by replacing '/' with '_'. */
         sanitized_name = g_strdup(memory_region_name(block->mr));
         for (c = sanitized_name; *c != '\0'; c++) {
@@ -1282,13 +1271,32 @@  static void *file_ram_alloc(RAMBlock *block,
             unlink(filename);
         }
         g_free(filename);
+    } else if (!ret) {
+        /* path names an existing file -> use it */
+        fd = open(path, O_RDWR);
     } else {
+        /* create a new file */
         fd = open(path, O_RDWR | O_CREAT, 0644);
+        unlink_on_error = true;
     }
 
     if (fd < 0) {
         error_setg_errno(errp, errno,
-                         "unable to create backing store for hugepages");
+                         "unable to create backing store for guest RAM");
+        return NULL;
+    }
+
+    hpagesize = gethugepagesize(path, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto error;
+    }
+    block->mr->align = hpagesize;
+
+    if (memory < hpagesize) {
+        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
+                   "or larger than page size 0x%" PRIx64,
+                   memory, hpagesize);
         goto error;
     }
 
@@ -1307,7 +1315,7 @@  static void *file_ram_alloc(RAMBlock *block,
     area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
-                         "unable to map backing store for hugepages");
+                         "unable to map backing store for guest RAM");
         close(fd);
         goto error;
     }
@@ -1320,6 +1328,10 @@  static void *file_ram_alloc(RAMBlock *block,
     return area;
 
 error:
+    if (unlink_on_error) {
+        unlink(path);
+    }
+    close(fd);
     return NULL;
 }
 #endif