diff mbox

[v2,33/45] ivshmem-server: fix hugetlbfs support

Message ID 1438043577-28636-34-git-send-email-marcandre.lureau@redhat.com
State New
Headers show

Commit Message

Marc-André Lureau July 28, 2015, 12:32 a.m. UTC
From: Marc-André Lureau <marcandre.lureau@gmail.com>

As pointed out on the ML by Andrew Jones, glibc no longer permits
creating POSIX shm on hugetlbfs directly. When given a hugetlbfs path,
create a shareable file there.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 contrib/ivshmem-server/ivshmem-server.c | 47 ++++++++++++++++++++++++++++++++-
 contrib/ivshmem-server/ivshmem-server.h |  3 +--
 contrib/ivshmem-server/main.c           |  5 ++--
 3 files changed, 49 insertions(+), 6 deletions(-)

Comments

Andrew Jones July 28, 2015, 7:33 a.m. UTC | #1
On Tue, Jul 28, 2015 at 02:32:45AM +0200, Marc-André Lureau wrote:
> From: Marc-André Lureau <marcandre.lureau@gmail.com>
> 
> As pointed out on the ML by Andrew Jones, glibc no longer permits
> creating POSIX shm on hugetlbfs directly. When given a hugetlbfs path,
> create a shareable file there.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  contrib/ivshmem-server/ivshmem-server.c | 47 ++++++++++++++++++++++++++++++++-
>  contrib/ivshmem-server/ivshmem-server.h |  3 +--
>  contrib/ivshmem-server/main.c           |  5 ++--
>  3 files changed, 49 insertions(+), 6 deletions(-)
> 
> diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
> index 972fda2..4bf774b 100644
> --- a/contrib/ivshmem-server/ivshmem-server.c
> +++ b/contrib/ivshmem-server/ivshmem-server.c
> @@ -11,6 +11,7 @@
>  #include <sys/socket.h>
>  #include <sys/un.h>
>  #include <sys/eventfd.h>
> +#include <sys/vfs.h>
>  
>  #include "qemu-common.h"
>  #include "qemu/queue.h"
> @@ -271,15 +272,59 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
>      return 0;
>  }
>  
> +#define HUGETLBFS_MAGIC       0x958458f6
> +
> +static long gethugepagesize(const char *path)
> +{
> +    struct statfs fs;
> +    int ret;
> +
> +    do {
> +        ret = statfs(path, &fs);
> +    } while (ret != 0 && errno == EINTR);
> +
> +    if (ret != 0) {
> +        if (errno != ENOENT) {
> +            fprintf(stderr, "cannot stat shm file %s: %s\n", path,
> +                    strerror(errno));
> +        }
> +        return -1;
> +    }
> +
> +    if (fs.f_type != HUGETLBFS_MAGIC) {
> +        return -1;
> +    }
> +
> +    return fs.f_bsize;
> +}
> +
> +
> +
few extra lines here
>  /* open shm, create and bind to the unix socket */
>  int
>  ivshmem_server_start(IvshmemServer *server)
>  {
>      struct sockaddr_un sun;
>      int shm_fd, sock_fd, ret;
> +    long hpagesize;
> +    gchar *filename;
>  
>      /* open shm file */
> -    shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
> +    hpagesize = gethugepagesize(server->shm_path);
> +    if (hpagesize > 0) {
> +        if (server->shm_size < hpagesize) {
should be >, but isn't this forcing the shared memory to be less than
equal to the size of a single hugepage? I think we should allow up to
nr-pages * page-size.

Also, I'm not sure we want the dependency, but what about libhugetlbfs?
It has hugetlbfs_test_path

> +            fprintf(stderr, "hugepage must be at least of size: %ld\n",
> +                    hpagesize);
> +            return -1;
> +        }
> +        filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
> +        shm_fd = mkstemp(filename);

Shouldn't we change the perms for shm_fd to match the non-hugetlbfs
case? Or change the non-hugetlbfs case to match mkstemp? Actually,
probably the later, because I don't think we want the region to have
execute perms, or do we? Also, I guess the plan is to pass the hugetlbfs
file descriptor around if other host processes need to know where the
memory is, as we never allow a full path. Should we do the same for shm?
i.e. keep them anonymous too and always pass file descriptors?

> +        unlink(filename);
> +        g_free(filename);
> +    } else {
> +        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
> +    }
> +
>      if (shm_fd < 0) {
>          fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
>                  strerror(errno));
> diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
> index 2176d5e..e9b0e7a 100644
> --- a/contrib/ivshmem-server/ivshmem-server.h
> +++ b/contrib/ivshmem-server/ivshmem-server.h
> @@ -81,8 +81,7 @@ typedef struct IvshmemServer {
>   * @server:         A pointer to an uninitialized IvshmemServer structure
>   * @unix_sock_path: The pointer to the unix socket file name
>   * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
> - *                  shm name. To use a real file, for instance in a hugetlbfs,
> - *                  it is possible to use /../../abspath/to/file.
> + *                  shm name or a hugetlbfs mount point.
>   * @shm_size:       Size of shared memory
>   * @n_vectors:      Number of interrupt vectors per client
>   * @verbose:        True to enable verbose mode
> diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
> index 84ffc4d..cd8d9ed 100644
> --- a/contrib/ivshmem-server/main.c
> +++ b/contrib/ivshmem-server/main.c
> @@ -47,9 +47,8 @@ ivshmem_server_usage(const char *name, int code)
>                      "     to listen to.\n"
>                      "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
>      fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
> -                    "     The path corresponds to a POSIX shm name. To use a\n"
> -                    "     real file, for instance in a hugetlbfs, use\n"
> -                    "     /../../abspath/to/file.\n"
> +                    "     The path corresponds to a POSIX shm name or a\n"
> +                    "     hugetlbfs mount point.\n"
>                      "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
>      fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
>                      "     K, M and G can be used (ex: 1K means 1024).\n"
> -- 
> 2.4.3
> 
> 

Thanks,
drew
Marc-André Lureau July 28, 2015, 6:02 p.m. UTC | #2
Hi

On Tue, Jul 28, 2015 at 9:33 AM, Andrew Jones <drjones@redhat.com> wrote:
> On Tue, Jul 28, 2015 at 02:32:45AM +0200, Marc-André Lureau wrote:
>> +
>> +    return fs.f_bsize;
>> +}
>> +
>> +
>> +
> few extra lines here

cut, thanks

>>  /* open shm, create and bind to the unix socket */
>>  int
>>  ivshmem_server_start(IvshmemServer *server)
>>  {
>>      struct sockaddr_un sun;
>>      int shm_fd, sock_fd, ret;
>> +    long hpagesize;
>> +    gchar *filename;
>>
>>      /* open shm file */
>> -    shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
>> +    hpagesize = gethugepagesize(server->shm_path);
>> +    if (hpagesize > 0) {
>> +        if (server->shm_size < hpagesize) {
> should be >, but isn't this forcing the shared memory to be less than
> equal to the size of a single hugepage? I think we should allow up to
> nr-pages * page-size.
>

oops, I clearly didn't test correctly this patch, or it's an outdated version.

This check is quite useless since ftruncate will check if it can be sized after.

> Also, I'm not sure we want the dependency, but what about libhugetlbfs?
> It has hugetlbfs_test_path

I don't know that library, it's not on my system either :)
After installing it, it doesn't look like an API but rather a preload library

Imho, it's not necessary at this point.

>
>> +            fprintf(stderr, "hugepage must be at least of size: %ld\n",
>> +                    hpagesize);
>> +            return -1;
>> +        }
>> +        filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
>> +        shm_fd = mkstemp(filename);
>
> Shouldn't we change the perms for shm_fd to match the non-hugetlbfs
> case? Or change the non-hugetlbfs case to match mkstemp? Actually,
> probably the later, because I don't think we want the region to have

Good question. I wonder what the exec mode actually means for shm, as
the execution in memory is rather defined by mmap PROT_EXEC.
It doesn't make much sense to create executable files/shm.

I'll make a followup rfc patch to fix shm_open() args in both server & qemu

> execute perms, or do we? Also, I guess the plan is to pass the hugetlbfs
> file descriptor around if other host processes need to know where the
> memory is, as we never allow a full path. Should we do the same for shm?
> i.e. keep them anonymous too and always pass file descriptors?

it's already only passing fd, in any case

>> +        unlink(filename);
>> +        g_free(filename);
>> +    } else {
>> +        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
>> +    }
>> +
>>      if (shm_fd < 0) {
>>          fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
>>                  strerror(errno));
>> diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
>> index 2176d5e..e9b0e7a 100644
>> --- a/contrib/ivshmem-server/ivshmem-server.h
>> +++ b/contrib/ivshmem-server/ivshmem-server.h
>> @@ -81,8 +81,7 @@ typedef struct IvshmemServer {
>>   * @server:         A pointer to an uninitialized IvshmemServer structure
>>   * @unix_sock_path: The pointer to the unix socket file name
>>   * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
>> - *                  shm name. To use a real file, for instance in a hugetlbfs,
>> - *                  it is possible to use /../../abspath/to/file.
>> + *                  shm name or a hugetlbfs mount point.
>>   * @shm_size:       Size of shared memory
>>   * @n_vectors:      Number of interrupt vectors per client
>>   * @verbose:        True to enable verbose mode
>> diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
>> index 84ffc4d..cd8d9ed 100644
>> --- a/contrib/ivshmem-server/main.c
>> +++ b/contrib/ivshmem-server/main.c
>> @@ -47,9 +47,8 @@ ivshmem_server_usage(const char *name, int code)
>>                      "     to listen to.\n"
>>                      "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
>>      fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
>> -                    "     The path corresponds to a POSIX shm name. To use a\n"
>> -                    "     real file, for instance in a hugetlbfs, use\n"
>> -                    "     /../../abspath/to/file.\n"
>> +                    "     The path corresponds to a POSIX shm name or a\n"
>> +                    "     hugetlbfs mount point.\n"
>>                      "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
>>      fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
>>                      "     K, M and G can be used (ex: 1K means 1024).\n"
>> --
>> 2.4.3
>>
>>

thanks
Andrew Jones July 29, 2015, 6:30 a.m. UTC | #3
On Tue, Jul 28, 2015 at 08:02:54PM +0200, Marc-André Lureau wrote:
> Hi
> 
> On Tue, Jul 28, 2015 at 9:33 AM, Andrew Jones <drjones@redhat.com> wrote:
> > On Tue, Jul 28, 2015 at 02:32:45AM +0200, Marc-André Lureau wrote:
> >> +
> >> +    return fs.f_bsize;
> >> +}
> >> +
> >> +
> >> +
> > few extra lines here
> 
> cut, thanks
> 
> >>  /* open shm, create and bind to the unix socket */
> >>  int
> >>  ivshmem_server_start(IvshmemServer *server)
> >>  {
> >>      struct sockaddr_un sun;
> >>      int shm_fd, sock_fd, ret;
> >> +    long hpagesize;
> >> +    gchar *filename;
> >>
> >>      /* open shm file */
> >> -    shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
> >> +    hpagesize = gethugepagesize(server->shm_path);
> >> +    if (hpagesize > 0) {
> >> +        if (server->shm_size < hpagesize) {
> > should be >, but isn't this forcing the shared memory to be less than
> > equal to the size of a single hugepage? I think we should allow up to
> > nr-pages * page-size.
> >
> 
> oops, I clearly didn't test correctly this patch, or it's an outdated version.
> 
> This check is quite useless since ftruncate will check if it can be sized after.
> 
> > Also, I'm not sure we want the dependency, but what about libhugetlbfs?
> > It has hugetlbfs_test_path
> 
> I don't know that library, it's not on my system either :)
> After installing it, it doesn't look like an API but rather a preload library

$ dnf info libhugetlbfs-devel | grep Desc
Description : Contains header files for building with libhugetlbfs.

> 
> Imho, it's not necessary at this point.

OK

> 
> >
> >> +            fprintf(stderr, "hugepage must be at least of size: %ld\n",
> >> +                    hpagesize);
> >> +            return -1;
> >> +        }
> >> +        filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
> >> +        shm_fd = mkstemp(filename);
> >
> > Shouldn't we change the perms for shm_fd to match the non-hugetlbfs
> > case? Or change the non-hugetlbfs case to match mkstemp? Actually,
> > probably the later, because I don't think we want the region to have
> 
> Good question. I wonder what the exec mode actually means for shm, as
> the execution in memory is rather defined by mmap PROT_EXEC.
> It doesn't make much sense to create executable files/shm.
> 
> I'll make a followup rfc patch to fix shm_open() args in both server & qemu
> 
> > execute perms, or do we? Also, I guess the plan is to pass the hugetlbfs
> > file descriptor around if other host processes need to know where the
> > memory is, as we never allow a full path. Should we do the same for shm?
> > i.e. keep them anonymous too and always pass file descriptors?
> 
> it's already only passing fd, in any case

Sounds good.

Thanks,
drew
> 
> >> +        unlink(filename);
> >> +        g_free(filename);
> >> +    } else {
> >> +        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
> >> +    }
> >> +
> >>      if (shm_fd < 0) {
> >>          fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
> >>                  strerror(errno));
> >> diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
> >> index 2176d5e..e9b0e7a 100644
> >> --- a/contrib/ivshmem-server/ivshmem-server.h
> >> +++ b/contrib/ivshmem-server/ivshmem-server.h
> >> @@ -81,8 +81,7 @@ typedef struct IvshmemServer {
> >>   * @server:         A pointer to an uninitialized IvshmemServer structure
> >>   * @unix_sock_path: The pointer to the unix socket file name
> >>   * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
> >> - *                  shm name. To use a real file, for instance in a hugetlbfs,
> >> - *                  it is possible to use /../../abspath/to/file.
> >> + *                  shm name or a hugetlbfs mount point.
> >>   * @shm_size:       Size of shared memory
> >>   * @n_vectors:      Number of interrupt vectors per client
> >>   * @verbose:        True to enable verbose mode
> >> diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
> >> index 84ffc4d..cd8d9ed 100644
> >> --- a/contrib/ivshmem-server/main.c
> >> +++ b/contrib/ivshmem-server/main.c
> >> @@ -47,9 +47,8 @@ ivshmem_server_usage(const char *name, int code)
> >>                      "     to listen to.\n"
> >>                      "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
> >>      fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
> >> -                    "     The path corresponds to a POSIX shm name. To use a\n"
> >> -                    "     real file, for instance in a hugetlbfs, use\n"
> >> -                    "     /../../abspath/to/file.\n"
> >> +                    "     The path corresponds to a POSIX shm name or a\n"
> >> +                    "     hugetlbfs mount point.\n"
> >>                      "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
> >>      fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
> >>                      "     K, M and G can be used (ex: 1K means 1024).\n"
> >> --
> >> 2.4.3
> >>
> >>
> 
> thanks
> 
> 
> -- 
> Marc-André Lureau
>
diff mbox

Patch

diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
index 972fda2..4bf774b 100644
--- a/contrib/ivshmem-server/ivshmem-server.c
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -11,6 +11,7 @@ 
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <sys/eventfd.h>
+#include <sys/vfs.h>
 
 #include "qemu-common.h"
 #include "qemu/queue.h"
@@ -271,15 +272,59 @@  ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
     return 0;
 }
 
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        if (errno != ENOENT) {
+            fprintf(stderr, "cannot stat shm file %s: %s\n", path,
+                    strerror(errno));
+        }
+        return -1;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        return -1;
+    }
+
+    return fs.f_bsize;
+}
+
+
+
 /* open shm, create and bind to the unix socket */
 int
 ivshmem_server_start(IvshmemServer *server)
 {
     struct sockaddr_un sun;
     int shm_fd, sock_fd, ret;
+    long hpagesize;
+    gchar *filename;
 
     /* open shm file */
-    shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
+    hpagesize = gethugepagesize(server->shm_path);
+    if (hpagesize > 0) {
+        if (server->shm_size < hpagesize) {
+            fprintf(stderr, "hugepage must be at least of size: %ld\n",
+                    hpagesize);
+            return -1;
+        }
+        filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+        shm_fd = mkstemp(filename);
+        unlink(filename);
+        g_free(filename);
+    } else {
+        shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
+    }
+
     if (shm_fd < 0) {
         fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
                 strerror(errno));
diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
index 2176d5e..e9b0e7a 100644
--- a/contrib/ivshmem-server/ivshmem-server.h
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -81,8 +81,7 @@  typedef struct IvshmemServer {
  * @server:         A pointer to an uninitialized IvshmemServer structure
  * @unix_sock_path: The pointer to the unix socket file name
  * @shm_path:       Path to the shared memory. The path corresponds to a POSIX
- *                  shm name. To use a real file, for instance in a hugetlbfs,
- *                  it is possible to use /../../abspath/to/file.
+ *                  shm name or a hugetlbfs mount point.
  * @shm_size:       Size of shared memory
  * @n_vectors:      Number of interrupt vectors per client
  * @verbose:        True to enable verbose mode
diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
index 84ffc4d..cd8d9ed 100644
--- a/contrib/ivshmem-server/main.c
+++ b/contrib/ivshmem-server/main.c
@@ -47,9 +47,8 @@  ivshmem_server_usage(const char *name, int code)
                     "     to listen to.\n"
                     "     Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
     fprintf(stderr, "  -m <shm_path>: path to the shared memory.\n"
-                    "     The path corresponds to a POSIX shm name. To use a\n"
-                    "     real file, for instance in a hugetlbfs, use\n"
-                    "     /../../abspath/to/file.\n"
+                    "     The path corresponds to a POSIX shm name or a\n"
+                    "     hugetlbfs mount point.\n"
                     "     default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
     fprintf(stderr, "  -l <size>: size of shared memory in bytes. The suffix\n"
                     "     K, M and G can be used (ex: 1K means 1024).\n"