diff mbox

[v3,03/16] util: add memfd helpers

Message ID 1438864852-4939-4-git-send-email-marcandre.lureau@redhat.com
State New
Headers show

Commit Message

Marc-André Lureau Aug. 6, 2015, 12:40 p.m. UTC
From: Marc-André Lureau <marcandre.lureau@redhat.com>

Add qemu_memfd_alloc/free() helpers.

The function helps to allocate and seal a memfd, and implements an
open/unlink/mmap fallback for system that do not support memfd.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 include/qemu/memfd.h |  4 +++
 util/memfd.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 76 insertions(+), 2 deletions(-)

Comments

Michael S. Tsirkin Sept. 29, 2015, 2:57 p.m. UTC | #1
On Thu, Aug 06, 2015 at 02:40:39PM +0200, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Add qemu_memfd_alloc/free() helpers.
> 
> The function helps to allocate and seal a memfd, and implements an
> open/unlink/mmap fallback for system that do not support memfd.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  include/qemu/memfd.h |  4 +++
>  util/memfd.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 76 insertions(+), 2 deletions(-)
> 
> diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
> index 8b1fe6a..950fb88 100644
> --- a/include/qemu/memfd.h
> +++ b/include/qemu/memfd.h
> @@ -17,4 +17,8 @@
>  #define F_SEAL_WRITE    0x0008  /* prevent writes */
>  #endif
>  
> +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> +                       int *fd);
> +void qemu_memfd_free(void *ptr, size_t size, int fd);
> +
>  #endif /* QEMU_MEMFD_H */
> diff --git a/util/memfd.c b/util/memfd.c
> index a98d57e..8b2b785 100644
> --- a/util/memfd.c
> +++ b/util/memfd.c
> @@ -27,6 +27,14 @@
>  
>  #include "config-host.h"
>  
> +#include <glib.h>
> +#include <glib/gprintf.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +
>  #include "qemu/memfd.h"
>  
>  #ifdef CONFIG_MEMFD
> @@ -44,13 +52,75 @@
>  #define MFD_ALLOW_SEALING 0x0002U
>  #endif
>  
> -static inline int memfd_create(const char *name, unsigned int flags)
> +static int memfd_create(const char *name, unsigned int flags)
>  {
>      return syscall(__NR_memfd_create, name, flags);
>  }
>  #else /* !LINUX */
> -static inline int memfd_create(const char *name, unsigned int flags)
> +static int memfd_create(const char *name, unsigned int flags)
>  {
>      return -1;
>  }
>  #endif
> +
> +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> +                       int *fd)
> +{
> +    void *ptr;
> +    int mfd;
> +
> +    mfd = memfd_create(name, MFD_ALLOW_SEALING|MFD_CLOEXEC);


Hmm. Does this interact correctly with the -mem-prealloc flag?

> +    if (mfd != -1) {
> +        if (ftruncate(mfd, size) == -1) {

Any limitations on size?

> +            perror("ftruncate");
> +            close(mfd);
> +            return NULL;
> +        }
> +
> +        if (fcntl(mfd, F_ADD_SEALS, seals) == -1) {
> +            perror("fcntl");
> +            close(mfd);
> +            return NULL;
> +        }
> +    } else {
> +        const char *tmpdir = getenv("TMPDIR");
> +        gchar *fname;
> +
> +        tmpdir = tmpdir ? tmpdir : "/tmp";
> +
> +        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);

This means there's now work to be done to set up selinux
to allow QEMU creating memfd under /tmp.

Maybe it's better to just fail gracefully for now.

> +        mfd = mkstemp(fname);
> +        unlink(fname);
> +        g_free(fname);
> +
> +        if (mfd == -1) {
> +            perror("mkstemp");
> +            return NULL;
> +        }
> +
> +        if (ftruncate(mfd, size) == -1) {
> +            perror("ftruncate");
> +            close(mfd);
> +            return NULL;
> +        }
> +    }
> +
> +    ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, mfd, 0);

Pls add space around | here and elsewhere.


> +    if (ptr == MAP_FAILED) {
> +        perror("mmap");
> +        close(mfd);
> +        return NULL;
> +    }
> +
> +    *fd = mfd;
> +    return ptr;
> +}
> +
> +void qemu_memfd_free(void *ptr, size_t size, int fd)
> +{
> +    if (ptr) {
> +        munmap(ptr, size);
> +    }
> +
> +    close(fd);

I notice you close fd unconditionally, but it's only returned
on success above. So this will close an uninitialized one?

> +}
> -- 
> 2.4.3
Marc-Andre Lureau Sept. 29, 2015, 3:25 p.m. UTC | #2
----- Original Message -----
> On Thu, Aug 06, 2015 at 02:40:39PM +0200, marcandre.lureau@redhat.com wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > 
> > Add qemu_memfd_alloc/free() helpers.
> > 
> > The function helps to allocate and seal a memfd, and implements an
> > open/unlink/mmap fallback for system that do not support memfd.
> > 
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  include/qemu/memfd.h |  4 +++
> >  util/memfd.c         | 74
> >  ++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 76 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
> > index 8b1fe6a..950fb88 100644
> > --- a/include/qemu/memfd.h
> > +++ b/include/qemu/memfd.h
> > @@ -17,4 +17,8 @@
> >  #define F_SEAL_WRITE    0x0008  /* prevent writes */
> >  #endif
> >  
> > +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> > +                       int *fd);
> > +void qemu_memfd_free(void *ptr, size_t size, int fd);
> > +
> >  #endif /* QEMU_MEMFD_H */
> > diff --git a/util/memfd.c b/util/memfd.c
> > index a98d57e..8b2b785 100644
> > --- a/util/memfd.c
> > +++ b/util/memfd.c
> > @@ -27,6 +27,14 @@
> >  
> >  #include "config-host.h"
> >  
> > +#include <glib.h>
> > +#include <glib/gprintf.h>
> > +
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <fcntl.h>
> > +#include <sys/mman.h>
> > +
> >  #include "qemu/memfd.h"
> >  
> >  #ifdef CONFIG_MEMFD
> > @@ -44,13 +52,75 @@
> >  #define MFD_ALLOW_SEALING 0x0002U
> >  #endif
> >  
> > -static inline int memfd_create(const char *name, unsigned int flags)
> > +static int memfd_create(const char *name, unsigned int flags)
> >  {
> >      return syscall(__NR_memfd_create, name, flags);
> >  }
> >  #else /* !LINUX */
> > -static inline int memfd_create(const char *name, unsigned int flags)
> > +static int memfd_create(const char *name, unsigned int flags)
> >  {
> >      return -1;
> >  }
> >  #endif
> > +
> > +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> > +                       int *fd)
> > +{
> > +    void *ptr;
> > +    int mfd;
> > +
> > +    mfd = memfd_create(name, MFD_ALLOW_SEALING|MFD_CLOEXEC);
> 
> 
> Hmm. Does this interact correctly with the -mem-prealloc flag?

It's unrelated imho. It's helper here.

In the rest of the series, it's used at runtime when migrating with variable size (today code doesn't prealloc that either)

> 
> > +    if (mfd != -1) {
> > +        if (ftruncate(mfd, size) == -1) {
> 
> Any limitations on size?

not that I know (reading memfd_create)

> 
> > +            perror("ftruncate");
> > +            close(mfd);
> > +            return NULL;
> > +        }
> > +
> > +        if (fcntl(mfd, F_ADD_SEALS, seals) == -1) {
> > +            perror("fcntl");
> > +            close(mfd);
> > +            return NULL;
> > +        }
> > +    } else {
> > +        const char *tmpdir = getenv("TMPDIR");
> > +        gchar *fname;
> > +
> > +        tmpdir = tmpdir ? tmpdir : "/tmp";
> > +
> > +        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
> 
> This means there's now work to be done to set up selinux
> to allow QEMU creating memfd under /tmp.

doesn't sound unreasonable to me

> 
> Maybe it's better to just fail gracefully for now.

it's a fallback, but sure we can remove it and add it back later if needed

> > +        mfd = mkstemp(fname);
> > +        unlink(fname);
> > +        g_free(fname);
> > +
> > +        if (mfd == -1) {
> > +            perror("mkstemp");
> > +            return NULL;
> > +        }
> > +
> > +        if (ftruncate(mfd, size) == -1) {
> > +            perror("ftruncate");
> > +            close(mfd);
> > +            return NULL;
> > +        }
> > +    }
> > +
> > +    ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, mfd, 0);
> 
> Pls add space around | here and elsewhere.
> 

ok
> 
> > +    if (ptr == MAP_FAILED) {
> > +        perror("mmap");
> > +        close(mfd);
> > +        return NULL;
> > +    }
> > +
> > +    *fd = mfd;
> > +    return ptr;
> > +}
> > +
> > +void qemu_memfd_free(void *ptr, size_t size, int fd)
> > +{
> > +    if (ptr) {
> > +        munmap(ptr, size);
> > +    }
> > +
> > +    close(fd);
> 
> I notice you close fd unconditionally, but it's only returned
> on success above. So this will close an uninitialized one?

Ok, I'll add a -1 check
Michael S. Tsirkin Sept. 29, 2015, 3:41 p.m. UTC | #3
On Tue, Sep 29, 2015 at 11:25:04AM -0400, Marc-André Lureau wrote:
> 
> 
> ----- Original Message -----
> > On Thu, Aug 06, 2015 at 02:40:39PM +0200, marcandre.lureau@redhat.com wrote:
> > > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > 
> > > Add qemu_memfd_alloc/free() helpers.
> > > 
> > > The function helps to allocate and seal a memfd, and implements an
> > > open/unlink/mmap fallback for system that do not support memfd.
> > > 
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  include/qemu/memfd.h |  4 +++
> > >  util/memfd.c         | 74
> > >  ++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  2 files changed, 76 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
> > > index 8b1fe6a..950fb88 100644
> > > --- a/include/qemu/memfd.h
> > > +++ b/include/qemu/memfd.h
> > > @@ -17,4 +17,8 @@
> > >  #define F_SEAL_WRITE    0x0008  /* prevent writes */
> > >  #endif
> > >  
> > > +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> > > +                       int *fd);
> > > +void qemu_memfd_free(void *ptr, size_t size, int fd);
> > > +
> > >  #endif /* QEMU_MEMFD_H */
> > > diff --git a/util/memfd.c b/util/memfd.c
> > > index a98d57e..8b2b785 100644
> > > --- a/util/memfd.c
> > > +++ b/util/memfd.c
> > > @@ -27,6 +27,14 @@
> > >  
> > >  #include "config-host.h"
> > >  
> > > +#include <glib.h>
> > > +#include <glib/gprintf.h>
> > > +
> > > +#include <stdio.h>
> > > +#include <stdlib.h>
> > > +#include <fcntl.h>
> > > +#include <sys/mman.h>
> > > +
> > >  #include "qemu/memfd.h"
> > >  
> > >  #ifdef CONFIG_MEMFD
> > > @@ -44,13 +52,75 @@
> > >  #define MFD_ALLOW_SEALING 0x0002U
> > >  #endif
> > >  
> > > -static inline int memfd_create(const char *name, unsigned int flags)
> > > +static int memfd_create(const char *name, unsigned int flags)
> > >  {
> > >      return syscall(__NR_memfd_create, name, flags);
> > >  }
> > >  #else /* !LINUX */
> > > -static inline int memfd_create(const char *name, unsigned int flags)
> > > +static int memfd_create(const char *name, unsigned int flags)
> > >  {
> > >      return -1;
> > >  }
> > >  #endif
> > > +
> > > +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
> > > +                       int *fd)
> > > +{
> > > +    void *ptr;
> > > +    int mfd;
> > > +
> > > +    mfd = memfd_create(name, MFD_ALLOW_SEALING|MFD_CLOEXEC);
> > 
> > 
> > Hmm. Does this interact correctly with the -mem-prealloc flag?
> 
> It's unrelated imho. It's helper here.
> 
> In the rest of the series, it's used at runtime when migrating with variable size (today code doesn't prealloc that either)

Yes but I think this means it will fault on access even if user
requested -realtime mlock=on.

> > 
> > > +    if (mfd != -1) {
> > > +        if (ftruncate(mfd, size) == -1) {
> > 
> > Any limitations on size?
> 
> not that I know (reading memfd_create)
> 
> > 
> > > +            perror("ftruncate");
> > > +            close(mfd);
> > > +            return NULL;
> > > +        }
> > > +
> > > +        if (fcntl(mfd, F_ADD_SEALS, seals) == -1) {
> > > +            perror("fcntl");
> > > +            close(mfd);
> > > +            return NULL;
> > > +        }
> > > +    } else {
> > > +        const char *tmpdir = getenv("TMPDIR");
> > > +        gchar *fname;
> > > +
> > > +        tmpdir = tmpdir ? tmpdir : "/tmp";
> > > +
> > > +        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
> > 
> > This means there's now work to be done to set up selinux
> > to allow QEMU creating memfd under /tmp.
> 
> doesn't sound unreasonable to me
> 
> > 
> > Maybe it's better to just fail gracefully for now.
> 
> it's a fallback, but sure we can remove it and add it back later if needed
> 
> > > +        mfd = mkstemp(fname);
> > > +        unlink(fname);
> > > +        g_free(fname);
> > > +
> > > +        if (mfd == -1) {
> > > +            perror("mkstemp");
> > > +            return NULL;
> > > +        }
> > > +
> > > +        if (ftruncate(mfd, size) == -1) {
> > > +            perror("ftruncate");
> > > +            close(mfd);
> > > +            return NULL;
> > > +        }
> > > +    }
> > > +
> > > +    ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, mfd, 0);
> > 
> > Pls add space around | here and elsewhere.
> > 
> 
> ok
> > 
> > > +    if (ptr == MAP_FAILED) {
> > > +        perror("mmap");
> > > +        close(mfd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    *fd = mfd;
> > > +    return ptr;
> > > +}
> > > +
> > > +void qemu_memfd_free(void *ptr, size_t size, int fd)
> > > +{
> > > +    if (ptr) {
> > > +        munmap(ptr, size);
> > > +    }
> > > +
> > > +    close(fd);
> > 
> > I notice you close fd unconditionally, but it's only returned
> > on success above. So this will close an uninitialized one?
> 
> Ok, I'll add a -1 check

Will only work well if you set *fd to -1 on error above ...
diff mbox

Patch

diff --git a/include/qemu/memfd.h b/include/qemu/memfd.h
index 8b1fe6a..950fb88 100644
--- a/include/qemu/memfd.h
+++ b/include/qemu/memfd.h
@@ -17,4 +17,8 @@ 
 #define F_SEAL_WRITE    0x0008  /* prevent writes */
 #endif
 
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+                       int *fd);
+void qemu_memfd_free(void *ptr, size_t size, int fd);
+
 #endif /* QEMU_MEMFD_H */
diff --git a/util/memfd.c b/util/memfd.c
index a98d57e..8b2b785 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -27,6 +27,14 @@ 
 
 #include "config-host.h"
 
+#include <glib.h>
+#include <glib/gprintf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
 #include "qemu/memfd.h"
 
 #ifdef CONFIG_MEMFD
@@ -44,13 +52,75 @@ 
 #define MFD_ALLOW_SEALING 0x0002U
 #endif
 
-static inline int memfd_create(const char *name, unsigned int flags)
+static int memfd_create(const char *name, unsigned int flags)
 {
     return syscall(__NR_memfd_create, name, flags);
 }
 #else /* !LINUX */
-static inline int memfd_create(const char *name, unsigned int flags)
+static int memfd_create(const char *name, unsigned int flags)
 {
     return -1;
 }
 #endif
+
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+                       int *fd)
+{
+    void *ptr;
+    int mfd;
+
+    mfd = memfd_create(name, MFD_ALLOW_SEALING|MFD_CLOEXEC);
+    if (mfd != -1) {
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+
+        if (fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+            perror("fcntl");
+            close(mfd);
+            return NULL;
+        }
+    } else {
+        const char *tmpdir = getenv("TMPDIR");
+        gchar *fname;
+
+        tmpdir = tmpdir ? tmpdir : "/tmp";
+
+        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+        mfd = mkstemp(fname);
+        unlink(fname);
+        g_free(fname);
+
+        if (mfd == -1) {
+            perror("mkstemp");
+            return NULL;
+        }
+
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+    }
+
+    ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, mfd, 0);
+    if (ptr == MAP_FAILED) {
+        perror("mmap");
+        close(mfd);
+        return NULL;
+    }
+
+    *fd = mfd;
+    return ptr;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+    if (ptr) {
+        munmap(ptr, size);
+    }
+
+    close(fd);
+}