diff mbox

[v2,4/9] vhost: alloc shareable log

Message ID 1438105003-29501-5-git-send-email-marcandre.lureau@redhat.com
State New
Headers show

Commit Message

Marc-André Lureau July 28, 2015, 5:36 p.m. UTC
If the backend is of type VHOST_BACKEND_TYPE_USER, allocate
shareable memory.

Note: vhost_log_get() can use a global "vhost_log" that can be shared by
several vhost devices. We may want instead a common shareable log and a
common non-shareable one.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 hw/virtio/vhost.c         | 38 +++++++++++++++++++++++++++++++-------
 include/hw/virtio/vhost.h |  3 ++-
 2 files changed, 33 insertions(+), 8 deletions(-)

Comments

Dr. David Alan Gilbert July 29, 2015, 4:53 p.m. UTC | #1
* Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> If the backend is of type VHOST_BACKEND_TYPE_USER, allocate
> shareable memory.
> 
> Note: vhost_log_get() can use a global "vhost_log" that can be shared by
> several vhost devices. We may want instead a common shareable log and a
> common non-shareable one.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  hw/virtio/vhost.c         | 38 +++++++++++++++++++++++++++++++-------
>  include/hw/virtio/vhost.h |  3 ++-
>  2 files changed, 33 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 2712c6f..862e786 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -18,6 +18,7 @@
>  #include "qemu/atomic.h"
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
> +#include "qemu/memfd.h"
>  #include <linux/vhost.h>
>  #include "exec/address-spaces.h"
>  #include "hw/virtio/virtio-bus.h"
> @@ -286,20 +287,34 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev)
>      }
>      return log_size;
>  }
> -static struct vhost_log *vhost_log_alloc(uint64_t size)
> +
> +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
>  {
> -    struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log)));
> +    struct vhost_log *log;
> +    uint64_t logsize = size * sizeof(*(log->log));
> +    int fd = -1;
> +
> +    log = g_new0(struct vhost_log, 1);
> +    if (share) {
> +        log->log = qemu_memfd_alloc("vhost-log", logsize,
> +                                    F_SEAL_GROW|F_SEAL_SHRINK|F_SEAL_SEAL, &fd);
> +        memset(log->log, 0, logsize);

qemu_memfd_alloc can return NULL can't it - so that needs checking?

> +    } else {
> +        log->log = g_malloc0(logsize);

I know the old code also used g_malloc0, but if the log isn't 'small'
then g_try_malloc0 is possibly safer and properly return errors
if it can't be allocated.

Dave

> +    }
>  
>      log->size = size;
>      log->refcnt = 1;
> +    log->fd = fd;
>  
>      return log;
>  }
>  
> -static struct vhost_log *vhost_log_get(uint64_t size)
> +static struct vhost_log *vhost_log_get(uint64_t size, bool share)
>  {
> -    if (!vhost_log || vhost_log->size != size) {
> -        vhost_log = vhost_log_alloc(size);
> +    if (!vhost_log || vhost_log->size != size ||
> +        (share && vhost_log->fd == -1)) {
> +        vhost_log = vhost_log_alloc(size, share);
>      } else {
>          ++vhost_log->refcnt;
>      }
> @@ -324,13 +339,21 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
>          if (vhost_log == log) {
>              vhost_log = NULL;
>          }
> +
> +        if (log->fd == -1) {
> +            g_free(log->log);
> +        } else {
> +            qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
> +                            log->fd);
> +        }
>          g_free(log);
>      }
>  }
>  
>  static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
>  {
> -    struct vhost_log *log = vhost_log_get(size);
> +    bool share = dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER;
> +    struct vhost_log *log = vhost_log_get(size, share);
>      uint64_t log_base = (uintptr_t)log->log;
>      int r;
>  
> @@ -1136,9 +1159,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
>  
>      if (hdev->log_enabled) {
>          uint64_t log_base;
> +        bool share = hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER;
>  
>          hdev->log_size = vhost_get_log_size(hdev);
> -        hdev->log = vhost_log_get(hdev->log_size);
> +        hdev->log = vhost_log_get(hdev->log_size, share);
>          log_base = (uintptr_t)hdev->log->log;
>          r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
>                                          hdev->log_size ? &log_base : NULL);
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 6467c73..ab1dcac 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -31,7 +31,8 @@ typedef unsigned long vhost_log_chunk_t;
>  struct vhost_log {
>      unsigned long long size;
>      int refcnt;
> -    vhost_log_chunk_t log[0];
> +    int fd;
> +    vhost_log_chunk_t *log;
>  };
>  
>  struct vhost_memory;
> -- 
> 2.4.3
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Marc-Andre Lureau July 29, 2015, 5:04 p.m. UTC | #2
Hi

----- Original Message -----
> * Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> > If the backend is of type VHOST_BACKEND_TYPE_USER, allocate
> > shareable memory.
> > 
> > Note: vhost_log_get() can use a global "vhost_log" that can be shared by
> > several vhost devices. We may want instead a common shareable log and a
> > common non-shareable one.
> > 
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  hw/virtio/vhost.c         | 38 +++++++++++++++++++++++++++++++-------
> >  include/hw/virtio/vhost.h |  3 ++-
> >  2 files changed, 33 insertions(+), 8 deletions(-)
> > 
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 2712c6f..862e786 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -18,6 +18,7 @@
> >  #include "qemu/atomic.h"
> >  #include "qemu/range.h"
> >  #include "qemu/error-report.h"
> > +#include "qemu/memfd.h"
> >  #include <linux/vhost.h>
> >  #include "exec/address-spaces.h"
> >  #include "hw/virtio/virtio-bus.h"
> > @@ -286,20 +287,34 @@ static uint64_t vhost_get_log_size(struct vhost_dev
> > *dev)
> >      }
> >      return log_size;
> >  }
> > -static struct vhost_log *vhost_log_alloc(uint64_t size)
> > +
> > +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
> >  {
> > -    struct vhost_log *log = g_malloc0(sizeof *log + size *
> > sizeof(*(log->log)));
> > +    struct vhost_log *log;
> > +    uint64_t logsize = size * sizeof(*(log->log));
> > +    int fd = -1;
> > +
> > +    log = g_new0(struct vhost_log, 1);
> > +    if (share) {
> > +        log->log = qemu_memfd_alloc("vhost-log", logsize,
> > +                                    F_SEAL_GROW|F_SEAL_SHRINK|F_SEAL_SEAL,
> > &fd);
> > +        memset(log->log, 0, logsize);
> 
> qemu_memfd_alloc can return NULL can't it - so that needs checking?
> 
> > +    } else {
> > +        log->log = g_malloc0(logsize);
> 
> I know the old code also used g_malloc0, but if the log isn't 'small'
> then g_try_malloc0 is possibly safer and properly return errors
> if it can't be allocated.

Yeah, I agree it's better to check for the return value here (as you pointed out, I followed the existing pattern).

Maybe we are just screwed if it happens, live migration shouldn't succeed if it can't be done properly imho.

What's your take on this Michael?

cheers
Dr. David Alan Gilbert July 29, 2015, 5:08 p.m. UTC | #3
* Marc-André Lureau (mlureau@redhat.com) wrote:
> Hi
> 
> ----- Original Message -----
> > * Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> > > If the backend is of type VHOST_BACKEND_TYPE_USER, allocate
> > > shareable memory.
> > > 
> > > Note: vhost_log_get() can use a global "vhost_log" that can be shared by
> > > several vhost devices. We may want instead a common shareable log and a
> > > common non-shareable one.
> > > 
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  hw/virtio/vhost.c         | 38 +++++++++++++++++++++++++++++++-------
> > >  include/hw/virtio/vhost.h |  3 ++-
> > >  2 files changed, 33 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > > index 2712c6f..862e786 100644
> > > --- a/hw/virtio/vhost.c
> > > +++ b/hw/virtio/vhost.c
> > > @@ -18,6 +18,7 @@
> > >  #include "qemu/atomic.h"
> > >  #include "qemu/range.h"
> > >  #include "qemu/error-report.h"
> > > +#include "qemu/memfd.h"
> > >  #include <linux/vhost.h>
> > >  #include "exec/address-spaces.h"
> > >  #include "hw/virtio/virtio-bus.h"
> > > @@ -286,20 +287,34 @@ static uint64_t vhost_get_log_size(struct vhost_dev
> > > *dev)
> > >      }
> > >      return log_size;
> > >  }
> > > -static struct vhost_log *vhost_log_alloc(uint64_t size)
> > > +
> > > +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
> > >  {
> > > -    struct vhost_log *log = g_malloc0(sizeof *log + size *
> > > sizeof(*(log->log)));
> > > +    struct vhost_log *log;
> > > +    uint64_t logsize = size * sizeof(*(log->log));
> > > +    int fd = -1;
> > > +
> > > +    log = g_new0(struct vhost_log, 1);
> > > +    if (share) {
> > > +        log->log = qemu_memfd_alloc("vhost-log", logsize,
> > > +                                    F_SEAL_GROW|F_SEAL_SHRINK|F_SEAL_SEAL,
> > > &fd);
> > > +        memset(log->log, 0, logsize);
> > 
> > qemu_memfd_alloc can return NULL can't it - so that needs checking?
> > 
> > > +    } else {
> > > +        log->log = g_malloc0(logsize);
> > 
> > I know the old code also used g_malloc0, but if the log isn't 'small'
> > then g_try_malloc0 is possibly safer and properly return errors
> > if it can't be allocated.
> 
> Yeah, I agree it's better to check for the return value here (as you pointed out, I followed the existing pattern).
> 
> Maybe we are just screwed if it happens, live migration shouldn't succeed if it can't be done properly imho.

Probably; we try to be careful particularly on the source of memory allocations
during migration, since the VM is running just fine, and it's unfortunate
to kill it at that point.

Dave

> What's your take on this Michael?
> 
> cheers
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Michael S. Tsirkin July 29, 2015, 5:22 p.m. UTC | #4
On Wed, Jul 29, 2015 at 01:04:51PM -0400, Marc-André Lureau wrote:
> Hi
> 
> ----- Original Message -----
> > * Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> > > If the backend is of type VHOST_BACKEND_TYPE_USER, allocate
> > > shareable memory.
> > > 
> > > Note: vhost_log_get() can use a global "vhost_log" that can be shared by
> > > several vhost devices. We may want instead a common shareable log and a
> > > common non-shareable one.
> > > 
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  hw/virtio/vhost.c         | 38 +++++++++++++++++++++++++++++++-------
> > >  include/hw/virtio/vhost.h |  3 ++-
> > >  2 files changed, 33 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > > index 2712c6f..862e786 100644
> > > --- a/hw/virtio/vhost.c
> > > +++ b/hw/virtio/vhost.c
> > > @@ -18,6 +18,7 @@
> > >  #include "qemu/atomic.h"
> > >  #include "qemu/range.h"
> > >  #include "qemu/error-report.h"
> > > +#include "qemu/memfd.h"
> > >  #include <linux/vhost.h>
> > >  #include "exec/address-spaces.h"
> > >  #include "hw/virtio/virtio-bus.h"
> > > @@ -286,20 +287,34 @@ static uint64_t vhost_get_log_size(struct vhost_dev
> > > *dev)
> > >      }
> > >      return log_size;
> > >  }
> > > -static struct vhost_log *vhost_log_alloc(uint64_t size)
> > > +
> > > +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
> > >  {
> > > -    struct vhost_log *log = g_malloc0(sizeof *log + size *
> > > sizeof(*(log->log)));
> > > +    struct vhost_log *log;
> > > +    uint64_t logsize = size * sizeof(*(log->log));
> > > +    int fd = -1;
> > > +
> > > +    log = g_new0(struct vhost_log, 1);
> > > +    if (share) {
> > > +        log->log = qemu_memfd_alloc("vhost-log", logsize,
> > > +                                    F_SEAL_GROW|F_SEAL_SHRINK|F_SEAL_SEAL,
> > > &fd);
> > > +        memset(log->log, 0, logsize);
> > 
> > qemu_memfd_alloc can return NULL can't it - so that needs checking?
> > 
> > > +    } else {
> > > +        log->log = g_malloc0(logsize);
> > 
> > I know the old code also used g_malloc0, but if the log isn't 'small'
> > then g_try_malloc0 is possibly safer and properly return errors
> > if it can't be allocated.
> 
> Yeah, I agree it's better to check for the return value here (as you pointed out, I followed the existing pattern).
> 
> Maybe we are just screwed if it happens, live migration shouldn't succeed if it can't be done properly imho.
> 
> What's your take on this Michael?
> 
> cheers

I guess we could fail migration in that case ...
Since current code uses g_malloc0 I feel this can be addressed by
a separate patch later.

--  
MST
diff mbox

Patch

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2712c6f..862e786 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -18,6 +18,7 @@ 
 #include "qemu/atomic.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
+#include "qemu/memfd.h"
 #include <linux/vhost.h>
 #include "exec/address-spaces.h"
 #include "hw/virtio/virtio-bus.h"
@@ -286,20 +287,34 @@  static uint64_t vhost_get_log_size(struct vhost_dev *dev)
     }
     return log_size;
 }
-static struct vhost_log *vhost_log_alloc(uint64_t size)
+
+static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
 {
-    struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log)));
+    struct vhost_log *log;
+    uint64_t logsize = size * sizeof(*(log->log));
+    int fd = -1;
+
+    log = g_new0(struct vhost_log, 1);
+    if (share) {
+        log->log = qemu_memfd_alloc("vhost-log", logsize,
+                                    F_SEAL_GROW|F_SEAL_SHRINK|F_SEAL_SEAL, &fd);
+        memset(log->log, 0, logsize);
+    } else {
+        log->log = g_malloc0(logsize);
+    }
 
     log->size = size;
     log->refcnt = 1;
+    log->fd = fd;
 
     return log;
 }
 
-static struct vhost_log *vhost_log_get(uint64_t size)
+static struct vhost_log *vhost_log_get(uint64_t size, bool share)
 {
-    if (!vhost_log || vhost_log->size != size) {
-        vhost_log = vhost_log_alloc(size);
+    if (!vhost_log || vhost_log->size != size ||
+        (share && vhost_log->fd == -1)) {
+        vhost_log = vhost_log_alloc(size, share);
     } else {
         ++vhost_log->refcnt;
     }
@@ -324,13 +339,21 @@  static void vhost_log_put(struct vhost_dev *dev, bool sync)
         if (vhost_log == log) {
             vhost_log = NULL;
         }
+
+        if (log->fd == -1) {
+            g_free(log->log);
+        } else {
+            qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
+                            log->fd);
+        }
         g_free(log);
     }
 }
 
 static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
 {
-    struct vhost_log *log = vhost_log_get(size);
+    bool share = dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER;
+    struct vhost_log *log = vhost_log_get(size, share);
     uint64_t log_base = (uintptr_t)log->log;
     int r;
 
@@ -1136,9 +1159,10 @@  int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 
     if (hdev->log_enabled) {
         uint64_t log_base;
+        bool share = hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER;
 
         hdev->log_size = vhost_get_log_size(hdev);
-        hdev->log = vhost_log_get(hdev->log_size);
+        hdev->log = vhost_log_get(hdev->log_size, share);
         log_base = (uintptr_t)hdev->log->log;
         r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
                                         hdev->log_size ? &log_base : NULL);
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 6467c73..ab1dcac 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -31,7 +31,8 @@  typedef unsigned long vhost_log_chunk_t;
 struct vhost_log {
     unsigned long long size;
     int refcnt;
-    vhost_log_chunk_t log[0];
+    int fd;
+    vhost_log_chunk_t *log;
 };
 
 struct vhost_memory;