diff mbox series

vhost-user: add separate memslot counter for vhost-user

Message ID 20200928131731.69684-1-chenjiajun8@huawei.com
State New
Headers show
Series vhost-user: add separate memslot counter for vhost-user | expand

Commit Message

chenjiajun Sept. 28, 2020, 1:17 p.m. UTC
Used_memslots is equal to dev->mem->nregions now, it is true for
vhost kernel, but not for vhost user, which uses the memory regions
that have file descriptor. In fact, not all of the memory regions
have file descriptor.
It is usefully in some scenarios, e.g. used_memslots is 8, and only
5 memory slots can be used by vhost user, it is failed to hot plug
a new memory RAM because vhost_has_free_slot just returned false,
but we can hot plug it safely in fact.

--
ChangeList:
v3:
-make used_memslots a member of struct vhost_dev instead of a global static value

v2:
-eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API

v1:
-vhost-user: add separate memslot counter for vhost-user

Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>
---
 hw/virtio/vhost-backend.c         | 12 ++++++++++
 hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
 hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
 include/hw/virtio/vhost-backend.h |  5 +++++
 include/hw/virtio/vhost.h         |  1 +
 net/vhost-user.c                  |  7 ++++++
 6 files changed, 78 insertions(+), 9 deletions(-)

Comments

Raphael Norwitz Oct. 2, 2020, 2:05 a.m. UTC | #1
On Mon, Sep 28, 2020 at 9:17 AM Jiajun Chen <chenjiajun8@huawei.com> wrote:
>
> Used_memslots is equal to dev->mem->nregions now, it is true for
> vhost kernel, but not for vhost user, which uses the memory regions
> that have file descriptor. In fact, not all of the memory regions
> have file descriptor.
> It is usefully in some scenarios, e.g. used_memslots is 8, and only
> 5 memory slots can be used by vhost user, it is failed to hot plug
> a new memory RAM because vhost_has_free_slot just returned false,
> but we can hot plug it safely in fact.
>
> --
> ChangeList:
> v3:
> -make used_memslots a member of struct vhost_dev instead of a global static value
>
> v2:
> -eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API
>
> v1:
> -vhost-user: add separate memslot counter for vhost-user
>
> Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
> Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>

I'm happy with this from a vhost/vhost-user perspective. vhost-backend
change looks good too. I'm a little confused by what's going on with
net/vhost-user.c.

> ---
>  hw/virtio/vhost-backend.c         | 12 ++++++++++
>  hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
>  hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
>  include/hw/virtio/vhost-backend.h |  5 +++++
>  include/hw/virtio/vhost.h         |  1 +
>  net/vhost-user.c                  |  7 ++++++
>  6 files changed, 78 insertions(+), 9 deletions(-)
>

> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 17532daaf3..7e93955537 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -20,6 +20,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/option.h"
>  #include "trace.h"
> +#include "include/hw/virtio/vhost.h"
>
>  typedef struct NetVhostUserState {
>      NetClientState nc;
> @@ -347,6 +348,12 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>          qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
>                                   net_vhost_user_event, NULL, nc0->name, NULL,
>                                   true);

Can you elaborate on this check here? What does it have to do with
fixing memslots accounting? Maybe it should be in a separate change?

> +
> +        if (!vhost_has_free_slot()) {
> +            error_report("used memslots exceeded the backend limit, quit "
> +                         "loop");
> +            goto err;
> +        }
>      } while (!s->started);
>
>      assert(s->vhost_net);
> --
> 2.27.0.dirty
>
Igor Mammedov Oct. 6, 2020, 9:48 a.m. UTC | #2
On Mon, 28 Sep 2020 21:17:31 +0800
Jiajun Chen <chenjiajun8@huawei.com> wrote:

> Used_memslots is equal to dev->mem->nregions now, it is true for
> vhost kernel, but not for vhost user, which uses the memory regions
> that have file descriptor. In fact, not all of the memory regions
> have file descriptor.
> It is usefully in some scenarios, e.g. used_memslots is 8, and only
> 5 memory slots can be used by vhost user, it is failed to hot plug
> a new memory RAM because vhost_has_free_slot just returned false,
> but we can hot plug it safely in fact.

I had an impression that all guest RAM has to be shared with vhost,
so combination of anon and fd based RAM couldn't work.
Am I wrong?

> 
> --
> ChangeList:
> v3:
> -make used_memslots a member of struct vhost_dev instead of a global static value
it's global resource, so why?

> 
> v2:
> -eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API
> 
> v1:
> -vhost-user: add separate memslot counter for vhost-user
> 
> Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
> Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>
> ---
>  hw/virtio/vhost-backend.c         | 12 ++++++++++
>  hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
>  hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
>  include/hw/virtio/vhost-backend.h |  5 +++++
>  include/hw/virtio/vhost.h         |  1 +
>  net/vhost-user.c                  |  7 ++++++
>  6 files changed, 78 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 782b1d67d9..7016f23ec5 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -238,6 +238,16 @@ static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
>          qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
>  }
>  
> +static void vhost_kernel_set_used_memslots(struct vhost_dev *dev)
> +{
> +    dev->used_memslots = dev->mem->nregions;
> +}
> +
> +static unsigned int vhost_kernel_get_used_memslots(struct vhost_dev *dev)
> +{
> +    return dev->used_memslots;
> +}
> +
>  static const VhostOps kernel_ops = {
>          .backend_type = VHOST_BACKEND_TYPE_KERNEL,
>          .vhost_backend_init = vhost_kernel_init,
> @@ -269,6 +279,8 @@ static const VhostOps kernel_ops = {
>  #endif /* CONFIG_VHOST_VSOCK */
>          .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
>          .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
> +        .vhost_set_used_memslots = vhost_kernel_set_used_memslots,
> +        .vhost_get_used_memslots = vhost_kernel_get_used_memslots,
>  };
>  #endif
>  
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 31231218dc..5dea64d8a8 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2354,6 +2354,29 @@ void vhost_user_cleanup(VhostUserState *user)
>      user->chr = NULL;
>  }
>  
> +static void vhost_user_set_used_memslots(struct vhost_dev *dev)
> +{
> +    int i;
> +    dev->used_memslots = 0;
> +
> +    for (i = 0; i < dev->mem->nregions; ++i) {
> +        struct vhost_memory_region *reg = dev->mem->regions + i;
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +        int fd;
> +
> +        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> +        if (mr && fd > 0) {
> +            dev->used_memslots++;
> +        }
> +    }
> +}
> +
> +static unsigned int vhost_user_get_used_memslots(struct vhost_dev *dev)
> +{
> +    return dev->used_memslots;
> +}
> +
>  const VhostOps user_ops = {
>          .backend_type = VHOST_BACKEND_TYPE_USER,
>          .vhost_backend_init = vhost_user_backend_init,
> @@ -2387,4 +2410,6 @@ const VhostOps user_ops = {
>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> +        .vhost_set_used_memslots = vhost_user_set_used_memslots,
> +        .vhost_get_used_memslots = vhost_user_get_used_memslots,
>  };
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 1a1384e7a6..98b967669b 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -45,20 +45,20 @@
>  static struct vhost_log *vhost_log;
>  static struct vhost_log *vhost_log_shm;
>  
> -static unsigned int used_memslots;
>  static QLIST_HEAD(, vhost_dev) vhost_devices =
>      QLIST_HEAD_INITIALIZER(vhost_devices);
>  
>  bool vhost_has_free_slot(void)
>  {
> -    unsigned int slots_limit = ~0U;
>      struct vhost_dev *hdev;
>  
>      QLIST_FOREACH(hdev, &vhost_devices, entry) {
> -        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
> -        slots_limit = MIN(slots_limit, r);
> +        if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >=
> +            hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> +            return false;
> +        }
>      }
> -    return slots_limit > used_memslots;
> +    return true;
>  }
>  
>  static void vhost_dev_sync_region(struct vhost_dev *dev,
> @@ -502,7 +502,6 @@ static void vhost_commit(MemoryListener *listener)
>                         dev->n_mem_sections * sizeof dev->mem->regions[0];
>      dev->mem = g_realloc(dev->mem, regions_size);
>      dev->mem->nregions = dev->n_mem_sections;
> -    used_memslots = dev->mem->nregions;
>      for (i = 0; i < dev->n_mem_sections; i++) {
>          struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
>          struct MemoryRegionSection *mrs = dev->mem_sections + i;
> @@ -678,6 +677,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
>          dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
>          memory_region_ref(section->mr);
>      }
> +    dev->vhost_ops->vhost_set_used_memslots(dev);
>  }
>  
>  /* Used for both add and nop callbacks */
> @@ -693,6 +693,17 @@ static void vhost_region_addnop(MemoryListener *listener,
>      vhost_region_add_section(dev, section);
>  }
>  
> +static void vhost_region_del(MemoryListener *listener,
> +                             MemoryRegionSection *section)
> +{
> +    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> +                                         memory_listener);
> +    if (!vhost_section(dev, section)) {
> +        return;
> +    }
> +    dev->vhost_ops->vhost_set_used_memslots(dev);
> +}
> +
>  static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>  {
>      struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
> @@ -1300,6 +1311,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>      hdev->memory_listener = (MemoryListener) {
>          .begin = vhost_begin,
>          .commit = vhost_commit,
> +        .region_del = vhost_region_del,
>          .region_add = vhost_region_addnop,
>          .region_nop = vhost_region_addnop,
>          .log_start = vhost_log_start,
> @@ -1346,9 +1358,16 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>      memory_listener_register(&hdev->memory_listener, &address_space_memory);
>      QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
>  
> -    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> -        error_report("vhost backend memory slots limit is less"
> -                " than current number of present memory slots");
> +    /*
> +     * If we started a VM without any vhost device,
> +     * for the first time vhost device hot-plug
> +     * (vhost_get_used_memslots is always 0),
> +     * so it needs to double check here.
> +     */
> +    if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >
> +        hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> +        error_report("vhost backend memory slots limit is less than"
> +                     " current number of present memory slots");
>          r = -1;
>          if (busyloop_timeout) {
>              goto fail_busyloop;
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 8825bd278f..6569c95a43 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -124,6 +124,9 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);
>  
>  typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
>  
> +typedef void (*vhost_set_used_memslots_op)(struct vhost_dev *dev);
> +typedef unsigned int (*vhost_get_used_memslots_op)(struct vhost_dev *dev);
> +
>  typedef struct VhostOps {
>      VhostBackendType backend_type;
>      vhost_backend_init vhost_backend_init;
> @@ -168,6 +171,8 @@ typedef struct VhostOps {
>      vhost_vq_get_addr_op  vhost_vq_get_addr;
>      vhost_get_device_id_op vhost_get_device_id;
>      vhost_force_iommu_op vhost_force_iommu;
> +    vhost_set_used_memslots_op vhost_set_used_memslots;
> +    vhost_get_used_memslots_op vhost_get_used_memslots;
>  } VhostOps;
>  
>  extern const VhostOps user_ops;
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 767a95ec0b..5ded21f86d 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -90,6 +90,7 @@ struct vhost_dev {
>      QLIST_HEAD(, vhost_iommu) iommu_list;
>      IOMMUNotifier n;
>      const VhostDevConfigOps *config_ops;
> +    unsigned int used_memslots;
>  };
>  
>  struct vhost_net {
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 17532daaf3..7e93955537 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -20,6 +20,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/option.h"
>  #include "trace.h"
> +#include "include/hw/virtio/vhost.h"
>  
>  typedef struct NetVhostUserState {
>      NetClientState nc;
> @@ -347,6 +348,12 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>          qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
>                                   net_vhost_user_event, NULL, nc0->name, NULL,
>                                   true);
> +
> +        if (!vhost_has_free_slot()) {
> +            error_report("used memslots exceeded the backend limit, quit "
> +                         "loop");
> +            goto err;
> +        }
>      } while (!s->started);
>  
>      assert(s->vhost_net);
chenjiajun Oct. 12, 2020, 11:12 a.m. UTC | #3
On 2020/10/2 10:05, Raphael Norwitz wrote:
> On Mon, Sep 28, 2020 at 9:17 AM Jiajun Chen <chenjiajun8@huawei.com> wrote:
>>
>> Used_memslots is equal to dev->mem->nregions now, it is true for
>> vhost kernel, but not for vhost user, which uses the memory regions
>> that have file descriptor. In fact, not all of the memory regions
>> have file descriptor.
>> It is usefully in some scenarios, e.g. used_memslots is 8, and only
>> 5 memory slots can be used by vhost user, it is failed to hot plug
>> a new memory RAM because vhost_has_free_slot just returned false,
>> but we can hot plug it safely in fact.
>>
>> --
>> ChangeList:
>> v3:
>> -make used_memslots a member of struct vhost_dev instead of a global static value
>>
>> v2:
>> -eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API
>>
>> v1:
>> -vhost-user: add separate memslot counter for vhost-user
>>
>> Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
>> Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>
> 
> I'm happy with this from a vhost/vhost-user perspective. vhost-backend
> change looks good too. I'm a little confused by what's going on with
> net/vhost-user.c.
> 
>> ---
>>  hw/virtio/vhost-backend.c         | 12 ++++++++++
>>  hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
>>  hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
>>  include/hw/virtio/vhost-backend.h |  5 +++++
>>  include/hw/virtio/vhost.h         |  1 +
>>  net/vhost-user.c                  |  7 ++++++
>>  6 files changed, 78 insertions(+), 9 deletions(-)
>>
> 
>> diff --git a/net/vhost-user.c b/net/vhost-user.c
>> index 17532daaf3..7e93955537 100644
>> --- a/net/vhost-user.c
>> +++ b/net/vhost-user.c
>> @@ -20,6 +20,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qemu/option.h"
>>  #include "trace.h"
>> +#include "include/hw/virtio/vhost.h"
>>
>>  typedef struct NetVhostUserState {
>>      NetClientState nc;
>> @@ -347,6 +348,12 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>>          qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
>>                                   net_vhost_user_event, NULL, nc0->name, NULL,
>>                                   true);
> 
> Can you elaborate on this check here? What does it have to do with
> fixing memslots accounting? Maybe it should be in a separate change?
> 
When the number of virtual machine memslots exceeds the upper limit of the back-end support,
QEMU main thread may enters an endless loop and cannot process other requests.
And number of memslots will not automatically decrease, so add a check here to exit from loop
in this scenario. For the newly started virtual machine, boot fails; for the hot plug network card,
hot plug fails.
>> +
>> +        if (!vhost_has_free_slot()) {
>> +            error_report("used memslots exceeded the backend limit, quit "
>> +                         "loop");
>> +            goto err;
>> +        }
>>      } while (!s->started);
>>
>>      assert(s->vhost_net);
>> --
>> 2.27.0.dirty
>>
> .
>
Raphael Norwitz Oct. 14, 2020, 12:58 a.m. UTC | #4
On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
>
> On Mon, 28 Sep 2020 21:17:31 +0800
> Jiajun Chen <chenjiajun8@huawei.com> wrote:
>
> > Used_memslots is equal to dev->mem->nregions now, it is true for
> > vhost kernel, but not for vhost user, which uses the memory regions
> > that have file descriptor. In fact, not all of the memory regions
> > have file descriptor.
> > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > 5 memory slots can be used by vhost user, it is failed to hot plug
> > a new memory RAM because vhost_has_free_slot just returned false,
> > but we can hot plug it safely in fact.
>
> I had an impression that all guest RAM has to be shared with vhost,
> so combination of anon and fd based RAM couldn't work.
> Am I wrong?

I'm not sure about the kernel backend, but I've tested adding anon
memory to a VM with a vhost-user-scsi device and it works (eventually
the VM crashed, but I could see the guest recognized the anon RAM).
The vhost-user code is designed to work with both. I'm not sure I see
a use case, but if there is one, this would be a valid issue. Maybe
Jiajun or Jianjay can elaborate.

>
> >
> > --
> > ChangeList:
> > v3:
> > -make used_memslots a member of struct vhost_dev instead of a global static value
> it's global resource, so why?

I suggested it because I thought it made the code a little cleaner.
I'm not opposed to changing it back, or having it stored at the
vhost_user level.
Raphael Norwitz Oct. 14, 2020, 1:22 a.m. UTC | #5
On Mon, Oct 12, 2020 at 7:12 AM chenjiajun <chenjiajun8@huawei.com> wrote:
>
>
>
> On 2020/10/2 10:05, Raphael Norwitz wrote:
> > On Mon, Sep 28, 2020 at 9:17 AM Jiajun Chen <chenjiajun8@huawei.com> wrote:
> >>
> >> Used_memslots is equal to dev->mem->nregions now, it is true for
> >> vhost kernel, but not for vhost user, which uses the memory regions
> >> that have file descriptor. In fact, not all of the memory regions
> >> have file descriptor.
> >> It is usefully in some scenarios, e.g. used_memslots is 8, and only
> >> 5 memory slots can be used by vhost user, it is failed to hot plug
> >> a new memory RAM because vhost_has_free_slot just returned false,
> >> but we can hot plug it safely in fact.
> >>
> >> --
> >> ChangeList:
> >> v3:
> >> -make used_memslots a member of struct vhost_dev instead of a global static value
> >>
> >> v2:
> >> -eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API
> >>
> >> v1:
> >> -vhost-user: add separate memslot counter for vhost-user
> >>
> >> Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
> >> Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>
> >
> > I'm happy with this from a vhost/vhost-user perspective. vhost-backend
> > change looks good too. I'm a little confused by what's going on with
> > net/vhost-user.c.
> >
> >> ---
> >>  hw/virtio/vhost-backend.c         | 12 ++++++++++
> >>  hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
> >>  hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
> >>  include/hw/virtio/vhost-backend.h |  5 +++++
> >>  include/hw/virtio/vhost.h         |  1 +
> >>  net/vhost-user.c                  |  7 ++++++
> >>  6 files changed, 78 insertions(+), 9 deletions(-)
> >>
> >
> >> diff --git a/net/vhost-user.c b/net/vhost-user.c
> >> index 17532daaf3..7e93955537 100644
> >> --- a/net/vhost-user.c
> >> +++ b/net/vhost-user.c
> >> @@ -20,6 +20,7 @@
> >>  #include "qemu/error-report.h"
> >>  #include "qemu/option.h"
> >>  #include "trace.h"
> >> +#include "include/hw/virtio/vhost.h"
> >>
> >>  typedef struct NetVhostUserState {
> >>      NetClientState nc;
> >> @@ -347,6 +348,12 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
> >>          qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
> >>                                   net_vhost_user_event, NULL, nc0->name, NULL,
> >>                                   true);
> >
> > Can you elaborate on this check here? What does it have to do with
> > fixing memslots accounting? Maybe it should be in a separate change?
> >
> When the number of virtual machine memslots exceeds the upper limit of the back-end support,
> QEMU main thread may enters an endless loop and cannot process other requests.
> And number of memslots will not automatically decrease, so add a check here to exit from loop
> in this scenario. For the newly started virtual machine, boot fails; for the hot plug network card,
> hot plug fails.

I don't understand what you mean by "number of memslots will not
automatically decrease". Where did this happen before and what changes
when the new memslots counter is introduced?

> >> +
> >> +        if (!vhost_has_free_slot()) {
> >> +            error_report("used memslots exceeded the backend limit, quit "
> >> +                         "loop");
> >> +            goto err;
> >> +        }
> >>      } while (!s->started);
> >>
> >>      assert(s->vhost_net);
> >> --
> >> 2.27.0.dirty
> >>
> > .
> >
Michael S. Tsirkin Oct. 14, 2020, 7:08 a.m. UTC | #6
On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:
> On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
> >
> > On Mon, 28 Sep 2020 21:17:31 +0800
> > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> >
> > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > vhost kernel, but not for vhost user, which uses the memory regions
> > > that have file descriptor. In fact, not all of the memory regions
> > > have file descriptor.
> > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > a new memory RAM because vhost_has_free_slot just returned false,
> > > but we can hot plug it safely in fact.
> >
> > I had an impression that all guest RAM has to be shared with vhost,
> > so combination of anon and fd based RAM couldn't work.
> > Am I wrong?
> 
> I'm not sure about the kernel backend, but I've tested adding anon
> memory to a VM with a vhost-user-scsi device and it works (eventually
> the VM crashed, but I could see the guest recognized the anon RAM).
> The vhost-user code is designed to work with both. I'm not sure I see
> a use case, but if there is one, this would be a valid issue. Maybe
> Jiajun or Jianjay can elaborate.

Hmm does not vhost-user skip all regions that do not have an fd?


        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
        if (fd > 0) {
            if (track_ramblocks) {
                assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
                trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
                                                      reg->memory_size,
                                                      reg->guest_phys_addr,
                                                      reg->userspace_addr,
                                                      offset);
                u->region_rb_offset[i] = offset;
                u->region_rb[i] = mr->ram_block;
            } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
                error_report("Failed preparing vhost-user memory table msg");
                return -1;
            }
            vhost_user_fill_msg_region(&region_buffer, reg, offset);
            msg->payload.memory.regions[*fd_num] = region_buffer;
            fds[(*fd_num)++] = fd;
        } else if (track_ramblocks) {
            u->region_rb_offset[i] = 0;
            u->region_rb[i] = NULL;
        }



In your test, is it possible that you were lucky and guest did not send
any data from anon memory to the device?



> >
> > >
> > > --
> > > ChangeList:
> > > v3:
> > > -make used_memslots a member of struct vhost_dev instead of a global static value
> > it's global resource, so why?
> 
> I suggested it because I thought it made the code a little cleaner.
> I'm not opposed to changing it back, or having it stored at the
> vhost_user level.
Raphael Norwitz Oct. 14, 2020, 4:11 p.m. UTC | #7
On Wed, Oct 14, 2020 at 3:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:
> > On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > >
> > > On Mon, 28 Sep 2020 21:17:31 +0800
> > > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> > >
> > > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > > vhost kernel, but not for vhost user, which uses the memory regions
> > > > that have file descriptor. In fact, not all of the memory regions
> > > > have file descriptor.
> > > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > > a new memory RAM because vhost_has_free_slot just returned false,
> > > > but we can hot plug it safely in fact.
> > >
> > > I had an impression that all guest RAM has to be shared with vhost,
> > > so combination of anon and fd based RAM couldn't work.
> > > Am I wrong?
> >
> > I'm not sure about the kernel backend, but I've tested adding anon
> > memory to a VM with a vhost-user-scsi device and it works (eventually
> > the VM crashed, but I could see the guest recognized the anon RAM).
> > The vhost-user code is designed to work with both. I'm not sure I see
> > a use case, but if there is one, this would be a valid issue. Maybe
> > Jiajun or Jianjay can elaborate.
>
> Hmm does not vhost-user skip all regions that do not have an fd?
>
>
>         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
>         if (fd > 0) {
>             if (track_ramblocks) {
>                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
>                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
>                                                       reg->memory_size,
>                                                       reg->guest_phys_addr,
>                                                       reg->userspace_addr,
>                                                       offset);
>                 u->region_rb_offset[i] = offset;
>                 u->region_rb[i] = mr->ram_block;
>             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
>                 error_report("Failed preparing vhost-user memory table msg");
>                 return -1;
>             }
>             vhost_user_fill_msg_region(&region_buffer, reg, offset);
>             msg->payload.memory.regions[*fd_num] = region_buffer;
>             fds[(*fd_num)++] = fd;
>         } else if (track_ramblocks) {
>             u->region_rb_offset[i] = 0;
>             u->region_rb[i] = NULL;
>         }
>
>
>
> In your test, is it possible that you were lucky and guest did not send
> any data from anon memory to the device?

Yes - vhost-user skips the region and does not send anon memory to the
device, but it does not fail the hot-add operation.

In my test the fd > 0 check definitely failed and went on to add the
memory without sending it to the backend. I understand why this can be
problematic (it did eventually crash the VM), but it seems like we
allow it as of today. I can't think of a valid reason why you would
want anon and FD ram together, but I figured there may be a reason
since the vhost-user code allows for it. Should we maybe block that
path altogether instead of patching it up?

>
>
>
> > >
> > > >
> > > > --
> > > > ChangeList:
> > > > v3:
> > > > -make used_memslots a member of struct vhost_dev instead of a global static value
> > > it's global resource, so why?
> >
> > I suggested it because I thought it made the code a little cleaner.
> > I'm not opposed to changing it back, or having it stored at the
> > vhost_user level.
>
Michael S. Tsirkin Oct. 14, 2020, 4:26 p.m. UTC | #8
On Wed, Oct 14, 2020 at 12:11:34PM -0400, Raphael Norwitz wrote:
> On Wed, Oct 14, 2020 at 3:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:
> > > On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > >
> > > > On Mon, 28 Sep 2020 21:17:31 +0800
> > > > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> > > >
> > > > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > > > vhost kernel, but not for vhost user, which uses the memory regions
> > > > > that have file descriptor. In fact, not all of the memory regions
> > > > > have file descriptor.
> > > > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > > > a new memory RAM because vhost_has_free_slot just returned false,
> > > > > but we can hot plug it safely in fact.
> > > >
> > > > I had an impression that all guest RAM has to be shared with vhost,
> > > > so combination of anon and fd based RAM couldn't work.
> > > > Am I wrong?
> > >
> > > I'm not sure about the kernel backend, but I've tested adding anon
> > > memory to a VM with a vhost-user-scsi device and it works (eventually
> > > the VM crashed, but I could see the guest recognized the anon RAM).
> > > The vhost-user code is designed to work with both. I'm not sure I see
> > > a use case, but if there is one, this would be a valid issue. Maybe
> > > Jiajun or Jianjay can elaborate.
> >
> > Hmm does not vhost-user skip all regions that do not have an fd?
> >
> >
> >         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> >         if (fd > 0) {
> >             if (track_ramblocks) {
> >                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
> >                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
> >                                                       reg->memory_size,
> >                                                       reg->guest_phys_addr,
> >                                                       reg->userspace_addr,
> >                                                       offset);
> >                 u->region_rb_offset[i] = offset;
> >                 u->region_rb[i] = mr->ram_block;
> >             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
> >                 error_report("Failed preparing vhost-user memory table msg");
> >                 return -1;
> >             }
> >             vhost_user_fill_msg_region(&region_buffer, reg, offset);
> >             msg->payload.memory.regions[*fd_num] = region_buffer;
> >             fds[(*fd_num)++] = fd;
> >         } else if (track_ramblocks) {
> >             u->region_rb_offset[i] = 0;
> >             u->region_rb[i] = NULL;
> >         }
> >
> >
> >
> > In your test, is it possible that you were lucky and guest did not send
> > any data from anon memory to the device?
> 
> Yes - vhost-user skips the region and does not send anon memory to the
> device, but it does not fail the hot-add operation.
> 
> In my test the fd > 0 check definitely failed and went on to add the
> memory without sending it to the backend. I understand why this can be
> problematic (it did eventually crash the VM), but it seems like we
> allow it as of today. I can't think of a valid reason why you would
> want anon and FD ram together, but I figured there may be a reason
> since the vhost-user code allows for it. Should we maybe block that
> path altogether instead of patching it up?


Hmm where do we patch it up? Reason we might have non FD MRs is IIUC
due to things like IO regions...


> >
> >
> >
> > > >
> > > > >
> > > > > --
> > > > > ChangeList:
> > > > > v3:
> > > > > -make used_memslots a member of struct vhost_dev instead of a global static value
> > > > it's global resource, so why?
> > >
> > > I suggested it because I thought it made the code a little cleaner.
> > > I'm not opposed to changing it back, or having it stored at the
> > > vhost_user level.
> >
Raphael Norwitz Oct. 14, 2020, 5:21 p.m. UTC | #9
On Wed, Oct 14, 2020 at 12:26 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Oct 14, 2020 at 12:11:34PM -0400, Raphael Norwitz wrote:
> > On Wed, Oct 14, 2020 at 3:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:
> > > > On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > >
> > > > > On Mon, 28 Sep 2020 21:17:31 +0800
> > > > > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> > > > >
> > > > > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > > > > vhost kernel, but not for vhost user, which uses the memory regions
> > > > > > that have file descriptor. In fact, not all of the memory regions
> > > > > > have file descriptor.
> > > > > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > > > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > > > > a new memory RAM because vhost_has_free_slot just returned false,
> > > > > > but we can hot plug it safely in fact.
> > > > >
> > > > > I had an impression that all guest RAM has to be shared with vhost,
> > > > > so combination of anon and fd based RAM couldn't work.
> > > > > Am I wrong?
> > > >
> > > > I'm not sure about the kernel backend, but I've tested adding anon
> > > > memory to a VM with a vhost-user-scsi device and it works (eventually
> > > > the VM crashed, but I could see the guest recognized the anon RAM).
> > > > The vhost-user code is designed to work with both. I'm not sure I see
> > > > a use case, but if there is one, this would be a valid issue. Maybe
> > > > Jiajun or Jianjay can elaborate.
> > >
> > > Hmm does not vhost-user skip all regions that do not have an fd?
> > >
> > >
> > >         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> > >         if (fd > 0) {
> > >             if (track_ramblocks) {
> > >                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
> > >                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
> > >                                                       reg->memory_size,
> > >                                                       reg->guest_phys_addr,
> > >                                                       reg->userspace_addr,
> > >                                                       offset);
> > >                 u->region_rb_offset[i] = offset;
> > >                 u->region_rb[i] = mr->ram_block;
> > >             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
> > >                 error_report("Failed preparing vhost-user memory table msg");
> > >                 return -1;
> > >             }
> > >             vhost_user_fill_msg_region(&region_buffer, reg, offset);
> > >             msg->payload.memory.regions[*fd_num] = region_buffer;
> > >             fds[(*fd_num)++] = fd;
> > >         } else if (track_ramblocks) {
> > >             u->region_rb_offset[i] = 0;
> > >             u->region_rb[i] = NULL;
> > >         }
> > >
> > >
> > >
> > > In your test, is it possible that you were lucky and guest did not send
> > > any data from anon memory to the device?
> >
> > Yes - vhost-user skips the region and does not send anon memory to the
> > device, but it does not fail the hot-add operation.
> >
> > In my test the fd > 0 check definitely failed and went on to add the
> > memory without sending it to the backend. I understand why this can be
> > problematic (it did eventually crash the VM), but it seems like we
> > allow it as of today. I can't think of a valid reason why you would
> > want anon and FD ram together, but I figured there may be a reason
> > since the vhost-user code allows for it. Should we maybe block that
> > path altogether instead of patching it up?
>
>
> Hmm where do we patch it up? Reason we might have non FD MRs is IIUC
> due to things like IO regions...

The issue is that today such non FD MRs count towards the vhost-user
max ramslots limit even though there is no good reason for them to. By
"patching it up", I mean accepting this change, which makes it so that
the vhost-user max ramslots limit only applies to FD RAM regions.

>
>
> > >
> > >
> > >
> > > > >
> > > > > >
> > > > > > --
> > > > > > ChangeList:
> > > > > > v3:
> > > > > > -make used_memslots a member of struct vhost_dev instead of a global static value
> > > > > it's global resource, so why?
> > > >
> > > > I suggested it because I thought it made the code a little cleaner.
> > > > I'm not opposed to changing it back, or having it stored at the
> > > > vhost_user level.
> > >
>
Michael S. Tsirkin Oct. 14, 2020, 5:54 p.m. UTC | #10
On Wed, Oct 14, 2020 at 01:21:39PM -0400, Raphael Norwitz wrote:
> On Wed, Oct 14, 2020 at 12:26 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Oct 14, 2020 at 12:11:34PM -0400, Raphael Norwitz wrote:
> > > On Wed, Oct 14, 2020 at 3:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:
> > > > > On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > >
> > > > > > On Mon, 28 Sep 2020 21:17:31 +0800
> > > > > > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> > > > > >
> > > > > > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > > > > > vhost kernel, but not for vhost user, which uses the memory regions
> > > > > > > that have file descriptor. In fact, not all of the memory regions
> > > > > > > have file descriptor.
> > > > > > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > > > > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > > > > > a new memory RAM because vhost_has_free_slot just returned false,
> > > > > > > but we can hot plug it safely in fact.
> > > > > >
> > > > > > I had an impression that all guest RAM has to be shared with vhost,
> > > > > > so combination of anon and fd based RAM couldn't work.
> > > > > > Am I wrong?
> > > > >
> > > > > I'm not sure about the kernel backend, but I've tested adding anon
> > > > > memory to a VM with a vhost-user-scsi device and it works (eventually
> > > > > the VM crashed, but I could see the guest recognized the anon RAM).
> > > > > The vhost-user code is designed to work with both. I'm not sure I see
> > > > > a use case, but if there is one, this would be a valid issue. Maybe
> > > > > Jiajun or Jianjay can elaborate.
> > > >
> > > > Hmm does not vhost-user skip all regions that do not have an fd?
> > > >
> > > >
> > > >         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> > > >         if (fd > 0) {
> > > >             if (track_ramblocks) {
> > > >                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
> > > >                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
> > > >                                                       reg->memory_size,
> > > >                                                       reg->guest_phys_addr,
> > > >                                                       reg->userspace_addr,
> > > >                                                       offset);
> > > >                 u->region_rb_offset[i] = offset;
> > > >                 u->region_rb[i] = mr->ram_block;
> > > >             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
> > > >                 error_report("Failed preparing vhost-user memory table msg");
> > > >                 return -1;
> > > >             }
> > > >             vhost_user_fill_msg_region(&region_buffer, reg, offset);
> > > >             msg->payload.memory.regions[*fd_num] = region_buffer;
> > > >             fds[(*fd_num)++] = fd;
> > > >         } else if (track_ramblocks) {
> > > >             u->region_rb_offset[i] = 0;
> > > >             u->region_rb[i] = NULL;
> > > >         }
> > > >
> > > >
> > > >
> > > > In your test, is it possible that you were lucky and guest did not send
> > > > any data from anon memory to the device?
> > >
> > > Yes - vhost-user skips the region and does not send anon memory to the
> > > device, but it does not fail the hot-add operation.
> > >
> > > In my test the fd > 0 check definitely failed and went on to add the
> > > memory without sending it to the backend. I understand why this can be
> > > problematic (it did eventually crash the VM), but it seems like we
> > > allow it as of today. I can't think of a valid reason why you would
> > > want anon and FD ram together, but I figured there may be a reason
> > > since the vhost-user code allows for it. Should we maybe block that
> > > path altogether instead of patching it up?
> >
> >
> > Hmm where do we patch it up? Reason we might have non FD MRs is IIUC
> > due to things like IO regions...
> 
> The issue is that today such non FD MRs count towards the vhost-user
> max ramslots limit even though there is no good reason for them to. By
> "patching it up", I mean accepting this change, which makes it so that
> the vhost-user max ramslots limit only applies to FD RAM regions.

I don't really remember, maybe one can get these things with things
like ROMs ...

> >
> >
> > > >
> > > >
> > > >
> > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > ChangeList:
> > > > > > > v3:
> > > > > > > -make used_memslots a member of struct vhost_dev instead of a global static value
> > > > > > it's global resource, so why?
> > > > >
> > > > > I suggested it because I thought it made the code a little cleaner.
> > > > > I'm not opposed to changing it back, or having it stored at the
> > > > > vhost_user level.
> > > >
> >
Igor Mammedov Oct. 21, 2020, 2:34 p.m. UTC | #11
On Wed, 14 Oct 2020 12:11:34 -0400
Raphael Norwitz <raphael.s.norwitz@gmail.com> wrote:

> On Wed, Oct 14, 2020 at 3:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Tue, Oct 13, 2020 at 08:58:59PM -0400, Raphael Norwitz wrote:  
> > > On Tue, Oct 6, 2020 at 5:48 AM Igor Mammedov <imammedo@redhat.com> wrote:  
> > > >
> > > > On Mon, 28 Sep 2020 21:17:31 +0800
> > > > Jiajun Chen <chenjiajun8@huawei.com> wrote:
> > > >  
> > > > > Used_memslots is equal to dev->mem->nregions now, it is true for
> > > > > vhost kernel, but not for vhost user, which uses the memory regions
> > > > > that have file descriptor. In fact, not all of the memory regions
> > > > > have file descriptor.

> > > > > It is usefully in some scenarios, e.g. used_memslots is 8, and only
> > > > > 5 memory slots can be used by vhost user, it is failed to hot plug
> > > > > a new memory RAM because vhost_has_free_slot just returned false,
> > > > > but we can hot plug it safely in fact.  
can you find out what are these extra 3 memory regions are and why they are
filtered out from regions that are passed to vhost-user?

> > > >
> > > > I had an impression that all guest RAM has to be shared with vhost,
> > > > so combination of anon and fd based RAM couldn't work.
> > > > Am I wrong?  
> > >
> > > I'm not sure about the kernel backend, but I've tested adding anon
> > > memory to a VM with a vhost-user-scsi device and it works (eventually
> > > the VM crashed, but I could see the guest recognized the anon RAM).
> > > The vhost-user code is designed to work with both. I'm not sure I see
> > > a use case, but if there is one, this would be a valid issue. Maybe
> > > Jiajun or Jianjay can elaborate.  
> >
> > Hmm does not vhost-user skip all regions that do not have an fd?
> >
> >
> >         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> >         if (fd > 0) {
> >             if (track_ramblocks) {
> >                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
> >                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
> >                                                       reg->memory_size,
> >                                                       reg->guest_phys_addr,
> >                                                       reg->userspace_addr,
> >                                                       offset);
> >                 u->region_rb_offset[i] = offset;
> >                 u->region_rb[i] = mr->ram_block;
> >             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
> >                 error_report("Failed preparing vhost-user memory table msg");
> >                 return -1;
> >             }
> >             vhost_user_fill_msg_region(&region_buffer, reg, offset);
> >             msg->payload.memory.regions[*fd_num] = region_buffer;
> >             fds[(*fd_num)++] = fd;
> >         } else if (track_ramblocks) {
> >             u->region_rb_offset[i] = 0;
> >             u->region_rb[i] = NULL;
> >         }
> >
> >
> >
> > In your test, is it possible that you were lucky and guest did not send
> > any data from anon memory to the device?  
> 
> Yes - vhost-user skips the region and does not send anon memory to the
> device, but it does not fail the hot-add operation.
> 
> In my test the fd > 0 check definitely failed and went on to add the
> memory without sending it to the backend. I understand why this can be
> problematic (it did eventually crash the VM), but it seems like we
> allow it as of today. I can't think of a valid reason why you would
> want anon and FD ram together, but I figured there may be a reason
> since the vhost-user code allows for it. Should we maybe block that
> path altogether instead of patching it up?

I'm more inclined to disabling mixed (provided that's really the case)
anon and FD RAM whenever vhost (user) is used or disable hot plugging
vhost-user device in case machine has mixed RAM.
Otherwise it's just a time bomb, waiting till guest OS tries to transmit
data that it just allocated from anon RAM.


> > > >  
> > > > >
> > > > > --
> > > > > ChangeList:
> > > > > v3:
> > > > > -make used_memslots a member of struct vhost_dev instead of a global static value  
> > > > it's global resource, so why?  
> > >
> > > I suggested it because I thought it made the code a little cleaner.
> > > I'm not opposed to changing it back, or having it stored at the
> > > vhost_user level.  
> >  
>
Michael S. Tsirkin Oct. 30, 2020, 8:39 a.m. UTC | #12
On Mon, Sep 28, 2020 at 09:17:31PM +0800, Jiajun Chen wrote:
> Used_memslots is equal to dev->mem->nregions now, it is true for
> vhost kernel, but not for vhost user, which uses the memory regions
> that have file descriptor. In fact, not all of the memory regions
> have file descriptor.
> It is usefully in some scenarios, e.g. used_memslots is 8, and only
> 5 memory slots can be used by vhost user, it is failed to hot plug
> a new memory RAM because vhost_has_free_slot just returned false,
> but we can hot plug it safely in fact.


At this point I dropped this, if you are going to resubmit pls include
data on qemu invocation that manifests the problem.

> --
> ChangeList:
> v3:
> -make used_memslots a member of struct vhost_dev instead of a global static value
> 
> v2:
> -eliminating useless used_memslots_exceeded variable and used_memslots_is_exceeded() API
> 
> v1:
> -vhost-user: add separate memslot counter for vhost-user
> 
> Signed-off-by: Jiajun Chen <chenjiajun8@huawei.com>
> Signed-off-by: Jianjay Zhou <jianjay.zhou@huawei.com>
> ---
>  hw/virtio/vhost-backend.c         | 12 ++++++++++
>  hw/virtio/vhost-user.c            | 25 +++++++++++++++++++++
>  hw/virtio/vhost.c                 | 37 +++++++++++++++++++++++--------
>  include/hw/virtio/vhost-backend.h |  5 +++++
>  include/hw/virtio/vhost.h         |  1 +
>  net/vhost-user.c                  |  7 ++++++
>  6 files changed, 78 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 782b1d67d9..7016f23ec5 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -238,6 +238,16 @@ static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
>          qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
>  }
>  
> +static void vhost_kernel_set_used_memslots(struct vhost_dev *dev)
> +{
> +    dev->used_memslots = dev->mem->nregions;
> +}
> +
> +static unsigned int vhost_kernel_get_used_memslots(struct vhost_dev *dev)
> +{
> +    return dev->used_memslots;
> +}
> +
>  static const VhostOps kernel_ops = {
>          .backend_type = VHOST_BACKEND_TYPE_KERNEL,
>          .vhost_backend_init = vhost_kernel_init,
> @@ -269,6 +279,8 @@ static const VhostOps kernel_ops = {
>  #endif /* CONFIG_VHOST_VSOCK */
>          .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
>          .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
> +        .vhost_set_used_memslots = vhost_kernel_set_used_memslots,
> +        .vhost_get_used_memslots = vhost_kernel_get_used_memslots,
>  };
>  #endif
>  
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 31231218dc..5dea64d8a8 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2354,6 +2354,29 @@ void vhost_user_cleanup(VhostUserState *user)
>      user->chr = NULL;
>  }
>  
> +static void vhost_user_set_used_memslots(struct vhost_dev *dev)
> +{
> +    int i;
> +    dev->used_memslots = 0;
> +
> +    for (i = 0; i < dev->mem->nregions; ++i) {
> +        struct vhost_memory_region *reg = dev->mem->regions + i;
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +        int fd;
> +
> +        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
> +        if (mr && fd > 0) {
> +            dev->used_memslots++;
> +        }
> +    }
> +}
> +
> +static unsigned int vhost_user_get_used_memslots(struct vhost_dev *dev)
> +{
> +    return dev->used_memslots;
> +}
> +
>  const VhostOps user_ops = {
>          .backend_type = VHOST_BACKEND_TYPE_USER,
>          .vhost_backend_init = vhost_user_backend_init,
> @@ -2387,4 +2410,6 @@ const VhostOps user_ops = {
>          .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
>          .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>          .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
> +        .vhost_set_used_memslots = vhost_user_set_used_memslots,
> +        .vhost_get_used_memslots = vhost_user_get_used_memslots,
>  };
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 1a1384e7a6..98b967669b 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -45,20 +45,20 @@
>  static struct vhost_log *vhost_log;
>  static struct vhost_log *vhost_log_shm;
>  
> -static unsigned int used_memslots;
>  static QLIST_HEAD(, vhost_dev) vhost_devices =
>      QLIST_HEAD_INITIALIZER(vhost_devices);
>  
>  bool vhost_has_free_slot(void)
>  {
> -    unsigned int slots_limit = ~0U;
>      struct vhost_dev *hdev;
>  
>      QLIST_FOREACH(hdev, &vhost_devices, entry) {
> -        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
> -        slots_limit = MIN(slots_limit, r);
> +        if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >=
> +            hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> +            return false;
> +        }
>      }
> -    return slots_limit > used_memslots;
> +    return true;
>  }
>  
>  static void vhost_dev_sync_region(struct vhost_dev *dev,
> @@ -502,7 +502,6 @@ static void vhost_commit(MemoryListener *listener)
>                         dev->n_mem_sections * sizeof dev->mem->regions[0];
>      dev->mem = g_realloc(dev->mem, regions_size);
>      dev->mem->nregions = dev->n_mem_sections;
> -    used_memslots = dev->mem->nregions;
>      for (i = 0; i < dev->n_mem_sections; i++) {
>          struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
>          struct MemoryRegionSection *mrs = dev->mem_sections + i;
> @@ -678,6 +677,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
>          dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
>          memory_region_ref(section->mr);
>      }
> +    dev->vhost_ops->vhost_set_used_memslots(dev);
>  }
>  
>  /* Used for both add and nop callbacks */
> @@ -693,6 +693,17 @@ static void vhost_region_addnop(MemoryListener *listener,
>      vhost_region_add_section(dev, section);
>  }
>  
> +static void vhost_region_del(MemoryListener *listener,
> +                             MemoryRegionSection *section)
> +{
> +    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
> +                                         memory_listener);
> +    if (!vhost_section(dev, section)) {
> +        return;
> +    }
> +    dev->vhost_ops->vhost_set_used_memslots(dev);
> +}
> +
>  static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>  {
>      struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
> @@ -1300,6 +1311,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>      hdev->memory_listener = (MemoryListener) {
>          .begin = vhost_begin,
>          .commit = vhost_commit,
> +        .region_del = vhost_region_del,
>          .region_add = vhost_region_addnop,
>          .region_nop = vhost_region_addnop,
>          .log_start = vhost_log_start,
> @@ -1346,9 +1358,16 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>      memory_listener_register(&hdev->memory_listener, &address_space_memory);
>      QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
>  
> -    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> -        error_report("vhost backend memory slots limit is less"
> -                " than current number of present memory slots");
> +    /*
> +     * If we started a VM without any vhost device,
> +     * for the first time vhost device hot-plug
> +     * (vhost_get_used_memslots is always 0),
> +     * so it needs to double check here.
> +     */
> +    if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >
> +        hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
> +        error_report("vhost backend memory slots limit is less than"
> +                     " current number of present memory slots");
>          r = -1;
>          if (busyloop_timeout) {
>              goto fail_busyloop;
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index 8825bd278f..6569c95a43 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -124,6 +124,9 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);
>  
>  typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
>  
> +typedef void (*vhost_set_used_memslots_op)(struct vhost_dev *dev);
> +typedef unsigned int (*vhost_get_used_memslots_op)(struct vhost_dev *dev);
> +
>  typedef struct VhostOps {
>      VhostBackendType backend_type;
>      vhost_backend_init vhost_backend_init;
> @@ -168,6 +171,8 @@ typedef struct VhostOps {
>      vhost_vq_get_addr_op  vhost_vq_get_addr;
>      vhost_get_device_id_op vhost_get_device_id;
>      vhost_force_iommu_op vhost_force_iommu;
> +    vhost_set_used_memslots_op vhost_set_used_memslots;
> +    vhost_get_used_memslots_op vhost_get_used_memslots;
>  } VhostOps;
>  
>  extern const VhostOps user_ops;
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 767a95ec0b..5ded21f86d 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -90,6 +90,7 @@ struct vhost_dev {
>      QLIST_HEAD(, vhost_iommu) iommu_list;
>      IOMMUNotifier n;
>      const VhostDevConfigOps *config_ops;
> +    unsigned int used_memslots;
>  };
>  
>  struct vhost_net {
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 17532daaf3..7e93955537 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -20,6 +20,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/option.h"
>  #include "trace.h"
> +#include "include/hw/virtio/vhost.h"
>  
>  typedef struct NetVhostUserState {
>      NetClientState nc;
> @@ -347,6 +348,12 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>          qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
>                                   net_vhost_user_event, NULL, nc0->name, NULL,
>                                   true);
> +
> +        if (!vhost_has_free_slot()) {
> +            error_report("used memslots exceeded the backend limit, quit "
> +                         "loop");
> +            goto err;
> +        }
>      } while (!s->started);
>  
>      assert(s->vhost_net);
> -- 
> 2.27.0.dirty
diff mbox series

Patch

diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 782b1d67d9..7016f23ec5 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -238,6 +238,16 @@  static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
         qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
 }
 
+static void vhost_kernel_set_used_memslots(struct vhost_dev *dev)
+{
+    dev->used_memslots = dev->mem->nregions;
+}
+
+static unsigned int vhost_kernel_get_used_memslots(struct vhost_dev *dev)
+{
+    return dev->used_memslots;
+}
+
 static const VhostOps kernel_ops = {
         .backend_type = VHOST_BACKEND_TYPE_KERNEL,
         .vhost_backend_init = vhost_kernel_init,
@@ -269,6 +279,8 @@  static const VhostOps kernel_ops = {
 #endif /* CONFIG_VHOST_VSOCK */
         .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
         .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
+        .vhost_set_used_memslots = vhost_kernel_set_used_memslots,
+        .vhost_get_used_memslots = vhost_kernel_get_used_memslots,
 };
 #endif
 
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 31231218dc..5dea64d8a8 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2354,6 +2354,29 @@  void vhost_user_cleanup(VhostUserState *user)
     user->chr = NULL;
 }
 
+static void vhost_user_set_used_memslots(struct vhost_dev *dev)
+{
+    int i;
+    dev->used_memslots = 0;
+
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        ram_addr_t offset;
+        MemoryRegion *mr;
+        int fd;
+
+        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+        if (mr && fd > 0) {
+            dev->used_memslots++;
+        }
+    }
+}
+
+static unsigned int vhost_user_get_used_memslots(struct vhost_dev *dev)
+{
+    return dev->used_memslots;
+}
+
 const VhostOps user_ops = {
         .backend_type = VHOST_BACKEND_TYPE_USER,
         .vhost_backend_init = vhost_user_backend_init,
@@ -2387,4 +2410,6 @@  const VhostOps user_ops = {
         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+        .vhost_set_used_memslots = vhost_user_set_used_memslots,
+        .vhost_get_used_memslots = vhost_user_get_used_memslots,
 };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 1a1384e7a6..98b967669b 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -45,20 +45,20 @@ 
 static struct vhost_log *vhost_log;
 static struct vhost_log *vhost_log_shm;
 
-static unsigned int used_memslots;
 static QLIST_HEAD(, vhost_dev) vhost_devices =
     QLIST_HEAD_INITIALIZER(vhost_devices);
 
 bool vhost_has_free_slot(void)
 {
-    unsigned int slots_limit = ~0U;
     struct vhost_dev *hdev;
 
     QLIST_FOREACH(hdev, &vhost_devices, entry) {
-        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
-        slots_limit = MIN(slots_limit, r);
+        if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >=
+            hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
+            return false;
+        }
     }
-    return slots_limit > used_memslots;
+    return true;
 }
 
 static void vhost_dev_sync_region(struct vhost_dev *dev,
@@ -502,7 +502,6 @@  static void vhost_commit(MemoryListener *listener)
                        dev->n_mem_sections * sizeof dev->mem->regions[0];
     dev->mem = g_realloc(dev->mem, regions_size);
     dev->mem->nregions = dev->n_mem_sections;
-    used_memslots = dev->mem->nregions;
     for (i = 0; i < dev->n_mem_sections; i++) {
         struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
         struct MemoryRegionSection *mrs = dev->mem_sections + i;
@@ -678,6 +677,7 @@  static void vhost_region_add_section(struct vhost_dev *dev,
         dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
         memory_region_ref(section->mr);
     }
+    dev->vhost_ops->vhost_set_used_memslots(dev);
 }
 
 /* Used for both add and nop callbacks */
@@ -693,6 +693,17 @@  static void vhost_region_addnop(MemoryListener *listener,
     vhost_region_add_section(dev, section);
 }
 
+static void vhost_region_del(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    if (!vhost_section(dev, section)) {
+        return;
+    }
+    dev->vhost_ops->vhost_set_used_memslots(dev);
+}
+
 static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
     struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
@@ -1300,6 +1311,7 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     hdev->memory_listener = (MemoryListener) {
         .begin = vhost_begin,
         .commit = vhost_commit,
+        .region_del = vhost_region_del,
         .region_add = vhost_region_addnop,
         .region_nop = vhost_region_addnop,
         .log_start = vhost_log_start,
@@ -1346,9 +1358,16 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     memory_listener_register(&hdev->memory_listener, &address_space_memory);
     QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
 
-    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
-        error_report("vhost backend memory slots limit is less"
-                " than current number of present memory slots");
+    /*
+     * If we started a VM without any vhost device,
+     * for the first time vhost device hot-plug
+     * (vhost_get_used_memslots is always 0),
+     * so it needs to double check here.
+     */
+    if (hdev->vhost_ops->vhost_get_used_memslots(hdev) >
+        hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
+        error_report("vhost backend memory slots limit is less than"
+                     " current number of present memory slots");
         r = -1;
         if (busyloop_timeout) {
             goto fail_busyloop;
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 8825bd278f..6569c95a43 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -124,6 +124,9 @@  typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);
 
 typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
 
+typedef void (*vhost_set_used_memslots_op)(struct vhost_dev *dev);
+typedef unsigned int (*vhost_get_used_memslots_op)(struct vhost_dev *dev);
+
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -168,6 +171,8 @@  typedef struct VhostOps {
     vhost_vq_get_addr_op  vhost_vq_get_addr;
     vhost_get_device_id_op vhost_get_device_id;
     vhost_force_iommu_op vhost_force_iommu;
+    vhost_set_used_memslots_op vhost_set_used_memslots;
+    vhost_get_used_memslots_op vhost_get_used_memslots;
 } VhostOps;
 
 extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 767a95ec0b..5ded21f86d 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -90,6 +90,7 @@  struct vhost_dev {
     QLIST_HEAD(, vhost_iommu) iommu_list;
     IOMMUNotifier n;
     const VhostDevConfigOps *config_ops;
+    unsigned int used_memslots;
 };
 
 struct vhost_net {
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 17532daaf3..7e93955537 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -20,6 +20,7 @@ 
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "trace.h"
+#include "include/hw/virtio/vhost.h"
 
 typedef struct NetVhostUserState {
     NetClientState nc;
@@ -347,6 +348,12 @@  static int net_vhost_user_init(NetClientState *peer, const char *device,
         qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
                                  net_vhost_user_event, NULL, nc0->name, NULL,
                                  true);
+
+        if (!vhost_has_free_slot()) {
+            error_report("used memslots exceeded the backend limit, quit "
+                         "loop");
+            goto err;
+        }
     } while (!s->started);
 
     assert(s->vhost_net);