diff mbox series

[v4,3/4] vhost-user: add shared_object msg

Message ID 20230626073426.285659-4-aesteve@redhat.com
State New
Headers show
Series Virtio shared dma-buf | expand

Commit Message

Albert Esteve June 26, 2023, 7:34 a.m. UTC
Add three new vhost-user protocol
`VHOST_USER_BACKEND_SHARED_OBJECT_* messages`.
These new messages are sent from vhost-user
back-ends to interact with the virtio-dmabuf
table in order to add, remove, or lookup for
virtio dma-buf shared objects.

The action taken in the front-end depends
on the type stored in the payload struct.

In the libvhost-user library we need to add
helper functions to allow sending messages to
interact with the virtio shared objects
hash table.

The messages can only be sent after successfully
negotiating a new VHOST_USER_PROTOCOL_F_SHARED_OBJECT
vhost-user protocol feature bit.

Signed-off-by: Albert Esteve <aesteve@redhat.com>
---
 docs/interop/vhost-user.rst               |  42 +++++++++
 hw/virtio/vhost-user.c                    |  99 +++++++++++++++++++++
 subprojects/libvhost-user/libvhost-user.c | 101 ++++++++++++++++++++++
 subprojects/libvhost-user/libvhost-user.h |  53 +++++++++++-
 4 files changed, 294 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin July 10, 2023, 7:03 p.m. UTC | #1
On Mon, Jun 26, 2023 at 09:34:25AM +0200, Albert Esteve wrote:
> Add three new vhost-user protocol
> `VHOST_USER_BACKEND_SHARED_OBJECT_* messages`.
> These new messages are sent from vhost-user
> back-ends to interact with the virtio-dmabuf
> table in order to add, remove, or lookup for
> virtio dma-buf shared objects.
> 
> The action taken in the front-end depends
> on the type stored in the payload struct.
> 
> In the libvhost-user library we need to add
> helper functions to allow sending messages to
> interact with the virtio shared objects
> hash table.
> 
> The messages can only be sent after successfully
> negotiating a new VHOST_USER_PROTOCOL_F_SHARED_OBJECT
> vhost-user protocol feature bit.
> 
> Signed-off-by: Albert Esteve <aesteve@redhat.com>

It bothers me that apparently, any backend can now
make qemu allocate any amount of memory by sending
lots of add messages.

Any way to limit this? If not - at least let's make this
a property that's opt-in?


> ---
>  docs/interop/vhost-user.rst               |  42 +++++++++
>  hw/virtio/vhost-user.c                    |  99 +++++++++++++++++++++
>  subprojects/libvhost-user/libvhost-user.c | 101 ++++++++++++++++++++++
>  subprojects/libvhost-user/libvhost-user.h |  53 +++++++++++-
>  4 files changed, 294 insertions(+), 1 deletion(-)
> 
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index 5a070adbc1..bca5600ff1 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -1528,6 +1528,48 @@ is sent by the front-end.
>  
>    The state.num field is currently reserved and must be set to 0.
>  
> +``VHOST_USER_BACKEND_SHARED_OBJECT_ADD``
> +  :id: 6
> +  :equivalent ioctl: N/A
> +  :request payload: ``struct VhostUserShared``
> +  :reply payload: N/A
> +
> +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> +  feature has been successfully negotiated, this message can be submitted
> +  by the backends to add a new dma-buf fd to the virtio-dmabuf shared
> +  table API can send this message. The fd gets associated with a UUID.
> +  If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the back-end sets
> +  the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with zero when
> +  operation is successfully completed, or non-zero otherwise.
> +
> +``VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE``
> +  :id: 7
> +  :equivalent ioctl: N/A
> +  :request payload: ``struct VhostUserShared``
> +  :reply payload: N/A
> +
> +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> +  feature has been successfully negotiated, this message can be submitted
> +  by the backend to remove a dma-buf from to the virtio-dmabuf shared
> +  table API can send this message. The shared table will remove the dma-buf
> +  fd associated with the UUID. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is
> +  negotiated, and the back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the
> +  front-end must respond with zero when operation is successfully completed,
> +  or non-zero otherwise.
> +
> +``VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP``
> +  :id: 8
> +  :equivalent ioctl: N/A
> +  :request payload: ``struct VhostUserShared``
> +  :reply payload: dmabuf fd and ``u64``
> +
> +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> +  feature has been successfully negotiated, this message can be submitted
> +  by the backends to retrieve a given dma-buf fd from the virtio-dmabuf
> +  shared table given a UUID. Frontend will reply passing the fd and a zero
> +  when the operation is successful, or non-zero otherwise. Note that if the
> +  operation fails, no fd is sent to the backend.
> +
>  .. _reply_ack:
>  
>  VHOST_USER_PROTOCOL_F_REPLY_ACK
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 74a2a28663..e340c39a19 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -10,6 +10,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
> +#include "hw/virtio/virtio-dmabuf.h"
>  #include "hw/virtio/vhost.h"
>  #include "hw/virtio/vhost-user.h"
>  #include "hw/virtio/vhost-backend.h"
> @@ -20,6 +21,7 @@
>  #include "sysemu/kvm.h"
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
> +#include "qemu/uuid.h"
>  #include "qemu/sockets.h"
>  #include "sysemu/runstate.h"
>  #include "sysemu/cryptodev.h"
> @@ -73,6 +75,7 @@ enum VhostUserProtocolFeature {
>      /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
>      VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
>      VHOST_USER_PROTOCOL_F_STATUS = 16,
> +    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
>      VHOST_USER_PROTOCOL_F_MAX
>  };
>  
> @@ -128,6 +131,9 @@ typedef enum VhostUserSlaveRequest {
>      VHOST_USER_BACKEND_IOTLB_MSG = 1,
>      VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
>      VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
>      VHOST_USER_BACKEND_MAX
>  }  VhostUserSlaveRequest;
>  
> @@ -190,6 +196,10 @@ typedef struct VhostUserInflight {
>      uint16_t queue_size;
>  } VhostUserInflight;
>  
> +typedef struct VhostUserShared {
> +    unsigned char uuid[16];
> +} VhostUserShared;
> +
>  typedef struct {
>      VhostUserRequest request;
>  
> @@ -214,6 +224,7 @@ typedef union {
>          VhostUserCryptoSession session;
>          VhostUserVringArea area;
>          VhostUserInflight inflight;
> +        VhostUserShared object;
>  } VhostUserPayload;
>  
>  typedef struct VhostUserMsg {
> @@ -1582,6 +1593,83 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
>      return 0;
>  }
>  
> +static int
> +vhost_user_backend_handle_shared_object_add(VhostUserShared *object,
> +                                            int dmabuf_fd)
> +{
> +    QemuUUID uuid;
> +
> +    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> +    return virtio_add_dmabuf(&uuid, dmabuf_fd);
> +}
> +
> +static int
> +vhost_user_backend_handle_shared_object_remove(VhostUserShared *object)
> +{
> +    QemuUUID uuid;
> +
> +    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> +    return virtio_remove_resource(&uuid);
> +}
> +
> +static bool
> +vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr,
> +                                  VhostUserPayload *payload)
> +{
> +    Error *local_err = NULL;
> +    struct iovec iov[2];
> +
> +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> +        hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK;
> +    }
> +    hdr->flags |= VHOST_USER_REPLY_MASK;
> +
> +    hdr->size = sizeof(payload->u64);
> +
> +    iov[0].iov_base = hdr;
> +    iov[0].iov_len = VHOST_USER_HDR_SIZE;
> +    iov[1].iov_base = payload;
> +    iov[1].iov_len = hdr->size;
> +
> +    if (qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), &local_err)) {
> +        error_report_err(local_err);
> +        return false;
> +    }
> +    return true;
> +}
> +
> +static int
> +vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
> +                                               QIOChannel *ioc,
> +                                               VhostUserHeader *hdr,
> +                                               VhostUserPayload *payload)
> +{
> +    QemuUUID uuid;
> +    CharBackend *chr = u->user->chr;
> +    int dmabuf_fd = -1;
> +    int fd_num = 0;
> +
> +    memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid));
> +
> +    dmabuf_fd = virtio_lookup_dmabuf(&uuid);
> +    if (dmabuf_fd != -1) {
> +        fd_num++;
> +    }
> +
> +    payload->u64 = 0;
> +    if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) {
> +        error_report("Failed to set msg fds.");
> +        payload->u64 = -EINVAL;
> +    }
> +
> +    if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload)) {
> +        error_report("Failed to write response msg.");
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  static void close_slave_channel(struct vhost_user *u)
>  {
>      g_source_destroy(u->slave_src);
> @@ -1639,6 +1727,17 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
>          ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
>                                                            fd ? fd[0] : -1);
>          break;
> +    case VHOST_USER_BACKEND_SHARED_OBJECT_ADD:
> +        ret = vhost_user_backend_handle_shared_object_add(&payload.object,
> +                                                          fd ? fd[0] : -1);
> +        break;
> +    case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE:
> +        ret = vhost_user_backend_handle_shared_object_remove(&payload.object);
> +        break;
> +    case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
> +        ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
> +                                                             &hdr, &payload);
> +        break;
>      default:
>          error_report("Received unexpected msg type: %d.", hdr.request);
>          ret = -EINVAL;
> diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
> index 8fb61e2df2..672d8292a0 100644
> --- a/subprojects/libvhost-user/libvhost-user.c
> +++ b/subprojects/libvhost-user/libvhost-user.c
> @@ -1403,6 +1403,107 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
>      return vu_process_message_reply(dev, &vmsg);
>  }
>  
> +bool
> +vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int *dmabuf_fd)
> +{
> +    bool result = false;
> +    VhostUserMsg msg_reply;
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
> +        .size = sizeof(msg.payload.object),
> +        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
> +    };
> +
> +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> +
> +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> +        return false;
> +    }
> +
> +    pthread_mutex_lock(&dev->slave_mutex);
> +    if (!vu_message_write(dev, dev->slave_fd, &msg)) {
> +        goto out;
> +    }
> +
> +    if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
> +        goto out;
> +    }
> +
> +    if (msg_reply.request != msg.request) {
> +        DPRINT("Received unexpected msg type. Expected %d, received %d",
> +               msg.request, msg_reply.request);
> +        goto out;
> +    }
> +
> +    if (msg_reply.fd_num != 1) {
> +        DPRINT("Received unexpected number of fds. Expected 1, received %d",
> +               msg_reply.fd_num);
> +        goto out;
> +    }
> +
> +    *dmabuf_fd = msg_reply.fds[0];
> +    result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
> +out:
> +    pthread_mutex_unlock(&dev->slave_mutex);
> +
> +    return result;
> +}
> +
> +static bool
> +vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
> +{
> +    bool result = false;
> +    pthread_mutex_lock(&dev->slave_mutex);
> +    if (!vu_message_write(dev, dev->slave_fd, vmsg)) {
> +        goto out;
> +    }
> +
> +    result = true;
> +out:
> +    pthread_mutex_unlock(&dev->slave_mutex);
> +
> +    return result;
> +}
> +
> +bool
> +vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int dmabuf_fd)
> +{
> +    int fd_num = 0;
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
> +        .size = sizeof(msg.payload.object),
> +        .flags = VHOST_USER_VERSION,
> +    };
> +
> +    msg.fds[fd_num++] = dmabuf_fd;
> +    msg.fd_num = fd_num;
> +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> +
> +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> +        return false;
> +    }
> +
> +    return vu_send_message(dev, &msg);
> +}
> +
> +bool
> +vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
> +{
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
> +        .size = sizeof(msg.payload.object),
> +        .flags = VHOST_USER_VERSION,
> +    };
> +
> +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> +
> +    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> +        return false;
> +    }
> +
> +    return vu_send_message(dev, &msg);
> +}
> +
>  static bool
>  vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
>  {
> diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> index 49208cceaa..907af1bcda 100644
> --- a/subprojects/libvhost-user/libvhost-user.h
> +++ b/subprojects/libvhost-user/libvhost-user.h
> @@ -64,7 +64,8 @@ enum VhostUserProtocolFeature {
>      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
>      VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
>      VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
> -
> +    /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
> +    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
>      VHOST_USER_PROTOCOL_F_MAX
>  };
>  
> @@ -119,6 +120,9 @@ typedef enum VhostUserSlaveRequest {
>      VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
>      VHOST_USER_BACKEND_VRING_CALL = 4,
>      VHOST_USER_BACKEND_VRING_ERR = 5,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> +    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
>      VHOST_USER_BACKEND_MAX
>  }  VhostUserSlaveRequest;
>  
> @@ -172,6 +176,12 @@ typedef struct VhostUserInflight {
>      uint16_t queue_size;
>  } VhostUserInflight;
>  
> +#define UUID_LEN 16
> +
> +typedef struct VhostUserShared {
> +    unsigned char uuid[UUID_LEN];
> +} VhostUserShared;
> +
>  #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
>  # define VU_PACKED __attribute__((gcc_struct, packed))
>  #else
> @@ -199,6 +209,7 @@ typedef struct VhostUserMsg {
>          VhostUserConfig config;
>          VhostUserVringArea area;
>          VhostUserInflight inflight;
> +        VhostUserShared object;
>      } payload;
>  
>      int fds[VHOST_MEMORY_BASELINE_NREGIONS];
> @@ -539,6 +550,46 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
>  bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
>                                  int size, int offset);
>  
> +/**
> + * vu_get_shared_object:
> + * @dev: a VuDev context
> + * @uuid: UUID of the shared object
> + * @dmabuf_fd: output dma-buf file descriptor
> + *
> + * Lookup for a virtio shared object (i.e., dma-buf fd) associated with the
> + * received UUID. Result, if found, is stored in the dmabuf_fd argument.
> + *
> + * Returns: whether the virtio object was found.
> + */
> +bool vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
> +                          int *dmabuf_fd);
> +
> +/**
> + * vu_add_shared_object:
> + * @dev: a VuDev context
> + * @uuid: UUID of the shared object
> + * @dmabuf_fd: output dma-buf file descriptor
> + *
> + * Stores a new shared object (i.e., dma-buf fd) in the hash table, and
> + * associates it with the received UUID.
> + *
> + * Returns: TRUE on success, FALSE on failure.
> + */
> +bool vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
> +                          int dmabuf_fd);
> +
> +/**
> + * vu_rm_shared_object:
> + * @dev: a VuDev context
> + * @uuid: UUID of the shared object
> + *
> + * Removes a shared object (i.e., dma-buf fd) associated with the
> + * received UUID from the hash table.
> + *
> + * Returns: TRUE on success, FALSE on failure.
> + */
> +bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]);
> +
>  /**
>   * vu_queue_set_notification:
>   * @dev: a VuDev context
> -- 
> 2.40.0
Albert Esteve July 17, 2023, 11:42 a.m. UTC | #2
Hi Michael,

True. It may be a good idea to impose a limit in the number of entries that
can be added to the table.
And fail to add new entries once it reaches the limit.

Not sure what would be a good limit though. For example,
https://www.kernel.org/doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
does not limit the number of buffers that can be allocated simultaneously,
it is an unsigned 32-bits value.
However, I guess 16-bits (65535) would suffice to cover the vast majority
of usecases. Or even lower, and
can be adjusted later, as this API gets (more) used.

Does that make sense?

Thanks.
BR,
Albert

On Mon, Jul 10, 2023 at 9:03 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> On Mon, Jun 26, 2023 at 09:34:25AM +0200, Albert Esteve wrote:
> > Add three new vhost-user protocol
> > `VHOST_USER_BACKEND_SHARED_OBJECT_* messages`.
> > These new messages are sent from vhost-user
> > back-ends to interact with the virtio-dmabuf
> > table in order to add, remove, or lookup for
> > virtio dma-buf shared objects.
> >
> > The action taken in the front-end depends
> > on the type stored in the payload struct.
> >
> > In the libvhost-user library we need to add
> > helper functions to allow sending messages to
> > interact with the virtio shared objects
> > hash table.
> >
> > The messages can only be sent after successfully
> > negotiating a new VHOST_USER_PROTOCOL_F_SHARED_OBJECT
> > vhost-user protocol feature bit.
> >
> > Signed-off-by: Albert Esteve <aesteve@redhat.com>
>
> It bothers me that apparently, any backend can now
> make qemu allocate any amount of memory by sending
> lots of add messages.
>
> Any way to limit this? If not - at least let's make this
> a property that's opt-in?
>
>
> > ---
> >  docs/interop/vhost-user.rst               |  42 +++++++++
> >  hw/virtio/vhost-user.c                    |  99 +++++++++++++++++++++
> >  subprojects/libvhost-user/libvhost-user.c | 101 ++++++++++++++++++++++
> >  subprojects/libvhost-user/libvhost-user.h |  53 +++++++++++-
> >  4 files changed, 294 insertions(+), 1 deletion(-)
> >
> > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > index 5a070adbc1..bca5600ff1 100644
> > --- a/docs/interop/vhost-user.rst
> > +++ b/docs/interop/vhost-user.rst
> > @@ -1528,6 +1528,48 @@ is sent by the front-end.
> >
> >    The state.num field is currently reserved and must be set to 0.
> >
> > +``VHOST_USER_BACKEND_SHARED_OBJECT_ADD``
> > +  :id: 6
> > +  :equivalent ioctl: N/A
> > +  :request payload: ``struct VhostUserShared``
> > +  :reply payload: N/A
> > +
> > +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> > +  feature has been successfully negotiated, this message can be
> submitted
> > +  by the backends to add a new dma-buf fd to the virtio-dmabuf shared
> > +  table API can send this message. The fd gets associated with a UUID.
> > +  If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the
> back-end sets
> > +  the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with
> zero when
> > +  operation is successfully completed, or non-zero otherwise.
> > +
> > +``VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE``
> > +  :id: 7
> > +  :equivalent ioctl: N/A
> > +  :request payload: ``struct VhostUserShared``
> > +  :reply payload: N/A
> > +
> > +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> > +  feature has been successfully negotiated, this message can be
> submitted
> > +  by the backend to remove a dma-buf from to the virtio-dmabuf shared
> > +  table API can send this message. The shared table will remove the
> dma-buf
> > +  fd associated with the UUID. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is
> > +  negotiated, and the back-end sets the ``VHOST_USER_NEED_REPLY`` flag,
> the
> > +  front-end must respond with zero when operation is successfully
> completed,
> > +  or non-zero otherwise.
> > +
> > +``VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP``
> > +  :id: 8
> > +  :equivalent ioctl: N/A
> > +  :request payload: ``struct VhostUserShared``
> > +  :reply payload: dmabuf fd and ``u64``
> > +
> > +  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
> > +  feature has been successfully negotiated, this message can be
> submitted
> > +  by the backends to retrieve a given dma-buf fd from the virtio-dmabuf
> > +  shared table given a UUID. Frontend will reply passing the fd and a
> zero
> > +  when the operation is successful, or non-zero otherwise. Note that if
> the
> > +  operation fails, no fd is sent to the backend.
> > +
> >  .. _reply_ack:
> >
> >  VHOST_USER_PROTOCOL_F_REPLY_ACK
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index 74a2a28663..e340c39a19 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -10,6 +10,7 @@
> >
> >  #include "qemu/osdep.h"
> >  #include "qapi/error.h"
> > +#include "hw/virtio/virtio-dmabuf.h"
> >  #include "hw/virtio/vhost.h"
> >  #include "hw/virtio/vhost-user.h"
> >  #include "hw/virtio/vhost-backend.h"
> > @@ -20,6 +21,7 @@
> >  #include "sysemu/kvm.h"
> >  #include "qemu/error-report.h"
> >  #include "qemu/main-loop.h"
> > +#include "qemu/uuid.h"
> >  #include "qemu/sockets.h"
> >  #include "sysemu/runstate.h"
> >  #include "sysemu/cryptodev.h"
> > @@ -73,6 +75,7 @@ enum VhostUserProtocolFeature {
> >      /* Feature 14 reserved for
> VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
> >      VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
> >      VHOST_USER_PROTOCOL_F_STATUS = 16,
> > +    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
> >      VHOST_USER_PROTOCOL_F_MAX
> >  };
> >
> > @@ -128,6 +131,9 @@ typedef enum VhostUserSlaveRequest {
> >      VHOST_USER_BACKEND_IOTLB_MSG = 1,
> >      VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
> >      VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> >      VHOST_USER_BACKEND_MAX
> >  }  VhostUserSlaveRequest;
> >
> > @@ -190,6 +196,10 @@ typedef struct VhostUserInflight {
> >      uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +typedef struct VhostUserShared {
> > +    unsigned char uuid[16];
> > +} VhostUserShared;
> > +
> >  typedef struct {
> >      VhostUserRequest request;
> >
> > @@ -214,6 +224,7 @@ typedef union {
> >          VhostUserCryptoSession session;
> >          VhostUserVringArea area;
> >          VhostUserInflight inflight;
> > +        VhostUserShared object;
> >  } VhostUserPayload;
> >
> >  typedef struct VhostUserMsg {
> > @@ -1582,6 +1593,83 @@ static int
> vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
> >      return 0;
> >  }
> >
> > +static int
> > +vhost_user_backend_handle_shared_object_add(VhostUserShared *object,
> > +                                            int dmabuf_fd)
> > +{
> > +    QemuUUID uuid;
> > +
> > +    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> > +    return virtio_add_dmabuf(&uuid, dmabuf_fd);
> > +}
> > +
> > +static int
> > +vhost_user_backend_handle_shared_object_remove(VhostUserShared *object)
> > +{
> > +    QemuUUID uuid;
> > +
> > +    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
> > +    return virtio_remove_resource(&uuid);
> > +}
> > +
> > +static bool
> > +vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr,
> > +                                  VhostUserPayload *payload)
> > +{
> > +    Error *local_err = NULL;
> > +    struct iovec iov[2];
> > +
> > +    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
> > +        hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK;
> > +    }
> > +    hdr->flags |= VHOST_USER_REPLY_MASK;
> > +
> > +    hdr->size = sizeof(payload->u64);
> > +
> > +    iov[0].iov_base = hdr;
> > +    iov[0].iov_len = VHOST_USER_HDR_SIZE;
> > +    iov[1].iov_base = payload;
> > +    iov[1].iov_len = hdr->size;
> > +
> > +    if (qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), &local_err)) {
> > +        error_report_err(local_err);
> > +        return false;
> > +    }
> > +    return true;
> > +}
> > +
> > +static int
> > +vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
> > +                                               QIOChannel *ioc,
> > +                                               VhostUserHeader *hdr,
> > +                                               VhostUserPayload
> *payload)
> > +{
> > +    QemuUUID uuid;
> > +    CharBackend *chr = u->user->chr;
> > +    int dmabuf_fd = -1;
> > +    int fd_num = 0;
> > +
> > +    memcpy(uuid.data, payload->object.uuid,
> sizeof(payload->object.uuid));
> > +
> > +    dmabuf_fd = virtio_lookup_dmabuf(&uuid);
> > +    if (dmabuf_fd != -1) {
> > +        fd_num++;
> > +    }
> > +
> > +    payload->u64 = 0;
> > +    if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) {
> > +        error_report("Failed to set msg fds.");
> > +        payload->u64 = -EINVAL;
> > +    }
> > +
> > +    if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload)) {
> > +        error_report("Failed to write response msg.");
> > +        return -EINVAL;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static void close_slave_channel(struct vhost_user *u)
> >  {
> >      g_source_destroy(u->slave_src);
> > @@ -1639,6 +1727,17 @@ static gboolean slave_read(QIOChannel *ioc,
> GIOCondition condition,
> >          ret = vhost_user_slave_handle_vring_host_notifier(dev,
> &payload.area,
> >                                                            fd ? fd[0] :
> -1);
> >          break;
> > +    case VHOST_USER_BACKEND_SHARED_OBJECT_ADD:
> > +        ret =
> vhost_user_backend_handle_shared_object_add(&payload.object,
> > +                                                          fd ? fd[0] :
> -1);
> > +        break;
> > +    case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE:
> > +        ret =
> vhost_user_backend_handle_shared_object_remove(&payload.object);
> > +        break;
> > +    case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
> > +        ret =
> vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
> > +                                                             &hdr,
> &payload);
> > +        break;
> >      default:
> >          error_report("Received unexpected msg type: %d.", hdr.request);
> >          ret = -EINVAL;
> > diff --git a/subprojects/libvhost-user/libvhost-user.c
> b/subprojects/libvhost-user/libvhost-user.c
> > index 8fb61e2df2..672d8292a0 100644
> > --- a/subprojects/libvhost-user/libvhost-user.c
> > +++ b/subprojects/libvhost-user/libvhost-user.c
> > @@ -1403,6 +1403,107 @@ bool vu_set_queue_host_notifier(VuDev *dev,
> VuVirtq *vq, int fd,
> >      return vu_process_message_reply(dev, &vmsg);
> >  }
> >
> > +bool
> > +vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int
> *dmabuf_fd)
> > +{
> > +    bool result = false;
> > +    VhostUserMsg msg_reply;
> > +    VhostUserMsg msg = {
> > +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
> > +        .size = sizeof(msg.payload.object),
> > +        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
> > +    };
> > +
> > +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> > +
> > +    if (!vu_has_protocol_feature(dev,
> VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> > +        return false;
> > +    }
> > +
> > +    pthread_mutex_lock(&dev->slave_mutex);
> > +    if (!vu_message_write(dev, dev->slave_fd, &msg)) {
> > +        goto out;
> > +    }
> > +
> > +    if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
> > +        goto out;
> > +    }
> > +
> > +    if (msg_reply.request != msg.request) {
> > +        DPRINT("Received unexpected msg type. Expected %d, received %d",
> > +               msg.request, msg_reply.request);
> > +        goto out;
> > +    }
> > +
> > +    if (msg_reply.fd_num != 1) {
> > +        DPRINT("Received unexpected number of fds. Expected 1, received
> %d",
> > +               msg_reply.fd_num);
> > +        goto out;
> > +    }
> > +
> > +    *dmabuf_fd = msg_reply.fds[0];
> > +    result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
> > +out:
> > +    pthread_mutex_unlock(&dev->slave_mutex);
> > +
> > +    return result;
> > +}
> > +
> > +static bool
> > +vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
> > +{
> > +    bool result = false;
> > +    pthread_mutex_lock(&dev->slave_mutex);
> > +    if (!vu_message_write(dev, dev->slave_fd, vmsg)) {
> > +        goto out;
> > +    }
> > +
> > +    result = true;
> > +out:
> > +    pthread_mutex_unlock(&dev->slave_mutex);
> > +
> > +    return result;
> > +}
> > +
> > +bool
> > +vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int
> dmabuf_fd)
> > +{
> > +    int fd_num = 0;
> > +    VhostUserMsg msg = {
> > +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
> > +        .size = sizeof(msg.payload.object),
> > +        .flags = VHOST_USER_VERSION,
> > +    };
> > +
> > +    msg.fds[fd_num++] = dmabuf_fd;
> > +    msg.fd_num = fd_num;
> > +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> > +
> > +    if (!vu_has_protocol_feature(dev,
> VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> > +        return false;
> > +    }
> > +
> > +    return vu_send_message(dev, &msg);
> > +}
> > +
> > +bool
> > +vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
> > +{
> > +    VhostUserMsg msg = {
> > +        .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
> > +        .size = sizeof(msg.payload.object),
> > +        .flags = VHOST_USER_VERSION,
> > +    };
> > +
> > +    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
> > +
> > +    if (!vu_has_protocol_feature(dev,
> VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
> > +        return false;
> > +    }
> > +
> > +    return vu_send_message(dev, &msg);
> > +}
> > +
> >  static bool
> >  vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
> >  {
> > diff --git a/subprojects/libvhost-user/libvhost-user.h
> b/subprojects/libvhost-user/libvhost-user.h
> > index 49208cceaa..907af1bcda 100644
> > --- a/subprojects/libvhost-user/libvhost-user.h
> > +++ b/subprojects/libvhost-user/libvhost-user.h
> > @@ -64,7 +64,8 @@ enum VhostUserProtocolFeature {
> >      VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
> >      VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
> >      VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
> > -
> > +    /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
> > +    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
> >      VHOST_USER_PROTOCOL_F_MAX
> >  };
> >
> > @@ -119,6 +120,9 @@ typedef enum VhostUserSlaveRequest {
> >      VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
> >      VHOST_USER_BACKEND_VRING_CALL = 4,
> >      VHOST_USER_BACKEND_VRING_ERR = 5,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
> > +    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
> >      VHOST_USER_BACKEND_MAX
> >  }  VhostUserSlaveRequest;
> >
> > @@ -172,6 +176,12 @@ typedef struct VhostUserInflight {
> >      uint16_t queue_size;
> >  } VhostUserInflight;
> >
> > +#define UUID_LEN 16
> > +
> > +typedef struct VhostUserShared {
> > +    unsigned char uuid[UUID_LEN];
> > +} VhostUserShared;
> > +
> >  #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
> >  # define VU_PACKED __attribute__((gcc_struct, packed))
> >  #else
> > @@ -199,6 +209,7 @@ typedef struct VhostUserMsg {
> >          VhostUserConfig config;
> >          VhostUserVringArea area;
> >          VhostUserInflight inflight;
> > +        VhostUserShared object;
> >      } payload;
> >
> >      int fds[VHOST_MEMORY_BASELINE_NREGIONS];
> > @@ -539,6 +550,46 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
> >  bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
> >                                  int size, int offset);
> >
> > +/**
> > + * vu_get_shared_object:
> > + * @dev: a VuDev context
> > + * @uuid: UUID of the shared object
> > + * @dmabuf_fd: output dma-buf file descriptor
> > + *
> > + * Lookup for a virtio shared object (i.e., dma-buf fd) associated with
> the
> > + * received UUID. Result, if found, is stored in the dmabuf_fd argument.
> > + *
> > + * Returns: whether the virtio object was found.
> > + */
> > +bool vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
> > +                          int *dmabuf_fd);
> > +
> > +/**
> > + * vu_add_shared_object:
> > + * @dev: a VuDev context
> > + * @uuid: UUID of the shared object
> > + * @dmabuf_fd: output dma-buf file descriptor
> > + *
> > + * Stores a new shared object (i.e., dma-buf fd) in the hash table, and
> > + * associates it with the received UUID.
> > + *
> > + * Returns: TRUE on success, FALSE on failure.
> > + */
> > +bool vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
> > +                          int dmabuf_fd);
> > +
> > +/**
> > + * vu_rm_shared_object:
> > + * @dev: a VuDev context
> > + * @uuid: UUID of the shared object
> > + *
> > + * Removes a shared object (i.e., dma-buf fd) associated with the
> > + * received UUID from the hash table.
> > + *
> > + * Returns: TRUE on success, FALSE on failure.
> > + */
> > +bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]);
> > +
> >  /**
> >   * vu_queue_set_notification:
> >   * @dev: a VuDev context
> > --
> > 2.40.0
>
>
Gerd Hoffmann July 17, 2023, 2:10 p.m. UTC | #3
On Mon, Jul 17, 2023 at 01:42:02PM +0200, Albert Esteve wrote:
> Hi Michael,
> 
> True. It may be a good idea to impose a limit in the number of entries that
> can be added to the table.
> And fail to add new entries once it reaches the limit.
> 
> Not sure what would be a good limit though. For example,
> https://www.kernel.org/doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
> does not limit the number of buffers that can be allocated simultaneously,
> it is an unsigned 32-bits value.
> However, I guess 16-bits (65535) would suffice to cover the vast majority
> of usecases. Or even lower, and
> can be adjusted later, as this API gets (more) used.

virtio-gpu does accounting on the total amount of memory (look for
'hostmem').  That is only used in case virgl is *not* used, with virgl
it is much harder to figure how much host memory is actually used.
Probably the virglrenderer library would have to implement that.

If we want apply limits to the memory used by buffers it probably makes
sense to do the same, i.e. account the total amount of memory used.
dma-bufs have a fixed size, so that should be doable without too much
trouble.  Might need some changes to the API because that'll give us a
few new possible failure modes.

take care,
  Gerd
Michael S. Tsirkin July 17, 2023, 2:11 p.m. UTC | #4
On Mon, Jul 17, 2023 at 01:42:02PM +0200, Albert Esteve wrote:
> Hi Michael,
> 
> True. It may be a good idea to impose a limit in the number of entries that can
> be added to the table.
> And fail to add new entries once it reaches the limit.
> 
> Not sure what would be a good limit though. For example, https://www.kernel.org
> /doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
> does not limit the number of buffers that can be allocated simultaneously, it
> is an unsigned 32-bits value.
> However, I guess 16-bits (65535) would suffice to cover the vast majority of
> usecases. Or even lower, and
> can be adjusted later, as this API gets (more) used.
> 
> Does that make sense?
> 
> Thanks.
> BR,
> Albert

let's not top-post please.

Maybe. Another concern is qemu running out of FDs with a bad backend.

Question: why does qemu have to maintain these UUIDs in its memory?

Can't it query the backend with UUID and get the fd back?

And then, the hash table in QEMU becomes just a cache
to speed up lookups.
Albert Esteve July 27, 2023, 2:48 p.m. UTC | #5
On Mon, Jul 17, 2023 at 4:11 PM Michael S. Tsirkin <mst@redhat.com> wrote:

>
>
>
>
> On Mon, Jul 17, 2023 at 01:42:02PM +0200, Albert Esteve wrote:
> > Hi Michael,
> >
> > True. It may be a good idea to impose a limit in the number of entries
> that can
> > be added to the table.
> > And fail to add new entries once it reaches the limit.
> >
> > Not sure what would be a good limit though. For example,
> https://www.kernel.org
> > /doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
> > does not limit the number of buffers that can be allocated
> simultaneously, it
> > is an unsigned 32-bits value.
> > However, I guess 16-bits (65535) would suffice to cover the vast
> majority of
> > usecases. Or even lower, and
> > can be adjusted later, as this API gets (more) used.
> >
> > Does that make sense?
> >
> > Thanks.
> > BR,
> > Albert
>
> let's not top-post please.
>
> Maybe. Another concern is qemu running out of FDs with a bad backend.
>
> Question: why does qemu have to maintain these UUIDs in its memory?
>
> Can't it query the backend with UUID and get the fd back?
>

In the end, we have one backend sharing an object with other backends.
From the importer POV, it does not know who the exporter is, so it cannot
go pocking other backends until it finds the one that is holding a resource
with
the same UUID, it relies on qemu providing this information.

If we do not want qemu to hold the fds, we could, for instance, store
references to
backends that act as exporters. And then, once an importer requests for a
specific
object with its UUID, we ask for the fd to the exporter(s), hoping to find
it.

But the current solution sounds better fit to the shared objects virtio
feature.
I would be more keen to look into something like what Gerd suggested,
limiting
the memory that we use.

Nonetheless, in qemu we are storing fds, and not mmaping the dmabufs.
So I think limiting the number of entries should suffice, to ensure
that we do not run out of FDs, and memory.


>
> And then, the hash table in QEMU becomes just a cache
> to speed up lookups.
>
> --
> MST
>
>
Michael S. Tsirkin July 27, 2023, 2:56 p.m. UTC | #6
On Thu, Jul 27, 2023 at 04:48:30PM +0200, Albert Esteve wrote:
> 
> 
> On Mon, Jul 17, 2023 at 4:11 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> 
> 
> 
> 
> 
>     On Mon, Jul 17, 2023 at 01:42:02PM +0200, Albert Esteve wrote:
>     > Hi Michael,
>     >
>     > True. It may be a good idea to impose a limit in the number of entries
>     that can
>     > be added to the table.
>     > And fail to add new entries once it reaches the limit.
>     >
>     > Not sure what would be a good limit though. For example, https://
>     www.kernel.org
>     > /doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
>     > does not limit the number of buffers that can be allocated
>     simultaneously, it
>     > is an unsigned 32-bits value.
>     > However, I guess 16-bits (65535) would suffice to cover the vast majority
>     of
>     > usecases. Or even lower, and
>     > can be adjusted later, as this API gets (more) used.
>     >
>     > Does that make sense?
>     >
>     > Thanks.
>     > BR,
>     > Albert
> 
>     let's not top-post please.
> 
>     Maybe. Another concern is qemu running out of FDs with a bad backend.
> 
>     Question: why does qemu have to maintain these UUIDs in its memory?
> 
>     Can't it query the backend with UUID and get the fd back?
> 
> 
> In the end, we have one backend sharing an object with other backends.
> From the importer POV, it does not know who the exporter is, so it cannot
> go pocking other backends until it finds the one that is holding a resource
> with
> the same UUID, it relies on qemu providing this information.
> 
> If we do not want qemu to hold the fds, we could, for instance, store
> references to
> backends that act as exporters. And then, once an importer requests for a
> specific
> object with its UUID, we ask for the fd to the exporter(s), hoping to find it.


right. I'd do this. and then the existing table can be regarded
as a cache.

> But the current solution sounds better fit to the shared objects virtio
> feature.
> I would be more keen to look into something like what Gerd suggested, limiting
> the memory that we use. 
> 
> Nonetheless, in qemu we are storing fds, and not mmaping the dmabufs.
> So I think limiting the number of entries should suffice, to ensure
> that we do not run out of FDs, and memory.

my point is you really don't know how much to limit it.
if there's ability to drop the entries then you
can do this, and cache things in memory.


> 
> 
>     And then, the hash table in QEMU becomes just a cache
>     to speed up lookups.
> 
>     --
>     MST
> 
>
Albert Esteve July 28, 2023, 9:05 a.m. UTC | #7
On Thu, Jul 27, 2023 at 4:57 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> On Thu, Jul 27, 2023 at 04:48:30PM +0200, Albert Esteve wrote:
> >
> >
> > On Mon, Jul 17, 2023 at 4:11 PM Michael S. Tsirkin <mst@redhat.com>
> wrote:
> >
> >
> >
> >
> >
> >     On Mon, Jul 17, 2023 at 01:42:02PM +0200, Albert Esteve wrote:
> >     > Hi Michael,
> >     >
> >     > True. It may be a good idea to impose a limit in the number of
> entries
> >     that can
> >     > be added to the table.
> >     > And fail to add new entries once it reaches the limit.
> >     >
> >     > Not sure what would be a good limit though. For example, https://
> >     www.kernel.org
> >     >
> /doc/html/v4.9/media/uapi/v4l/vidioc-reqbufs.html#c.v4l2_requestbuffers
> >     > does not limit the number of buffers that can be allocated
> >     simultaneously, it
> >     > is an unsigned 32-bits value.
> >     > However, I guess 16-bits (65535) would suffice to cover the vast
> majority
> >     of
> >     > usecases. Or even lower, and
> >     > can be adjusted later, as this API gets (more) used.
> >     >
> >     > Does that make sense?
> >     >
> >     > Thanks.
> >     > BR,
> >     > Albert
> >
> >     let's not top-post please.
> >
> >     Maybe. Another concern is qemu running out of FDs with a bad backend.
> >
> >     Question: why does qemu have to maintain these UUIDs in its memory?
> >
> >     Can't it query the backend with UUID and get the fd back?
> >
> >
> > In the end, we have one backend sharing an object with other backends.
> > From the importer POV, it does not know who the exporter is, so it cannot
> > go pocking other backends until it finds the one that is holding a
> resource
> > with
> > the same UUID, it relies on qemu providing this information.
> >
> > If we do not want qemu to hold the fds, we could, for instance, store
> > references to
> > backends that act as exporters. And then, once an importer requests for a
> > specific
> > object with its UUID, we ask for the fd to the exporter(s), hoping to
> find it.
>
>
> right. I'd do this. and then the existing table can be regarded
> as a cache.
>

It is true that it is not easy to find a limit that fits all usecases,
and the cache proposal could result in a more maintanable
solution in the long term.

I'll explore this and post a proposal for the next version
of the patch. It will mean having a bigger changeset, so
I'll try to push something as clean as possible.

BR,
Albert


>
> > But the current solution sounds better fit to the shared objects virtio
> > feature.
> > I would be more keen to look into something like what Gerd suggested,
> limiting
> > the memory that we use.
> >
> > Nonetheless, in qemu we are storing fds, and not mmaping the dmabufs.
> > So I think limiting the number of entries should suffice, to ensure
> > that we do not run out of FDs, and memory.
>
> my point is you really don't know how much to limit it.
> if there's ability to drop the entries then you
> can do this, and cache things in memory.
>
>
> >
> >
> >     And then, the hash table in QEMU becomes just a cache
> >     to speed up lookups.
> >
> >     --
> >     MST
> >
> >
>
>
diff mbox series

Patch

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 5a070adbc1..bca5600ff1 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1528,6 +1528,48 @@  is sent by the front-end.
 
   The state.num field is currently reserved and must be set to 0.
 
+``VHOST_USER_BACKEND_SHARED_OBJECT_ADD``
+  :id: 6
+  :equivalent ioctl: N/A
+  :request payload: ``struct VhostUserShared``
+  :reply payload: N/A
+
+  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
+  feature has been successfully negotiated, this message can be submitted
+  by the backends to add a new dma-buf fd to the virtio-dmabuf shared
+  table API can send this message. The fd gets associated with a UUID.
+  If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is negotiated, and the back-end sets
+  the ``VHOST_USER_NEED_REPLY`` flag, the front-end must respond with zero when
+  operation is successfully completed, or non-zero otherwise.
+
+``VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE``
+  :id: 7
+  :equivalent ioctl: N/A
+  :request payload: ``struct VhostUserShared``
+  :reply payload: N/A
+
+  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
+  feature has been successfully negotiated, this message can be submitted
+  by the backend to remove a dma-buf from to the virtio-dmabuf shared
+  table API can send this message. The shared table will remove the dma-buf
+  fd associated with the UUID. If ``VHOST_USER_PROTOCOL_F_REPLY_ACK`` is
+  negotiated, and the back-end sets the ``VHOST_USER_NEED_REPLY`` flag, the
+  front-end must respond with zero when operation is successfully completed,
+  or non-zero otherwise.
+
+``VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP``
+  :id: 8
+  :equivalent ioctl: N/A
+  :request payload: ``struct VhostUserShared``
+  :reply payload: dmabuf fd and ``u64``
+
+  When the ``VHOST_USER_PROTOCOL_F_SHARED_OBJECT`` protocol
+  feature has been successfully negotiated, this message can be submitted
+  by the backends to retrieve a given dma-buf fd from the virtio-dmabuf
+  shared table given a UUID. Frontend will reply passing the fd and a zero
+  when the operation is successful, or non-zero otherwise. Note that if the
+  operation fails, no fd is sent to the backend.
+
 .. _reply_ack:
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 74a2a28663..e340c39a19 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -10,6 +10,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "hw/virtio/virtio-dmabuf.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
 #include "hw/virtio/vhost-backend.h"
@@ -20,6 +21,7 @@ 
 #include "sysemu/kvm.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
+#include "qemu/uuid.h"
 #include "qemu/sockets.h"
 #include "sysemu/runstate.h"
 #include "sysemu/cryptodev.h"
@@ -73,6 +75,7 @@  enum VhostUserProtocolFeature {
     /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
     VHOST_USER_PROTOCOL_F_STATUS = 16,
+    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
     VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -128,6 +131,9 @@  typedef enum VhostUserSlaveRequest {
     VHOST_USER_BACKEND_IOTLB_MSG = 1,
     VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
     VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
+    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
+    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
+    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
     VHOST_USER_BACKEND_MAX
 }  VhostUserSlaveRequest;
 
@@ -190,6 +196,10 @@  typedef struct VhostUserInflight {
     uint16_t queue_size;
 } VhostUserInflight;
 
+typedef struct VhostUserShared {
+    unsigned char uuid[16];
+} VhostUserShared;
+
 typedef struct {
     VhostUserRequest request;
 
@@ -214,6 +224,7 @@  typedef union {
         VhostUserCryptoSession session;
         VhostUserVringArea area;
         VhostUserInflight inflight;
+        VhostUserShared object;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
@@ -1582,6 +1593,83 @@  static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
     return 0;
 }
 
+static int
+vhost_user_backend_handle_shared_object_add(VhostUserShared *object,
+                                            int dmabuf_fd)
+{
+    QemuUUID uuid;
+
+    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
+    return virtio_add_dmabuf(&uuid, dmabuf_fd);
+}
+
+static int
+vhost_user_backend_handle_shared_object_remove(VhostUserShared *object)
+{
+    QemuUUID uuid;
+
+    memcpy(uuid.data, object->uuid, sizeof(object->uuid));
+    return virtio_remove_resource(&uuid);
+}
+
+static bool
+vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr,
+                                  VhostUserPayload *payload)
+{
+    Error *local_err = NULL;
+    struct iovec iov[2];
+
+    if (hdr->flags & VHOST_USER_NEED_REPLY_MASK) {
+        hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK;
+    }
+    hdr->flags |= VHOST_USER_REPLY_MASK;
+
+    hdr->size = sizeof(payload->u64);
+
+    iov[0].iov_base = hdr;
+    iov[0].iov_len = VHOST_USER_HDR_SIZE;
+    iov[1].iov_base = payload;
+    iov[1].iov_len = hdr->size;
+
+    if (qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), &local_err)) {
+        error_report_err(local_err);
+        return false;
+    }
+    return true;
+}
+
+static int
+vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u,
+                                               QIOChannel *ioc,
+                                               VhostUserHeader *hdr,
+                                               VhostUserPayload *payload)
+{
+    QemuUUID uuid;
+    CharBackend *chr = u->user->chr;
+    int dmabuf_fd = -1;
+    int fd_num = 0;
+
+    memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid));
+
+    dmabuf_fd = virtio_lookup_dmabuf(&uuid);
+    if (dmabuf_fd != -1) {
+        fd_num++;
+    }
+
+    payload->u64 = 0;
+    if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) {
+        error_report("Failed to set msg fds.");
+        payload->u64 = -EINVAL;
+    }
+
+    if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload)) {
+        error_report("Failed to write response msg.");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static void close_slave_channel(struct vhost_user *u)
 {
     g_source_destroy(u->slave_src);
@@ -1639,6 +1727,17 @@  static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
                                                           fd ? fd[0] : -1);
         break;
+    case VHOST_USER_BACKEND_SHARED_OBJECT_ADD:
+        ret = vhost_user_backend_handle_shared_object_add(&payload.object,
+                                                          fd ? fd[0] : -1);
+        break;
+    case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE:
+        ret = vhost_user_backend_handle_shared_object_remove(&payload.object);
+        break;
+    case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP:
+        ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc,
+                                                             &hdr, &payload);
+        break;
     default:
         error_report("Received unexpected msg type: %d.", hdr.request);
         ret = -EINVAL;
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
index 8fb61e2df2..672d8292a0 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -1403,6 +1403,107 @@  bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
     return vu_process_message_reply(dev, &vmsg);
 }
 
+bool
+vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int *dmabuf_fd)
+{
+    bool result = false;
+    VhostUserMsg msg_reply;
+    VhostUserMsg msg = {
+        .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
+        .size = sizeof(msg.payload.object),
+        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+    };
+
+    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
+
+    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
+        return false;
+    }
+
+    pthread_mutex_lock(&dev->slave_mutex);
+    if (!vu_message_write(dev, dev->slave_fd, &msg)) {
+        goto out;
+    }
+
+    if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
+        goto out;
+    }
+
+    if (msg_reply.request != msg.request) {
+        DPRINT("Received unexpected msg type. Expected %d, received %d",
+               msg.request, msg_reply.request);
+        goto out;
+    }
+
+    if (msg_reply.fd_num != 1) {
+        DPRINT("Received unexpected number of fds. Expected 1, received %d",
+               msg_reply.fd_num);
+        goto out;
+    }
+
+    *dmabuf_fd = msg_reply.fds[0];
+    result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
+out:
+    pthread_mutex_unlock(&dev->slave_mutex);
+
+    return result;
+}
+
+static bool
+vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
+{
+    bool result = false;
+    pthread_mutex_lock(&dev->slave_mutex);
+    if (!vu_message_write(dev, dev->slave_fd, vmsg)) {
+        goto out;
+    }
+
+    result = true;
+out:
+    pthread_mutex_unlock(&dev->slave_mutex);
+
+    return result;
+}
+
+bool
+vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN], int dmabuf_fd)
+{
+    int fd_num = 0;
+    VhostUserMsg msg = {
+        .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
+        .size = sizeof(msg.payload.object),
+        .flags = VHOST_USER_VERSION,
+    };
+
+    msg.fds[fd_num++] = dmabuf_fd;
+    msg.fd_num = fd_num;
+    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
+
+    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
+        return false;
+    }
+
+    return vu_send_message(dev, &msg);
+}
+
+bool
+vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
+{
+    VhostUserMsg msg = {
+        .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
+        .size = sizeof(msg.payload.object),
+        .flags = VHOST_USER_VERSION,
+    };
+
+    memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
+
+    if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
+        return false;
+    }
+
+    return vu_send_message(dev, &msg);
+}
+
 static bool
 vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
index 49208cceaa..907af1bcda 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -64,7 +64,8 @@  enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
     VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
-
+    /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
+    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17,
     VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -119,6 +120,9 @@  typedef enum VhostUserSlaveRequest {
     VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
     VHOST_USER_BACKEND_VRING_CALL = 4,
     VHOST_USER_BACKEND_VRING_ERR = 5,
+    VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6,
+    VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7,
+    VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8,
     VHOST_USER_BACKEND_MAX
 }  VhostUserSlaveRequest;
 
@@ -172,6 +176,12 @@  typedef struct VhostUserInflight {
     uint16_t queue_size;
 } VhostUserInflight;
 
+#define UUID_LEN 16
+
+typedef struct VhostUserShared {
+    unsigned char uuid[UUID_LEN];
+} VhostUserShared;
+
 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
 # define VU_PACKED __attribute__((gcc_struct, packed))
 #else
@@ -199,6 +209,7 @@  typedef struct VhostUserMsg {
         VhostUserConfig config;
         VhostUserVringArea area;
         VhostUserInflight inflight;
+        VhostUserShared object;
     } payload;
 
     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
@@ -539,6 +550,46 @@  void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
                                 int size, int offset);
 
+/**
+ * vu_get_shared_object:
+ * @dev: a VuDev context
+ * @uuid: UUID of the shared object
+ * @dmabuf_fd: output dma-buf file descriptor
+ *
+ * Lookup for a virtio shared object (i.e., dma-buf fd) associated with the
+ * received UUID. Result, if found, is stored in the dmabuf_fd argument.
+ *
+ * Returns: whether the virtio object was found.
+ */
+bool vu_get_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
+                          int *dmabuf_fd);
+
+/**
+ * vu_add_shared_object:
+ * @dev: a VuDev context
+ * @uuid: UUID of the shared object
+ * @dmabuf_fd: output dma-buf file descriptor
+ *
+ * Stores a new shared object (i.e., dma-buf fd) in the hash table, and
+ * associates it with the received UUID.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
+                          int dmabuf_fd);
+
+/**
+ * vu_rm_shared_object:
+ * @dev: a VuDev context
+ * @uuid: UUID of the shared object
+ *
+ * Removes a shared object (i.e., dma-buf fd) associated with the
+ * received UUID from the hash table.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN]);
+
 /**
  * vu_queue_set_notification:
  * @dev: a VuDev context