diff mbox series

[v2,2/2] vdpa: send CVQ state load commands in parallel

Message ID 7d800315d04359d0bb91f61ec348eda1bdb972be.1683371965.git.yin31149@gmail.com
State New
Headers show
Series Send all the SVQ control commands in parallel | expand

Commit Message

Hawkins Jiawei May 6, 2023, 2:06 p.m. UTC
This patch introduces the vhost_vdpa_net_cvq_add() and
refactors the vhost_vdpa_net_load*(), so that QEMU can
send CVQ state load commands in parallel.

To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
to add SVQ control commands to SVQ and kick the device,
but does not poll the device used buffers. QEMU will not
poll and check the device used buffers in vhost_vdpa_net_load()
until all CVQ state load commands have been sent to the device.

What's more, in order to avoid buffer overwriting caused by
using `svq->cvq_cmd_out_buffer` and `svq->status` as the
buffer for all CVQ state load commands when sending
CVQ state load commands in parallel, this patch introduces
`out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
pointing to the available buffer for in descriptor and
out descriptor, so that different CVQ state load commands can
use their unique buffer.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
---
 net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 120 insertions(+), 32 deletions(-)

Comments

Jason Wang May 17, 2023, 5:22 a.m. UTC | #1
On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
>
> This patch introduces the vhost_vdpa_net_cvq_add() and
> refactors the vhost_vdpa_net_load*(), so that QEMU can
> send CVQ state load commands in parallel.
>
> To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> to add SVQ control commands to SVQ and kick the device,
> but does not poll the device used buffers. QEMU will not
> poll and check the device used buffers in vhost_vdpa_net_load()
> until all CVQ state load commands have been sent to the device.
>
> What's more, in order to avoid buffer overwriting caused by
> using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> buffer for all CVQ state load commands when sending
> CVQ state load commands in parallel, this patch introduces
> `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> pointing to the available buffer for in descriptor and
> out descriptor, so that different CVQ state load commands can
> use their unique buffer.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> ---
>  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 120 insertions(+), 32 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 10804c7200..14e31ca5c5 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
>      vhost_vdpa_net_client_stop(nc);
>  }
>
> +/**
> + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> + * kicks the device but does not poll the device used buffers.
> + *
> + * Return the number of elements added to SVQ if success.
> + */
> +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> +                                void **out_cursor, size_t out_len,

Can we track things like cursors in e.g VhostVDPAState ?

> +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> +{
> +    /* Buffers for the device */
> +    const struct iovec out = {
> +        .iov_base = *out_cursor,
> +        .iov_len = out_len,
> +    };
> +    const struct iovec in = {
> +        .iov_base = *in_cursor,
> +        .iov_len = sizeof(virtio_net_ctrl_ack),
> +    };
> +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> +    int r;
> +
> +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> +    if (unlikely(r != 0)) {
> +        if (unlikely(r == -ENOSPC)) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> +                          __func__);
> +        }
> +        return r;
> +    }
> +
> +    /* Update the cursor */
> +    *out_cursor += out_len;
> +    *in_cursor += 1;
> +
> +    return 1;
> +}
> +
>  /**
>   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
>   * kicks the device and polls the device used buffers.
> @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
>      return vhost_svq_poll(svq);
>  }
>
> -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> -                                       uint8_t cmd, const void *data,
> -                                       size_t data_size)
> +
> +/**
> + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> + *
> + * Return the number of elements added to SVQ if success.
> + */
> +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> +                                void **out_cursor, uint8_t class, uint8_t cmd,
> +                                const void *data, size_t data_size,
> +                                virtio_net_ctrl_ack **in_cursor)
>  {
>      const struct virtio_net_ctrl_hdr ctrl = {
>          .class = class,
>          .cmd = cmd,
>      };
>
> -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> +                          (*out_cursor - s->cvq_cmd_out_buffer));
> +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> +                       (*out_cursor - s->cvq_cmd_out_buffer));
>
> -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
>
> -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> -                                  sizeof(virtio_net_ctrl_ack));
> +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
>  }
>
> -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> +/**
> + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> + *
> + * Return the number of elements added to SVQ if success.
> + */
> +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
>  {
>      uint64_t features = n->parent_obj.guest_features;
>      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> -                                                  n->mac, sizeof(n->mac));
> -        if (unlikely(dev_written < 0)) {
> -            return dev_written;
> -        }
> -
> -        return *s->status != VIRTIO_NET_OK;
> +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> +                                       n->mac, sizeof(n->mac), in_cursor);
>      }
>
>      return 0;
>  }
>
> -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> -                                  const VirtIONet *n)
> +/**
> + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> + *
> + * Return the number of elements added to SVQ if success.
> + */
> +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
>  {
>      struct virtio_net_ctrl_mq mq;
>      uint64_t features = n->parent_obj.guest_features;
> -    ssize_t dev_written;
>
>      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
>          return 0;
>      }
>
>      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> -                                          sizeof(mq));
> -    if (unlikely(dev_written < 0)) {
> -        return dev_written;
> -    }
> -
> -    return *s->status != VIRTIO_NET_OK;
> +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> +                                   &mq, sizeof(mq), in_cursor);
>  }
>
>  static int vhost_vdpa_net_load(NetClientState *nc)
>  {
>      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +    VhostShadowVirtqueue *svq;
> +    void *out_cursor;
> +    virtio_net_ctrl_ack *in_cursor;
>      struct vhost_vdpa *v = &s->vhost_vdpa;
>      const VirtIONet *n;
> -    int r;
> +    ssize_t cmds_in_flight = 0, dev_written, r;
>
>      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
> @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
>      }
>
>      n = VIRTIO_NET(v->dev->vdev);
> -    r = vhost_vdpa_net_load_mac(s, n);
> +    out_cursor = s->cvq_cmd_out_buffer;
> +    in_cursor = s->status;
> +
> +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
>      if (unlikely(r < 0))
>          return r;
>      }
> -    r = vhost_vdpa_net_load_mq(s, n);
> -    if (unlikely(r)) {
> +    cmds_in_flight += r;
> +
> +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> +    if (unlikely(r < 0)) {
>          return r;
>      }
> +    cmds_in_flight += r;
> +
> +    /* Poll for all used buffer from device */
> +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> +    while (cmds_in_flight > 0) {
> +        /*
> +         * We can poll here since we've had BQL from the time we sent the
> +         * descriptor. Also, we need to take the answer before SVQ pulls
> +         * by itself, when BQL is released
> +         */
> +        dev_written = vhost_svq_poll(svq);

I'd tweak vhost_svq_poll to accept cmds_in_flight.

Thanks

> +
> +        if (unlikely(!dev_written)) {
> +            /*
> +             * vhost_svq_poll() return 0 when something wrong, such as
> +             * QEMU waits for too long time or no available used buffer
> +             * from device, and there is no need to continue polling
> +             * in this case.
> +             */
> +            return -EINVAL;
> +        }
> +
> +        --cmds_in_flight;
> +    }
> +
> +    /* Check the buffers written by device */
> +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> +         ++status) {
> +        if (*status != VIRTIO_NET_OK) {
> +            return -EINVAL;
> +        }
> +    }
>
>      return 0;
>  }
> --
> 2.25.1
>
Eugenio Perez Martin May 17, 2023, 8:21 a.m. UTC | #2
On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> >
> > This patch introduces the vhost_vdpa_net_cvq_add() and
> > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > send CVQ state load commands in parallel.
> >
> > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > to add SVQ control commands to SVQ and kick the device,
> > but does not poll the device used buffers. QEMU will not
> > poll and check the device used buffers in vhost_vdpa_net_load()
> > until all CVQ state load commands have been sent to the device.
> >
> > What's more, in order to avoid buffer overwriting caused by
> > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > buffer for all CVQ state load commands when sending
> > CVQ state load commands in parallel, this patch introduces
> > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > pointing to the available buffer for in descriptor and
> > out descriptor, so that different CVQ state load commands can
> > use their unique buffer.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > ---
> >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 120 insertions(+), 32 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 10804c7200..14e31ca5c5 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> >      vhost_vdpa_net_client_stop(nc);
> >  }
> >
> > +/**
> > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > + * kicks the device but does not poll the device used buffers.
> > + *
> > + * Return the number of elements added to SVQ if success.
> > + */
> > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > +                                void **out_cursor, size_t out_len,
>
> Can we track things like cursors in e.g VhostVDPAState ?
>

Cursors will only be used at device startup. After that, cursors are
always the full buffer. Do we want to "pollute" VhostVDPAState with
things that will not be used after the startup?

Maybe we can create a struct for them and then pass just this struct?

Thanks!

> > +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> > +{
> > +    /* Buffers for the device */
> > +    const struct iovec out = {
> > +        .iov_base = *out_cursor,
> > +        .iov_len = out_len,
> > +    };
> > +    const struct iovec in = {
> > +        .iov_base = *in_cursor,
> > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > +    };
> > +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > +    int r;
> > +
> > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > +    if (unlikely(r != 0)) {
> > +        if (unlikely(r == -ENOSPC)) {
> > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > +                          __func__);
> > +        }
> > +        return r;
> > +    }
> > +
> > +    /* Update the cursor */
> > +    *out_cursor += out_len;
> > +    *in_cursor += 1;
> > +
> > +    return 1;
> > +}
> > +
> >  /**
> >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> >   * kicks the device and polls the device used buffers.
> > @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> >      return vhost_svq_poll(svq);
> >  }
> >
> > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > -                                       uint8_t cmd, const void *data,
> > -                                       size_t data_size)
> > +
> > +/**
> > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > + *
> > + * Return the number of elements added to SVQ if success.
> > + */
> > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > +                                void **out_cursor, uint8_t class, uint8_t cmd,
> > +                                const void *data, size_t data_size,
> > +                                virtio_net_ctrl_ack **in_cursor)
> >  {
> >      const struct virtio_net_ctrl_hdr ctrl = {
> >          .class = class,
> >          .cmd = cmd,
> >      };
> >
> > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> >
> > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> >
> > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > -                                  sizeof(virtio_net_ctrl_ack));
> > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> >  }
> >
> > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > +/**
> > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > + *
> > + * Return the number of elements added to SVQ if success.
> > + */
> > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> >  {
> >      uint64_t features = n->parent_obj.guest_features;
> >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> > -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > -                                                  n->mac, sizeof(n->mac));
> > -        if (unlikely(dev_written < 0)) {
> > -            return dev_written;
> > -        }
> > -
> > -        return *s->status != VIRTIO_NET_OK;
> > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > +                                       n->mac, sizeof(n->mac), in_cursor);
> >      }
> >
> >      return 0;
> >  }
> >
> > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > -                                  const VirtIONet *n)
> > +/**
> > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > + *
> > + * Return the number of elements added to SVQ if success.
> > + */
> > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> >  {
> >      struct virtio_net_ctrl_mq mq;
> >      uint64_t features = n->parent_obj.guest_features;
> > -    ssize_t dev_written;
> >
> >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> >          return 0;
> >      }
> >
> >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > -                                          sizeof(mq));
> > -    if (unlikely(dev_written < 0)) {
> > -        return dev_written;
> > -    }
> > -
> > -    return *s->status != VIRTIO_NET_OK;
> > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > +                                   &mq, sizeof(mq), in_cursor);
> >  }
> >
> >  static int vhost_vdpa_net_load(NetClientState *nc)
> >  {
> >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +    VhostShadowVirtqueue *svq;
> > +    void *out_cursor;
> > +    virtio_net_ctrl_ack *in_cursor;
> >      struct vhost_vdpa *v = &s->vhost_vdpa;
> >      const VirtIONet *n;
> > -    int r;
> > +    ssize_t cmds_in_flight = 0, dev_written, r;
> >
> >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> >      }
> >
> >      n = VIRTIO_NET(v->dev->vdev);
> > -    r = vhost_vdpa_net_load_mac(s, n);
> > +    out_cursor = s->cvq_cmd_out_buffer;
> > +    in_cursor = s->status;
> > +
> > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> >      if (unlikely(r < 0))
> >          return r;
> >      }
> > -    r = vhost_vdpa_net_load_mq(s, n);
> > -    if (unlikely(r)) {
> > +    cmds_in_flight += r;
> > +
> > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > +    if (unlikely(r < 0)) {
> >          return r;
> >      }
> > +    cmds_in_flight += r;
> > +
> > +    /* Poll for all used buffer from device */
> > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > +    while (cmds_in_flight > 0) {
> > +        /*
> > +         * We can poll here since we've had BQL from the time we sent the
> > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > +         * by itself, when BQL is released
> > +         */
> > +        dev_written = vhost_svq_poll(svq);
>
> I'd tweak vhost_svq_poll to accept cmds_in_flight.
>
> Thanks
>
> > +
> > +        if (unlikely(!dev_written)) {
> > +            /*
> > +             * vhost_svq_poll() return 0 when something wrong, such as
> > +             * QEMU waits for too long time or no available used buffer
> > +             * from device, and there is no need to continue polling
> > +             * in this case.
> > +             */
> > +            return -EINVAL;
> > +        }
> > +
> > +        --cmds_in_flight;
> > +    }
> > +
> > +    /* Check the buffers written by device */
> > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > +         ++status) {
> > +        if (*status != VIRTIO_NET_OK) {
> > +            return -EINVAL;
> > +        }
> > +    }
> >
> >      return 0;
> >  }
> > --
> > 2.25.1
> >
>
Hawkins Jiawei May 17, 2023, 3:01 p.m. UTC | #3
Sorry for forgetting cc when replying to the email.

On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> > >
> > > This patch introduces the vhost_vdpa_net_cvq_add() and
> > > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > > send CVQ state load commands in parallel.
> > >
> > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > > to add SVQ control commands to SVQ and kick the device,
> > > but does not poll the device used buffers. QEMU will not
> > > poll and check the device used buffers in vhost_vdpa_net_load()
> > > until all CVQ state load commands have been sent to the device.
> > >
> > > What's more, in order to avoid buffer overwriting caused by
> > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > > buffer for all CVQ state load commands when sending
> > > CVQ state load commands in parallel, this patch introduces
> > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > > pointing to the available buffer for in descriptor and
> > > out descriptor, so that different CVQ state load commands can
> > > use their unique buffer.
> > >
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > > ---
> > >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> > >  1 file changed, 120 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > index 10804c7200..14e31ca5c5 100644
> > > --- a/net/vhost-vdpa.c
> > > +++ b/net/vhost-vdpa.c
> > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > >      vhost_vdpa_net_client_stop(nc);
> > >  }
> > >
> > > +/**
> > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > > + * kicks the device but does not poll the device used buffers.
> > > + *
> > > + * Return the number of elements added to SVQ if success.
> > > + */
> > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > > +                                void **out_cursor, size_t out_len,
> >
> > Can we track things like cursors in e.g VhostVDPAState ?
> >
>
> Cursors will only be used at device startup. After that, cursors are
> always the full buffer. Do we want to "pollute" VhostVDPAState with
> things that will not be used after the startup?
>
> Maybe we can create a struct for them and then pass just this struct?

Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
called in vhost_vdpa_net_load() at startup, so these cursors will not be
used after startup.

If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.

>
> Thanks!
>
> > > +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> > > +{
> > > +    /* Buffers for the device */
> > > +    const struct iovec out = {
> > > +        .iov_base = *out_cursor,
> > > +        .iov_len = out_len,
> > > +    };
> > > +    const struct iovec in = {
> > > +        .iov_base = *in_cursor,
> > > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > > +    };
> > > +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > +    int r;
> > > +
> > > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > > +    if (unlikely(r != 0)) {
> > > +        if (unlikely(r == -ENOSPC)) {
> > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > > +                          __func__);
> > > +        }
> > > +        return r;
> > > +    }
> > > +
> > > +    /* Update the cursor */
> > > +    *out_cursor += out_len;
> > > +    *in_cursor += 1;
> > > +
> > > +    return 1;
> > > +}
> > > +
> > >  /**
> > >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> > >   * kicks the device and polls the device used buffers.
> > > @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> > >      return vhost_svq_poll(svq);
> > >  }
> > >
> > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > > -                                       uint8_t cmd, const void *data,
> > > -                                       size_t data_size)
> > > +
> > > +/**
> > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > > + *
> > > + * Return the number of elements added to SVQ if success.
> > > + */
> > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > > +                                void **out_cursor, uint8_t class, uint8_t cmd,
> > > +                                const void *data, size_t data_size,
> > > +                                virtio_net_ctrl_ack **in_cursor)
> > >  {
> > >      const struct virtio_net_ctrl_hdr ctrl = {
> > >          .class = class,
> > >          .cmd = cmd,
> > >      };
> > >
> > > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> > >
> > > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> > >
> > > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > > -                                  sizeof(virtio_net_ctrl_ack));
> > > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> > >  }
> > >
> > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > > +/**
> > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > > + *
> > > + * Return the number of elements added to SVQ if success.
> > > + */
> > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > >  {
> > >      uint64_t features = n->parent_obj.guest_features;
> > >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> > > -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > -                                                  n->mac, sizeof(n->mac));
> > > -        if (unlikely(dev_written < 0)) {
> > > -            return dev_written;
> > > -        }
> > > -
> > > -        return *s->status != VIRTIO_NET_OK;
> > > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > +                                       n->mac, sizeof(n->mac), in_cursor);
> > >      }
> > >
> > >      return 0;
> > >  }
> > >
> > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > > -                                  const VirtIONet *n)
> > > +/**
> > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > > + *
> > > + * Return the number of elements added to SVQ if success.
> > > + */
> > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > >  {
> > >      struct virtio_net_ctrl_mq mq;
> > >      uint64_t features = n->parent_obj.guest_features;
> > > -    ssize_t dev_written;
> > >
> > >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> > >          return 0;
> > >      }
> > >
> > >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > > -                                          sizeof(mq));
> > > -    if (unlikely(dev_written < 0)) {
> > > -        return dev_written;
> > > -    }
> > > -
> > > -    return *s->status != VIRTIO_NET_OK;
> > > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > > +                                   &mq, sizeof(mq), in_cursor);
> > >  }
> > >
> > >  static int vhost_vdpa_net_load(NetClientState *nc)
> > >  {
> > >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > +    VhostShadowVirtqueue *svq;
> > > +    void *out_cursor;
> > > +    virtio_net_ctrl_ack *in_cursor;
> > >      struct vhost_vdpa *v = &s->vhost_vdpa;
> > >      const VirtIONet *n;
> > > -    int r;
> > > +    ssize_t cmds_in_flight = 0, dev_written, r;
> > >
> > >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > >
> > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> > >      }
> > >
> > >      n = VIRTIO_NET(v->dev->vdev);
> > > -    r = vhost_vdpa_net_load_mac(s, n);
> > > +    out_cursor = s->cvq_cmd_out_buffer;
> > > +    in_cursor = s->status;
> > > +
> > > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> > >      if (unlikely(r < 0))
> > >          return r;
> > >      }
> > > -    r = vhost_vdpa_net_load_mq(s, n);
> > > -    if (unlikely(r)) {
> > > +    cmds_in_flight += r;
> > > +
> > > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > > +    if (unlikely(r < 0)) {
> > >          return r;
> > >      }
> > > +    cmds_in_flight += r;
> > > +
> > > +    /* Poll for all used buffer from device */
> > > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > +    while (cmds_in_flight > 0) {
> > > +        /*
> > > +         * We can poll here since we've had BQL from the time we sent the
> > > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > > +         * by itself, when BQL is released
> > > +         */
> > > +        dev_written = vhost_svq_poll(svq);
> >
> > I'd tweak vhost_svq_poll to accept cmds_in_flight.

That sounds great!
I will refactor the code here and send the v2 patch after
your patch.

Thanks!

> >
> > Thanks
> >
> > > +
> > > +        if (unlikely(!dev_written)) {
> > > +            /*
> > > +             * vhost_svq_poll() return 0 when something wrong, such as
> > > +             * QEMU waits for too long time or no available used buffer
> > > +             * from device, and there is no need to continue polling
> > > +             * in this case.
> > > +             */
> > > +            return -EINVAL;
> > > +        }
> > > +
> > > +        --cmds_in_flight;
> > > +    }
> > > +
> > > +    /* Check the buffers written by device */
> > > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > > +         ++status) {
> > > +        if (*status != VIRTIO_NET_OK) {
> > > +            return -EINVAL;
> > > +        }
> > > +    }
> > >
> > >      return 0;
> > >  }
> > > --
> > > 2.25.1
> > >
> >
>
Jason Wang May 18, 2023, 5:46 a.m. UTC | #4
On Wed, May 17, 2023 at 11:02 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
>
> Sorry for forgetting cc when replying to the email.
>
> On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> > > >
> > > > This patch introduces the vhost_vdpa_net_cvq_add() and
> > > > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > > > send CVQ state load commands in parallel.
> > > >
> > > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > > > to add SVQ control commands to SVQ and kick the device,
> > > > but does not poll the device used buffers. QEMU will not
> > > > poll and check the device used buffers in vhost_vdpa_net_load()
> > > > until all CVQ state load commands have been sent to the device.
> > > >
> > > > What's more, in order to avoid buffer overwriting caused by
> > > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > > > buffer for all CVQ state load commands when sending
> > > > CVQ state load commands in parallel, this patch introduces
> > > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > > > pointing to the available buffer for in descriptor and
> > > > out descriptor, so that different CVQ state load commands can
> > > > use their unique buffer.
> > > >
> > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > > > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > > > ---
> > > >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> > > >  1 file changed, 120 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 10804c7200..14e31ca5c5 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > > >      vhost_vdpa_net_client_stop(nc);
> > > >  }
> > > >
> > > > +/**
> > > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > > > + * kicks the device but does not poll the device used buffers.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > > > +                                void **out_cursor, size_t out_len,
> > >
> > > Can we track things like cursors in e.g VhostVDPAState ?
> > >
> >
> > Cursors will only be used at device startup. After that, cursors are
> > always the full buffer. Do we want to "pollute" VhostVDPAState with
> > things that will not be used after the startup?

So it's the cursor of the cvq_cmd_out_buffer, it would be more elegant
to keep it with where the cvq_cmd_out_buffer is placed. It can avoid
passing cursors in several levels.

Or it would be even better to have some buffer allocation helpers to
alloc and free in and out buffers.

Thanks

> >
> > Maybe we can create a struct for them and then pass just this struct?
>
> Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
> called in vhost_vdpa_net_load() at startup, so these cursors will not be
> used after startup.
>
> If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.
>
> >
> > Thanks!
> >
> > > > +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> > > > +{
> > > > +    /* Buffers for the device */
> > > > +    const struct iovec out = {
> > > > +        .iov_base = *out_cursor,
> > > > +        .iov_len = out_len,
> > > > +    };
> > > > +    const struct iovec in = {
> > > > +        .iov_base = *in_cursor,
> > > > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > > > +    };
> > > > +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > +    int r;
> > > > +
> > > > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > > > +    if (unlikely(r != 0)) {
> > > > +        if (unlikely(r == -ENOSPC)) {
> > > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > > > +                          __func__);
> > > > +        }
> > > > +        return r;
> > > > +    }
> > > > +
> > > > +    /* Update the cursor */
> > > > +    *out_cursor += out_len;
> > > > +    *in_cursor += 1;
> > > > +
> > > > +    return 1;
> > > > +}
> > > > +
> > > >  /**
> > > >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> > > >   * kicks the device and polls the device used buffers.
> > > > @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> > > >      return vhost_svq_poll(svq);
> > > >  }
> > > >
> > > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > > > -                                       uint8_t cmd, const void *data,
> > > > -                                       size_t data_size)
> > > > +
> > > > +/**
> > > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > > > +                                void **out_cursor, uint8_t class, uint8_t cmd,
> > > > +                                const void *data, size_t data_size,
> > > > +                                virtio_net_ctrl_ack **in_cursor)
> > > >  {
> > > >      const struct virtio_net_ctrl_hdr ctrl = {
> > > >          .class = class,
> > > >          .cmd = cmd,
> > > >      };
> > > >
> > > > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > > > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > > > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > > > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > > > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> > > >
> > > > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > > > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > > > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > > > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> > > >
> > > > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > > > -                                  sizeof(virtio_net_ctrl_ack));
> > > > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > > > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> > > >  }
> > > >
> > > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > > > +/**
> > > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > >  {
> > > >      uint64_t features = n->parent_obj.guest_features;
> > > >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > > > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> > > > -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > -                                                  n->mac, sizeof(n->mac));
> > > > -        if (unlikely(dev_written < 0)) {
> > > > -            return dev_written;
> > > > -        }
> > > > -
> > > > -        return *s->status != VIRTIO_NET_OK;
> > > > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > > > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > +                                       n->mac, sizeof(n->mac), in_cursor);
> > > >      }
> > > >
> > > >      return 0;
> > > >  }
> > > >
> > > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > > > -                                  const VirtIONet *n)
> > > > +/**
> > > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > > > + *
> > > > + * Return the number of elements added to SVQ if success.
> > > > + */
> > > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > >  {
> > > >      struct virtio_net_ctrl_mq mq;
> > > >      uint64_t features = n->parent_obj.guest_features;
> > > > -    ssize_t dev_written;
> > > >
> > > >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> > > >          return 0;
> > > >      }
> > > >
> > > >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > > > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > > > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > > > -                                          sizeof(mq));
> > > > -    if (unlikely(dev_written < 0)) {
> > > > -        return dev_written;
> > > > -    }
> > > > -
> > > > -    return *s->status != VIRTIO_NET_OK;
> > > > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > > > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > > > +                                   &mq, sizeof(mq), in_cursor);
> > > >  }
> > > >
> > > >  static int vhost_vdpa_net_load(NetClientState *nc)
> > > >  {
> > > >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > +    VhostShadowVirtqueue *svq;
> > > > +    void *out_cursor;
> > > > +    virtio_net_ctrl_ack *in_cursor;
> > > >      struct vhost_vdpa *v = &s->vhost_vdpa;
> > > >      const VirtIONet *n;
> > > > -    int r;
> > > > +    ssize_t cmds_in_flight = 0, dev_written, r;
> > > >
> > > >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > > >
> > > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> > > >      }
> > > >
> > > >      n = VIRTIO_NET(v->dev->vdev);
> > > > -    r = vhost_vdpa_net_load_mac(s, n);
> > > > +    out_cursor = s->cvq_cmd_out_buffer;
> > > > +    in_cursor = s->status;
> > > > +
> > > > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> > > >      if (unlikely(r < 0))
> > > >          return r;
> > > >      }
> > > > -    r = vhost_vdpa_net_load_mq(s, n);
> > > > -    if (unlikely(r)) {
> > > > +    cmds_in_flight += r;
> > > > +
> > > > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > > > +    if (unlikely(r < 0)) {
> > > >          return r;
> > > >      }
> > > > +    cmds_in_flight += r;
> > > > +
> > > > +    /* Poll for all used buffer from device */
> > > > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > +    while (cmds_in_flight > 0) {
> > > > +        /*
> > > > +         * We can poll here since we've had BQL from the time we sent the
> > > > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > > > +         * by itself, when BQL is released
> > > > +         */
> > > > +        dev_written = vhost_svq_poll(svq);
> > >
> > > I'd tweak vhost_svq_poll to accept cmds_in_flight.
>
> That sounds great!
> I will refactor the code here and send the v2 patch after
> your patch.
>
> Thanks!
>
> > >
> > > Thanks
> > >
> > > > +
> > > > +        if (unlikely(!dev_written)) {
> > > > +            /*
> > > > +             * vhost_svq_poll() return 0 when something wrong, such as
> > > > +             * QEMU waits for too long time or no available used buffer
> > > > +             * from device, and there is no need to continue polling
> > > > +             * in this case.
> > > > +             */
> > > > +            return -EINVAL;
> > > > +        }
> > > > +
> > > > +        --cmds_in_flight;
> > > > +    }
> > > > +
> > > > +    /* Check the buffers written by device */
> > > > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > > > +         ++status) {
> > > > +        if (*status != VIRTIO_NET_OK) {
> > > > +            return -EINVAL;
> > > > +        }
> > > > +    }
> > > >
> > > >      return 0;
> > > >  }
> > > > --
> > > > 2.25.1
> > > >
> > >
> >
>
Eugenio Perez Martin May 18, 2023, 6 a.m. UTC | #5
On Thu, May 18, 2023 at 7:47 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, May 17, 2023 at 11:02 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> >
> > Sorry for forgetting cc when replying to the email.
> >
> > On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <eperezma@redhat.com> wrote:
> > >
> > > On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> > > > >
> > > > > This patch introduces the vhost_vdpa_net_cvq_add() and
> > > > > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > > > > send CVQ state load commands in parallel.
> > > > >
> > > > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > > > > to add SVQ control commands to SVQ and kick the device,
> > > > > but does not poll the device used buffers. QEMU will not
> > > > > poll and check the device used buffers in vhost_vdpa_net_load()
> > > > > until all CVQ state load commands have been sent to the device.
> > > > >
> > > > > What's more, in order to avoid buffer overwriting caused by
> > > > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > > > > buffer for all CVQ state load commands when sending
> > > > > CVQ state load commands in parallel, this patch introduces
> > > > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > > > > pointing to the available buffer for in descriptor and
> > > > > out descriptor, so that different CVQ state load commands can
> > > > > use their unique buffer.
> > > > >
> > > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > > > > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > > > > ---
> > > > >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> > > > >  1 file changed, 120 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > index 10804c7200..14e31ca5c5 100644
> > > > > --- a/net/vhost-vdpa.c
> > > > > +++ b/net/vhost-vdpa.c
> > > > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > > > >      vhost_vdpa_net_client_stop(nc);
> > > > >  }
> > > > >
> > > > > +/**
> > > > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > > > > + * kicks the device but does not poll the device used buffers.
> > > > > + *
> > > > > + * Return the number of elements added to SVQ if success.
> > > > > + */
> > > > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > > > > +                                void **out_cursor, size_t out_len,
> > > >
> > > > Can we track things like cursors in e.g VhostVDPAState ?
> > > >
> > >
> > > Cursors will only be used at device startup. After that, cursors are
> > > always the full buffer. Do we want to "pollute" VhostVDPAState with
> > > things that will not be used after the startup?
>
> So it's the cursor of the cvq_cmd_out_buffer, it would be more elegant
> to keep it with where the cvq_cmd_out_buffer is placed. It can avoid
> passing cursors in several levels.
>

That's right, there is no reason to update at vhost_vdpa_net_cvq_add.
It can be done at the caller.

> Or it would be even better to have some buffer allocation helpers to
> alloc and free in and out buffers.
>

I think that's overkill, as the buffers are always the in and out CVQ
buffers. They are allocated at qemu initialization, and mapped /
unmapped at device start / stop for now.

To manage them dynamically would need code to map them at any time etc.

Thanks!

> Thanks
>
> > >
> > > Maybe we can create a struct for them and then pass just this struct?
> >
> > Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
> > called in vhost_vdpa_net_load() at startup, so these cursors will not be
> > used after startup.
> >
> > If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.
> >
> > >
> > > Thanks!
> > >
> > > > > +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> > > > > +{
> > > > > +    /* Buffers for the device */
> > > > > +    const struct iovec out = {
> > > > > +        .iov_base = *out_cursor,
> > > > > +        .iov_len = out_len,
> > > > > +    };
> > > > > +    const struct iovec in = {
> > > > > +        .iov_base = *in_cursor,
> > > > > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > > > > +    };
> > > > > +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > > +    int r;
> > > > > +
> > > > > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > > > > +    if (unlikely(r != 0)) {
> > > > > +        if (unlikely(r == -ENOSPC)) {
> > > > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > > > > +                          __func__);
> > > > > +        }
> > > > > +        return r;
> > > > > +    }
> > > > > +
> > > > > +    /* Update the cursor */
> > > > > +    *out_cursor += out_len;
> > > > > +    *in_cursor += 1;
> > > > > +
> > > > > +    return 1;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> > > > >   * kicks the device and polls the device used buffers.
> > > > > @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> > > > >      return vhost_svq_poll(svq);
> > > > >  }
> > > > >
> > > > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > > > > -                                       uint8_t cmd, const void *data,
> > > > > -                                       size_t data_size)
> > > > > +
> > > > > +/**
> > > > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > > > > + *
> > > > > + * Return the number of elements added to SVQ if success.
> > > > > + */
> > > > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > > > > +                                void **out_cursor, uint8_t class, uint8_t cmd,
> > > > > +                                const void *data, size_t data_size,
> > > > > +                                virtio_net_ctrl_ack **in_cursor)
> > > > >  {
> > > > >      const struct virtio_net_ctrl_hdr ctrl = {
> > > > >          .class = class,
> > > > >          .cmd = cmd,
> > > > >      };
> > > > >
> > > > > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > > > > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > > > > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > > > > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > > > > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> > > > >
> > > > > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > > > > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > > > > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > > > > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> > > > >
> > > > > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > > > > -                                  sizeof(virtio_net_ctrl_ack));
> > > > > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > > > > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> > > > >  }
> > > > >
> > > > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > > > > +/**
> > > > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > > > > + *
> > > > > + * Return the number of elements added to SVQ if success.
> > > > > + */
> > > > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> > > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > > >  {
> > > > >      uint64_t features = n->parent_obj.guest_features;
> > > > >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > > > > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> > > > > -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > > -                                                  n->mac, sizeof(n->mac));
> > > > > -        if (unlikely(dev_written < 0)) {
> > > > > -            return dev_written;
> > > > > -        }
> > > > > -
> > > > > -        return *s->status != VIRTIO_NET_OK;
> > > > > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > > > > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > > +                                       n->mac, sizeof(n->mac), in_cursor);
> > > > >      }
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > > > > -                                  const VirtIONet *n)
> > > > > +/**
> > > > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > > > > + *
> > > > > + * Return the number of elements added to SVQ if success.
> > > > > + */
> > > > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> > > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > > >  {
> > > > >      struct virtio_net_ctrl_mq mq;
> > > > >      uint64_t features = n->parent_obj.guest_features;
> > > > > -    ssize_t dev_written;
> > > > >
> > > > >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> > > > >          return 0;
> > > > >      }
> > > > >
> > > > >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > > > > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > > > > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > > > > -                                          sizeof(mq));
> > > > > -    if (unlikely(dev_written < 0)) {
> > > > > -        return dev_written;
> > > > > -    }
> > > > > -
> > > > > -    return *s->status != VIRTIO_NET_OK;
> > > > > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > > > > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > > > > +                                   &mq, sizeof(mq), in_cursor);
> > > > >  }
> > > > >
> > > > >  static int vhost_vdpa_net_load(NetClientState *nc)
> > > > >  {
> > > > >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > +    VhostShadowVirtqueue *svq;
> > > > > +    void *out_cursor;
> > > > > +    virtio_net_ctrl_ack *in_cursor;
> > > > >      struct vhost_vdpa *v = &s->vhost_vdpa;
> > > > >      const VirtIONet *n;
> > > > > -    int r;
> > > > > +    ssize_t cmds_in_flight = 0, dev_written, r;
> > > > >
> > > > >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > > > >
> > > > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> > > > >      }
> > > > >
> > > > >      n = VIRTIO_NET(v->dev->vdev);
> > > > > -    r = vhost_vdpa_net_load_mac(s, n);
> > > > > +    out_cursor = s->cvq_cmd_out_buffer;
> > > > > +    in_cursor = s->status;
> > > > > +
> > > > > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> > > > >      if (unlikely(r < 0))
> > > > >          return r;
> > > > >      }
> > > > > -    r = vhost_vdpa_net_load_mq(s, n);
> > > > > -    if (unlikely(r)) {
> > > > > +    cmds_in_flight += r;
> > > > > +
> > > > > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > > > > +    if (unlikely(r < 0)) {
> > > > >          return r;
> > > > >      }
> > > > > +    cmds_in_flight += r;
> > > > > +
> > > > > +    /* Poll for all used buffer from device */
> > > > > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > > +    while (cmds_in_flight > 0) {
> > > > > +        /*
> > > > > +         * We can poll here since we've had BQL from the time we sent the
> > > > > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > > > > +         * by itself, when BQL is released
> > > > > +         */
> > > > > +        dev_written = vhost_svq_poll(svq);
> > > >
> > > > I'd tweak vhost_svq_poll to accept cmds_in_flight.
> >
> > That sounds great!
> > I will refactor the code here and send the v2 patch after
> > your patch.
> >
> > Thanks!
> >
> > > >
> > > > Thanks
> > > >
> > > > > +
> > > > > +        if (unlikely(!dev_written)) {
> > > > > +            /*
> > > > > +             * vhost_svq_poll() return 0 when something wrong, such as
> > > > > +             * QEMU waits for too long time or no available used buffer
> > > > > +             * from device, and there is no need to continue polling
> > > > > +             * in this case.
> > > > > +             */
> > > > > +            return -EINVAL;
> > > > > +        }
> > > > > +
> > > > > +        --cmds_in_flight;
> > > > > +    }
> > > > > +
> > > > > +    /* Check the buffers written by device */
> > > > > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > > > > +         ++status) {
> > > > > +        if (*status != VIRTIO_NET_OK) {
> > > > > +            return -EINVAL;
> > > > > +        }
> > > > > +    }
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > > --
> > > > > 2.25.1
> > > > >
> > > >
> > >
> >
>
Jason Wang May 18, 2023, 6:12 a.m. UTC | #6
On Thu, May 18, 2023 at 2:00 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Thu, May 18, 2023 at 7:47 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, May 17, 2023 at 11:02 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> > >
> > > Sorry for forgetting cc when replying to the email.
> > >
> > > On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <eperezma@redhat.com> wrote:
> > > >
> > > > On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
> > > > > >
> > > > > > This patch introduces the vhost_vdpa_net_cvq_add() and
> > > > > > refactors the vhost_vdpa_net_load*(), so that QEMU can
> > > > > > send CVQ state load commands in parallel.
> > > > > >
> > > > > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
> > > > > > to add SVQ control commands to SVQ and kick the device,
> > > > > > but does not poll the device used buffers. QEMU will not
> > > > > > poll and check the device used buffers in vhost_vdpa_net_load()
> > > > > > until all CVQ state load commands have been sent to the device.
> > > > > >
> > > > > > What's more, in order to avoid buffer overwriting caused by
> > > > > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the
> > > > > > buffer for all CVQ state load commands when sending
> > > > > > CVQ state load commands in parallel, this patch introduces
> > > > > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
> > > > > > pointing to the available buffer for in descriptor and
> > > > > > out descriptor, so that different CVQ state load commands can
> > > > > > use their unique buffer.
> > > > > >
> > > > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
> > > > > > Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
> > > > > > ---
> > > > > >  net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
> > > > > >  1 file changed, 120 insertions(+), 32 deletions(-)
> > > > > >
> > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > index 10804c7200..14e31ca5c5 100644
> > > > > > --- a/net/vhost-vdpa.c
> > > > > > +++ b/net/vhost-vdpa.c
> > > > > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > > > > >      vhost_vdpa_net_client_stop(nc);
> > > > > >  }
> > > > > >
> > > > > > +/**
> > > > > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
> > > > > > + * kicks the device but does not poll the device used buffers.
> > > > > > + *
> > > > > > + * Return the number of elements added to SVQ if success.
> > > > > > + */
> > > > > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
> > > > > > +                                void **out_cursor, size_t out_len,
> > > > >
> > > > > Can we track things like cursors in e.g VhostVDPAState ?
> > > > >
> > > >
> > > > Cursors will only be used at device startup. After that, cursors are
> > > > always the full buffer. Do we want to "pollute" VhostVDPAState with
> > > > things that will not be used after the startup?
> >
> > So it's the cursor of the cvq_cmd_out_buffer, it would be more elegant
> > to keep it with where the cvq_cmd_out_buffer is placed. It can avoid
> > passing cursors in several levels.
> >
>
> That's right, there is no reason to update at vhost_vdpa_net_cvq_add.
> It can be done at the caller.
>
> > Or it would be even better to have some buffer allocation helpers to
> > alloc and free in and out buffers.
> >
>
> I think that's overkill, as the buffers are always the in and out CVQ
> buffers. They are allocated at qemu initialization, and mapped /
> unmapped at device start / stop for now.

It's not a must, we can do it if it has more users for sure.

>
> To manage them dynamically would need code to map them at any time etc.

Thanks

>
> Thanks!
>
> > Thanks
> >
> > > >
> > > > Maybe we can create a struct for them and then pass just this struct?
> > >
> > > Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
> > > called in vhost_vdpa_net_load() at startup, so these cursors will not be
> > > used after startup.
> > >
> > > If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.
> > >
> > > >
> > > > Thanks!
> > > >
> > > > > > +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
> > > > > > +{
> > > > > > +    /* Buffers for the device */
> > > > > > +    const struct iovec out = {
> > > > > > +        .iov_base = *out_cursor,
> > > > > > +        .iov_len = out_len,
> > > > > > +    };
> > > > > > +    const struct iovec in = {
> > > > > > +        .iov_base = *in_cursor,
> > > > > > +        .iov_len = sizeof(virtio_net_ctrl_ack),
> > > > > > +    };
> > > > > > +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > > > +    int r;
> > > > > > +
> > > > > > +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
> > > > > > +    if (unlikely(r != 0)) {
> > > > > > +        if (unlikely(r == -ENOSPC)) {
> > > > > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > > > > > +                          __func__);
> > > > > > +        }
> > > > > > +        return r;
> > > > > > +    }
> > > > > > +
> > > > > > +    /* Update the cursor */
> > > > > > +    *out_cursor += out_len;
> > > > > > +    *in_cursor += 1;
> > > > > > +
> > > > > > +    return 1;
> > > > > > +}
> > > > > > +
> > > > > >  /**
> > > > > >   * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
> > > > > >   * kicks the device and polls the device used buffers.
> > > > > > @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
> > > > > >      return vhost_svq_poll(svq);
> > > > > >  }
> > > > > >
> > > > > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
> > > > > > -                                       uint8_t cmd, const void *data,
> > > > > > -                                       size_t data_size)
> > > > > > +
> > > > > > +/**
> > > > > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
> > > > > > + *
> > > > > > + * Return the number of elements added to SVQ if success.
> > > > > > + */
> > > > > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
> > > > > > +                                void **out_cursor, uint8_t class, uint8_t cmd,
> > > > > > +                                const void *data, size_t data_size,
> > > > > > +                                virtio_net_ctrl_ack **in_cursor)
> > > > > >  {
> > > > > >      const struct virtio_net_ctrl_hdr ctrl = {
> > > > > >          .class = class,
> > > > > >          .cmd = cmd,
> > > > > >      };
> > > > > >
> > > > > > -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
> > > > > > +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
> > > > > > +                          (*out_cursor - s->cvq_cmd_out_buffer));
> > > > > > +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
> > > > > > +                       (*out_cursor - s->cvq_cmd_out_buffer));
> > > > > >
> > > > > > -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
> > > > > > -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
> > > > > > +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
> > > > > > +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
> > > > > >
> > > > > > -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
> > > > > > -                                  sizeof(virtio_net_ctrl_ack));
> > > > > > +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
> > > > > > +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
> > > > > >  }
> > > > > >
> > > > > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
> > > > > > +/**
> > > > > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
> > > > > > + *
> > > > > > + * Return the number of elements added to SVQ if success.
> > > > > > + */
> > > > > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
> > > > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > > > >  {
> > > > > >      uint64_t features = n->parent_obj.guest_features;
> > > > > >      if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
> > > > > > -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
> > > > > > -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > > > -                                                  n->mac, sizeof(n->mac));
> > > > > > -        if (unlikely(dev_written < 0)) {
> > > > > > -            return dev_written;
> > > > > > -        }
> > > > > > -
> > > > > > -        return *s->status != VIRTIO_NET_OK;
> > > > > > +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
> > > > > > +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
> > > > > > +                                       n->mac, sizeof(n->mac), in_cursor);
> > > > > >      }
> > > > > >
> > > > > >      return 0;
> > > > > >  }
> > > > > >
> > > > > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
> > > > > > -                                  const VirtIONet *n)
> > > > > > +/**
> > > > > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
> > > > > > + *
> > > > > > + * Return the number of elements added to SVQ if success.
> > > > > > + */
> > > > > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
> > > > > > +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
> > > > > >  {
> > > > > >      struct virtio_net_ctrl_mq mq;
> > > > > >      uint64_t features = n->parent_obj.guest_features;
> > > > > > -    ssize_t dev_written;
> > > > > >
> > > > > >      if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
> > > > > >          return 0;
> > > > > >      }
> > > > > >
> > > > > >      mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
> > > > > > -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
> > > > > > -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
> > > > > > -                                          sizeof(mq));
> > > > > > -    if (unlikely(dev_written < 0)) {
> > > > > > -        return dev_written;
> > > > > > -    }
> > > > > > -
> > > > > > -    return *s->status != VIRTIO_NET_OK;
> > > > > > +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
> > > > > > +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
> > > > > > +                                   &mq, sizeof(mq), in_cursor);
> > > > > >  }
> > > > > >
> > > > > >  static int vhost_vdpa_net_load(NetClientState *nc)
> > > > > >  {
> > > > > >      VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > +    VhostShadowVirtqueue *svq;
> > > > > > +    void *out_cursor;
> > > > > > +    virtio_net_ctrl_ack *in_cursor;
> > > > > >      struct vhost_vdpa *v = &s->vhost_vdpa;
> > > > > >      const VirtIONet *n;
> > > > > > -    int r;
> > > > > > +    ssize_t cmds_in_flight = 0, dev_written, r;
> > > > > >
> > > > > >      assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> > > > > >
> > > > > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
> > > > > >      }
> > > > > >
> > > > > >      n = VIRTIO_NET(v->dev->vdev);
> > > > > > -    r = vhost_vdpa_net_load_mac(s, n);
> > > > > > +    out_cursor = s->cvq_cmd_out_buffer;
> > > > > > +    in_cursor = s->status;
> > > > > > +
> > > > > > +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
> > > > > >      if (unlikely(r < 0))
> > > > > >          return r;
> > > > > >      }
> > > > > > -    r = vhost_vdpa_net_load_mq(s, n);
> > > > > > -    if (unlikely(r)) {
> > > > > > +    cmds_in_flight += r;
> > > > > > +
> > > > > > +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
> > > > > > +    if (unlikely(r < 0)) {
> > > > > >          return r;
> > > > > >      }
> > > > > > +    cmds_in_flight += r;
> > > > > > +
> > > > > > +    /* Poll for all used buffer from device */
> > > > > > +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
> > > > > > +    while (cmds_in_flight > 0) {
> > > > > > +        /*
> > > > > > +         * We can poll here since we've had BQL from the time we sent the
> > > > > > +         * descriptor. Also, we need to take the answer before SVQ pulls
> > > > > > +         * by itself, when BQL is released
> > > > > > +         */
> > > > > > +        dev_written = vhost_svq_poll(svq);
> > > > >
> > > > > I'd tweak vhost_svq_poll to accept cmds_in_flight.
> > >
> > > That sounds great!
> > > I will refactor the code here and send the v2 patch after
> > > your patch.
> > >
> > > Thanks!
> > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > > +
> > > > > > +        if (unlikely(!dev_written)) {
> > > > > > +            /*
> > > > > > +             * vhost_svq_poll() return 0 when something wrong, such as
> > > > > > +             * QEMU waits for too long time or no available used buffer
> > > > > > +             * from device, and there is no need to continue polling
> > > > > > +             * in this case.
> > > > > > +             */
> > > > > > +            return -EINVAL;
> > > > > > +        }
> > > > > > +
> > > > > > +        --cmds_in_flight;
> > > > > > +    }
> > > > > > +
> > > > > > +    /* Check the buffers written by device */
> > > > > > +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
> > > > > > +         ++status) {
> > > > > > +        if (*status != VIRTIO_NET_OK) {
> > > > > > +            return -EINVAL;
> > > > > > +        }
> > > > > > +    }
> > > > > >
> > > > > >      return 0;
> > > > > >  }
> > > > > > --
> > > > > > 2.25.1
> > > > > >
> > > > >
> > > >
> > >
> >
>
Hawkins Jiawei May 18, 2023, 6:54 a.m. UTC | #7
On 2023/5/18 14:12, Jason Wang wrote:
> On Thu, May 18, 2023 at 2:00 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
>>
>> On Thu, May 18, 2023 at 7:47 AM Jason Wang <jasowang@redhat.com> wrote:
>>>
>>> On Wed, May 17, 2023 at 11:02 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
>>>>
>>>> Sorry for forgetting cc when replying to the email.
>>>>
>>>> On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <eperezma@redhat.com> wrote:
>>>>>
>>>>> On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasowang@redhat.com> wrote:
>>>>>>
>>>>>> On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31149@gmail.com> wrote:
>>>>>>>
>>>>>>> This patch introduces the vhost_vdpa_net_cvq_add() and
>>>>>>> refactors the vhost_vdpa_net_load*(), so that QEMU can
>>>>>>> send CVQ state load commands in parallel.
>>>>>>>
>>>>>>> To be more specific, this patch introduces vhost_vdpa_net_cvq_add()
>>>>>>> to add SVQ control commands to SVQ and kick the device,
>>>>>>> but does not poll the device used buffers. QEMU will not
>>>>>>> poll and check the device used buffers in vhost_vdpa_net_load()
>>>>>>> until all CVQ state load commands have been sent to the device.
>>>>>>>
>>>>>>> What's more, in order to avoid buffer overwriting caused by
>>>>>>> using `svq->cvq_cmd_out_buffer` and `svq->status` as the
>>>>>>> buffer for all CVQ state load commands when sending
>>>>>>> CVQ state load commands in parallel, this patch introduces
>>>>>>> `out_cursor` and `in_cursor` in vhost_vdpa_net_load(),
>>>>>>> pointing to the available buffer for in descriptor and
>>>>>>> out descriptor, so that different CVQ state load commands can
>>>>>>> use their unique buffer.
>>>>>>>
>>>>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578
>>>>>>> Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
>>>>>>> ---
>>>>>>>   net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++----------
>>>>>>>   1 file changed, 120 insertions(+), 32 deletions(-)
>>>>>>>
>>>>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>>>>> index 10804c7200..14e31ca5c5 100644
>>>>>>> --- a/net/vhost-vdpa.c
>>>>>>> +++ b/net/vhost-vdpa.c
>>>>>>> @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
>>>>>>>       vhost_vdpa_net_client_stop(nc);
>>>>>>>   }
>>>>>>>
>>>>>>> +/**
>>>>>>> + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
>>>>>>> + * kicks the device but does not poll the device used buffers.
>>>>>>> + *
>>>>>>> + * Return the number of elements added to SVQ if success.
>>>>>>> + */
>>>>>>> +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
>>>>>>> +                                void **out_cursor, size_t out_len,
>>>>>>
>>>>>> Can we track things like cursors in e.g VhostVDPAState ?
>>>>>>
>>>>>
>>>>> Cursors will only be used at device startup. After that, cursors are
>>>>> always the full buffer. Do we want to "pollute" VhostVDPAState with
>>>>> things that will not be used after the startup?
>>>
>>> So it's the cursor of the cvq_cmd_out_buffer, it would be more elegant
>>> to keep it with where the cvq_cmd_out_buffer is placed. It can avoid
>>> passing cursors in several levels.
>>> >>
>> That's right, there is no reason to update at vhost_vdpa_net_cvq_add.
>> It can be done at the caller.

But in any case, these cursors need to be passed as alone parameters to
vhost_vdpa_net_cvq_add(), so that they can be accessed for
`struct iovec` `iov_base` field, right? Considering that
we always pass these parameters, so I also update them together in
vhost_vdpa_net_cvq_add() in this patch.

If we want to avoid passing cursors in several levels, is it okay
to backup `cvq_cmd_out_buffer` and `status` in vhost_vdpa_net_load(),
access and update them through `VhostVDPAState` directly during loading,
and restore them when finishing loading.

>>
>>> Or it would be even better to have some buffer allocation helpers to
>>> alloc and free in and out buffers.
>>>
>>
>> I think that's overkill, as the buffers are always the in and out CVQ
>> buffers. They are allocated at qemu initialization, and mapped /
>> unmapped at device start / stop for now.
>
> It's not a must, we can do it if it has more users for sure.
>
>>
>> To manage them dynamically would need code to map them at any time etc.
>
> Thanks
>
>>
>> Thanks!
>>
>>> Thanks
>>>
>>>>>
>>>>> Maybe we can create a struct for them and then pass just this struct?
>>>>
>>>> Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only
>>>> called in vhost_vdpa_net_load() at startup, so these cursors will not be
>>>> used after startup.
>>>>
>>>> If needed, we can create a `VhostVDPACursor` as suggested by Eugenio.
>>>>
>>>>>
>>>>> Thanks!
>>>>>
>>>>>>> +                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
>>>>>>> +{
>>>>>>> +    /* Buffers for the device */
>>>>>>> +    const struct iovec out = {
>>>>>>> +        .iov_base = *out_cursor,
>>>>>>> +        .iov_len = out_len,
>>>>>>> +    };
>>>>>>> +    const struct iovec in = {
>>>>>>> +        .iov_base = *in_cursor,
>>>>>>> +        .iov_len = sizeof(virtio_net_ctrl_ack),
>>>>>>> +    };
>>>>>>> +    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
>>>>>>> +    int r;
>>>>>>> +
>>>>>>> +    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
>>>>>>> +    if (unlikely(r != 0)) {
>>>>>>> +        if (unlikely(r == -ENOSPC)) {
>>>>>>> +            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
>>>>>>> +                          __func__);
>>>>>>> +        }
>>>>>>> +        return r;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Update the cursor */
>>>>>>> +    *out_cursor += out_len;
>>>>>>> +    *in_cursor += 1;
>>>>>>> +
>>>>>>> +    return 1;
>>>>>>> +}
>>>>>>> +
>>>>>>>   /**
>>>>>>>    * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
>>>>>>>    * kicks the device and polls the device used buffers.
>>>>>>> @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
>>>>>>>       return vhost_svq_poll(svq);
>>>>>>>   }
>>>>>>>
>>>>>>> -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
>>>>>>> -                                       uint8_t cmd, const void *data,
>>>>>>> -                                       size_t data_size)
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
>>>>>>> + *
>>>>>>> + * Return the number of elements added to SVQ if success.
>>>>>>> + */
>>>>>>> +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
>>>>>>> +                                void **out_cursor, uint8_t class, uint8_t cmd,
>>>>>>> +                                const void *data, size_t data_size,
>>>>>>> +                                virtio_net_ctrl_ack **in_cursor)
>>>>>>>   {
>>>>>>>       const struct virtio_net_ctrl_hdr ctrl = {
>>>>>>>           .class = class,
>>>>>>>           .cmd = cmd,
>>>>>>>       };
>>>>>>>
>>>>>>> -    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
>>>>>>> +    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
>>>>>>> +                          (*out_cursor - s->cvq_cmd_out_buffer));
>>>>>>> +    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
>>>>>>> +                       (*out_cursor - s->cvq_cmd_out_buffer));
>>>>>>>
>>>>>>> -    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
>>>>>>> -    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
>>>>>>> +    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
>>>>>>> +    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
>>>>>>>
>>>>>>> -    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
>>>>>>> -                                  sizeof(virtio_net_ctrl_ack));
>>>>>>> +    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
>>>>>>> +                                  in_cursor, sizeof(virtio_net_ctrl_ack));
>>>>>>>   }
>>>>>>>
>>>>>>> -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
>>>>>>> +/**
>>>>>>> + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
>>>>>>> + *
>>>>>>> + * Return the number of elements added to SVQ if success.
>>>>>>> + */
>>>>>>> +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
>>>>>>> +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
>>>>>>>   {
>>>>>>>       uint64_t features = n->parent_obj.guest_features;
>>>>>>>       if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
>>>>>>> -        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
>>>>>>> -                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
>>>>>>> -                                                  n->mac, sizeof(n->mac));
>>>>>>> -        if (unlikely(dev_written < 0)) {
>>>>>>> -            return dev_written;
>>>>>>> -        }
>>>>>>> -
>>>>>>> -        return *s->status != VIRTIO_NET_OK;
>>>>>>> +        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
>>>>>>> +                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
>>>>>>> +                                       n->mac, sizeof(n->mac), in_cursor);
>>>>>>>       }
>>>>>>>
>>>>>>>       return 0;
>>>>>>>   }
>>>>>>>
>>>>>>> -static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
>>>>>>> -                                  const VirtIONet *n)
>>>>>>> +/**
>>>>>>> + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
>>>>>>> + *
>>>>>>> + * Return the number of elements added to SVQ if success.
>>>>>>> + */
>>>>>>> +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
>>>>>>> +                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
>>>>>>>   {
>>>>>>>       struct virtio_net_ctrl_mq mq;
>>>>>>>       uint64_t features = n->parent_obj.guest_features;
>>>>>>> -    ssize_t dev_written;
>>>>>>>
>>>>>>>       if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
>>>>>>>           return 0;
>>>>>>>       }
>>>>>>>
>>>>>>>       mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
>>>>>>> -    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
>>>>>>> -                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
>>>>>>> -                                          sizeof(mq));
>>>>>>> -    if (unlikely(dev_written < 0)) {
>>>>>>> -        return dev_written;
>>>>>>> -    }
>>>>>>> -
>>>>>>> -    return *s->status != VIRTIO_NET_OK;
>>>>>>> +    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
>>>>>>> +                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
>>>>>>> +                                   &mq, sizeof(mq), in_cursor);
>>>>>>>   }
>>>>>>>
>>>>>>>   static int vhost_vdpa_net_load(NetClientState *nc)
>>>>>>>   {
>>>>>>>       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
>>>>>>> +    VhostShadowVirtqueue *svq;
>>>>>>> +    void *out_cursor;
>>>>>>> +    virtio_net_ctrl_ack *in_cursor;
>>>>>>>       struct vhost_vdpa *v = &s->vhost_vdpa;
>>>>>>>       const VirtIONet *n;
>>>>>>> -    int r;
>>>>>>> +    ssize_t cmds_in_flight = 0, dev_written, r;
>>>>>>>
>>>>>>>       assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>>>>>
>>>>>>> @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc)
>>>>>>>       }
>>>>>>>
>>>>>>>       n = VIRTIO_NET(v->dev->vdev);
>>>>>>> -    r = vhost_vdpa_net_load_mac(s, n);
>>>>>>> +    out_cursor = s->cvq_cmd_out_buffer;
>>>>>>> +    in_cursor = s->status;
>>>>>>> +
>>>>>>> +    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
>>>>>>>       if (unlikely(r < 0))
>>>>>>>           return r;
>>>>>>>       }
>>>>>>> -    r = vhost_vdpa_net_load_mq(s, n);
>>>>>>> -    if (unlikely(r)) {
>>>>>>> +    cmds_in_flight += r;
>>>>>>> +
>>>>>>> +    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
>>>>>>> +    if (unlikely(r < 0)) {
>>>>>>>           return r;
>>>>>>>       }
>>>>>>> +    cmds_in_flight += r;
>>>>>>> +
>>>>>>> +    /* Poll for all used buffer from device */
>>>>>>> +    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
>>>>>>> +    while (cmds_in_flight > 0) {
>>>>>>> +        /*
>>>>>>> +         * We can poll here since we've had BQL from the time we sent the
>>>>>>> +         * descriptor. Also, we need to take the answer before SVQ pulls
>>>>>>> +         * by itself, when BQL is released
>>>>>>> +         */
>>>>>>> +        dev_written = vhost_svq_poll(svq);
>>>>>>
>>>>>> I'd tweak vhost_svq_poll to accept cmds_in_flight.
>>>>
>>>> That sounds great!
>>>> I will refactor the code here and send the v2 patch after
>>>> your patch.
>>>>
>>>> Thanks!
>>>>
>>>>>>
>>>>>> Thanks
>>>>>>
>>>>>>> +
>>>>>>> +        if (unlikely(!dev_written)) {
>>>>>>> +            /*
>>>>>>> +             * vhost_svq_poll() return 0 when something wrong, such as
>>>>>>> +             * QEMU waits for too long time or no available used buffer
>>>>>>> +             * from device, and there is no need to continue polling
>>>>>>> +             * in this case.
>>>>>>> +             */
>>>>>>> +            return -EINVAL;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        --cmds_in_flight;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Check the buffers written by device */
>>>>>>> +    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
>>>>>>> +         ++status) {
>>>>>>> +        if (*status != VIRTIO_NET_OK) {
>>>>>>> +            return -EINVAL;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>>
>>>>>>>       return 0;
>>>>>>>   }
>>>>>>> --
>>>>>>> 2.25.1
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 10804c7200..14e31ca5c5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -590,6 +590,44 @@  static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
     vhost_vdpa_net_client_stop(nc);
 }
 
+/**
+ * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ,
+ * kicks the device but does not poll the device used buffers.
+ *
+ * Return the number of elements added to SVQ if success.
+ */
+static int vhost_vdpa_net_cvq_add(VhostVDPAState *s,
+                                void **out_cursor, size_t out_len,
+                                virtio_net_ctrl_ack **in_cursor, size_t in_len)
+{
+    /* Buffers for the device */
+    const struct iovec out = {
+        .iov_base = *out_cursor,
+        .iov_len = out_len,
+    };
+    const struct iovec in = {
+        .iov_base = *in_cursor,
+        .iov_len = sizeof(virtio_net_ctrl_ack),
+    };
+    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
+    int r;
+
+    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
+    if (unlikely(r != 0)) {
+        if (unlikely(r == -ENOSPC)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+                          __func__);
+        }
+        return r;
+    }
+
+    /* Update the cursor */
+    *out_cursor += out_len;
+    *in_cursor += 1;
+
+    return 1;
+}
+
 /**
  * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ,
  * kicks the device and polls the device used buffers.
@@ -628,69 +666,82 @@  static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s,
     return vhost_svq_poll(svq);
 }
 
-static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
-                                       uint8_t cmd, const void *data,
-                                       size_t data_size)
+
+/**
+ * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ.
+ *
+ * Return the number of elements added to SVQ if success.
+ */
+static int vhost_vdpa_net_load_cmd(VhostVDPAState *s,
+                                void **out_cursor, uint8_t class, uint8_t cmd,
+                                const void *data, size_t data_size,
+                                virtio_net_ctrl_ack **in_cursor)
 {
     const struct virtio_net_ctrl_hdr ctrl = {
         .class = class,
         .cmd = cmd,
     };
 
-    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
+    assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() -
+                          (*out_cursor - s->cvq_cmd_out_buffer));
+    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) -
+                       (*out_cursor - s->cvq_cmd_out_buffer));
 
-    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
-    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
+    memcpy(*out_cursor, &ctrl, sizeof(ctrl));
+    memcpy(*out_cursor + sizeof(ctrl), data, data_size);
 
-    return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size,
-                                  sizeof(virtio_net_ctrl_ack));
+    return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size,
+                                  in_cursor, sizeof(virtio_net_ctrl_ack));
 }
 
-static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
+/**
+ * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ.
+ *
+ * Return the number of elements added to SVQ if success.
+ */
+static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
+                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
 {
     uint64_t features = n->parent_obj.guest_features;
     if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
-        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
-                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
-                                                  n->mac, sizeof(n->mac));
-        if (unlikely(dev_written < 0)) {
-            return dev_written;
-        }
-
-        return *s->status != VIRTIO_NET_OK;
+        return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC,
+                                       VIRTIO_NET_CTRL_MAC_ADDR_SET,
+                                       n->mac, sizeof(n->mac), in_cursor);
     }
 
     return 0;
 }
 
-static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
-                                  const VirtIONet *n)
+/**
+ * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ.
+ *
+ * Return the number of elements added to SVQ if success.
+ */
+static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n,
+                            void **out_cursor, virtio_net_ctrl_ack **in_cursor)
 {
     struct virtio_net_ctrl_mq mq;
     uint64_t features = n->parent_obj.guest_features;
-    ssize_t dev_written;
 
     if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
         return 0;
     }
 
     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
-    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
-                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
-                                          sizeof(mq));
-    if (unlikely(dev_written < 0)) {
-        return dev_written;
-    }
-
-    return *s->status != VIRTIO_NET_OK;
+    return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ,
+                                   VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
+                                   &mq, sizeof(mq), in_cursor);
 }
 
 static int vhost_vdpa_net_load(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+    VhostShadowVirtqueue *svq;
+    void *out_cursor;
+    virtio_net_ctrl_ack *in_cursor;
     struct vhost_vdpa *v = &s->vhost_vdpa;
     const VirtIONet *n;
-    int r;
+    ssize_t cmds_in_flight = 0, dev_written, r;
 
     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
@@ -699,14 +750,51 @@  static int vhost_vdpa_net_load(NetClientState *nc)
     }
 
     n = VIRTIO_NET(v->dev->vdev);
-    r = vhost_vdpa_net_load_mac(s, n);
+    out_cursor = s->cvq_cmd_out_buffer;
+    in_cursor = s->status;
+
+    r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
     if (unlikely(r < 0)) {
         return r;
     }
-    r = vhost_vdpa_net_load_mq(s, n);
-    if (unlikely(r)) {
+    cmds_in_flight += r;
+
+    r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
+    if (unlikely(r < 0)) {
         return r;
     }
+    cmds_in_flight += r;
+
+    /* Poll for all used buffer from device */
+    svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
+    while (cmds_in_flight > 0) {
+        /*
+         * We can poll here since we've had BQL from the time we sent the
+         * descriptor. Also, we need to take the answer before SVQ pulls
+         * by itself, when BQL is released
+         */
+        dev_written = vhost_svq_poll(svq);
+
+        if (unlikely(!dev_written)) {
+            /*
+             * vhost_svq_poll() return 0 when something wrong, such as
+             * QEMU waits for too long time or no available used buffer
+             * from device, and there is no need to continue polling
+             * in this case.
+             */
+            return -EINVAL;
+        }
+
+        --cmds_in_flight;
+    }
+
+    /* Check the buffers written by device */
+    for (virtio_net_ctrl_ack *status = s->status; status < in_cursor;
+         ++status) {
+        if (*status != VIRTIO_NET_OK) {
+            return -EINVAL;
+        }
+    }
 
     return 0;
 }