diff mbox series

[v4,06/11] virtio: get avail bytes check for packed ring

Message ID 1550118402-4057-7-git-send-email-wexu@redhat.com
State New
Headers show
Series packed ring virtio-net backends support | expand

Commit Message

Wei Xu Feb. 14, 2019, 4:26 a.m. UTC
From: Wei Xu <wexu@redhat.com>

Add packed ring headcount check.

Common part of split/packed ring are kept.

Signed-off-by: Wei Xu <wexu@redhat.com>
---
 hw/virtio/virtio.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 179 insertions(+), 18 deletions(-)

Comments

Jason Wang Feb. 18, 2019, 7:27 a.m. UTC | #1
On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> From: Wei Xu <wexu@redhat.com>
>
> Add packed ring headcount check.
>
> Common part of split/packed ring are kept.
>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
>   hw/virtio/virtio.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 179 insertions(+), 18 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index f2ff980..832287b 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
>       return vq->vring.avail != 0;
>   }
>   
> +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
> +                            MemoryRegionCache *cache, int i)
> +{
> +    address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
> +                              desc, sizeof(VRingPackedDesc));
> +    virtio_tswap16s(vdev, &desc->flags);
> +    virtio_tswap64s(vdev, &desc->addr);
> +    virtio_tswap32s(vdev, &desc->len);
> +    virtio_tswap16s(vdev, &desc->id);
> +}
> +
>   static void vring_packed_desc_read_flags(VirtIODevice *vdev,
>                       VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
>   {
> @@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
>       return VIRTQUEUE_READ_DESC_MORE;
>   }
>   
> -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> -                               unsigned int *out_bytes,
> -                               unsigned max_in_bytes, unsigned max_out_bytes)
> +static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
> +                            unsigned int *in_bytes, unsigned int *out_bytes,
> +                            unsigned max_in_bytes, unsigned max_out_bytes)
>   {
>       VirtIODevice *vdev = vq->vdev;
>       unsigned int max, idx;
> @@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
>       int64_t len = 0;
>       int rc;
>   
> -    if (unlikely(!vq->vring.desc)) {
> -        if (in_bytes) {
> -            *in_bytes = 0;
> -        }
> -        if (out_bytes) {
> -            *out_bytes = 0;
> -        }
> -        return;
> -    }
> -
>       rcu_read_lock();
>       idx = vq->last_avail_idx;
>       total_bufs = in_total = out_total = 0;
>   
>       max = vq->vring.num;
>       caches = vring_get_region_caches(vq);
> -    if (caches->desc.len < max * sizeof(VRingDesc)) {
> -        virtio_error(vdev, "Cannot map descriptor ring");
> -        goto err;
> -    }
> -
>       while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
>           MemoryRegionCache *desc_cache = &caches->desc;
>           unsigned int num_bufs;
> @@ -792,6 +788,171 @@ err:
>       goto done;
>   }
>   
> +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
> +                            unsigned int *in_bytes, unsigned int *out_bytes,
> +                            unsigned max_in_bytes, unsigned max_out_bytes)
> +{
> +    VirtIODevice *vdev = vq->vdev;
> +    unsigned int max, idx;
> +    unsigned int total_bufs, in_total, out_total;
> +    MemoryRegionCache *desc_cache;
> +    VRingMemoryRegionCaches *caches;
> +    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    int64_t len = 0;
> +    VRingPackedDesc desc;
> +    bool wrap_counter;
> +
> +    rcu_read_lock();
> +    idx = vq->last_avail_idx;
> +    wrap_counter = vq->last_avail_wrap_counter;
> +    total_bufs = in_total = out_total = 0;
> +
> +    max = vq->vring.num;
> +    caches = vring_get_region_caches(vq);
> +    desc_cache = &caches->desc;
> +    vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> +    while (is_desc_avail(&desc, wrap_counter)) {
> +        unsigned int num_bufs;
> +        unsigned int i = 0;
> +
> +        num_bufs = total_bufs;
> +
> +        /* Make sure flags has been read before all the fields. */
> +        smp_rmb();
> +        vring_packed_desc_read(vdev, &desc, desc_cache, idx);


It's better to have single function to deal with reading flags and 
descriptors and check its availability like packed ring.


> +
> +        if (desc.flags & VRING_DESC_F_INDIRECT) {
> +            if (desc.len % sizeof(VRingPackedDesc)) {
> +                virtio_error(vdev, "Invalid size for indirect buffer table");
> +                goto err;
> +            }
> +
> +            /* If we've got too many, that implies a descriptor loop. */
> +            if (num_bufs >= max) {
> +                virtio_error(vdev, "Looped descriptor");
> +                goto err;
> +            }
> +
> +            /* loop over the indirect descriptor table */
> +            len = address_space_cache_init(&indirect_desc_cache,
> +                                           vdev->dma_as,
> +                                           desc.addr, desc.len, false);
> +            desc_cache = &indirect_desc_cache;
> +            if (len < desc.len) {
> +                virtio_error(vdev, "Cannot map indirect buffer");
> +                goto err;
> +            }
> +
> +            max = desc.len / sizeof(VRingPackedDesc);
> +            num_bufs = i = 0;
> +            vring_packed_desc_read(vdev, &desc, desc_cache, i);
> +        }
> +
> +        do {
> +            /* If we've got too many, that implies a descriptor loop. */
> +            if (++num_bufs > max) {
> +                virtio_error(vdev, "Looped descriptor");
> +                goto err;
> +            }
> +
> +            if (desc.flags & VRING_DESC_F_WRITE) {
> +                in_total += desc.len;
> +            } else {
> +                out_total += desc.len;
> +            }
> +            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
> +                goto done;
> +            }
> +
> +            if (desc_cache == &indirect_desc_cache) {
> +                if (++i >= max) {
> +                    break;
> +                }
> +                vring_packed_desc_read(vdev, &desc, desc_cache, i);
> +            } else {
> +                if (++idx >= vq->vring.num) {
> +                    idx -= vq->vring.num;
> +                    wrap_counter ^= 1;
> +                }
> +                vring_packed_desc_read(vdev, &desc, desc_cache, idx);
> +            }
> +            /* Make sure flags has been read after all the other fields */
> +            smp_rmb();


I don't think we need this according to the spec:

"

The driver always makes the first descriptor in the list
available after the rest of the list has been written out into
the ring. This guarantees that the device will never observe a
partial scatter/gather list in the ring.

"

So when the first is available, the rest of the chain should be 
available, otherwise device may see partial chain.

Thanks


> +        } while (desc.flags & VRING_DESC_F_NEXT);
> +
> +        if (desc_cache == &indirect_desc_cache) {
> +            address_space_cache_destroy(&indirect_desc_cache);
> +            total_bufs++;
> +            /* We missed one step on for indirect desc */
> +            idx++;
> +            if (++idx >= vq->vring.num) {
> +                idx -= vq->vring.num;
> +                wrap_counter ^= 1;
> +            }
> +        } else {
> +            total_bufs = num_bufs;
> +        }
> +
> +        desc_cache = &caches->desc;
> +        vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> +    }
> +
> +    /* Record the index and wrap counter for a kick we want */
> +    vq->shadow_avail_idx = idx;
> +    vq->avail_wrap_counter = wrap_counter;
> +done:
> +    address_space_cache_destroy(&indirect_desc_cache);
> +    if (in_bytes) {
> +        *in_bytes = in_total;
> +    }
> +    if (out_bytes) {
> +        *out_bytes = out_total;
> +    }
> +    rcu_read_unlock();
> +    return;
> +
> +err:
> +    in_total = out_total = 0;
> +    goto done;
> +}
> +
> +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> +                               unsigned int *out_bytes,
> +                               unsigned max_in_bytes, unsigned max_out_bytes)
> +{
> +    uint16_t desc_size;
> +    VRingMemoryRegionCaches *caches;
> +
> +    if (unlikely(!vq->vring.desc)) {
> +        goto err;
> +    }
> +
> +    caches = vring_get_region_caches(vq);
> +    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
> +                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
> +    if (caches->desc.len < vq->vring.num * desc_size) {
> +        virtio_error(vq->vdev, "Cannot map descriptor ring");
> +        goto err;
> +    }
> +
> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> +        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
> +                                         max_in_bytes, max_out_bytes);
> +    } else {
> +        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
> +                                        max_in_bytes, max_out_bytes);
> +    }
> +
> +    return;
> +err:
> +    if (in_bytes) {
> +        *in_bytes = 0;
> +    }
> +    if (out_bytes) {
> +        *out_bytes = 0;
> +    }
> +}
> +
>   int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>                             unsigned int out_bytes)
>   {
Wei Xu Feb. 18, 2019, 5:07 p.m. UTC | #2
On Mon, Feb 18, 2019 at 03:27:21PM +0800, Jason Wang wrote:
> 
> On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> >From: Wei Xu <wexu@redhat.com>
> >
> >Add packed ring headcount check.
> >
> >Common part of split/packed ring are kept.
> >
> >Signed-off-by: Wei Xu <wexu@redhat.com>
> >---
> >  hw/virtio/virtio.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 179 insertions(+), 18 deletions(-)
> >
> >diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> >index f2ff980..832287b 100644
> >--- a/hw/virtio/virtio.c
> >+++ b/hw/virtio/virtio.c
> >@@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
> >      return vq->vring.avail != 0;
> >  }
> >+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
> >+                            MemoryRegionCache *cache, int i)
> >+{
> >+    address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
> >+                              desc, sizeof(VRingPackedDesc));
> >+    virtio_tswap16s(vdev, &desc->flags);
> >+    virtio_tswap64s(vdev, &desc->addr);
> >+    virtio_tswap32s(vdev, &desc->len);
> >+    virtio_tswap16s(vdev, &desc->id);
> >+}
> >+
> >  static void vring_packed_desc_read_flags(VirtIODevice *vdev,
> >                      VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
> >  {
> >@@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
> >      return VIRTQUEUE_READ_DESC_MORE;
> >  }
> >-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >-                               unsigned int *out_bytes,
> >-                               unsigned max_in_bytes, unsigned max_out_bytes)
> >+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
> >+                            unsigned int *in_bytes, unsigned int *out_bytes,
> >+                            unsigned max_in_bytes, unsigned max_out_bytes)
> >  {
> >      VirtIODevice *vdev = vq->vdev;
> >      unsigned int max, idx;
> >@@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >      int64_t len = 0;
> >      int rc;
> >-    if (unlikely(!vq->vring.desc)) {
> >-        if (in_bytes) {
> >-            *in_bytes = 0;
> >-        }
> >-        if (out_bytes) {
> >-            *out_bytes = 0;
> >-        }
> >-        return;
> >-    }
> >-
> >      rcu_read_lock();
> >      idx = vq->last_avail_idx;
> >      total_bufs = in_total = out_total = 0;
> >      max = vq->vring.num;
> >      caches = vring_get_region_caches(vq);
> >-    if (caches->desc.len < max * sizeof(VRingDesc)) {
> >-        virtio_error(vdev, "Cannot map descriptor ring");
> >-        goto err;
> >-    }
> >-
> >      while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
> >          MemoryRegionCache *desc_cache = &caches->desc;
> >          unsigned int num_bufs;
> >@@ -792,6 +788,171 @@ err:
> >      goto done;
> >  }
> >+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
> >+                            unsigned int *in_bytes, unsigned int *out_bytes,
> >+                            unsigned max_in_bytes, unsigned max_out_bytes)
> >+{
> >+    VirtIODevice *vdev = vq->vdev;
> >+    unsigned int max, idx;
> >+    unsigned int total_bufs, in_total, out_total;
> >+    MemoryRegionCache *desc_cache;
> >+    VRingMemoryRegionCaches *caches;
> >+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> >+    int64_t len = 0;
> >+    VRingPackedDesc desc;
> >+    bool wrap_counter;
> >+
> >+    rcu_read_lock();
> >+    idx = vq->last_avail_idx;
> >+    wrap_counter = vq->last_avail_wrap_counter;
> >+    total_bufs = in_total = out_total = 0;
> >+
> >+    max = vq->vring.num;
> >+    caches = vring_get_region_caches(vq);
> >+    desc_cache = &caches->desc;
> >+    vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> >+    while (is_desc_avail(&desc, wrap_counter)) {
> >+        unsigned int num_bufs;
> >+        unsigned int i = 0;
> >+
> >+        num_bufs = total_bufs;
> >+
> >+        /* Make sure flags has been read before all the fields. */
> >+        smp_rmb();
> >+        vring_packed_desc_read(vdev, &desc, desc_cache, idx);
> 
> 
> It's better to have single function to deal with reading flags and
> descriptors and check its availability like packed ring.

There is something different between split and packed ring here.
For split ring, 'avail_idx' and descriptor are separately used so the
interfaces of them are straightforward, while the flag and data fields
of the descriptors for packed ring are mixed and independent accesses to
them have been brought in, it is good to use them as what they are supposed
to work. :)

Another neat way is to pack the two operations to a new one, but it would
introduce memory cache parameter passing.

So personally I prefer to keep it unchanged, still want to sort it out?

> 
> 
> >+
> >+        if (desc.flags & VRING_DESC_F_INDIRECT) {
> >+            if (desc.len % sizeof(VRingPackedDesc)) {
> >+                virtio_error(vdev, "Invalid size for indirect buffer table");
> >+                goto err;
> >+            }
> >+
> >+            /* If we've got too many, that implies a descriptor loop. */
> >+            if (num_bufs >= max) {
> >+                virtio_error(vdev, "Looped descriptor");
> >+                goto err;
> >+            }
> >+
> >+            /* loop over the indirect descriptor table */
> >+            len = address_space_cache_init(&indirect_desc_cache,
> >+                                           vdev->dma_as,
> >+                                           desc.addr, desc.len, false);
> >+            desc_cache = &indirect_desc_cache;
> >+            if (len < desc.len) {
> >+                virtio_error(vdev, "Cannot map indirect buffer");
> >+                goto err;
> >+            }
> >+
> >+            max = desc.len / sizeof(VRingPackedDesc);
> >+            num_bufs = i = 0;
> >+            vring_packed_desc_read(vdev, &desc, desc_cache, i);
> >+        }
> >+
> >+        do {
> >+            /* If we've got too many, that implies a descriptor loop. */
> >+            if (++num_bufs > max) {
> >+                virtio_error(vdev, "Looped descriptor");
> >+                goto err;
> >+            }
> >+
> >+            if (desc.flags & VRING_DESC_F_WRITE) {
> >+                in_total += desc.len;
> >+            } else {
> >+                out_total += desc.len;
> >+            }
> >+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
> >+                goto done;
> >+            }
> >+
> >+            if (desc_cache == &indirect_desc_cache) {
> >+                if (++i >= max) {
> >+                    break;
> >+                }
> >+                vring_packed_desc_read(vdev, &desc, desc_cache, i);
> >+            } else {
> >+                if (++idx >= vq->vring.num) {
> >+                    idx -= vq->vring.num;
> >+                    wrap_counter ^= 1;
> >+                }
> >+                vring_packed_desc_read(vdev, &desc, desc_cache, idx);
> >+            }
> >+            /* Make sure flags has been read after all the other fields */
> >+            smp_rmb();
> 
> 
> I don't think we need this according to the spec:
> 
> "
> 
> The driver always makes the first descriptor in the list
> available after the rest of the list has been written out into
> the ring. This guarantees that the device will never observe a
> partial scatter/gather list in the ring.
> 
> "
> 
> So when the first is available, the rest of the chain should be available,
> otherwise device may see partial chain.

As always, I will remove it, thanks a lot.

Wei

> 
> Thanks
> 
> 
> >+        } while (desc.flags & VRING_DESC_F_NEXT);
> >+
> >+        if (desc_cache == &indirect_desc_cache) {
> >+            address_space_cache_destroy(&indirect_desc_cache);
> >+            total_bufs++;
> >+            /* We missed one step on for indirect desc */
> >+            idx++;
> >+            if (++idx >= vq->vring.num) {
> >+                idx -= vq->vring.num;
> >+                wrap_counter ^= 1;
> >+            }
> >+        } else {
> >+            total_bufs = num_bufs;
> >+        }
> >+
> >+        desc_cache = &caches->desc;
> >+        vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> >+    }
> >+
> >+    /* Record the index and wrap counter for a kick we want */
> >+    vq->shadow_avail_idx = idx;
> >+    vq->avail_wrap_counter = wrap_counter;
> >+done:
> >+    address_space_cache_destroy(&indirect_desc_cache);
> >+    if (in_bytes) {
> >+        *in_bytes = in_total;
> >+    }
> >+    if (out_bytes) {
> >+        *out_bytes = out_total;
> >+    }
> >+    rcu_read_unlock();
> >+    return;
> >+
> >+err:
> >+    in_total = out_total = 0;
> >+    goto done;
> >+}
> >+
> >+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >+                               unsigned int *out_bytes,
> >+                               unsigned max_in_bytes, unsigned max_out_bytes)
> >+{
> >+    uint16_t desc_size;
> >+    VRingMemoryRegionCaches *caches;
> >+
> >+    if (unlikely(!vq->vring.desc)) {
> >+        goto err;
> >+    }
> >+
> >+    caches = vring_get_region_caches(vq);
> >+    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
> >+                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
> >+    if (caches->desc.len < vq->vring.num * desc_size) {
> >+        virtio_error(vq->vdev, "Cannot map descriptor ring");
> >+        goto err;
> >+    }
> >+
> >+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> >+        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
> >+                                         max_in_bytes, max_out_bytes);
> >+    } else {
> >+        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
> >+                                        max_in_bytes, max_out_bytes);
> >+    }
> >+
> >+    return;
> >+err:
> >+    if (in_bytes) {
> >+        *in_bytes = 0;
> >+    }
> >+    if (out_bytes) {
> >+        *out_bytes = 0;
> >+    }
> >+}
> >+
> >  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
> >                            unsigned int out_bytes)
> >  {
>
Jason Wang Feb. 19, 2019, 6:24 a.m. UTC | #3
On 2019/2/19 上午1:07, Wei Xu wrote:
> On Mon, Feb 18, 2019 at 03:27:21PM +0800, Jason Wang wrote:
>> On 2019/2/14 下午12:26, wexu@redhat.com wrote:
>>> From: Wei Xu <wexu@redhat.com>
>>>
>>> Add packed ring headcount check.
>>>
>>> Common part of split/packed ring are kept.
>>>
>>> Signed-off-by: Wei Xu <wexu@redhat.com>
>>> ---
>>>   hw/virtio/virtio.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>>>   1 file changed, 179 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>>> index f2ff980..832287b 100644
>>> --- a/hw/virtio/virtio.c
>>> +++ b/hw/virtio/virtio.c
>>> @@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
>>>       return vq->vring.avail != 0;
>>>   }
>>> +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
>>> +                            MemoryRegionCache *cache, int i)
>>> +{
>>> +    address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
>>> +                              desc, sizeof(VRingPackedDesc));
>>> +    virtio_tswap16s(vdev, &desc->flags);
>>> +    virtio_tswap64s(vdev, &desc->addr);
>>> +    virtio_tswap32s(vdev, &desc->len);
>>> +    virtio_tswap16s(vdev, &desc->id);
>>> +}
>>> +
>>>   static void vring_packed_desc_read_flags(VirtIODevice *vdev,
>>>                       VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
>>>   {
>>> @@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
>>>       return VIRTQUEUE_READ_DESC_MORE;
>>>   }
>>> -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
>>> -                               unsigned int *out_bytes,
>>> -                               unsigned max_in_bytes, unsigned max_out_bytes)
>>> +static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
>>> +                            unsigned int *in_bytes, unsigned int *out_bytes,
>>> +                            unsigned max_in_bytes, unsigned max_out_bytes)
>>>   {
>>>       VirtIODevice *vdev = vq->vdev;
>>>       unsigned int max, idx;
>>> @@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
>>>       int64_t len = 0;
>>>       int rc;
>>> -    if (unlikely(!vq->vring.desc)) {
>>> -        if (in_bytes) {
>>> -            *in_bytes = 0;
>>> -        }
>>> -        if (out_bytes) {
>>> -            *out_bytes = 0;
>>> -        }
>>> -        return;
>>> -    }
>>> -
>>>       rcu_read_lock();
>>>       idx = vq->last_avail_idx;
>>>       total_bufs = in_total = out_total = 0;
>>>       max = vq->vring.num;
>>>       caches = vring_get_region_caches(vq);
>>> -    if (caches->desc.len < max * sizeof(VRingDesc)) {
>>> -        virtio_error(vdev, "Cannot map descriptor ring");
>>> -        goto err;
>>> -    }
>>> -
>>>       while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
>>>           MemoryRegionCache *desc_cache = &caches->desc;
>>>           unsigned int num_bufs;
>>> @@ -792,6 +788,171 @@ err:
>>>       goto done;
>>>   }
>>> +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
>>> +                            unsigned int *in_bytes, unsigned int *out_bytes,
>>> +                            unsigned max_in_bytes, unsigned max_out_bytes)
>>> +{
>>> +    VirtIODevice *vdev = vq->vdev;
>>> +    unsigned int max, idx;
>>> +    unsigned int total_bufs, in_total, out_total;
>>> +    MemoryRegionCache *desc_cache;
>>> +    VRingMemoryRegionCaches *caches;
>>> +    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
>>> +    int64_t len = 0;
>>> +    VRingPackedDesc desc;
>>> +    bool wrap_counter;
>>> +
>>> +    rcu_read_lock();
>>> +    idx = vq->last_avail_idx;
>>> +    wrap_counter = vq->last_avail_wrap_counter;
>>> +    total_bufs = in_total = out_total = 0;
>>> +
>>> +    max = vq->vring.num;
>>> +    caches = vring_get_region_caches(vq);
>>> +    desc_cache = &caches->desc;
>>> +    vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
>>> +    while (is_desc_avail(&desc, wrap_counter)) {
>>> +        unsigned int num_bufs;
>>> +        unsigned int i = 0;
>>> +
>>> +        num_bufs = total_bufs;
>>> +
>>> +        /* Make sure flags has been read before all the fields. */
>>> +        smp_rmb();
>>> +        vring_packed_desc_read(vdev, &desc, desc_cache, idx);
>>
>> It's better to have single function to deal with reading flags and
>> descriptors and check its availability like packed ring.
> There is something different between split and packed ring here.
> For split ring, 'avail_idx' and descriptor are separately used so the
> interfaces of them are straightforward, while the flag and data fields
> of the descriptors for packed ring are mixed and independent accesses to
> them have been brought in, it is good to use them as what they are supposed
> to work. :)
>
> Another neat way is to pack the two operations to a new one, but it would
> introduce memory cache parameter passing.
>
> So personally I prefer to keep it unchanged, still want to sort it out?


It's as simple as another helper that call read_flags() and desc_read()?

Btw, it's better to have a consistent naming for the function like 
vring_packed_flags_read().

Thanks


>
>>
>>> +
>>> +        if (desc.flags & VRING_DESC_F_INDIRECT) {
>>> +            if (desc.len % sizeof(VRingPackedDesc)) {
>>> +                virtio_error(vdev, "Invalid size for indirect buffer table");
>>> +                goto err;
>>> +            }
>>> +
>>> +            /* If we've got too many, that implies a descriptor loop. */
>>> +            if (num_bufs >= max) {
>>> +                virtio_error(vdev, "Looped descriptor");
>>> +                goto err;
>>> +            }
>>> +
>>> +            /* loop over the indirect descriptor table */
>>> +            len = address_space_cache_init(&indirect_desc_cache,
>>> +                                           vdev->dma_as,
>>> +                                           desc.addr, desc.len, false);
>>> +            desc_cache = &indirect_desc_cache;
>>> +            if (len < desc.len) {
>>> +                virtio_error(vdev, "Cannot map indirect buffer");
>>> +                goto err;
>>> +            }
>>> +
>>> +            max = desc.len / sizeof(VRingPackedDesc);
>>> +            num_bufs = i = 0;
>>> +            vring_packed_desc_read(vdev, &desc, desc_cache, i);
>>> +        }
>>> +
>>> +        do {
>>> +            /* If we've got too many, that implies a descriptor loop. */
>>> +            if (++num_bufs > max) {
>>> +                virtio_error(vdev, "Looped descriptor");
>>> +                goto err;
>>> +            }
>>> +
>>> +            if (desc.flags & VRING_DESC_F_WRITE) {
>>> +                in_total += desc.len;
>>> +            } else {
>>> +                out_total += desc.len;
>>> +            }
>>> +            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
>>> +                goto done;
>>> +            }
>>> +
>>> +            if (desc_cache == &indirect_desc_cache) {
>>> +                if (++i >= max) {
>>> +                    break;
>>> +                }
>>> +                vring_packed_desc_read(vdev, &desc, desc_cache, i);
>>> +            } else {
>>> +                if (++idx >= vq->vring.num) {
>>> +                    idx -= vq->vring.num;
>>> +                    wrap_counter ^= 1;
>>> +                }
>>> +                vring_packed_desc_read(vdev, &desc, desc_cache, idx);
>>> +            }
>>> +            /* Make sure flags has been read after all the other fields */
>>> +            smp_rmb();
>>
>> I don't think we need this according to the spec:
>>
>> "
>>
>> The driver always makes the first descriptor in the list
>> available after the rest of the list has been written out into
>> the ring. This guarantees that the device will never observe a
>> partial scatter/gather list in the ring.
>>
>> "
>>
>> So when the first is available, the rest of the chain should be available,
>> otherwise device may see partial chain.
> As always, I will remove it, thanks a lot.
>
> Wei
>
>> Thanks
>>
>>
>>> +        } while (desc.flags & VRING_DESC_F_NEXT);
>>> +
>>> +        if (desc_cache == &indirect_desc_cache) {
>>> +            address_space_cache_destroy(&indirect_desc_cache);
>>> +            total_bufs++;
>>> +            /* We missed one step on for indirect desc */
>>> +            idx++;
>>> +            if (++idx >= vq->vring.num) {
>>> +                idx -= vq->vring.num;
>>> +                wrap_counter ^= 1;
>>> +            }
>>> +        } else {
>>> +            total_bufs = num_bufs;
>>> +        }
>>> +
>>> +        desc_cache = &caches->desc;
>>> +        vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
>>> +    }
>>> +
>>> +    /* Record the index and wrap counter for a kick we want */
>>> +    vq->shadow_avail_idx = idx;
>>> +    vq->avail_wrap_counter = wrap_counter;
>>> +done:
>>> +    address_space_cache_destroy(&indirect_desc_cache);
>>> +    if (in_bytes) {
>>> +        *in_bytes = in_total;
>>> +    }
>>> +    if (out_bytes) {
>>> +        *out_bytes = out_total;
>>> +    }
>>> +    rcu_read_unlock();
>>> +    return;
>>> +
>>> +err:
>>> +    in_total = out_total = 0;
>>> +    goto done;
>>> +}
>>> +
>>> +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
>>> +                               unsigned int *out_bytes,
>>> +                               unsigned max_in_bytes, unsigned max_out_bytes)
>>> +{
>>> +    uint16_t desc_size;
>>> +    VRingMemoryRegionCaches *caches;
>>> +
>>> +    if (unlikely(!vq->vring.desc)) {
>>> +        goto err;
>>> +    }
>>> +
>>> +    caches = vring_get_region_caches(vq);
>>> +    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
>>> +                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
>>> +    if (caches->desc.len < vq->vring.num * desc_size) {
>>> +        virtio_error(vq->vdev, "Cannot map descriptor ring");
>>> +        goto err;
>>> +    }
>>> +
>>> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
>>> +        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
>>> +                                         max_in_bytes, max_out_bytes);
>>> +    } else {
>>> +        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
>>> +                                        max_in_bytes, max_out_bytes);
>>> +    }
>>> +
>>> +    return;
>>> +err:
>>> +    if (in_bytes) {
>>> +        *in_bytes = 0;
>>> +    }
>>> +    if (out_bytes) {
>>> +        *out_bytes = 0;
>>> +    }
>>> +}
>>> +
>>>   int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>>>                             unsigned int out_bytes)
>>>   {
Wei Xu Feb. 19, 2019, 8:24 a.m. UTC | #4
On Tue, Feb 19, 2019 at 02:24:11PM +0800, Jason Wang wrote:
> 
> On 2019/2/19 上午1:07, Wei Xu wrote:
> >On Mon, Feb 18, 2019 at 03:27:21PM +0800, Jason Wang wrote:
> >>On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> >>>From: Wei Xu <wexu@redhat.com>
> >>>
> >>>Add packed ring headcount check.
> >>>
> >>>Common part of split/packed ring are kept.
> >>>
> >>>Signed-off-by: Wei Xu <wexu@redhat.com>
> >>>---
> >>>  hw/virtio/virtio.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++-----
> >>>  1 file changed, 179 insertions(+), 18 deletions(-)
> >>>
> >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> >>>index f2ff980..832287b 100644
> >>>--- a/hw/virtio/virtio.c
> >>>+++ b/hw/virtio/virtio.c
> >>>@@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
> >>>      return vq->vring.avail != 0;
> >>>  }
> >>>+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
> >>>+                            MemoryRegionCache *cache, int i)
> >>>+{
> >>>+    address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
> >>>+                              desc, sizeof(VRingPackedDesc));
> >>>+    virtio_tswap16s(vdev, &desc->flags);
> >>>+    virtio_tswap64s(vdev, &desc->addr);
> >>>+    virtio_tswap32s(vdev, &desc->len);
> >>>+    virtio_tswap16s(vdev, &desc->id);
> >>>+}
> >>>+
> >>>  static void vring_packed_desc_read_flags(VirtIODevice *vdev,
> >>>                      VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
> >>>  {
> >>>@@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
> >>>      return VIRTQUEUE_READ_DESC_MORE;
> >>>  }
> >>>-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >>>-                               unsigned int *out_bytes,
> >>>-                               unsigned max_in_bytes, unsigned max_out_bytes)
> >>>+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
> >>>+                            unsigned int *in_bytes, unsigned int *out_bytes,
> >>>+                            unsigned max_in_bytes, unsigned max_out_bytes)
> >>>  {
> >>>      VirtIODevice *vdev = vq->vdev;
> >>>      unsigned int max, idx;
> >>>@@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >>>      int64_t len = 0;
> >>>      int rc;
> >>>-    if (unlikely(!vq->vring.desc)) {
> >>>-        if (in_bytes) {
> >>>-            *in_bytes = 0;
> >>>-        }
> >>>-        if (out_bytes) {
> >>>-            *out_bytes = 0;
> >>>-        }
> >>>-        return;
> >>>-    }
> >>>-
> >>>      rcu_read_lock();
> >>>      idx = vq->last_avail_idx;
> >>>      total_bufs = in_total = out_total = 0;
> >>>      max = vq->vring.num;
> >>>      caches = vring_get_region_caches(vq);
> >>>-    if (caches->desc.len < max * sizeof(VRingDesc)) {
> >>>-        virtio_error(vdev, "Cannot map descriptor ring");
> >>>-        goto err;
> >>>-    }
> >>>-
> >>>      while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
> >>>          MemoryRegionCache *desc_cache = &caches->desc;
> >>>          unsigned int num_bufs;
> >>>@@ -792,6 +788,171 @@ err:
> >>>      goto done;
> >>>  }
> >>>+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
> >>>+                            unsigned int *in_bytes, unsigned int *out_bytes,
> >>>+                            unsigned max_in_bytes, unsigned max_out_bytes)
> >>>+{
> >>>+    VirtIODevice *vdev = vq->vdev;
> >>>+    unsigned int max, idx;
> >>>+    unsigned int total_bufs, in_total, out_total;
> >>>+    MemoryRegionCache *desc_cache;
> >>>+    VRingMemoryRegionCaches *caches;
> >>>+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> >>>+    int64_t len = 0;
> >>>+    VRingPackedDesc desc;
> >>>+    bool wrap_counter;
> >>>+
> >>>+    rcu_read_lock();
> >>>+    idx = vq->last_avail_idx;
> >>>+    wrap_counter = vq->last_avail_wrap_counter;
> >>>+    total_bufs = in_total = out_total = 0;
> >>>+
> >>>+    max = vq->vring.num;
> >>>+    caches = vring_get_region_caches(vq);
> >>>+    desc_cache = &caches->desc;
> >>>+    vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> >>>+    while (is_desc_avail(&desc, wrap_counter)) {
> >>>+        unsigned int num_bufs;
> >>>+        unsigned int i = 0;
> >>>+
> >>>+        num_bufs = total_bufs;
> >>>+
> >>>+        /* Make sure flags has been read before all the fields. */
> >>>+        smp_rmb();
> >>>+        vring_packed_desc_read(vdev, &desc, desc_cache, idx);
> >>
> >>It's better to have single function to deal with reading flags and
> >>descriptors and check its availability like packed ring.
> >There is something different between split and packed ring here.
> >For split ring, 'avail_idx' and descriptor are separately used so the
> >interfaces of them are straightforward, while the flag and data fields
> >of the descriptors for packed ring are mixed and independent accesses to
> >them have been brought in, it is good to use them as what they are supposed
> >to work. :)
> >
> >Another neat way is to pack the two operations to a new one, but it would
> >introduce memory cache parameter passing.
> >
> >So personally I prefer to keep it unchanged, still want to sort it out?
> 
> 
> It's as simple as another helper that call read_flags() and desc_read()?

OK, will try one.

> 
> Btw, it's better to have a consistent naming for the function like
> vring_packed_flags_read().

OK, thanks.

Wei
> 
> Thanks
> 
> 
> >
> >>
> >>>+
> >>>+        if (desc.flags & VRING_DESC_F_INDIRECT) {
> >>>+            if (desc.len % sizeof(VRingPackedDesc)) {
> >>>+                virtio_error(vdev, "Invalid size for indirect buffer table");
> >>>+                goto err;
> >>>+            }
> >>>+
> >>>+            /* If we've got too many, that implies a descriptor loop. */
> >>>+            if (num_bufs >= max) {
> >>>+                virtio_error(vdev, "Looped descriptor");
> >>>+                goto err;
> >>>+            }
> >>>+
> >>>+            /* loop over the indirect descriptor table */
> >>>+            len = address_space_cache_init(&indirect_desc_cache,
> >>>+                                           vdev->dma_as,
> >>>+                                           desc.addr, desc.len, false);
> >>>+            desc_cache = &indirect_desc_cache;
> >>>+            if (len < desc.len) {
> >>>+                virtio_error(vdev, "Cannot map indirect buffer");
> >>>+                goto err;
> >>>+            }
> >>>+
> >>>+            max = desc.len / sizeof(VRingPackedDesc);
> >>>+            num_bufs = i = 0;
> >>>+            vring_packed_desc_read(vdev, &desc, desc_cache, i);
> >>>+        }
> >>>+
> >>>+        do {
> >>>+            /* If we've got too many, that implies a descriptor loop. */
> >>>+            if (++num_bufs > max) {
> >>>+                virtio_error(vdev, "Looped descriptor");
> >>>+                goto err;
> >>>+            }
> >>>+
> >>>+            if (desc.flags & VRING_DESC_F_WRITE) {
> >>>+                in_total += desc.len;
> >>>+            } else {
> >>>+                out_total += desc.len;
> >>>+            }
> >>>+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
> >>>+                goto done;
> >>>+            }
> >>>+
> >>>+            if (desc_cache == &indirect_desc_cache) {
> >>>+                if (++i >= max) {
> >>>+                    break;
> >>>+                }
> >>>+                vring_packed_desc_read(vdev, &desc, desc_cache, i);
> >>>+            } else {
> >>>+                if (++idx >= vq->vring.num) {
> >>>+                    idx -= vq->vring.num;
> >>>+                    wrap_counter ^= 1;
> >>>+                }
> >>>+                vring_packed_desc_read(vdev, &desc, desc_cache, idx);
> >>>+            }
> >>>+            /* Make sure flags has been read after all the other fields */
> >>>+            smp_rmb();
> >>
> >>I don't think we need this according to the spec:
> >>
> >>"
> >>
> >>The driver always makes the first descriptor in the list
> >>available after the rest of the list has been written out into
> >>the ring. This guarantees that the device will never observe a
> >>partial scatter/gather list in the ring.
> >>
> >>"
> >>
> >>So when the first is available, the rest of the chain should be available,
> >>otherwise device may see partial chain.
> >As always, I will remove it, thanks a lot.
> >
> >Wei
> >
> >>Thanks
> >>
> >>
> >>>+        } while (desc.flags & VRING_DESC_F_NEXT);
> >>>+
> >>>+        if (desc_cache == &indirect_desc_cache) {
> >>>+            address_space_cache_destroy(&indirect_desc_cache);
> >>>+            total_bufs++;
> >>>+            /* We missed one step on for indirect desc */
> >>>+            idx++;
> >>>+            if (++idx >= vq->vring.num) {
> >>>+                idx -= vq->vring.num;
> >>>+                wrap_counter ^= 1;
> >>>+            }
> >>>+        } else {
> >>>+            total_bufs = num_bufs;
> >>>+        }
> >>>+
> >>>+        desc_cache = &caches->desc;
> >>>+        vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
> >>>+    }
> >>>+
> >>>+    /* Record the index and wrap counter for a kick we want */
> >>>+    vq->shadow_avail_idx = idx;
> >>>+    vq->avail_wrap_counter = wrap_counter;
> >>>+done:
> >>>+    address_space_cache_destroy(&indirect_desc_cache);
> >>>+    if (in_bytes) {
> >>>+        *in_bytes = in_total;
> >>>+    }
> >>>+    if (out_bytes) {
> >>>+        *out_bytes = out_total;
> >>>+    }
> >>>+    rcu_read_unlock();
> >>>+    return;
> >>>+
> >>>+err:
> >>>+    in_total = out_total = 0;
> >>>+    goto done;
> >>>+}
> >>>+
> >>>+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
> >>>+                               unsigned int *out_bytes,
> >>>+                               unsigned max_in_bytes, unsigned max_out_bytes)
> >>>+{
> >>>+    uint16_t desc_size;
> >>>+    VRingMemoryRegionCaches *caches;
> >>>+
> >>>+    if (unlikely(!vq->vring.desc)) {
> >>>+        goto err;
> >>>+    }
> >>>+
> >>>+    caches = vring_get_region_caches(vq);
> >>>+    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
> >>>+                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
> >>>+    if (caches->desc.len < vq->vring.num * desc_size) {
> >>>+        virtio_error(vq->vdev, "Cannot map descriptor ring");
> >>>+        goto err;
> >>>+    }
> >>>+
> >>>+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> >>>+        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
> >>>+                                         max_in_bytes, max_out_bytes);
> >>>+    } else {
> >>>+        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
> >>>+                                        max_in_bytes, max_out_bytes);
> >>>+    }
> >>>+
> >>>+    return;
> >>>+err:
> >>>+    if (in_bytes) {
> >>>+        *in_bytes = 0;
> >>>+    }
> >>>+    if (out_bytes) {
> >>>+        *out_bytes = 0;
> >>>+    }
> >>>+}
> >>>+
> >>>  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
> >>>                            unsigned int out_bytes)
> >>>  {
diff mbox series

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f2ff980..832287b 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -368,6 +368,17 @@  int virtio_queue_ready(VirtQueue *vq)
     return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+                            MemoryRegionCache *cache, int i)
+{
+    address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+                              desc, sizeof(VRingPackedDesc));
+    virtio_tswap16s(vdev, &desc->flags);
+    virtio_tswap64s(vdev, &desc->addr);
+    virtio_tswap32s(vdev, &desc->len);
+    virtio_tswap16s(vdev, &desc->id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
                     VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -667,9 +678,9 @@  static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
     return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-                               unsigned int *out_bytes,
-                               unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+                            unsigned int *in_bytes, unsigned int *out_bytes,
+                            unsigned max_in_bytes, unsigned max_out_bytes)
 {
     VirtIODevice *vdev = vq->vdev;
     unsigned int max, idx;
@@ -679,27 +690,12 @@  void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
     int64_t len = 0;
     int rc;
 
-    if (unlikely(!vq->vring.desc)) {
-        if (in_bytes) {
-            *in_bytes = 0;
-        }
-        if (out_bytes) {
-            *out_bytes = 0;
-        }
-        return;
-    }
-
     rcu_read_lock();
     idx = vq->last_avail_idx;
     total_bufs = in_total = out_total = 0;
 
     max = vq->vring.num;
     caches = vring_get_region_caches(vq);
-    if (caches->desc.len < max * sizeof(VRingDesc)) {
-        virtio_error(vdev, "Cannot map descriptor ring");
-        goto err;
-    }
-
     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
         MemoryRegionCache *desc_cache = &caches->desc;
         unsigned int num_bufs;
@@ -792,6 +788,171 @@  err:
     goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+                            unsigned int *in_bytes, unsigned int *out_bytes,
+                            unsigned max_in_bytes, unsigned max_out_bytes)
+{
+    VirtIODevice *vdev = vq->vdev;
+    unsigned int max, idx;
+    unsigned int total_bufs, in_total, out_total;
+    MemoryRegionCache *desc_cache;
+    VRingMemoryRegionCaches *caches;
+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    int64_t len = 0;
+    VRingPackedDesc desc;
+    bool wrap_counter;
+
+    rcu_read_lock();
+    idx = vq->last_avail_idx;
+    wrap_counter = vq->last_avail_wrap_counter;
+    total_bufs = in_total = out_total = 0;
+
+    max = vq->vring.num;
+    caches = vring_get_region_caches(vq);
+    desc_cache = &caches->desc;
+    vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
+    while (is_desc_avail(&desc, wrap_counter)) {
+        unsigned int num_bufs;
+        unsigned int i = 0;
+
+        num_bufs = total_bufs;
+
+        /* Make sure flags has been read before all the fields. */
+        smp_rmb();
+        vring_packed_desc_read(vdev, &desc, desc_cache, idx);
+
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
+            if (desc.len % sizeof(VRingPackedDesc)) {
+                virtio_error(vdev, "Invalid size for indirect buffer table");
+                goto err;
+            }
+
+            /* If we've got too many, that implies a descriptor loop. */
+            if (num_bufs >= max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            /* loop over the indirect descriptor table */
+            len = address_space_cache_init(&indirect_desc_cache,
+                                           vdev->dma_as,
+                                           desc.addr, desc.len, false);
+            desc_cache = &indirect_desc_cache;
+            if (len < desc.len) {
+                virtio_error(vdev, "Cannot map indirect buffer");
+                goto err;
+            }
+
+            max = desc.len / sizeof(VRingPackedDesc);
+            num_bufs = i = 0;
+            vring_packed_desc_read(vdev, &desc, desc_cache, i);
+        }
+
+        do {
+            /* If we've got too many, that implies a descriptor loop. */
+            if (++num_bufs > max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            if (desc.flags & VRING_DESC_F_WRITE) {
+                in_total += desc.len;
+            } else {
+                out_total += desc.len;
+            }
+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+                goto done;
+            }
+
+            if (desc_cache == &indirect_desc_cache) {
+                if (++i >= max) {
+                    break;
+                }
+                vring_packed_desc_read(vdev, &desc, desc_cache, i);
+            } else {
+                if (++idx >= vq->vring.num) {
+                    idx -= vq->vring.num;
+                    wrap_counter ^= 1;
+                }
+                vring_packed_desc_read(vdev, &desc, desc_cache, idx);
+            }
+            /* Make sure flags has been read after all the other fields */
+            smp_rmb();
+        } while (desc.flags & VRING_DESC_F_NEXT);
+
+        if (desc_cache == &indirect_desc_cache) {
+            address_space_cache_destroy(&indirect_desc_cache);
+            total_bufs++;
+            /* We missed one step on for indirect desc */
+            idx++;
+            if (++idx >= vq->vring.num) {
+                idx -= vq->vring.num;
+                wrap_counter ^= 1;
+            }
+        } else {
+            total_bufs = num_bufs;
+        }
+
+        desc_cache = &caches->desc;
+        vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx);
+    }
+
+    /* Record the index and wrap counter for a kick we want */
+    vq->shadow_avail_idx = idx;
+    vq->avail_wrap_counter = wrap_counter;
+done:
+    address_space_cache_destroy(&indirect_desc_cache);
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+    rcu_read_unlock();
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
+}
+
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes,
+                               unsigned max_in_bytes, unsigned max_out_bytes)
+{
+    uint16_t desc_size;
+    VRingMemoryRegionCaches *caches;
+
+    if (unlikely(!vq->vring.desc)) {
+        goto err;
+    }
+
+    caches = vring_get_region_caches(vq);
+    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
+    if (caches->desc.len < vq->vring.num * desc_size) {
+        virtio_error(vq->vdev, "Cannot map descriptor ring");
+        goto err;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
+                                         max_in_bytes, max_out_bytes);
+    } else {
+        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
+                                        max_in_bytes, max_out_bytes);
+    }
+
+    return;
+err:
+    if (in_bytes) {
+        *in_bytes = 0;
+    }
+    if (out_bytes) {
+        *out_bytes = 0;
+    }
+}
+
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                           unsigned int out_bytes)
 {