diff mbox series

[v4,5/5] virtio: Enable IOMMU

Message ID 20191204032138.127624-6-aik@ozlabs.ru
State Superseded
Headers show
Series virtio: Enable iommu_platform=on | expand

Commit Message

Alexey Kardashevskiy Dec. 4, 2019, 3:21 a.m. UTC
When QEMU is started with iommu_platform=on, the guest driver must accept
it or the device will fail.

This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
devices. -serial and -9p are only compile tested though.

For virtio-net we map all RX buffers once and TX when xmit() is called
and unmap older pages when we are about to reuse the VQ descriptor.
As all other devices are synchronous, we unmap IOMMU pages right after
completion of a transaction.

This depends on QEMU's:
https://patchwork.ozlabs.org/patch/1194067/

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
Changes:
v4:
* ditched vqs->id in virtio_queue_init_vq

v2:
* added Mike's fs/dma-instance-function.fs
* total rework
---
 lib/libvirtio/virtio.h        |  5 +++
 lib/libvirtio/virtio-9p.c     |  4 ++
 lib/libvirtio/virtio-blk.c    |  4 ++
 lib/libvirtio/virtio-net.c    |  5 ++-
 lib/libvirtio/virtio-scsi.c   |  5 +++
 lib/libvirtio/virtio-serial.c | 12 +++--
 lib/libvirtio/virtio.c        | 82 ++++++++++++++++++++++++++++++++++-
 7 files changed, 111 insertions(+), 6 deletions(-)

Comments

Michael Roth Dec. 4, 2019, 9:16 p.m. UTC | #1
Quoting Alexey Kardashevskiy (2019-12-03 21:21:38)
> When QEMU is started with iommu_platform=on, the guest driver must accept
> it or the device will fail.
> 
> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
> devices. -serial and -9p are only compile tested though.
> 
> For virtio-net we map all RX buffers once and TX when xmit() is called
> and unmap older pages when we are about to reuse the VQ descriptor.
> As all other devices are synchronous, we unmap IOMMU pages right after
> completion of a transaction.
> 
> This depends on QEMU's:
> https://patchwork.ozlabs.org/patch/1194067/
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> ---
> Changes:
> v4:
> * ditched vqs->id in virtio_queue_init_vq
> 
> v2:
> * added Mike's fs/dma-instance-function.fs
> * total rework
> ---
>  lib/libvirtio/virtio.h        |  5 +++
>  lib/libvirtio/virtio-9p.c     |  4 ++
>  lib/libvirtio/virtio-blk.c    |  4 ++
>  lib/libvirtio/virtio-net.c    |  5 ++-
>  lib/libvirtio/virtio-scsi.c   |  5 +++
>  lib/libvirtio/virtio-serial.c | 12 +++--
>  lib/libvirtio/virtio.c        | 82 ++++++++++++++++++++++++++++++++++-
>  7 files changed, 111 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
> index 7efc1e524d77..c4eafe40dd31 100644
> --- a/lib/libvirtio/virtio.h
> +++ b/lib/libvirtio/virtio.h
> @@ -29,6 +29,7 @@
>  #define VIRTIO_F_RING_INDIRECT_DESC    BIT(28)
>  #define VIRTIO_F_RING_EVENT_IDX                BIT(29)
>  #define VIRTIO_F_VERSION_1             BIT(32)
> +#define VIRTIO_F_IOMMU_PLATFORM         BIT(33)
> 
>  #define VIRTIO_TIMEOUT                 5000 /* 5 sec timeout */
> 
> @@ -83,6 +84,8 @@ struct vqs {
>         struct vring_desc *desc;
>         struct vring_avail *avail;
>         struct vring_used *used;
> +       void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
> +       uint64_t bus_desc;
>  };
> 
>  struct virtio_device {
> @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
>  extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>                               uint64_t addr, uint32_t len,
>                               uint16_t flags, uint16_t next);
> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
>  extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
>  extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
> 
> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
> index 426069fe9509..76078612b06e 100644
> --- a/lib/libvirtio/virtio-9p.c
> +++ b/lib/libvirtio/virtio-9p.c
> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
>                 // do something better
>                 mb();
>         }
> +
> +       virtio_free_desc(vq, id, dev->features);
> +       virtio_free_desc(vq, id + 1, dev->features);
> +
>         if (i == 0) {
>                 return -1;
>         }
> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
> index a0dadbb0d6a8..0363038e559d 100644
> --- a/lib/libvirtio/virtio-blk.c
> +++ b/lib/libvirtio/virtio-blk.c
> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
>                         break;
>         }
> 
> +       virtio_free_desc(vq, id, dev->features);
> +       virtio_free_desc(vq, id + 1, dev->features);
> +       virtio_free_desc(vq, id + 2, dev->features);
> +
>         if (status == 0)
>                 return cnt;
> 
> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
> index ae67883020ef..5a0d19088527 100644
> --- a/lib/libvirtio/virtio-net.c
> +++ b/lib/libvirtio/virtio-net.c
> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
>         idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
>         id = (idx * 2) % vq_tx->size;
> 
> +       virtio_free_desc(vq_tx, id, vdev->features);
> +       virtio_free_desc(vq_tx, id + 1, vdev->features);
> +
>         /* Set up virtqueue descriptor for header */
>         virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
>                          net_hdr_size, VRING_DESC_F_NEXT, id + 1);
> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
>  #endif
> 
>         /* Copy data to destination buffer */
> -       memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
> +       memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
> 
>         /* Move indices to next entries */
>         last_rx_idx = last_rx_idx + 1;
> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
> index ae87e97e7330..96285e3891af 100644
> --- a/lib/libvirtio/virtio-scsi.c
> +++ b/lib/libvirtio/virtio-scsi.c
> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev,
>                         break;
>         }
> 
> +       virtio_free_desc(vq, id, dev->features);
> +       virtio_free_desc(vq, id + 1, dev->features);
> +       if (!(buf == NULL || buf_len == 0))
> +               virtio_free_desc(vq, id + 2, dev->features);
> +
>         return 0;
>  }
> 
> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
> index b8b898fc8bea..8826be96c24e 100644
> --- a/lib/libvirtio/virtio-serial.c
> +++ b/lib/libvirtio/virtio-serial.c
> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev)
> 
>  int virtio_serial_putchar(struct virtio_device *dev, char c)
>  {
> -       int id;
> +       int id, ret;
>         uint32_t time;
>         volatile uint16_t *current_used_idx;
>         uint16_t last_used_idx, avail_idx;
> @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c)
>         virtio_queue_notify(dev, TX_Q);
> 
>         /* Wait for host to consume the descriptor */
> +       ret = 1;
>         time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
>         while (*current_used_idx == last_used_idx) {
>                 // do something better
>                 mb();
>                 if (time < SLOF_GetTimer()) {
>                         printf("virtio_serial_putchar failed! \n");
> -                       return 0;
> +                       ret = 0;
> +                       break;
>                 }
>         }
> 
> -       return 1;
> +       virtio_free_desc(vq, id, dev->features);
> +
> +       return ret;
>  }
> 
>  char virtio_serial_getchar(struct virtio_device *dev)
> @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev)
>                 % vq_rx->size;
> 
>         /* Copy data to destination buffer */
> -       memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
> +       memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
> 
>         /* Move indices to next entries */
>         last_rx_idx = last_rx_idx + 1;
> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
> index 3e615c65fc2c..9a0c3a96371a 100644
> --- a/lib/libvirtio/virtio.c
> +++ b/lib/libvirtio/virtio.c
> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>         next %= vq->size;
> 
>         if (features & VIRTIO_F_VERSION_1) {
> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
> +                       void *gpa = (void *) addr;
> +
> +                       if (!vq->desc_gpas) {
> +                               fprintf(stderr, "IOMMU setup has not been done!\n");
> +                               return;
> +                       }
> +
> +                       addr = SLOF_dma_map_in(gpa, len, 0);
> +                       vq->desc_gpas[id] = gpa;
> +               }
>                 desc->addr = cpu_to_le64(addr);
>                 desc->len = cpu_to_le32(len);
>                 desc->flags = cpu_to_le16(flags);
> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>         }
>  }
> 
> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
> +{
> +       struct vring_desc *desc;
> +
> +       id %= vq->size;
> +       desc = &vq->desc[id];
> +
> +       if (features & VIRTIO_F_VERSION_1) {
> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
> +                       SLOF_dma_map_out(le64_to_cpu(desc->addr),
> +                                        0, le32_to_cpu(desc->len));
> +                       vq->desc_gpas[id] = NULL;
> +               }
> +       }
> +}
> +
> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id)
> +{
> +       struct vqs *vq = &vdev->vq[queue];
> +
> +       if (vq->desc_gpas)
> +               return vq->desc_gpas[id];
> +
> +       return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr);
> +}
> +
>  /**
>   * Reset virtio device
>   */
> @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long
>                 uint64_t q_used;
>                 uint32_t q_size = virtio_get_qsize(dev, queue);
> 
> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
> +                       unsigned long cb;
> +
> +                       cb = q_size * sizeof(struct vring_desc);
> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
> +                       cb = VQ_ALIGN(cb);
> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;

Shouldn't this be:

  cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;

The subsequent VQ_ALIGN probably masks it in the current code.

> +                       cb = VQ_ALIGN(cb);
> +                       q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0);
> +
> +                       dev->vq[queue].bus_desc = q_desc;
> +               }
> +
>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc);
>                 q_avail = q_desc + q_size * sizeof(struct vring_desc);
>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail);
> @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id)
> 
>         vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT);
>         vq->avail->idx = 0;
> +       if (dev->features & VIRTIO_F_IOMMU_PLATFORM)
> +               vq->desc_gpas = SLOF_alloc_mem_aligned(
> +                       vq->size * sizeof(vq->desc_gpas[0]), 4096);
> 
>         return vq;
>  }
> 
>  void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id)
>  {
> -       if (vq->desc)
> +       if (vq->desc_gpas) {
> +               int i;
> +
> +               for (i = 0; i < vq->size; ++i)
> +                       virtio_free_desc(vq, i, dev->features);

Since virtio_free_desc() calls dma-map-out regardless of whether the
IOBA was unmapped via a previous call by the driver, isn't there a chance
we unmap a desc->addr IOBA that has since been re-used elsewhere? I
guess it's unlikely for SLOF but seems like it could become an issue.

Also not sure what side-effects we might expect for unused/uninitialized
descriptors. In init-dma-window-vars we reserve DMA addr 0 so other code
doesn't have to distinguish from NULL; seems like there's a chance that
could get unmapped here.

Maybe we should test for desc_gpas[i] before calling virtio_free_desc(),
or maybe build that check into virtio_free_desc() itself?

Also, is there a case where a driver would legitimately call term_vq()
without calling virtio_free_desc() for a previously used descriptor?
Even if they eventually get cleaned up by term_vq() it seems there's a
chance the descriptor gets re-used prior to that and we still end up
leaking the ioba. Maybe we should print a warning in this case to detect
leaks in the driver code.

> +
> +               memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0]));

Is the memset really needed if we're freeing it right after?

> +               SLOF_free_mem(vq->desc_gpas,
> +                       vq->size * sizeof(vq->desc_gpas[0]));
> +       }
> +       if (vq->desc) {
> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
> +                       unsigned long cb;
> +                       uint32_t q_size = virtio_get_qsize(dev, id);
> +
> +                       cb = q_size * sizeof(struct vring_desc);
> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
> +                       cb = VQ_ALIGN(cb);
> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;

As per prior comment:

  cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;


Looks good otherwise.

> +                       cb = VQ_ALIGN(cb);
> +
> +                       SLOF_dma_map_out(vq->bus_desc, 0, cb);
> +               }
> +
>                 SLOF_free_mem(vq->desc, virtio_vring_size(vq->size));
> +       }
>         memset(vq, 0, sizeof(*vq));
>  }
> 
> @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features
>                 return -1;
>         }
> 
> +       if (host_features & VIRTIO_F_IOMMU_PLATFORM)
> +               features |= VIRTIO_F_IOMMU_PLATFORM;
> +
>         virtio_set_guest_features(dev,  features);
>         host_features = virtio_get_host_features(dev);
>         if ((host_features & features) != features) {
> -- 
> 2.17.1
>
Alexey Kardashevskiy Dec. 5, 2019, 11:21 p.m. UTC | #2
On 05/12/2019 08:16, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2019-12-03 21:21:38)
>> When QEMU is started with iommu_platform=on, the guest driver must accept
>> it or the device will fail.
>>
>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p
>> devices. -serial and -9p are only compile tested though.
>>
>> For virtio-net we map all RX buffers once and TX when xmit() is called
>> and unmap older pages when we are about to reuse the VQ descriptor.
>> As all other devices are synchronous, we unmap IOMMU pages right after
>> completion of a transaction.
>>
>> This depends on QEMU's:
>> https://patchwork.ozlabs.org/patch/1194067/
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
>> ---
>> Changes:
>> v4:
>> * ditched vqs->id in virtio_queue_init_vq
>>
>> v2:
>> * added Mike's fs/dma-instance-function.fs
>> * total rework
>> ---
>>  lib/libvirtio/virtio.h        |  5 +++
>>  lib/libvirtio/virtio-9p.c     |  4 ++
>>  lib/libvirtio/virtio-blk.c    |  4 ++
>>  lib/libvirtio/virtio-net.c    |  5 ++-
>>  lib/libvirtio/virtio-scsi.c   |  5 +++
>>  lib/libvirtio/virtio-serial.c | 12 +++--
>>  lib/libvirtio/virtio.c        | 82 ++++++++++++++++++++++++++++++++++-
>>  7 files changed, 111 insertions(+), 6 deletions(-)
>>
>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
>> index 7efc1e524d77..c4eafe40dd31 100644
>> --- a/lib/libvirtio/virtio.h
>> +++ b/lib/libvirtio/virtio.h
>> @@ -29,6 +29,7 @@
>>  #define VIRTIO_F_RING_INDIRECT_DESC    BIT(28)
>>  #define VIRTIO_F_RING_EVENT_IDX                BIT(29)
>>  #define VIRTIO_F_VERSION_1             BIT(32)
>> +#define VIRTIO_F_IOMMU_PLATFORM         BIT(33)
>>
>>  #define VIRTIO_TIMEOUT                 5000 /* 5 sec timeout */
>>
>> @@ -83,6 +84,8 @@ struct vqs {
>>         struct vring_desc *desc;
>>         struct vring_avail *avail;
>>         struct vring_used *used;
>> +       void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
>> +       uint64_t bus_desc;
>>  };
>>
>>  struct virtio_device {
>> @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
>>  extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>                               uint64_t addr, uint32_t len,
>>                               uint16_t flags, uint16_t next);
>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
>>  extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
>>  extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
>>
>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
>> index 426069fe9509..76078612b06e 100644
>> --- a/lib/libvirtio/virtio-9p.c
>> +++ b/lib/libvirtio/virtio-9p.c
>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
>>                 // do something better
>>                 mb();
>>         }
>> +
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +
>>         if (i == 0) {
>>                 return -1;
>>         }
>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
>> index a0dadbb0d6a8..0363038e559d 100644
>> --- a/lib/libvirtio/virtio-blk.c
>> +++ b/lib/libvirtio/virtio-blk.c
>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
>>                         break;
>>         }
>>
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +       virtio_free_desc(vq, id + 2, dev->features);
>> +
>>         if (status == 0)
>>                 return cnt;
>>
>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
>> index ae67883020ef..5a0d19088527 100644
>> --- a/lib/libvirtio/virtio-net.c
>> +++ b/lib/libvirtio/virtio-net.c
>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
>>         idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
>>         id = (idx * 2) % vq_tx->size;
>>
>> +       virtio_free_desc(vq_tx, id, vdev->features);
>> +       virtio_free_desc(vq_tx, id + 1, vdev->features);
>> +
>>         /* Set up virtqueue descriptor for header */
>>         virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
>>                          net_hdr_size, VRING_DESC_F_NEXT, id + 1);
>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
>>  #endif
>>
>>         /* Copy data to destination buffer */
>> -       memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
>> +       memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
>>
>>         /* Move indices to next entries */
>>         last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
>> index ae87e97e7330..96285e3891af 100644
>> --- a/lib/libvirtio/virtio-scsi.c
>> +++ b/lib/libvirtio/virtio-scsi.c
>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev,
>>                         break;
>>         }
>>
>> +       virtio_free_desc(vq, id, dev->features);
>> +       virtio_free_desc(vq, id + 1, dev->features);
>> +       if (!(buf == NULL || buf_len == 0))
>> +               virtio_free_desc(vq, id + 2, dev->features);
>> +
>>         return 0;
>>  }
>>
>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
>> index b8b898fc8bea..8826be96c24e 100644
>> --- a/lib/libvirtio/virtio-serial.c
>> +++ b/lib/libvirtio/virtio-serial.c
>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev)
>>
>>  int virtio_serial_putchar(struct virtio_device *dev, char c)
>>  {
>> -       int id;
>> +       int id, ret;
>>         uint32_t time;
>>         volatile uint16_t *current_used_idx;
>>         uint16_t last_used_idx, avail_idx;
>> @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c)
>>         virtio_queue_notify(dev, TX_Q);
>>
>>         /* Wait for host to consume the descriptor */
>> +       ret = 1;
>>         time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
>>         while (*current_used_idx == last_used_idx) {
>>                 // do something better
>>                 mb();
>>                 if (time < SLOF_GetTimer()) {
>>                         printf("virtio_serial_putchar failed! \n");
>> -                       return 0;
>> +                       ret = 0;
>> +                       break;
>>                 }
>>         }
>>
>> -       return 1;
>> +       virtio_free_desc(vq, id, dev->features);
>> +
>> +       return ret;
>>  }
>>
>>  char virtio_serial_getchar(struct virtio_device *dev)
>> @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev)
>>                 % vq_rx->size;
>>
>>         /* Copy data to destination buffer */
>> -       memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
>> +       memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
>>
>>         /* Move indices to next entries */
>>         last_rx_idx = last_rx_idx + 1;
>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
>> index 3e615c65fc2c..9a0c3a96371a 100644
>> --- a/lib/libvirtio/virtio.c
>> +++ b/lib/libvirtio/virtio.c
>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>         next %= vq->size;
>>
>>         if (features & VIRTIO_F_VERSION_1) {
>> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       void *gpa = (void *) addr;
>> +
>> +                       if (!vq->desc_gpas) {
>> +                               fprintf(stderr, "IOMMU setup has not been done!\n");
>> +                               return;
>> +                       }
>> +
>> +                       addr = SLOF_dma_map_in(gpa, len, 0);
>> +                       vq->desc_gpas[id] = gpa;
>> +               }
>>                 desc->addr = cpu_to_le64(addr);
>>                 desc->len = cpu_to_le32(len);
>>                 desc->flags = cpu_to_le16(flags);
>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
>>         }
>>  }
>>
>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
>> +{
>> +       struct vring_desc *desc;
>> +
>> +       id %= vq->size;
>> +       desc = &vq->desc[id];
>> +
>> +       if (features & VIRTIO_F_VERSION_1) {
>> +               if (features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       SLOF_dma_map_out(le64_to_cpu(desc->addr),
>> +                                        0, le32_to_cpu(desc->len));
>> +                       vq->desc_gpas[id] = NULL;
>> +               }
>> +       }
>> +}
>> +
>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id)
>> +{
>> +       struct vqs *vq = &vdev->vq[queue];
>> +
>> +       if (vq->desc_gpas)
>> +               return vq->desc_gpas[id];
>> +
>> +       return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr);
>> +}
>> +
>>  /**
>>   * Reset virtio device
>>   */
>> @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long
>>                 uint64_t q_used;
>>                 uint32_t q_size = virtio_get_qsize(dev, queue);
>>
>> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       unsigned long cb;
>> +
>> +                       cb = q_size * sizeof(struct vring_desc);
>> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
>> +                       cb = VQ_ALIGN(cb);
>> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
> 
> Shouldn't this be:
> 
>   cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;


Yes it should, good find!


> 
> The subsequent VQ_ALIGN probably masks it in the current code.
> 
>> +                       cb = VQ_ALIGN(cb);
>> +                       q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0);
>> +
>> +                       dev->vq[queue].bus_desc = q_desc;
>> +               }
>> +
>>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc);
>>                 q_avail = q_desc + q_size * sizeof(struct vring_desc);
>>                 virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail);
>> @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id)
>>
>>         vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT);
>>         vq->avail->idx = 0;
>> +       if (dev->features & VIRTIO_F_IOMMU_PLATFORM)
>> +               vq->desc_gpas = SLOF_alloc_mem_aligned(
>> +                       vq->size * sizeof(vq->desc_gpas[0]), 4096);
>>
>>         return vq;
>>  }
>>
>>  void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id)
>>  {
>> -       if (vq->desc)
>> +       if (vq->desc_gpas) {
>> +               int i;
>> +
>> +               for (i = 0; i < vq->size; ++i)
>> +                       virtio_free_desc(vq, i, dev->features);
> 
> Since virtio_free_desc() calls dma-map-out regardless of whether the
> IOBA was unmapped via a previous call by the driver, isn't there a chance
> we unmap a desc->addr IOBA that has since been re-used elsewhere? I
> guess it's unlikely for SLOF but seems like it could become an issue.


As far as I understand it, only one instance of any driver can be open
at the time so the DMA window is used exclusevely by this particular
device. So when a queue is being terminated, I expect no other user around.


> Also not sure what side-effects we might expect for unused/uninitialized
> descriptors. In init-dma-window-vars we reserve DMA addr 0 so other code
> doesn't have to distinguish from NULL; seems like there's a chance that
> could get unmapped here.
> 
> Maybe we should test for desc_gpas[i] before calling virtio_free_desc(),
> or maybe build that check into virtio_free_desc() itself?

It still makes sense and makes things cleaner, I'll do that.


> Also, is there a case where a driver would legitimately call term_vq()
> without calling virtio_free_desc() for a previously used descriptor?
> Even if they eventually get cleaned up by term_vq() it seems there's a
> chance the descriptor gets re-used prior to that and we still end up
> leaking the ioba. Maybe we should print a warning in this case to detect
> leaks in the driver code.


virtio-net does this - if it ever sent anything, a couple of buffers are
always mapped (see virtionet_xmit()); receive buffers are simply always
mapped until virtio_queue_term_vq() is called. Every other device
releases a descriptor straight away. So I am not sure how I add an easy
detection for all cases but virtio-net.


> 
>> +
>> +               memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0]));
> 
> Is the memset really needed if we're freeing it right after?

No, not really, I'll remove it.

> 
>> +               SLOF_free_mem(vq->desc_gpas,
>> +                       vq->size * sizeof(vq->desc_gpas[0]));
>> +       }
>> +       if (vq->desc) {
>> +               if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
>> +                       unsigned long cb;
>> +                       uint32_t q_size = virtio_get_qsize(dev, id);
>> +
>> +                       cb = q_size * sizeof(struct vring_desc);
>> +                       cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
>> +                       cb = VQ_ALIGN(cb);
>> +                       cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
> 
> As per prior comment:
> 
>   cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size;


Oh right, fixed. Thanks,

> 
> 
> Looks good otherwise.
> 
>> +                       cb = VQ_ALIGN(cb);
>> +
>> +                       SLOF_dma_map_out(vq->bus_desc, 0, cb);
>> +               }
>> +
>>                 SLOF_free_mem(vq->desc, virtio_vring_size(vq->size));
>> +       }
>>         memset(vq, 0, sizeof(*vq));
>>  }
>>
>> @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features
>>                 return -1;
>>         }
>>
>> +       if (host_features & VIRTIO_F_IOMMU_PLATFORM)
>> +               features |= VIRTIO_F_IOMMU_PLATFORM;
>> +
>>         virtio_set_guest_features(dev,  features);
>>         host_features = virtio_get_host_features(dev);
>>         if ((host_features & features) != features) {
>> -- 
>> 2.17.1
>>
diff mbox series

Patch

diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h
index 7efc1e524d77..c4eafe40dd31 100644
--- a/lib/libvirtio/virtio.h
+++ b/lib/libvirtio/virtio.h
@@ -29,6 +29,7 @@ 
 #define VIRTIO_F_RING_INDIRECT_DESC	BIT(28)
 #define VIRTIO_F_RING_EVENT_IDX		BIT(29)
 #define VIRTIO_F_VERSION_1		BIT(32)
+#define VIRTIO_F_IOMMU_PLATFORM         BIT(33)
 
 #define VIRTIO_TIMEOUT		        5000 /* 5 sec timeout */
 
@@ -83,6 +84,8 @@  struct vqs {
 	struct vring_desc *desc;
 	struct vring_avail *avail;
 	struct vring_used *used;
+	void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */
+	uint64_t bus_desc;
 };
 
 struct virtio_device {
@@ -108,6 +111,8 @@  extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q
 extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
                              uint64_t addr, uint32_t len,
                              uint16_t flags, uint16_t next);
+extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features);
+void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id);
 extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id);
 extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id);
 
diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c
index 426069fe9509..76078612b06e 100644
--- a/lib/libvirtio/virtio-9p.c
+++ b/lib/libvirtio/virtio-9p.c
@@ -129,6 +129,10 @@  static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r
 		// do something better
 		mb();
 	}
+
+	virtio_free_desc(vq, id, dev->features);
+	virtio_free_desc(vq, id + 1, dev->features);
+
 	if (i == 0) {
 		return -1;
 	}
diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c
index a0dadbb0d6a8..0363038e559d 100644
--- a/lib/libvirtio/virtio-blk.c
+++ b/lib/libvirtio/virtio-blk.c
@@ -195,6 +195,10 @@  virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum,
 			break;
 	}
 
+	virtio_free_desc(vq, id, dev->features);
+	virtio_free_desc(vq, id + 1, dev->features);
+	virtio_free_desc(vq, id + 2, dev->features);
+
 	if (status == 0)
 		return cnt;
 
diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c
index ae67883020ef..5a0d19088527 100644
--- a/lib/libvirtio/virtio-net.c
+++ b/lib/libvirtio/virtio-net.c
@@ -255,6 +255,9 @@  static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len)
 	idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx);
 	id = (idx * 2) % vq_tx->size;
 
+	virtio_free_desc(vq_tx, id, vdev->features);
+	virtio_free_desc(vq_tx, id + 1, vdev->features);
+
 	/* Set up virtqueue descriptor for header */
 	virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr,
 			 net_hdr_size, VRING_DESC_F_NEXT, id + 1);
@@ -317,7 +320,7 @@  static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen)
 #endif
 
 	/* Copy data to destination buffer */
-	memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len);
+	memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len);
 
 	/* Move indices to next entries */
 	last_rx_idx = last_rx_idx + 1;
diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c
index ae87e97e7330..96285e3891af 100644
--- a/lib/libvirtio/virtio-scsi.c
+++ b/lib/libvirtio/virtio-scsi.c
@@ -81,6 +81,11 @@  int virtioscsi_send(struct virtio_device *dev,
 			break;
 	}
 
+	virtio_free_desc(vq, id, dev->features);
+	virtio_free_desc(vq, id + 1, dev->features);
+	if (!(buf == NULL || buf_len == 0))
+		virtio_free_desc(vq, id + 2, dev->features);
+
 	return 0;
 }
 
diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c
index b8b898fc8bea..8826be96c24e 100644
--- a/lib/libvirtio/virtio-serial.c
+++ b/lib/libvirtio/virtio-serial.c
@@ -108,7 +108,7 @@  void virtio_serial_shutdown(struct virtio_device *dev)
 
 int virtio_serial_putchar(struct virtio_device *dev, char c)
 {
-	int id;
+	int id, ret;
 	uint32_t time;
 	volatile uint16_t *current_used_idx;
 	uint16_t last_used_idx, avail_idx;
@@ -133,17 +133,21 @@  int virtio_serial_putchar(struct virtio_device *dev, char c)
 	virtio_queue_notify(dev, TX_Q);
 
 	/* Wait for host to consume the descriptor */
+	ret = 1;
 	time = SLOF_GetTimer() + VIRTIO_TIMEOUT;
 	while (*current_used_idx == last_used_idx) {
 		// do something better
 		mb();
 		if (time < SLOF_GetTimer()) {
 			printf("virtio_serial_putchar failed! \n");
-			return 0;
+			ret = 0;
+			break;
 		}
 	}
 
-	return 1;
+	virtio_free_desc(vq, id, dev->features);
+
+	return ret;
 }
 
 char virtio_serial_getchar(struct virtio_device *dev)
@@ -163,7 +167,7 @@  char virtio_serial_getchar(struct virtio_device *dev)
 		% vq_rx->size;
 
 	/* Copy data to destination buffer */
-	memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE);
+	memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE);
 
 	/* Move indices to next entries */
 	last_rx_idx = last_rx_idx + 1;
diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c
index 3e615c65fc2c..9a0c3a96371a 100644
--- a/lib/libvirtio/virtio.c
+++ b/lib/libvirtio/virtio.c
@@ -273,6 +273,17 @@  void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
 	next %= vq->size;
 
 	if (features & VIRTIO_F_VERSION_1) {
+		if (features & VIRTIO_F_IOMMU_PLATFORM) {
+			void *gpa = (void *) addr;
+
+			if (!vq->desc_gpas) {
+				fprintf(stderr, "IOMMU setup has not been done!\n");
+				return;
+			}
+
+			addr = SLOF_dma_map_in(gpa, len, 0);
+			vq->desc_gpas[id] = gpa;
+		}
 		desc->addr = cpu_to_le64(addr);
 		desc->len = cpu_to_le32(len);
 		desc->flags = cpu_to_le16(flags);
@@ -285,6 +296,32 @@  void virtio_fill_desc(struct vqs *vq, int id, uint64_t features,
 	}
 }
 
+void virtio_free_desc(struct vqs *vq, int id, uint64_t features)
+{
+	struct vring_desc *desc;
+
+	id %= vq->size;
+	desc = &vq->desc[id];
+
+	if (features & VIRTIO_F_VERSION_1) {
+		if (features & VIRTIO_F_IOMMU_PLATFORM) {
+			SLOF_dma_map_out(le64_to_cpu(desc->addr),
+					 0, le32_to_cpu(desc->len));
+			vq->desc_gpas[id] = NULL;
+		}
+	}
+}
+
+void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id)
+{
+	struct vqs *vq = &vdev->vq[queue];
+
+	if (vq->desc_gpas)
+		return vq->desc_gpas[id];
+
+	return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr);
+}
+
 /**
  * Reset virtio device
  */
@@ -326,6 +363,19 @@  static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long
 		uint64_t q_used;
 		uint32_t q_size = virtio_get_qsize(dev, queue);
 
+		if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
+			unsigned long cb;
+
+			cb = q_size * sizeof(struct vring_desc);
+			cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
+			cb = VQ_ALIGN(cb);
+			cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
+			cb = VQ_ALIGN(cb);
+			q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0);
+
+			dev->vq[queue].bus_desc = q_desc;
+		}
+
 		virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc);
 		q_avail = q_desc + q_size * sizeof(struct vring_desc);
 		virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail);
@@ -372,14 +422,41 @@  struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id)
 
 	vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT);
 	vq->avail->idx = 0;
+	if (dev->features & VIRTIO_F_IOMMU_PLATFORM)
+		vq->desc_gpas = SLOF_alloc_mem_aligned(
+			vq->size * sizeof(vq->desc_gpas[0]), 4096);
 
 	return vq;
 }
 
 void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id)
 {
-	if (vq->desc)
+	if (vq->desc_gpas) {
+		int i;
+
+		for (i = 0; i < vq->size; ++i)
+			virtio_free_desc(vq, i, dev->features);
+
+		memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0]));
+		SLOF_free_mem(vq->desc_gpas,
+			vq->size * sizeof(vq->desc_gpas[0]));
+	}
+	if (vq->desc) {
+		if (dev->features & VIRTIO_F_IOMMU_PLATFORM) {
+			unsigned long cb;
+			uint32_t q_size = virtio_get_qsize(dev, id);
+
+			cb = q_size * sizeof(struct vring_desc);
+			cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size;
+			cb = VQ_ALIGN(cb);
+			cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size;
+			cb = VQ_ALIGN(cb);
+
+			SLOF_dma_map_out(vq->bus_desc, 0, cb);
+		}
+
 		SLOF_free_mem(vq->desc, virtio_vring_size(vq->size));
+	}
 	memset(vq, 0, sizeof(*vq));
 }
 
@@ -473,6 +550,9 @@  int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features
 		return -1;
 	}
 
+	if (host_features & VIRTIO_F_IOMMU_PLATFORM)
+		features |= VIRTIO_F_IOMMU_PLATFORM;
+
 	virtio_set_guest_features(dev,  features);
 	host_features = virtio_get_host_features(dev);
 	if ((host_features & features) != features) {