Message ID | 20191204032138.127624-6-aik@ozlabs.ru |
---|---|
State | Superseded |
Headers | show |
Series | virtio: Enable iommu_platform=on | expand |
Quoting Alexey Kardashevskiy (2019-12-03 21:21:38) > When QEMU is started with iommu_platform=on, the guest driver must accept > it or the device will fail. > > This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > devices. -serial and -9p are only compile tested though. > > For virtio-net we map all RX buffers once and TX when xmit() is called > and unmap older pages when we are about to reuse the VQ descriptor. > As all other devices are synchronous, we unmap IOMMU pages right after > completion of a transaction. > > This depends on QEMU's: > https://patchwork.ozlabs.org/patch/1194067/ > > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com> > --- > Changes: > v4: > * ditched vqs->id in virtio_queue_init_vq > > v2: > * added Mike's fs/dma-instance-function.fs > * total rework > --- > lib/libvirtio/virtio.h | 5 +++ > lib/libvirtio/virtio-9p.c | 4 ++ > lib/libvirtio/virtio-blk.c | 4 ++ > lib/libvirtio/virtio-net.c | 5 ++- > lib/libvirtio/virtio-scsi.c | 5 +++ > lib/libvirtio/virtio-serial.c | 12 +++-- > lib/libvirtio/virtio.c | 82 ++++++++++++++++++++++++++++++++++- > 7 files changed, 111 insertions(+), 6 deletions(-) > > diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > index 7efc1e524d77..c4eafe40dd31 100644 > --- a/lib/libvirtio/virtio.h > +++ b/lib/libvirtio/virtio.h > @@ -29,6 +29,7 @@ > #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > #define VIRTIO_F_RING_EVENT_IDX BIT(29) > #define VIRTIO_F_VERSION_1 BIT(32) > +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > > #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > > @@ -83,6 +84,8 @@ struct vqs { > struct vring_desc *desc; > struct vring_avail *avail; > struct vring_used *used; > + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > + uint64_t bus_desc; > }; > > struct virtio_device { > @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > uint64_t addr, uint32_t len, > uint16_t flags, uint16_t next); > +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > > diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > index 426069fe9509..76078612b06e 100644 > --- a/lib/libvirtio/virtio-9p.c > +++ b/lib/libvirtio/virtio-9p.c > @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > // do something better > mb(); > } > + > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + > if (i == 0) { > return -1; > } > diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > index a0dadbb0d6a8..0363038e559d 100644 > --- a/lib/libvirtio/virtio-blk.c > +++ b/lib/libvirtio/virtio-blk.c > @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > break; > } > > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + virtio_free_desc(vq, id + 2, dev->features); > + > if (status == 0) > return cnt; > > diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > index ae67883020ef..5a0d19088527 100644 > --- a/lib/libvirtio/virtio-net.c > +++ b/lib/libvirtio/virtio-net.c > @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > id = (idx * 2) % vq_tx->size; > > + virtio_free_desc(vq_tx, id, vdev->features); > + virtio_free_desc(vq_tx, id + 1, vdev->features); > + > /* Set up virtqueue descriptor for header */ > virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > net_hdr_size, VRING_DESC_F_NEXT, id + 1); > @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > #endif > > /* Copy data to destination buffer */ > - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > > /* Move indices to next entries */ > last_rx_idx = last_rx_idx + 1; > diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > index ae87e97e7330..96285e3891af 100644 > --- a/lib/libvirtio/virtio-scsi.c > +++ b/lib/libvirtio/virtio-scsi.c > @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > break; > } > > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + if (!(buf == NULL || buf_len == 0)) > + virtio_free_desc(vq, id + 2, dev->features); > + > return 0; > } > > diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > index b8b898fc8bea..8826be96c24e 100644 > --- a/lib/libvirtio/virtio-serial.c > +++ b/lib/libvirtio/virtio-serial.c > @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > > int virtio_serial_putchar(struct virtio_device *dev, char c) > { > - int id; > + int id, ret; > uint32_t time; > volatile uint16_t *current_used_idx; > uint16_t last_used_idx, avail_idx; > @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > virtio_queue_notify(dev, TX_Q); > > /* Wait for host to consume the descriptor */ > + ret = 1; > time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > while (*current_used_idx == last_used_idx) { > // do something better > mb(); > if (time < SLOF_GetTimer()) { > printf("virtio_serial_putchar failed! \n"); > - return 0; > + ret = 0; > + break; > } > } > > - return 1; > + virtio_free_desc(vq, id, dev->features); > + > + return ret; > } > > char virtio_serial_getchar(struct virtio_device *dev) > @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > % vq_rx->size; > > /* Copy data to destination buffer */ > - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > > /* Move indices to next entries */ > last_rx_idx = last_rx_idx + 1; > diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > index 3e615c65fc2c..9a0c3a96371a 100644 > --- a/lib/libvirtio/virtio.c > +++ b/lib/libvirtio/virtio.c > @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > next %= vq->size; > > if (features & VIRTIO_F_VERSION_1) { > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > + void *gpa = (void *) addr; > + > + if (!vq->desc_gpas) { > + fprintf(stderr, "IOMMU setup has not been done!\n"); > + return; > + } > + > + addr = SLOF_dma_map_in(gpa, len, 0); > + vq->desc_gpas[id] = gpa; > + } > desc->addr = cpu_to_le64(addr); > desc->len = cpu_to_le32(len); > desc->flags = cpu_to_le16(flags); > @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > } > } > > +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > +{ > + struct vring_desc *desc; > + > + id %= vq->size; > + desc = &vq->desc[id]; > + > + if (features & VIRTIO_F_VERSION_1) { > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > + SLOF_dma_map_out(le64_to_cpu(desc->addr), > + 0, le32_to_cpu(desc->len)); > + vq->desc_gpas[id] = NULL; > + } > + } > +} > + > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) > +{ > + struct vqs *vq = &vdev->vq[queue]; > + > + if (vq->desc_gpas) > + return vq->desc_gpas[id]; > + > + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); > +} > + > /** > * Reset virtio device > */ > @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long > uint64_t q_used; > uint32_t q_size = virtio_get_qsize(dev, queue); > > + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { > + unsigned long cb; > + > + cb = q_size * sizeof(struct vring_desc); > + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; > + cb = VQ_ALIGN(cb); > + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; Shouldn't this be: cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size; The subsequent VQ_ALIGN probably masks it in the current code. > + cb = VQ_ALIGN(cb); > + q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0); > + > + dev->vq[queue].bus_desc = q_desc; > + } > + > virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc); > q_avail = q_desc + q_size * sizeof(struct vring_desc); > virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail); > @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id) > > vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT); > vq->avail->idx = 0; > + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) > + vq->desc_gpas = SLOF_alloc_mem_aligned( > + vq->size * sizeof(vq->desc_gpas[0]), 4096); > > return vq; > } > > void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id) > { > - if (vq->desc) > + if (vq->desc_gpas) { > + int i; > + > + for (i = 0; i < vq->size; ++i) > + virtio_free_desc(vq, i, dev->features); Since virtio_free_desc() calls dma-map-out regardless of whether the IOBA was unmapped via a previous call by the driver, isn't there a chance we unmap a desc->addr IOBA that has since been re-used elsewhere? I guess it's unlikely for SLOF but seems like it could become an issue. Also not sure what side-effects we might expect for unused/uninitialized descriptors. In init-dma-window-vars we reserve DMA addr 0 so other code doesn't have to distinguish from NULL; seems like there's a chance that could get unmapped here. Maybe we should test for desc_gpas[i] before calling virtio_free_desc(), or maybe build that check into virtio_free_desc() itself? Also, is there a case where a driver would legitimately call term_vq() without calling virtio_free_desc() for a previously used descriptor? Even if they eventually get cleaned up by term_vq() it seems there's a chance the descriptor gets re-used prior to that and we still end up leaking the ioba. Maybe we should print a warning in this case to detect leaks in the driver code. > + > + memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0])); Is the memset really needed if we're freeing it right after? > + SLOF_free_mem(vq->desc_gpas, > + vq->size * sizeof(vq->desc_gpas[0])); > + } > + if (vq->desc) { > + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { > + unsigned long cb; > + uint32_t q_size = virtio_get_qsize(dev, id); > + > + cb = q_size * sizeof(struct vring_desc); > + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; > + cb = VQ_ALIGN(cb); > + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; As per prior comment: cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size; Looks good otherwise. > + cb = VQ_ALIGN(cb); > + > + SLOF_dma_map_out(vq->bus_desc, 0, cb); > + } > + > SLOF_free_mem(vq->desc, virtio_vring_size(vq->size)); > + } > memset(vq, 0, sizeof(*vq)); > } > > @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features > return -1; > } > > + if (host_features & VIRTIO_F_IOMMU_PLATFORM) > + features |= VIRTIO_F_IOMMU_PLATFORM; > + > virtio_set_guest_features(dev, features); > host_features = virtio_get_host_features(dev); > if ((host_features & features) != features) { > -- > 2.17.1 >
On 05/12/2019 08:16, Michael Roth wrote: > Quoting Alexey Kardashevskiy (2019-12-03 21:21:38) >> When QEMU is started with iommu_platform=on, the guest driver must accept >> it or the device will fail. >> >> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p >> devices. -serial and -9p are only compile tested though. >> >> For virtio-net we map all RX buffers once and TX when xmit() is called >> and unmap older pages when we are about to reuse the VQ descriptor. >> As all other devices are synchronous, we unmap IOMMU pages right after >> completion of a transaction. >> >> This depends on QEMU's: >> https://patchwork.ozlabs.org/patch/1194067/ >> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com> >> --- >> Changes: >> v4: >> * ditched vqs->id in virtio_queue_init_vq >> >> v2: >> * added Mike's fs/dma-instance-function.fs >> * total rework >> --- >> lib/libvirtio/virtio.h | 5 +++ >> lib/libvirtio/virtio-9p.c | 4 ++ >> lib/libvirtio/virtio-blk.c | 4 ++ >> lib/libvirtio/virtio-net.c | 5 ++- >> lib/libvirtio/virtio-scsi.c | 5 +++ >> lib/libvirtio/virtio-serial.c | 12 +++-- >> lib/libvirtio/virtio.c | 82 ++++++++++++++++++++++++++++++++++- >> 7 files changed, 111 insertions(+), 6 deletions(-) >> >> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h >> index 7efc1e524d77..c4eafe40dd31 100644 >> --- a/lib/libvirtio/virtio.h >> +++ b/lib/libvirtio/virtio.h >> @@ -29,6 +29,7 @@ >> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) >> #define VIRTIO_F_RING_EVENT_IDX BIT(29) >> #define VIRTIO_F_VERSION_1 BIT(32) >> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) >> >> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ >> >> @@ -83,6 +84,8 @@ struct vqs { >> struct vring_desc *desc; >> struct vring_avail *avail; >> struct vring_used *used; >> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ >> + uint64_t bus_desc; >> }; >> >> struct virtio_device { >> @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q >> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> uint64_t addr, uint32_t len, >> uint16_t flags, uint16_t next); >> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); >> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); >> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); >> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); >> >> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c >> index 426069fe9509..76078612b06e 100644 >> --- a/lib/libvirtio/virtio-9p.c >> +++ b/lib/libvirtio/virtio-9p.c >> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r >> // do something better >> mb(); >> } >> + >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + >> if (i == 0) { >> return -1; >> } >> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c >> index a0dadbb0d6a8..0363038e559d 100644 >> --- a/lib/libvirtio/virtio-blk.c >> +++ b/lib/libvirtio/virtio-blk.c >> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, >> break; >> } >> >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + virtio_free_desc(vq, id + 2, dev->features); >> + >> if (status == 0) >> return cnt; >> >> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c >> index ae67883020ef..5a0d19088527 100644 >> --- a/lib/libvirtio/virtio-net.c >> +++ b/lib/libvirtio/virtio-net.c >> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) >> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); >> id = (idx * 2) % vq_tx->size; >> >> + virtio_free_desc(vq_tx, id, vdev->features); >> + virtio_free_desc(vq_tx, id + 1, vdev->features); >> + >> /* Set up virtqueue descriptor for header */ >> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, >> net_hdr_size, VRING_DESC_F_NEXT, id + 1); >> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) >> #endif >> >> /* Copy data to destination buffer */ >> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); >> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); >> >> /* Move indices to next entries */ >> last_rx_idx = last_rx_idx + 1; >> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c >> index ae87e97e7330..96285e3891af 100644 >> --- a/lib/libvirtio/virtio-scsi.c >> +++ b/lib/libvirtio/virtio-scsi.c >> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, >> break; >> } >> >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + if (!(buf == NULL || buf_len == 0)) >> + virtio_free_desc(vq, id + 2, dev->features); >> + >> return 0; >> } >> >> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c >> index b8b898fc8bea..8826be96c24e 100644 >> --- a/lib/libvirtio/virtio-serial.c >> +++ b/lib/libvirtio/virtio-serial.c >> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) >> >> int virtio_serial_putchar(struct virtio_device *dev, char c) >> { >> - int id; >> + int id, ret; >> uint32_t time; >> volatile uint16_t *current_used_idx; >> uint16_t last_used_idx, avail_idx; >> @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) >> virtio_queue_notify(dev, TX_Q); >> >> /* Wait for host to consume the descriptor */ >> + ret = 1; >> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; >> while (*current_used_idx == last_used_idx) { >> // do something better >> mb(); >> if (time < SLOF_GetTimer()) { >> printf("virtio_serial_putchar failed! \n"); >> - return 0; >> + ret = 0; >> + break; >> } >> } >> >> - return 1; >> + virtio_free_desc(vq, id, dev->features); >> + >> + return ret; >> } >> >> char virtio_serial_getchar(struct virtio_device *dev) >> @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev) >> % vq_rx->size; >> >> /* Copy data to destination buffer */ >> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); >> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); >> >> /* Move indices to next entries */ >> last_rx_idx = last_rx_idx + 1; >> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c >> index 3e615c65fc2c..9a0c3a96371a 100644 >> --- a/lib/libvirtio/virtio.c >> +++ b/lib/libvirtio/virtio.c >> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> next %= vq->size; >> >> if (features & VIRTIO_F_VERSION_1) { >> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >> + void *gpa = (void *) addr; >> + >> + if (!vq->desc_gpas) { >> + fprintf(stderr, "IOMMU setup has not been done!\n"); >> + return; >> + } >> + >> + addr = SLOF_dma_map_in(gpa, len, 0); >> + vq->desc_gpas[id] = gpa; >> + } >> desc->addr = cpu_to_le64(addr); >> desc->len = cpu_to_le32(len); >> desc->flags = cpu_to_le16(flags); >> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> } >> } >> >> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >> +{ >> + struct vring_desc *desc; >> + >> + id %= vq->size; >> + desc = &vq->desc[id]; >> + >> + if (features & VIRTIO_F_VERSION_1) { >> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >> + SLOF_dma_map_out(le64_to_cpu(desc->addr), >> + 0, le32_to_cpu(desc->len)); >> + vq->desc_gpas[id] = NULL; >> + } >> + } >> +} >> + >> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) >> +{ >> + struct vqs *vq = &vdev->vq[queue]; >> + >> + if (vq->desc_gpas) >> + return vq->desc_gpas[id]; >> + >> + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); >> +} >> + >> /** >> * Reset virtio device >> */ >> @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long >> uint64_t q_used; >> uint32_t q_size = virtio_get_qsize(dev, queue); >> >> + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { >> + unsigned long cb; >> + >> + cb = q_size * sizeof(struct vring_desc); >> + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; >> + cb = VQ_ALIGN(cb); >> + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; > > Shouldn't this be: > > cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size; Yes it should, good find! > > The subsequent VQ_ALIGN probably masks it in the current code. > >> + cb = VQ_ALIGN(cb); >> + q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0); >> + >> + dev->vq[queue].bus_desc = q_desc; >> + } >> + >> virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc); >> q_avail = q_desc + q_size * sizeof(struct vring_desc); >> virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail); >> @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id) >> >> vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT); >> vq->avail->idx = 0; >> + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) >> + vq->desc_gpas = SLOF_alloc_mem_aligned( >> + vq->size * sizeof(vq->desc_gpas[0]), 4096); >> >> return vq; >> } >> >> void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id) >> { >> - if (vq->desc) >> + if (vq->desc_gpas) { >> + int i; >> + >> + for (i = 0; i < vq->size; ++i) >> + virtio_free_desc(vq, i, dev->features); > > Since virtio_free_desc() calls dma-map-out regardless of whether the > IOBA was unmapped via a previous call by the driver, isn't there a chance > we unmap a desc->addr IOBA that has since been re-used elsewhere? I > guess it's unlikely for SLOF but seems like it could become an issue. As far as I understand it, only one instance of any driver can be open at the time so the DMA window is used exclusevely by this particular device. So when a queue is being terminated, I expect no other user around. > Also not sure what side-effects we might expect for unused/uninitialized > descriptors. In init-dma-window-vars we reserve DMA addr 0 so other code > doesn't have to distinguish from NULL; seems like there's a chance that > could get unmapped here. > > Maybe we should test for desc_gpas[i] before calling virtio_free_desc(), > or maybe build that check into virtio_free_desc() itself? It still makes sense and makes things cleaner, I'll do that. > Also, is there a case where a driver would legitimately call term_vq() > without calling virtio_free_desc() for a previously used descriptor? > Even if they eventually get cleaned up by term_vq() it seems there's a > chance the descriptor gets re-used prior to that and we still end up > leaking the ioba. Maybe we should print a warning in this case to detect > leaks in the driver code. virtio-net does this - if it ever sent anything, a couple of buffers are always mapped (see virtionet_xmit()); receive buffers are simply always mapped until virtio_queue_term_vq() is called. Every other device releases a descriptor straight away. So I am not sure how I add an easy detection for all cases but virtio-net. > >> + >> + memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0])); > > Is the memset really needed if we're freeing it right after? No, not really, I'll remove it. > >> + SLOF_free_mem(vq->desc_gpas, >> + vq->size * sizeof(vq->desc_gpas[0])); >> + } >> + if (vq->desc) { >> + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { >> + unsigned long cb; >> + uint32_t q_size = virtio_get_qsize(dev, id); >> + >> + cb = q_size * sizeof(struct vring_desc); >> + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; >> + cb = VQ_ALIGN(cb); >> + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; > > As per prior comment: > > cb += sizeof(struct vring_used) + sizeof(struct vring_used_elem) * q_size; Oh right, fixed. Thanks, > > > Looks good otherwise. > >> + cb = VQ_ALIGN(cb); >> + >> + SLOF_dma_map_out(vq->bus_desc, 0, cb); >> + } >> + >> SLOF_free_mem(vq->desc, virtio_vring_size(vq->size)); >> + } >> memset(vq, 0, sizeof(*vq)); >> } >> >> @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features >> return -1; >> } >> >> + if (host_features & VIRTIO_F_IOMMU_PLATFORM) >> + features |= VIRTIO_F_IOMMU_PLATFORM; >> + >> virtio_set_guest_features(dev, features); >> host_features = virtio_get_host_features(dev); >> if ((host_features & features) != features) { >> -- >> 2.17.1 >>
diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h index 7efc1e524d77..c4eafe40dd31 100644 --- a/lib/libvirtio/virtio.h +++ b/lib/libvirtio/virtio.h @@ -29,6 +29,7 @@ #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) #define VIRTIO_F_RING_EVENT_IDX BIT(29) #define VIRTIO_F_VERSION_1 BIT(32) +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ @@ -83,6 +84,8 @@ struct vqs { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ + uint64_t bus_desc; }; struct virtio_device { @@ -108,6 +111,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, uint64_t addr, uint32_t len, uint16_t flags, uint16_t next); +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c index 426069fe9509..76078612b06e 100644 --- a/lib/libvirtio/virtio-9p.c +++ b/lib/libvirtio/virtio-9p.c @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r // do something better mb(); } + + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + if (i == 0) { return -1; } diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c index a0dadbb0d6a8..0363038e559d 100644 --- a/lib/libvirtio/virtio-blk.c +++ b/lib/libvirtio/virtio-blk.c @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, break; } + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + virtio_free_desc(vq, id + 2, dev->features); + if (status == 0) return cnt; diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c index ae67883020ef..5a0d19088527 100644 --- a/lib/libvirtio/virtio-net.c +++ b/lib/libvirtio/virtio-net.c @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); id = (idx * 2) % vq_tx->size; + virtio_free_desc(vq_tx, id, vdev->features); + virtio_free_desc(vq_tx, id + 1, vdev->features); + /* Set up virtqueue descriptor for header */ virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, net_hdr_size, VRING_DESC_F_NEXT, id + 1); @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) #endif /* Copy data to destination buffer */ - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); /* Move indices to next entries */ last_rx_idx = last_rx_idx + 1; diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c index ae87e97e7330..96285e3891af 100644 --- a/lib/libvirtio/virtio-scsi.c +++ b/lib/libvirtio/virtio-scsi.c @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, break; } + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + if (!(buf == NULL || buf_len == 0)) + virtio_free_desc(vq, id + 2, dev->features); + return 0; } diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c index b8b898fc8bea..8826be96c24e 100644 --- a/lib/libvirtio/virtio-serial.c +++ b/lib/libvirtio/virtio-serial.c @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) int virtio_serial_putchar(struct virtio_device *dev, char c) { - int id; + int id, ret; uint32_t time; volatile uint16_t *current_used_idx; uint16_t last_used_idx, avail_idx; @@ -133,17 +133,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) virtio_queue_notify(dev, TX_Q); /* Wait for host to consume the descriptor */ + ret = 1; time = SLOF_GetTimer() + VIRTIO_TIMEOUT; while (*current_used_idx == last_used_idx) { // do something better mb(); if (time < SLOF_GetTimer()) { printf("virtio_serial_putchar failed! \n"); - return 0; + ret = 0; + break; } } - return 1; + virtio_free_desc(vq, id, dev->features); + + return ret; } char virtio_serial_getchar(struct virtio_device *dev) @@ -163,7 +167,7 @@ char virtio_serial_getchar(struct virtio_device *dev) % vq_rx->size; /* Copy data to destination buffer */ - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); /* Move indices to next entries */ last_rx_idx = last_rx_idx + 1; diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c index 3e615c65fc2c..9a0c3a96371a 100644 --- a/lib/libvirtio/virtio.c +++ b/lib/libvirtio/virtio.c @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, next %= vq->size; if (features & VIRTIO_F_VERSION_1) { + if (features & VIRTIO_F_IOMMU_PLATFORM) { + void *gpa = (void *) addr; + + if (!vq->desc_gpas) { + fprintf(stderr, "IOMMU setup has not been done!\n"); + return; + } + + addr = SLOF_dma_map_in(gpa, len, 0); + vq->desc_gpas[id] = gpa; + } desc->addr = cpu_to_le64(addr); desc->len = cpu_to_le32(len); desc->flags = cpu_to_le16(flags); @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, } } +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) +{ + struct vring_desc *desc; + + id %= vq->size; + desc = &vq->desc[id]; + + if (features & VIRTIO_F_VERSION_1) { + if (features & VIRTIO_F_IOMMU_PLATFORM) { + SLOF_dma_map_out(le64_to_cpu(desc->addr), + 0, le32_to_cpu(desc->len)); + vq->desc_gpas[id] = NULL; + } + } +} + +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) +{ + struct vqs *vq = &vdev->vq[queue]; + + if (vq->desc_gpas) + return vq->desc_gpas[id]; + + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); +} + /** * Reset virtio device */ @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long uint64_t q_used; uint32_t q_size = virtio_get_qsize(dev, queue); + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { + unsigned long cb; + + cb = q_size * sizeof(struct vring_desc); + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0); + + dev->vq[queue].bus_desc = q_desc; + } + virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc); q_avail = q_desc + q_size * sizeof(struct vring_desc); virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail); @@ -372,14 +422,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id) vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT); vq->avail->idx = 0; + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) + vq->desc_gpas = SLOF_alloc_mem_aligned( + vq->size * sizeof(vq->desc_gpas[0]), 4096); return vq; } void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id) { - if (vq->desc) + if (vq->desc_gpas) { + int i; + + for (i = 0; i < vq->size; ++i) + virtio_free_desc(vq, i, dev->features); + + memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0])); + SLOF_free_mem(vq->desc_gpas, + vq->size * sizeof(vq->desc_gpas[0])); + } + if (vq->desc) { + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { + unsigned long cb; + uint32_t q_size = virtio_get_qsize(dev, id); + + cb = q_size * sizeof(struct vring_desc); + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + + SLOF_dma_map_out(vq->bus_desc, 0, cb); + } + SLOF_free_mem(vq->desc, virtio_vring_size(vq->size)); + } memset(vq, 0, sizeof(*vq)); } @@ -473,6 +550,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features return -1; } + if (host_features & VIRTIO_F_IOMMU_PLATFORM) + features |= VIRTIO_F_IOMMU_PLATFORM; + virtio_set_guest_features(dev, features); host_features = virtio_get_host_features(dev); if ((host_features & features) != features) {