Message ID | 20191120235801.4928-5-aik@ozlabs.ru |
---|---|
State | Superseded |
Headers | show |
Series | virtio: Enable iommu_platform=on | expand |
Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) > When QEMU is started with iommu_platform=on, the guest driver must accept > it or the device will fail. > > This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > devices. -serial and -9p are only compile tested though. > > For virtio-net we map all RX buffers once and TX when xmit() is called > and unmap older pages when we are about to reuse the VQ descriptor. > As all other devices are synchronous, we unmap IOMMU pages right after > completion of a transaction. > > This depends on QEMU's: > https://patchwork.ozlabs.org/patch/1194067/ > > Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > --- > Changes: > v2: > * added Mike's fs/dma-instance-function.fs > * total rework > --- > board-qemu/slof/Makefile | 1 + > lib/libvirtio/virtio.h | 5 ++ > lib/libvirtio/virtio-9p.c | 4 ++ > lib/libvirtio/virtio-blk.c | 4 ++ > lib/libvirtio/virtio-net.c | 5 +- > lib/libvirtio/virtio-scsi.c | 5 ++ > lib/libvirtio/virtio-serial.c | 12 +++-- > lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- > board-qemu/slof/OF.fs | 3 ++ > slof/fs/dma-instance-function.fs | 28 +++++++++++ > 10 files changed, 143 insertions(+), 6 deletions(-) > create mode 100644 slof/fs/dma-instance-function.fs > > diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile > index 2263e751bde9..d7ed2d7a6f18 100644 > --- a/board-qemu/slof/Makefile > +++ b/board-qemu/slof/Makefile > @@ -99,6 +99,7 @@ OF_FFS_FILES = \ > $(SLOFCMNDIR)/fs/graphics.fs \ > $(SLOFCMNDIR)/fs/generic-disk.fs \ > $(SLOFCMNDIR)/fs/dma-function.fs \ > + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ > $(SLOFCMNDIR)/fs/pci-device.fs \ > $(SLOFCMNDIR)/fs/pci-bridge.fs \ > $(SLOFCMNDIR)/fs/pci-properties.fs \ > diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > index 4927a97f9f5f..250120b3112b 100644 > --- a/lib/libvirtio/virtio.h > +++ b/lib/libvirtio/virtio.h > @@ -29,6 +29,7 @@ > #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > #define VIRTIO_F_RING_EVENT_IDX BIT(29) > #define VIRTIO_F_VERSION_1 BIT(32) > +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > > #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > > @@ -84,6 +85,8 @@ struct vqs { > struct vring_desc *desc; > struct vring_avail *avail; > struct vring_used *used; > + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > + uint64_t bus_desc; > }; > > struct virtio_device { > @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > uint64_t addr, uint32_t len, > uint16_t flags, uint16_t next); > +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > > diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > index 426069fe9509..76078612b06e 100644 > --- a/lib/libvirtio/virtio-9p.c > +++ b/lib/libvirtio/virtio-9p.c > @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > // do something better > mb(); > } > + > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + > if (i == 0) { > return -1; > } > diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > index a0dadbb0d6a8..0363038e559d 100644 > --- a/lib/libvirtio/virtio-blk.c > +++ b/lib/libvirtio/virtio-blk.c > @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > break; > } > > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + virtio_free_desc(vq, id + 2, dev->features); > + > if (status == 0) > return cnt; > > diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > index ae67883020ef..5a0d19088527 100644 > --- a/lib/libvirtio/virtio-net.c > +++ b/lib/libvirtio/virtio-net.c > @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > id = (idx * 2) % vq_tx->size; > > + virtio_free_desc(vq_tx, id, vdev->features); > + virtio_free_desc(vq_tx, id + 1, vdev->features); > + > /* Set up virtqueue descriptor for header */ > virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > net_hdr_size, VRING_DESC_F_NEXT, id + 1); > @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > #endif > > /* Copy data to destination buffer */ > - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > > /* Move indices to next entries */ > last_rx_idx = last_rx_idx + 1; > diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > index 251661e8d466..44573d10e24c 100644 > --- a/lib/libvirtio/virtio-scsi.c > +++ b/lib/libvirtio/virtio-scsi.c > @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > break; > } > > + virtio_free_desc(vq, id, dev->features); > + virtio_free_desc(vq, id + 1, dev->features); > + if (!(buf == NULL || buf_len == 0)) > + virtio_free_desc(vq, id + 2, dev->features); > + > return 0; > } > > diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > index 78ab0e25d7db..66fbfe8b6cc6 100644 > --- a/lib/libvirtio/virtio-serial.c > +++ b/lib/libvirtio/virtio-serial.c > @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > > int virtio_serial_putchar(struct virtio_device *dev, char c) > { > - int id; > + int id, ret; > uint32_t vq_size, time; > volatile uint16_t *current_used_idx; > uint16_t last_used_idx, avail_idx; > @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > virtio_queue_notify(dev, TX_Q); > > /* Wait for host to consume the descriptor */ > + ret = 1; > time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > while (*current_used_idx == last_used_idx) { > // do something better > mb(); > if (time < SLOF_GetTimer()) { > printf("virtio_serial_putchar failed! \n"); > - return 0; > + ret = 0; > + break; > } > } > > - return 1; > + virtio_free_desc(vq, id, dev->features); > + > + return ret; > } > > char virtio_serial_getchar(struct virtio_device *dev) > @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > % vq_rx->size; > > /* Copy data to destination buffer */ > - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > > /* Move indices to next entries */ > last_rx_idx = last_rx_idx + 1; > diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > index fff0c7e54c19..d4db5f656888 100644 > --- a/lib/libvirtio/virtio.c > +++ b/lib/libvirtio/virtio.c > @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > next %= vq->size; > > if (features & VIRTIO_F_VERSION_1) { > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > + void *gpa = (void *) addr; > + > + if (!vq->desc_gpas) { > + fprintf(stderr, "IOMMU setup has not been done!\n"); > + return; > + } > + > + addr = SLOF_dma_map_in(gpa, len, 0); > + vq->desc_gpas[id] = gpa; > + } > desc->addr = cpu_to_le64(addr); > desc->len = cpu_to_le32(len); > desc->flags = cpu_to_le16(flags); > @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > } > } > > +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > +{ > + struct vring_desc *desc; > + > + id %= vq->size; > + desc = &vq->desc[id]; > + > + if (features & VIRTIO_F_VERSION_1) { > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > + SLOF_dma_map_out(le64_to_cpu(desc->addr), > + 0, le32_to_cpu(desc->len)); > + vq->desc_gpas[id] = NULL; > + } > + } > +} On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the disk isn't bootable and the H_PUT_TCE traces show: qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER If I disable in-kernel TCE acceleration, or use TCG, it seems to work fine. If I add the following hack it seems to fix things: @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) if (features & VIRTIO_F_VERSION_1) { if (features & VIRTIO_F_IOMMU_PLATFORM) { - SLOF_dma_map_out(le64_to_cpu(desc->addr), + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), 0, le32_to_cpu(desc->len)); vq->desc_gpas[id] = NULL; } I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If so, maybe we should align automatically in dma-map-out? > + > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) > +{ > + struct vqs *vq = &vdev->vq[queue]; > + > + if (vq->desc_gpas) > + return vq->desc_gpas[id]; > + > + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); > +} > + > /** > * Reset virtio device > */ <snip> > diff --git a/board-qemu/slof/OF.fs b/board-qemu/slof/OF.fs > index a85f6c558e67..3e117ad03e09 100644 > --- a/board-qemu/slof/OF.fs > +++ b/board-qemu/slof/OF.fs > @@ -143,6 +143,9 @@ check-for-nvramrc > > 8a0 cp > > +\ For DMA functions used by client/package instances. > +#include "dma-instance-function.fs" > + > \ The client interface. > #include "client.fs" > \ ELF binary file format. > diff --git a/slof/fs/dma-instance-function.fs b/slof/fs/dma-instance-function.fs > new file mode 100644 > index 000000000000..057181be0f44 > --- /dev/null > +++ b/slof/fs/dma-instance-function.fs > @@ -0,0 +1,28 @@ > +\ ****************************************************************************/ > +\ * Copyright (c) 2011 IBM Corporation I copy/pasted the license header and forgot to update, should probably change to 2019. Also, if you think you need it: Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> > +\ * All rights reserved. > +\ * This program and the accompanying materials > +\ * are made available under the terms of the BSD License > +\ * which accompanies this distribution, and is available at > +\ * http://www.opensource.org/licenses/bsd-license.php > +\ * > +\ * Contributors: > +\ * IBM Corporation - initial implementation > +\ ****************************************************************************/ > + > +\ DMA memory allocation functions > +: dma-alloc ( size -- virt ) > + s" dma-alloc" $call-parent > +; > + > +: dma-free ( virt size -- ) > + s" dma-free" $call-parent > +; > + > +: dma-map-in ( virt size cacheable? -- devaddr ) > + s" dma-map-in" $call-parent > +; > + > +: dma-map-out ( virt devaddr size -- ) > + s" dma-map-out" $call-parent > +; > -- > 2.17.1 > > _______________________________________________ > SLOF mailing list > SLOF@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/slof
On Thu, Nov 21, 2019 at 06:09:56PM -0600, Michael Roth wrote: > Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) > > When QEMU is started with iommu_platform=on, the guest driver must accept > > it or the device will fail. > > > > This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > > devices. -serial and -9p are only compile tested though. > > > > For virtio-net we map all RX buffers once and TX when xmit() is called > > and unmap older pages when we are about to reuse the VQ descriptor. > > As all other devices are synchronous, we unmap IOMMU pages right after > > completion of a transaction. > > > > This depends on QEMU's: > > https://patchwork.ozlabs.org/patch/1194067/ > > > > Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> > > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > > --- > > Changes: > > v2: > > * added Mike's fs/dma-instance-function.fs > > * total rework > > --- > > board-qemu/slof/Makefile | 1 + > > lib/libvirtio/virtio.h | 5 ++ > > lib/libvirtio/virtio-9p.c | 4 ++ > > lib/libvirtio/virtio-blk.c | 4 ++ > > lib/libvirtio/virtio-net.c | 5 +- > > lib/libvirtio/virtio-scsi.c | 5 ++ > > lib/libvirtio/virtio-serial.c | 12 +++-- > > lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- > > board-qemu/slof/OF.fs | 3 ++ > > slof/fs/dma-instance-function.fs | 28 +++++++++++ > > 10 files changed, 143 insertions(+), 6 deletions(-) > > create mode 100644 slof/fs/dma-instance-function.fs > > > > diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile > > index 2263e751bde9..d7ed2d7a6f18 100644 > > --- a/board-qemu/slof/Makefile > > +++ b/board-qemu/slof/Makefile > > @@ -99,6 +99,7 @@ OF_FFS_FILES = \ > > $(SLOFCMNDIR)/fs/graphics.fs \ > > $(SLOFCMNDIR)/fs/generic-disk.fs \ > > $(SLOFCMNDIR)/fs/dma-function.fs \ > > + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ > > $(SLOFCMNDIR)/fs/pci-device.fs \ > > $(SLOFCMNDIR)/fs/pci-bridge.fs \ > > $(SLOFCMNDIR)/fs/pci-properties.fs \ > > diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > > index 4927a97f9f5f..250120b3112b 100644 > > --- a/lib/libvirtio/virtio.h > > +++ b/lib/libvirtio/virtio.h > > @@ -29,6 +29,7 @@ > > #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > > #define VIRTIO_F_RING_EVENT_IDX BIT(29) > > #define VIRTIO_F_VERSION_1 BIT(32) > > +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > > > > #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > > > > @@ -84,6 +85,8 @@ struct vqs { > > struct vring_desc *desc; > > struct vring_avail *avail; > > struct vring_used *used; > > + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > > + uint64_t bus_desc; > > }; > > > > struct virtio_device { > > @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > > extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > > uint64_t addr, uint32_t len, > > uint16_t flags, uint16_t next); > > +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > > +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > > extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > > extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > > > > diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > > index 426069fe9509..76078612b06e 100644 > > --- a/lib/libvirtio/virtio-9p.c > > +++ b/lib/libvirtio/virtio-9p.c > > @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > > // do something better > > mb(); > > } > > + > > + virtio_free_desc(vq, id, dev->features); > > + virtio_free_desc(vq, id + 1, dev->features); > > + > > if (i == 0) { > > return -1; > > } > > diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > > index a0dadbb0d6a8..0363038e559d 100644 > > --- a/lib/libvirtio/virtio-blk.c > > +++ b/lib/libvirtio/virtio-blk.c > > @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > > break; > > } > > > > + virtio_free_desc(vq, id, dev->features); > > + virtio_free_desc(vq, id + 1, dev->features); > > + virtio_free_desc(vq, id + 2, dev->features); > > + > > if (status == 0) > > return cnt; > > > > diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > > index ae67883020ef..5a0d19088527 100644 > > --- a/lib/libvirtio/virtio-net.c > > +++ b/lib/libvirtio/virtio-net.c > > @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > > idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > > id = (idx * 2) % vq_tx->size; > > > > + virtio_free_desc(vq_tx, id, vdev->features); > > + virtio_free_desc(vq_tx, id + 1, vdev->features); > > + > > /* Set up virtqueue descriptor for header */ > > virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > > net_hdr_size, VRING_DESC_F_NEXT, id + 1); > > @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > > #endif > > > > /* Copy data to destination buffer */ > > - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > > + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > > > > /* Move indices to next entries */ > > last_rx_idx = last_rx_idx + 1; > > diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > > index 251661e8d466..44573d10e24c 100644 > > --- a/lib/libvirtio/virtio-scsi.c > > +++ b/lib/libvirtio/virtio-scsi.c > > @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > > break; > > } > > > > + virtio_free_desc(vq, id, dev->features); > > + virtio_free_desc(vq, id + 1, dev->features); > > + if (!(buf == NULL || buf_len == 0)) > > + virtio_free_desc(vq, id + 2, dev->features); > > + > > return 0; > > } > > > > diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > > index 78ab0e25d7db..66fbfe8b6cc6 100644 > > --- a/lib/libvirtio/virtio-serial.c > > +++ b/lib/libvirtio/virtio-serial.c > > @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > > > > int virtio_serial_putchar(struct virtio_device *dev, char c) > > { > > - int id; > > + int id, ret; > > uint32_t vq_size, time; > > volatile uint16_t *current_used_idx; > > uint16_t last_used_idx, avail_idx; > > @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > > virtio_queue_notify(dev, TX_Q); > > > > /* Wait for host to consume the descriptor */ > > + ret = 1; > > time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > > while (*current_used_idx == last_used_idx) { > > // do something better > > mb(); > > if (time < SLOF_GetTimer()) { > > printf("virtio_serial_putchar failed! \n"); > > - return 0; > > + ret = 0; > > + break; > > } > > } > > > > - return 1; > > + virtio_free_desc(vq, id, dev->features); > > + > > + return ret; > > } > > > > char virtio_serial_getchar(struct virtio_device *dev) > > @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > > % vq_rx->size; > > > > /* Copy data to destination buffer */ > > - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > > + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > > > > /* Move indices to next entries */ > > last_rx_idx = last_rx_idx + 1; > > diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > > index fff0c7e54c19..d4db5f656888 100644 > > --- a/lib/libvirtio/virtio.c > > +++ b/lib/libvirtio/virtio.c > > @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > > next %= vq->size; > > > > if (features & VIRTIO_F_VERSION_1) { > > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > > + void *gpa = (void *) addr; > > + > > + if (!vq->desc_gpas) { > > + fprintf(stderr, "IOMMU setup has not been done!\n"); > > + return; > > + } > > + > > + addr = SLOF_dma_map_in(gpa, len, 0); > > + vq->desc_gpas[id] = gpa; > > + } > > desc->addr = cpu_to_le64(addr); > > desc->len = cpu_to_le32(len); > > desc->flags = cpu_to_le16(flags); > > @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > > } > > } > > > > +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > > +{ > > + struct vring_desc *desc; > > + > > + id %= vq->size; > > + desc = &vq->desc[id]; > > + > > + if (features & VIRTIO_F_VERSION_1) { > > + if (features & VIRTIO_F_IOMMU_PLATFORM) { > > + SLOF_dma_map_out(le64_to_cpu(desc->addr), > > + 0, le32_to_cpu(desc->len)); > > + vq->desc_gpas[id] = NULL; > > + } > > + } > > +} > > On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the > disk isn't bootable and the H_PUT_TCE traces show: > > qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 > qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 > qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 > qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER > > If I disable in-kernel TCE acceleration, or use TCG, it seems to work > fine. If I add the following hack it seems to fix things: > > @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > > if (features & VIRTIO_F_VERSION_1) { > if (features & VIRTIO_F_IOMMU_PLATFORM) { > - SLOF_dma_map_out(le64_to_cpu(desc->addr), > + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); > + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), > 0, le32_to_cpu(desc->len)); > vq->desc_gpas[id] = NULL; > } > > I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If > so, maybe we should align automatically in dma-map-out? Yes, I think we should. It's probably also a good idea to match the qemu and kernel behaviour to each other. PAPR doesn't seem to specifically say what to do about unaligned IOBAs, but I'm inclined to make qemu stricter rather than making the kernel less strict. Any counter opinions?
On 22/11/2019 11:09, Michael Roth wrote: > Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) >> When QEMU is started with iommu_platform=on, the guest driver must accept >> it or the device will fail. >> >> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p >> devices. -serial and -9p are only compile tested though. >> >> For virtio-net we map all RX buffers once and TX when xmit() is called >> and unmap older pages when we are about to reuse the VQ descriptor. >> As all other devices are synchronous, we unmap IOMMU pages right after >> completion of a transaction. >> >> This depends on QEMU's: >> https://patchwork.ozlabs.org/patch/1194067/ >> >> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >> --- >> Changes: >> v2: >> * added Mike's fs/dma-instance-function.fs >> * total rework >> --- >> board-qemu/slof/Makefile | 1 + >> lib/libvirtio/virtio.h | 5 ++ >> lib/libvirtio/virtio-9p.c | 4 ++ >> lib/libvirtio/virtio-blk.c | 4 ++ >> lib/libvirtio/virtio-net.c | 5 +- >> lib/libvirtio/virtio-scsi.c | 5 ++ >> lib/libvirtio/virtio-serial.c | 12 +++-- >> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- >> board-qemu/slof/OF.fs | 3 ++ >> slof/fs/dma-instance-function.fs | 28 +++++++++++ >> 10 files changed, 143 insertions(+), 6 deletions(-) >> create mode 100644 slof/fs/dma-instance-function.fs >> >> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile >> index 2263e751bde9..d7ed2d7a6f18 100644 >> --- a/board-qemu/slof/Makefile >> +++ b/board-qemu/slof/Makefile >> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ >> $(SLOFCMNDIR)/fs/graphics.fs \ >> $(SLOFCMNDIR)/fs/generic-disk.fs \ >> $(SLOFCMNDIR)/fs/dma-function.fs \ >> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ >> $(SLOFCMNDIR)/fs/pci-device.fs \ >> $(SLOFCMNDIR)/fs/pci-bridge.fs \ >> $(SLOFCMNDIR)/fs/pci-properties.fs \ >> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h >> index 4927a97f9f5f..250120b3112b 100644 >> --- a/lib/libvirtio/virtio.h >> +++ b/lib/libvirtio/virtio.h >> @@ -29,6 +29,7 @@ >> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) >> #define VIRTIO_F_RING_EVENT_IDX BIT(29) >> #define VIRTIO_F_VERSION_1 BIT(32) >> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) >> >> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ >> >> @@ -84,6 +85,8 @@ struct vqs { >> struct vring_desc *desc; >> struct vring_avail *avail; >> struct vring_used *used; >> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ >> + uint64_t bus_desc; >> }; >> >> struct virtio_device { >> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q >> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> uint64_t addr, uint32_t len, >> uint16_t flags, uint16_t next); >> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); >> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); >> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); >> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); >> >> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c >> index 426069fe9509..76078612b06e 100644 >> --- a/lib/libvirtio/virtio-9p.c >> +++ b/lib/libvirtio/virtio-9p.c >> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r >> // do something better >> mb(); >> } >> + >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + >> if (i == 0) { >> return -1; >> } >> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c >> index a0dadbb0d6a8..0363038e559d 100644 >> --- a/lib/libvirtio/virtio-blk.c >> +++ b/lib/libvirtio/virtio-blk.c >> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, >> break; >> } >> >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + virtio_free_desc(vq, id + 2, dev->features); >> + >> if (status == 0) >> return cnt; >> >> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c >> index ae67883020ef..5a0d19088527 100644 >> --- a/lib/libvirtio/virtio-net.c >> +++ b/lib/libvirtio/virtio-net.c >> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) >> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); >> id = (idx * 2) % vq_tx->size; >> >> + virtio_free_desc(vq_tx, id, vdev->features); >> + virtio_free_desc(vq_tx, id + 1, vdev->features); >> + >> /* Set up virtqueue descriptor for header */ >> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, >> net_hdr_size, VRING_DESC_F_NEXT, id + 1); >> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) >> #endif >> >> /* Copy data to destination buffer */ >> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); >> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); >> >> /* Move indices to next entries */ >> last_rx_idx = last_rx_idx + 1; >> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c >> index 251661e8d466..44573d10e24c 100644 >> --- a/lib/libvirtio/virtio-scsi.c >> +++ b/lib/libvirtio/virtio-scsi.c >> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, >> break; >> } >> >> + virtio_free_desc(vq, id, dev->features); >> + virtio_free_desc(vq, id + 1, dev->features); >> + if (!(buf == NULL || buf_len == 0)) >> + virtio_free_desc(vq, id + 2, dev->features); >> + >> return 0; >> } >> >> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c >> index 78ab0e25d7db..66fbfe8b6cc6 100644 >> --- a/lib/libvirtio/virtio-serial.c >> +++ b/lib/libvirtio/virtio-serial.c >> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) >> >> int virtio_serial_putchar(struct virtio_device *dev, char c) >> { >> - int id; >> + int id, ret; >> uint32_t vq_size, time; >> volatile uint16_t *current_used_idx; >> uint16_t last_used_idx, avail_idx; >> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) >> virtio_queue_notify(dev, TX_Q); >> >> /* Wait for host to consume the descriptor */ >> + ret = 1; >> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; >> while (*current_used_idx == last_used_idx) { >> // do something better >> mb(); >> if (time < SLOF_GetTimer()) { >> printf("virtio_serial_putchar failed! \n"); >> - return 0; >> + ret = 0; >> + break; >> } >> } >> >> - return 1; >> + virtio_free_desc(vq, id, dev->features); >> + >> + return ret; >> } >> >> char virtio_serial_getchar(struct virtio_device *dev) >> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) >> % vq_rx->size; >> >> /* Copy data to destination buffer */ >> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); >> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); >> >> /* Move indices to next entries */ >> last_rx_idx = last_rx_idx + 1; >> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c >> index fff0c7e54c19..d4db5f656888 100644 >> --- a/lib/libvirtio/virtio.c >> +++ b/lib/libvirtio/virtio.c >> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> next %= vq->size; >> >> if (features & VIRTIO_F_VERSION_1) { >> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >> + void *gpa = (void *) addr; >> + >> + if (!vq->desc_gpas) { >> + fprintf(stderr, "IOMMU setup has not been done!\n"); >> + return; >> + } >> + >> + addr = SLOF_dma_map_in(gpa, len, 0); >> + vq->desc_gpas[id] = gpa; >> + } >> desc->addr = cpu_to_le64(addr); >> desc->len = cpu_to_le32(len); >> desc->flags = cpu_to_le16(flags); >> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >> } >> } >> >> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >> +{ >> + struct vring_desc *desc; >> + >> + id %= vq->size; >> + desc = &vq->desc[id]; >> + >> + if (features & VIRTIO_F_VERSION_1) { >> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >> + SLOF_dma_map_out(le64_to_cpu(desc->addr), >> + 0, le32_to_cpu(desc->len)); >> + vq->desc_gpas[id] = NULL; >> + } >> + } >> +} > > On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the > disk isn't bootable and the H_PUT_TCE traces show: > > qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 > qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 > qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 > qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 > qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER > > If I disable in-kernel TCE acceleration, or use TCG, it seems to work > fine. If I add the following hack it seems to fix things: > > @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > > if (features & VIRTIO_F_VERSION_1) { > if (features & VIRTIO_F_IOMMU_PLATFORM) { > - SLOF_dma_map_out(le64_to_cpu(desc->addr), > + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); > + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), > 0, le32_to_cpu(desc->len)); > vq->desc_gpas[id] = NULL; > } > > I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If > so, maybe we should align automatically in dma-map-out? I thought I did that as well in 1/4, I'll double check. Hmmm. > >> + >> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) >> +{ >> + struct vqs *vq = &vdev->vq[queue]; >> + >> + if (vq->desc_gpas) >> + return vq->desc_gpas[id]; >> + >> + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); >> +} >> + >> /** >> * Reset virtio device >> */ > > <snip> > >> diff --git a/board-qemu/slof/OF.fs b/board-qemu/slof/OF.fs >> index a85f6c558e67..3e117ad03e09 100644 >> --- a/board-qemu/slof/OF.fs >> +++ b/board-qemu/slof/OF.fs >> @@ -143,6 +143,9 @@ check-for-nvramrc >> >> 8a0 cp >> >> +\ For DMA functions used by client/package instances. >> +#include "dma-instance-function.fs" >> + >> \ The client interface. >> #include "client.fs" >> \ ELF binary file format. >> diff --git a/slof/fs/dma-instance-function.fs b/slof/fs/dma-instance-function.fs >> new file mode 100644 >> index 000000000000..057181be0f44 >> --- /dev/null >> +++ b/slof/fs/dma-instance-function.fs >> @@ -0,0 +1,28 @@ >> +\ ****************************************************************************/ >> +\ * Copyright (c) 2011 IBM Corporation > > I copy/pasted the license header and forgot to update, should probably change > to 2019. > > Also, if you think you need it: > > Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com> Yes I do, thanks! >> +\ * All rights reserved. >> +\ * This program and the accompanying materials >> +\ * are made available under the terms of the BSD License >> +\ * which accompanies this distribution, and is available at >> +\ * http://www.opensource.org/licenses/bsd-license.php >> +\ * >> +\ * Contributors: >> +\ * IBM Corporation - initial implementation >> +\ ****************************************************************************/ >> + >> +\ DMA memory allocation functions >> +: dma-alloc ( size -- virt ) >> + s" dma-alloc" $call-parent >> +; >> + >> +: dma-free ( virt size -- ) >> + s" dma-free" $call-parent >> +; >> + >> +: dma-map-in ( virt size cacheable? -- devaddr ) >> + s" dma-map-in" $call-parent >> +; >> + >> +: dma-map-out ( virt devaddr size -- ) >> + s" dma-map-out" $call-parent >> +; >> -- >> 2.17.1 >> >> _______________________________________________ >> SLOF mailing list >> SLOF@lists.ozlabs.org >> https://lists.ozlabs.org/listinfo/slof
On 22/11/2019 12:49, Alexey Kardashevskiy wrote: > > > On 22/11/2019 11:09, Michael Roth wrote: >> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) >>> When QEMU is started with iommu_platform=on, the guest driver must accept >>> it or the device will fail. >>> >>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p >>> devices. -serial and -9p are only compile tested though. >>> >>> For virtio-net we map all RX buffers once and TX when xmit() is called >>> and unmap older pages when we are about to reuse the VQ descriptor. >>> As all other devices are synchronous, we unmap IOMMU pages right after >>> completion of a transaction. >>> >>> This depends on QEMU's: >>> https://patchwork.ozlabs.org/patch/1194067/ >>> >>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> >>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>> --- >>> Changes: >>> v2: >>> * added Mike's fs/dma-instance-function.fs >>> * total rework >>> --- >>> board-qemu/slof/Makefile | 1 + >>> lib/libvirtio/virtio.h | 5 ++ >>> lib/libvirtio/virtio-9p.c | 4 ++ >>> lib/libvirtio/virtio-blk.c | 4 ++ >>> lib/libvirtio/virtio-net.c | 5 +- >>> lib/libvirtio/virtio-scsi.c | 5 ++ >>> lib/libvirtio/virtio-serial.c | 12 +++-- >>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- >>> board-qemu/slof/OF.fs | 3 ++ >>> slof/fs/dma-instance-function.fs | 28 +++++++++++ >>> 10 files changed, 143 insertions(+), 6 deletions(-) >>> create mode 100644 slof/fs/dma-instance-function.fs >>> >>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile >>> index 2263e751bde9..d7ed2d7a6f18 100644 >>> --- a/board-qemu/slof/Makefile >>> +++ b/board-qemu/slof/Makefile >>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ >>> $(SLOFCMNDIR)/fs/graphics.fs \ >>> $(SLOFCMNDIR)/fs/generic-disk.fs \ >>> $(SLOFCMNDIR)/fs/dma-function.fs \ >>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ >>> $(SLOFCMNDIR)/fs/pci-device.fs \ >>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ >>> $(SLOFCMNDIR)/fs/pci-properties.fs \ >>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h >>> index 4927a97f9f5f..250120b3112b 100644 >>> --- a/lib/libvirtio/virtio.h >>> +++ b/lib/libvirtio/virtio.h >>> @@ -29,6 +29,7 @@ >>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) >>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) >>> #define VIRTIO_F_VERSION_1 BIT(32) >>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) >>> >>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ >>> >>> @@ -84,6 +85,8 @@ struct vqs { >>> struct vring_desc *desc; >>> struct vring_avail *avail; >>> struct vring_used *used; >>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ >>> + uint64_t bus_desc; >>> }; >>> >>> struct virtio_device { >>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q >>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>> uint64_t addr, uint32_t len, >>> uint16_t flags, uint16_t next); >>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); >>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); >>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); >>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); >>> >>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c >>> index 426069fe9509..76078612b06e 100644 >>> --- a/lib/libvirtio/virtio-9p.c >>> +++ b/lib/libvirtio/virtio-9p.c >>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r >>> // do something better >>> mb(); >>> } >>> + >>> + virtio_free_desc(vq, id, dev->features); >>> + virtio_free_desc(vq, id + 1, dev->features); >>> + >>> if (i == 0) { >>> return -1; >>> } >>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c >>> index a0dadbb0d6a8..0363038e559d 100644 >>> --- a/lib/libvirtio/virtio-blk.c >>> +++ b/lib/libvirtio/virtio-blk.c >>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, >>> break; >>> } >>> >>> + virtio_free_desc(vq, id, dev->features); >>> + virtio_free_desc(vq, id + 1, dev->features); >>> + virtio_free_desc(vq, id + 2, dev->features); >>> + >>> if (status == 0) >>> return cnt; >>> >>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c >>> index ae67883020ef..5a0d19088527 100644 >>> --- a/lib/libvirtio/virtio-net.c >>> +++ b/lib/libvirtio/virtio-net.c >>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) >>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); >>> id = (idx * 2) % vq_tx->size; >>> >>> + virtio_free_desc(vq_tx, id, vdev->features); >>> + virtio_free_desc(vq_tx, id + 1, vdev->features); >>> + >>> /* Set up virtqueue descriptor for header */ >>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, >>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); >>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) >>> #endif >>> >>> /* Copy data to destination buffer */ >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); >>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); >>> >>> /* Move indices to next entries */ >>> last_rx_idx = last_rx_idx + 1; >>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c >>> index 251661e8d466..44573d10e24c 100644 >>> --- a/lib/libvirtio/virtio-scsi.c >>> +++ b/lib/libvirtio/virtio-scsi.c >>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, >>> break; >>> } >>> >>> + virtio_free_desc(vq, id, dev->features); >>> + virtio_free_desc(vq, id + 1, dev->features); >>> + if (!(buf == NULL || buf_len == 0)) >>> + virtio_free_desc(vq, id + 2, dev->features); >>> + >>> return 0; >>> } >>> >>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c >>> index 78ab0e25d7db..66fbfe8b6cc6 100644 >>> --- a/lib/libvirtio/virtio-serial.c >>> +++ b/lib/libvirtio/virtio-serial.c >>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) >>> >>> int virtio_serial_putchar(struct virtio_device *dev, char c) >>> { >>> - int id; >>> + int id, ret; >>> uint32_t vq_size, time; >>> volatile uint16_t *current_used_idx; >>> uint16_t last_used_idx, avail_idx; >>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) >>> virtio_queue_notify(dev, TX_Q); >>> >>> /* Wait for host to consume the descriptor */ >>> + ret = 1; >>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; >>> while (*current_used_idx == last_used_idx) { >>> // do something better >>> mb(); >>> if (time < SLOF_GetTimer()) { >>> printf("virtio_serial_putchar failed! \n"); >>> - return 0; >>> + ret = 0; >>> + break; >>> } >>> } >>> >>> - return 1; >>> + virtio_free_desc(vq, id, dev->features); >>> + >>> + return ret; >>> } >>> >>> char virtio_serial_getchar(struct virtio_device *dev) >>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) >>> % vq_rx->size; >>> >>> /* Copy data to destination buffer */ >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); >>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); >>> >>> /* Move indices to next entries */ >>> last_rx_idx = last_rx_idx + 1; >>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c >>> index fff0c7e54c19..d4db5f656888 100644 >>> --- a/lib/libvirtio/virtio.c >>> +++ b/lib/libvirtio/virtio.c >>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>> next %= vq->size; >>> >>> if (features & VIRTIO_F_VERSION_1) { >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>> + void *gpa = (void *) addr; >>> + >>> + if (!vq->desc_gpas) { >>> + fprintf(stderr, "IOMMU setup has not been done!\n"); >>> + return; >>> + } >>> + >>> + addr = SLOF_dma_map_in(gpa, len, 0); >>> + vq->desc_gpas[id] = gpa; >>> + } >>> desc->addr = cpu_to_le64(addr); >>> desc->len = cpu_to_le32(len); >>> desc->flags = cpu_to_le16(flags); >>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>> } >>> } >>> >>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >>> +{ >>> + struct vring_desc *desc; >>> + >>> + id %= vq->size; >>> + desc = &vq->desc[id]; >>> + >>> + if (features & VIRTIO_F_VERSION_1) { >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), >>> + 0, le32_to_cpu(desc->len)); >>> + vq->desc_gpas[id] = NULL; >>> + } >>> + } >>> +} >> >> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the >> disk isn't bootable and the H_PUT_TCE traces show: >> >> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 >> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 >> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 >> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 >> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 >> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 >> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER >> >> If I disable in-kernel TCE acceleration, or use TCG, it seems to work >> fine. If I add the following hack it seems to fix things: >> >> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >> >> if (features & VIRTIO_F_VERSION_1) { >> if (features & VIRTIO_F_IOMMU_PLATFORM) { >> - SLOF_dma_map_out(le64_to_cpu(desc->addr), >> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); >> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), >> 0, le32_to_cpu(desc->len)); >> vq->desc_gpas[id] = NULL; >> } >> >> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If >> so, maybe we should align automatically in dma-map-out? > > > I thought I did that as well in 1/4, I'll double check. Hmmm. v2 of these patches did not align (or, trunc, really) in dma-map-out but v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really confused now. It is true that checking differs between qemu's and kvm's h_put_tce handlers, I added this (see below) to my qemu and it still works, can you please try in your tree and capture the spapr_iommu_pci_put traces? Thanks, diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 3d3bcc86496a..feae27bf11fa 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, SpaprMachineState *spapr, if (tcet) { hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + if (ioba & ~page_mask) + abort(); +
On Fri, Nov 22, 2019 at 02:14:10PM +1100, Alexey Kardashevskiy wrote: > > > On 22/11/2019 12:49, Alexey Kardashevskiy wrote: > > > > > > On 22/11/2019 11:09, Michael Roth wrote: > >> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) > >>> When QEMU is started with iommu_platform=on, the guest driver must accept > >>> it or the device will fail. > >>> > >>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > >>> devices. -serial and -9p are only compile tested though. > >>> > >>> For virtio-net we map all RX buffers once and TX when xmit() is called > >>> and unmap older pages when we are about to reuse the VQ descriptor. > >>> As all other devices are synchronous, we unmap IOMMU pages right after > >>> completion of a transaction. > >>> > >>> This depends on QEMU's: > >>> https://patchwork.ozlabs.org/patch/1194067/ > >>> > >>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> > >>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >>> --- > >>> Changes: > >>> v2: > >>> * added Mike's fs/dma-instance-function.fs > >>> * total rework > >>> --- > >>> board-qemu/slof/Makefile | 1 + > >>> lib/libvirtio/virtio.h | 5 ++ > >>> lib/libvirtio/virtio-9p.c | 4 ++ > >>> lib/libvirtio/virtio-blk.c | 4 ++ > >>> lib/libvirtio/virtio-net.c | 5 +- > >>> lib/libvirtio/virtio-scsi.c | 5 ++ > >>> lib/libvirtio/virtio-serial.c | 12 +++-- > >>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- > >>> board-qemu/slof/OF.fs | 3 ++ > >>> slof/fs/dma-instance-function.fs | 28 +++++++++++ > >>> 10 files changed, 143 insertions(+), 6 deletions(-) > >>> create mode 100644 slof/fs/dma-instance-function.fs > >>> > >>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile > >>> index 2263e751bde9..d7ed2d7a6f18 100644 > >>> --- a/board-qemu/slof/Makefile > >>> +++ b/board-qemu/slof/Makefile > >>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ > >>> $(SLOFCMNDIR)/fs/graphics.fs \ > >>> $(SLOFCMNDIR)/fs/generic-disk.fs \ > >>> $(SLOFCMNDIR)/fs/dma-function.fs \ > >>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ > >>> $(SLOFCMNDIR)/fs/pci-device.fs \ > >>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ > >>> $(SLOFCMNDIR)/fs/pci-properties.fs \ > >>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > >>> index 4927a97f9f5f..250120b3112b 100644 > >>> --- a/lib/libvirtio/virtio.h > >>> +++ b/lib/libvirtio/virtio.h > >>> @@ -29,6 +29,7 @@ > >>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > >>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) > >>> #define VIRTIO_F_VERSION_1 BIT(32) > >>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > >>> > >>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > >>> > >>> @@ -84,6 +85,8 @@ struct vqs { > >>> struct vring_desc *desc; > >>> struct vring_avail *avail; > >>> struct vring_used *used; > >>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > >>> + uint64_t bus_desc; > >>> }; > >>> > >>> struct virtio_device { > >>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > >>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> uint64_t addr, uint32_t len, > >>> uint16_t flags, uint16_t next); > >>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > >>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > >>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > >>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > >>> > >>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > >>> index 426069fe9509..76078612b06e 100644 > >>> --- a/lib/libvirtio/virtio-9p.c > >>> +++ b/lib/libvirtio/virtio-9p.c > >>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > >>> // do something better > >>> mb(); > >>> } > >>> + > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + > >>> if (i == 0) { > >>> return -1; > >>> } > >>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > >>> index a0dadbb0d6a8..0363038e559d 100644 > >>> --- a/lib/libvirtio/virtio-blk.c > >>> +++ b/lib/libvirtio/virtio-blk.c > >>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > >>> break; > >>> } > >>> > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + virtio_free_desc(vq, id + 2, dev->features); > >>> + > >>> if (status == 0) > >>> return cnt; > >>> > >>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > >>> index ae67883020ef..5a0d19088527 100644 > >>> --- a/lib/libvirtio/virtio-net.c > >>> +++ b/lib/libvirtio/virtio-net.c > >>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > >>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > >>> id = (idx * 2) % vq_tx->size; > >>> > >>> + virtio_free_desc(vq_tx, id, vdev->features); > >>> + virtio_free_desc(vq_tx, id + 1, vdev->features); > >>> + > >>> /* Set up virtqueue descriptor for header */ > >>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > >>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); > >>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > >>> #endif > >>> > >>> /* Copy data to destination buffer */ > >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > >>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > >>> > >>> /* Move indices to next entries */ > >>> last_rx_idx = last_rx_idx + 1; > >>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > >>> index 251661e8d466..44573d10e24c 100644 > >>> --- a/lib/libvirtio/virtio-scsi.c > >>> +++ b/lib/libvirtio/virtio-scsi.c > >>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > >>> break; > >>> } > >>> > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + if (!(buf == NULL || buf_len == 0)) > >>> + virtio_free_desc(vq, id + 2, dev->features); > >>> + > >>> return 0; > >>> } > >>> > >>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > >>> index 78ab0e25d7db..66fbfe8b6cc6 100644 > >>> --- a/lib/libvirtio/virtio-serial.c > >>> +++ b/lib/libvirtio/virtio-serial.c > >>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > >>> > >>> int virtio_serial_putchar(struct virtio_device *dev, char c) > >>> { > >>> - int id; > >>> + int id, ret; > >>> uint32_t vq_size, time; > >>> volatile uint16_t *current_used_idx; > >>> uint16_t last_used_idx, avail_idx; > >>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > >>> virtio_queue_notify(dev, TX_Q); > >>> > >>> /* Wait for host to consume the descriptor */ > >>> + ret = 1; > >>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > >>> while (*current_used_idx == last_used_idx) { > >>> // do something better > >>> mb(); > >>> if (time < SLOF_GetTimer()) { > >>> printf("virtio_serial_putchar failed! \n"); > >>> - return 0; > >>> + ret = 0; > >>> + break; > >>> } > >>> } > >>> > >>> - return 1; > >>> + virtio_free_desc(vq, id, dev->features); > >>> + > >>> + return ret; > >>> } > >>> > >>> char virtio_serial_getchar(struct virtio_device *dev) > >>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > >>> % vq_rx->size; > >>> > >>> /* Copy data to destination buffer */ > >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > >>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > >>> > >>> /* Move indices to next entries */ > >>> last_rx_idx = last_rx_idx + 1; > >>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > >>> index fff0c7e54c19..d4db5f656888 100644 > >>> --- a/lib/libvirtio/virtio.c > >>> +++ b/lib/libvirtio/virtio.c > >>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> next %= vq->size; > >>> > >>> if (features & VIRTIO_F_VERSION_1) { > >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>> + void *gpa = (void *) addr; > >>> + > >>> + if (!vq->desc_gpas) { > >>> + fprintf(stderr, "IOMMU setup has not been done!\n"); > >>> + return; > >>> + } > >>> + > >>> + addr = SLOF_dma_map_in(gpa, len, 0); > >>> + vq->desc_gpas[id] = gpa; > >>> + } > >>> desc->addr = cpu_to_le64(addr); > >>> desc->len = cpu_to_le32(len); > >>> desc->flags = cpu_to_le16(flags); > >>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> } > >>> } > >>> > >>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >>> +{ > >>> + struct vring_desc *desc; > >>> + > >>> + id %= vq->size; > >>> + desc = &vq->desc[id]; > >>> + > >>> + if (features & VIRTIO_F_VERSION_1) { > >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), > >>> + 0, le32_to_cpu(desc->len)); > >>> + vq->desc_gpas[id] = NULL; > >>> + } > >>> + } > >>> +} > >> > >> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the > >> disk isn't bootable and the H_PUT_TCE traces show: > >> > >> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 > >> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 > >> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 Note here .......................................................................................................^^^^^^ > >> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER > >> > >> If I disable in-kernel TCE acceleration, or use TCG, it seems to work > >> fine. If I add the following hack it seems to fix things: > >> > >> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >> > >> if (features & VIRTIO_F_VERSION_1) { > >> if (features & VIRTIO_F_IOMMU_PLATFORM) { > >> - SLOF_dma_map_out(le64_to_cpu(desc->addr), > >> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); > >> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), > >> 0, le32_to_cpu(desc->len)); > >> vq->desc_gpas[id] = NULL; > >> } > >> > >> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If > >> so, maybe we should align automatically in dma-map-out? > > > > > > I thought I did that as well in 1/4, I'll double check. Hmmm. > > > v2 of these patches did not align (or, trunc, really) in dma-map-out but > v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really > confused now. Huh, so am I. Because somehow or other we are seeing a call to H_PUT_TCE with an unaligned IOBA in Mike's trace above. > > It is true that checking differs between qemu's and kvm's h_put_tce > handlers, I added this (see below) to my qemu and it still works, can > you please try in your tree and capture the spapr_iommu_pci_put traces? > Thanks, > > > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > index 3d3bcc86496a..feae27bf11fa 100644 > --- a/hw/ppc/spapr_iommu.c > +++ b/hw/ppc/spapr_iommu.c > @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, > SpaprMachineState *spapr, > if (tcet) { > hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); > > + if (ioba & ~page_mask) > + abort(); > + > > > >
Quoting Alexey Kardashevskiy (2019-11-21 21:14:10) > > > On 22/11/2019 12:49, Alexey Kardashevskiy wrote: > > > > > > On 22/11/2019 11:09, Michael Roth wrote: > >> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) > >>> When QEMU is started with iommu_platform=on, the guest driver must accept > >>> it or the device will fail. > >>> > >>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > >>> devices. -serial and -9p are only compile tested though. > >>> > >>> For virtio-net we map all RX buffers once and TX when xmit() is called > >>> and unmap older pages when we are about to reuse the VQ descriptor. > >>> As all other devices are synchronous, we unmap IOMMU pages right after > >>> completion of a transaction. > >>> > >>> This depends on QEMU's: > >>> https://patchwork.ozlabs.org/patch/1194067/ > >>> > >>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> > >>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >>> --- > >>> Changes: > >>> v2: > >>> * added Mike's fs/dma-instance-function.fs > >>> * total rework > >>> --- > >>> board-qemu/slof/Makefile | 1 + > >>> lib/libvirtio/virtio.h | 5 ++ > >>> lib/libvirtio/virtio-9p.c | 4 ++ > >>> lib/libvirtio/virtio-blk.c | 4 ++ > >>> lib/libvirtio/virtio-net.c | 5 +- > >>> lib/libvirtio/virtio-scsi.c | 5 ++ > >>> lib/libvirtio/virtio-serial.c | 12 +++-- > >>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- > >>> board-qemu/slof/OF.fs | 3 ++ > >>> slof/fs/dma-instance-function.fs | 28 +++++++++++ > >>> 10 files changed, 143 insertions(+), 6 deletions(-) > >>> create mode 100644 slof/fs/dma-instance-function.fs > >>> > >>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile > >>> index 2263e751bde9..d7ed2d7a6f18 100644 > >>> --- a/board-qemu/slof/Makefile > >>> +++ b/board-qemu/slof/Makefile > >>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ > >>> $(SLOFCMNDIR)/fs/graphics.fs \ > >>> $(SLOFCMNDIR)/fs/generic-disk.fs \ > >>> $(SLOFCMNDIR)/fs/dma-function.fs \ > >>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ > >>> $(SLOFCMNDIR)/fs/pci-device.fs \ > >>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ > >>> $(SLOFCMNDIR)/fs/pci-properties.fs \ > >>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > >>> index 4927a97f9f5f..250120b3112b 100644 > >>> --- a/lib/libvirtio/virtio.h > >>> +++ b/lib/libvirtio/virtio.h > >>> @@ -29,6 +29,7 @@ > >>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > >>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) > >>> #define VIRTIO_F_VERSION_1 BIT(32) > >>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > >>> > >>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > >>> > >>> @@ -84,6 +85,8 @@ struct vqs { > >>> struct vring_desc *desc; > >>> struct vring_avail *avail; > >>> struct vring_used *used; > >>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > >>> + uint64_t bus_desc; > >>> }; > >>> > >>> struct virtio_device { > >>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > >>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> uint64_t addr, uint32_t len, > >>> uint16_t flags, uint16_t next); > >>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > >>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > >>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > >>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > >>> > >>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > >>> index 426069fe9509..76078612b06e 100644 > >>> --- a/lib/libvirtio/virtio-9p.c > >>> +++ b/lib/libvirtio/virtio-9p.c > >>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > >>> // do something better > >>> mb(); > >>> } > >>> + > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + > >>> if (i == 0) { > >>> return -1; > >>> } > >>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > >>> index a0dadbb0d6a8..0363038e559d 100644 > >>> --- a/lib/libvirtio/virtio-blk.c > >>> +++ b/lib/libvirtio/virtio-blk.c > >>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > >>> break; > >>> } > >>> > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + virtio_free_desc(vq, id + 2, dev->features); > >>> + > >>> if (status == 0) > >>> return cnt; > >>> > >>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > >>> index ae67883020ef..5a0d19088527 100644 > >>> --- a/lib/libvirtio/virtio-net.c > >>> +++ b/lib/libvirtio/virtio-net.c > >>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > >>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > >>> id = (idx * 2) % vq_tx->size; > >>> > >>> + virtio_free_desc(vq_tx, id, vdev->features); > >>> + virtio_free_desc(vq_tx, id + 1, vdev->features); > >>> + > >>> /* Set up virtqueue descriptor for header */ > >>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > >>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); > >>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > >>> #endif > >>> > >>> /* Copy data to destination buffer */ > >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > >>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > >>> > >>> /* Move indices to next entries */ > >>> last_rx_idx = last_rx_idx + 1; > >>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > >>> index 251661e8d466..44573d10e24c 100644 > >>> --- a/lib/libvirtio/virtio-scsi.c > >>> +++ b/lib/libvirtio/virtio-scsi.c > >>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > >>> break; > >>> } > >>> > >>> + virtio_free_desc(vq, id, dev->features); > >>> + virtio_free_desc(vq, id + 1, dev->features); > >>> + if (!(buf == NULL || buf_len == 0)) > >>> + virtio_free_desc(vq, id + 2, dev->features); > >>> + > >>> return 0; > >>> } > >>> > >>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > >>> index 78ab0e25d7db..66fbfe8b6cc6 100644 > >>> --- a/lib/libvirtio/virtio-serial.c > >>> +++ b/lib/libvirtio/virtio-serial.c > >>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > >>> > >>> int virtio_serial_putchar(struct virtio_device *dev, char c) > >>> { > >>> - int id; > >>> + int id, ret; > >>> uint32_t vq_size, time; > >>> volatile uint16_t *current_used_idx; > >>> uint16_t last_used_idx, avail_idx; > >>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > >>> virtio_queue_notify(dev, TX_Q); > >>> > >>> /* Wait for host to consume the descriptor */ > >>> + ret = 1; > >>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > >>> while (*current_used_idx == last_used_idx) { > >>> // do something better > >>> mb(); > >>> if (time < SLOF_GetTimer()) { > >>> printf("virtio_serial_putchar failed! \n"); > >>> - return 0; > >>> + ret = 0; > >>> + break; > >>> } > >>> } > >>> > >>> - return 1; > >>> + virtio_free_desc(vq, id, dev->features); > >>> + > >>> + return ret; > >>> } > >>> > >>> char virtio_serial_getchar(struct virtio_device *dev) > >>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > >>> % vq_rx->size; > >>> > >>> /* Copy data to destination buffer */ > >>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > >>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > >>> > >>> /* Move indices to next entries */ > >>> last_rx_idx = last_rx_idx + 1; > >>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > >>> index fff0c7e54c19..d4db5f656888 100644 > >>> --- a/lib/libvirtio/virtio.c > >>> +++ b/lib/libvirtio/virtio.c > >>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> next %= vq->size; > >>> > >>> if (features & VIRTIO_F_VERSION_1) { > >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>> + void *gpa = (void *) addr; > >>> + > >>> + if (!vq->desc_gpas) { > >>> + fprintf(stderr, "IOMMU setup has not been done!\n"); > >>> + return; > >>> + } > >>> + > >>> + addr = SLOF_dma_map_in(gpa, len, 0); > >>> + vq->desc_gpas[id] = gpa; > >>> + } > >>> desc->addr = cpu_to_le64(addr); > >>> desc->len = cpu_to_le32(len); > >>> desc->flags = cpu_to_le16(flags); > >>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>> } > >>> } > >>> > >>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >>> +{ > >>> + struct vring_desc *desc; > >>> + > >>> + id %= vq->size; > >>> + desc = &vq->desc[id]; > >>> + > >>> + if (features & VIRTIO_F_VERSION_1) { > >>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), > >>> + 0, le32_to_cpu(desc->len)); > >>> + vq->desc_gpas[id] = NULL; > >>> + } > >>> + } > >>> +} > >> > >> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the > >> disk isn't bootable and the H_PUT_TCE traces show: > >> > >> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 > >> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 > >> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 > >> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 > >> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER > >> > >> If I disable in-kernel TCE acceleration, or use TCG, it seems to work > >> fine. If I add the following hack it seems to fix things: > >> > >> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >> > >> if (features & VIRTIO_F_VERSION_1) { > >> if (features & VIRTIO_F_IOMMU_PLATFORM) { > >> - SLOF_dma_map_out(le64_to_cpu(desc->addr), > >> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); > >> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), > >> 0, le32_to_cpu(desc->len)); > >> vq->desc_gpas[id] = NULL; > >> } > >> > >> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If > >> so, maybe we should align automatically in dma-map-out? > > > > > > I thought I did that as well in 1/4, I'll double check. Hmmm. > > > v2 of these patches did not align (or, trunc, really) in dma-map-out but > v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really > confused now. Yah sorry I think the issue was on my end. I thought: https://github.com/aik/SLOF/commits/virtio-iommu reflected this series, but it does contain an older version of the dma-map* patch: index f136324..be529c9 100644 --- a/board-qemu/slof/pci-phb.fs +++ b/board-qemu/slof/pci-phb.fs @@ -146,6 +146,7 @@ setup-puid \ grub does not align allocated addresses to the size so when mapping, \ we might ask bm-alloc for an extra IOMMU page : dma-align ( size virt -- aligned-size ) tce-mask and + tce-ps #aligned ; +: dma-trunc ( addr -- addr&~fff) tce-mask not and ; : dma-map-in ( virt size cachable? -- devaddr ) phb-debug? IF cr ." dma-map-in called: " .s cr THEN @@ -163,7 +164,7 @@ setup-puid swap ( size dev-addr virt ) 2dup tce-mask and or >r \ add page offset to the return value - tce-mask not and 3 OR \ Trunk virt and add read and write perm + dma-trunc 3 OR \ Truncate and add read and write perm rot ( virt dev-addr size r: dev-addr ) 0 ?DO @@ -179,10 +180,10 @@ setup-puid : dma-map-out ( virt devaddr size -- ) phb-debug? IF cr ." dma-map-out called: " .s cr THEN (init-dma-window-vars) - rot drop ( devaddr size ) + rot drop ( devaddr size ) over dma-align - 2dup swap tce-mask not and swap bm-handle -rot - bm-free + swap dma-trunc swap ( devaddr-trunc size-extended ) + 2dup bm-handle -rot bm-free 0 ?DO dup 0 dma-window-liobn -rot I haven't been able to re-test yet since something seems to have gone awry with the test system last night, but I assume that's what the issue was. Sorry for the confusion. > > It is true that checking differs between qemu's and kvm's h_put_tce > handlers, I added this (see below) to my qemu and it still works, can > you please try in your tree and capture the spapr_iommu_pci_put traces? > Thanks, > > > diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > index 3d3bcc86496a..feae27bf11fa 100644 > --- a/hw/ppc/spapr_iommu.c > +++ b/hw/ppc/spapr_iommu.c > @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, > SpaprMachineState *spapr, > if (tcet) { > hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); > > + if (ioba & ~page_mask) > + abort(); > + > > > > > -- > Alexey
On 23/11/2019 01:42, Michael Roth wrote: > Quoting Alexey Kardashevskiy (2019-11-21 21:14:10) >> >> >> On 22/11/2019 12:49, Alexey Kardashevskiy wrote: >>> >>> >>> On 22/11/2019 11:09, Michael Roth wrote: >>>> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) >>>>> When QEMU is started with iommu_platform=on, the guest driver must accept >>>>> it or the device will fail. >>>>> >>>>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p >>>>> devices. -serial and -9p are only compile tested though. >>>>> >>>>> For virtio-net we map all RX buffers once and TX when xmit() is called >>>>> and unmap older pages when we are about to reuse the VQ descriptor. >>>>> As all other devices are synchronous, we unmap IOMMU pages right after >>>>> completion of a transaction. >>>>> >>>>> This depends on QEMU's: >>>>> https://patchwork.ozlabs.org/patch/1194067/ >>>>> >>>>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> >>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>>> --- >>>>> Changes: >>>>> v2: >>>>> * added Mike's fs/dma-instance-function.fs >>>>> * total rework >>>>> --- >>>>> board-qemu/slof/Makefile | 1 + >>>>> lib/libvirtio/virtio.h | 5 ++ >>>>> lib/libvirtio/virtio-9p.c | 4 ++ >>>>> lib/libvirtio/virtio-blk.c | 4 ++ >>>>> lib/libvirtio/virtio-net.c | 5 +- >>>>> lib/libvirtio/virtio-scsi.c | 5 ++ >>>>> lib/libvirtio/virtio-serial.c | 12 +++-- >>>>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- >>>>> board-qemu/slof/OF.fs | 3 ++ >>>>> slof/fs/dma-instance-function.fs | 28 +++++++++++ >>>>> 10 files changed, 143 insertions(+), 6 deletions(-) >>>>> create mode 100644 slof/fs/dma-instance-function.fs >>>>> >>>>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile >>>>> index 2263e751bde9..d7ed2d7a6f18 100644 >>>>> --- a/board-qemu/slof/Makefile >>>>> +++ b/board-qemu/slof/Makefile >>>>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ >>>>> $(SLOFCMNDIR)/fs/graphics.fs \ >>>>> $(SLOFCMNDIR)/fs/generic-disk.fs \ >>>>> $(SLOFCMNDIR)/fs/dma-function.fs \ >>>>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ >>>>> $(SLOFCMNDIR)/fs/pci-device.fs \ >>>>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ >>>>> $(SLOFCMNDIR)/fs/pci-properties.fs \ >>>>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h >>>>> index 4927a97f9f5f..250120b3112b 100644 >>>>> --- a/lib/libvirtio/virtio.h >>>>> +++ b/lib/libvirtio/virtio.h >>>>> @@ -29,6 +29,7 @@ >>>>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) >>>>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) >>>>> #define VIRTIO_F_VERSION_1 BIT(32) >>>>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) >>>>> >>>>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ >>>>> >>>>> @@ -84,6 +85,8 @@ struct vqs { >>>>> struct vring_desc *desc; >>>>> struct vring_avail *avail; >>>>> struct vring_used *used; >>>>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ >>>>> + uint64_t bus_desc; >>>>> }; >>>>> >>>>> struct virtio_device { >>>>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q >>>>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>> uint64_t addr, uint32_t len, >>>>> uint16_t flags, uint16_t next); >>>>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); >>>>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); >>>>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); >>>>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); >>>>> >>>>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c >>>>> index 426069fe9509..76078612b06e 100644 >>>>> --- a/lib/libvirtio/virtio-9p.c >>>>> +++ b/lib/libvirtio/virtio-9p.c >>>>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r >>>>> // do something better >>>>> mb(); >>>>> } >>>>> + >>>>> + virtio_free_desc(vq, id, dev->features); >>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>> + >>>>> if (i == 0) { >>>>> return -1; >>>>> } >>>>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c >>>>> index a0dadbb0d6a8..0363038e559d 100644 >>>>> --- a/lib/libvirtio/virtio-blk.c >>>>> +++ b/lib/libvirtio/virtio-blk.c >>>>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, >>>>> break; >>>>> } >>>>> >>>>> + virtio_free_desc(vq, id, dev->features); >>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>> + virtio_free_desc(vq, id + 2, dev->features); >>>>> + >>>>> if (status == 0) >>>>> return cnt; >>>>> >>>>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c >>>>> index ae67883020ef..5a0d19088527 100644 >>>>> --- a/lib/libvirtio/virtio-net.c >>>>> +++ b/lib/libvirtio/virtio-net.c >>>>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) >>>>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); >>>>> id = (idx * 2) % vq_tx->size; >>>>> >>>>> + virtio_free_desc(vq_tx, id, vdev->features); >>>>> + virtio_free_desc(vq_tx, id + 1, vdev->features); >>>>> + >>>>> /* Set up virtqueue descriptor for header */ >>>>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, >>>>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); >>>>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) >>>>> #endif >>>>> >>>>> /* Copy data to destination buffer */ >>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); >>>>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); >>>>> >>>>> /* Move indices to next entries */ >>>>> last_rx_idx = last_rx_idx + 1; >>>>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c >>>>> index 251661e8d466..44573d10e24c 100644 >>>>> --- a/lib/libvirtio/virtio-scsi.c >>>>> +++ b/lib/libvirtio/virtio-scsi.c >>>>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, >>>>> break; >>>>> } >>>>> >>>>> + virtio_free_desc(vq, id, dev->features); >>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>> + if (!(buf == NULL || buf_len == 0)) >>>>> + virtio_free_desc(vq, id + 2, dev->features); >>>>> + >>>>> return 0; >>>>> } >>>>> >>>>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c >>>>> index 78ab0e25d7db..66fbfe8b6cc6 100644 >>>>> --- a/lib/libvirtio/virtio-serial.c >>>>> +++ b/lib/libvirtio/virtio-serial.c >>>>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) >>>>> >>>>> int virtio_serial_putchar(struct virtio_device *dev, char c) >>>>> { >>>>> - int id; >>>>> + int id, ret; >>>>> uint32_t vq_size, time; >>>>> volatile uint16_t *current_used_idx; >>>>> uint16_t last_used_idx, avail_idx; >>>>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) >>>>> virtio_queue_notify(dev, TX_Q); >>>>> >>>>> /* Wait for host to consume the descriptor */ >>>>> + ret = 1; >>>>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; >>>>> while (*current_used_idx == last_used_idx) { >>>>> // do something better >>>>> mb(); >>>>> if (time < SLOF_GetTimer()) { >>>>> printf("virtio_serial_putchar failed! \n"); >>>>> - return 0; >>>>> + ret = 0; >>>>> + break; >>>>> } >>>>> } >>>>> >>>>> - return 1; >>>>> + virtio_free_desc(vq, id, dev->features); >>>>> + >>>>> + return ret; >>>>> } >>>>> >>>>> char virtio_serial_getchar(struct virtio_device *dev) >>>>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) >>>>> % vq_rx->size; >>>>> >>>>> /* Copy data to destination buffer */ >>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); >>>>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); >>>>> >>>>> /* Move indices to next entries */ >>>>> last_rx_idx = last_rx_idx + 1; >>>>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c >>>>> index fff0c7e54c19..d4db5f656888 100644 >>>>> --- a/lib/libvirtio/virtio.c >>>>> +++ b/lib/libvirtio/virtio.c >>>>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>> next %= vq->size; >>>>> >>>>> if (features & VIRTIO_F_VERSION_1) { >>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>>> + void *gpa = (void *) addr; >>>>> + >>>>> + if (!vq->desc_gpas) { >>>>> + fprintf(stderr, "IOMMU setup has not been done!\n"); >>>>> + return; >>>>> + } >>>>> + >>>>> + addr = SLOF_dma_map_in(gpa, len, 0); >>>>> + vq->desc_gpas[id] = gpa; >>>>> + } >>>>> desc->addr = cpu_to_le64(addr); >>>>> desc->len = cpu_to_le32(len); >>>>> desc->flags = cpu_to_le16(flags); >>>>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>> } >>>>> } >>>>> >>>>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >>>>> +{ >>>>> + struct vring_desc *desc; >>>>> + >>>>> + id %= vq->size; >>>>> + desc = &vq->desc[id]; >>>>> + >>>>> + if (features & VIRTIO_F_VERSION_1) { >>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), >>>>> + 0, le32_to_cpu(desc->len)); >>>>> + vq->desc_gpas[id] = NULL; >>>>> + } >>>>> + } >>>>> +} >>>> >>>> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the >>>> disk isn't bootable and the H_PUT_TCE traces show: >>>> >>>> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 >>>> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 >>>> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 >>>> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 >>>> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 >>>> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 >>>> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER >>>> >>>> If I disable in-kernel TCE acceleration, or use TCG, it seems to work >>>> fine. If I add the following hack it seems to fix things: >>>> >>>> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >>>> >>>> if (features & VIRTIO_F_VERSION_1) { >>>> if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>> - SLOF_dma_map_out(le64_to_cpu(desc->addr), >>>> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); >>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), >>>> 0, le32_to_cpu(desc->len)); >>>> vq->desc_gpas[id] = NULL; >>>> } >>>> >>>> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If >>>> so, maybe we should align automatically in dma-map-out? >>> >>> >>> I thought I did that as well in 1/4, I'll double check. Hmmm. >> >> >> v2 of these patches did not align (or, trunc, really) in dma-map-out but >> v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really >> confused now. > > Yah sorry I think the issue was on my end. I thought: > > https://github.com/aik/SLOF/commits/virtio-iommu My bad, I should have pushed it out since I started pushing them out. The question now is if we are good to go and I can push these out and send SLOF blob update so people can actually start testing it? Thanks, > > reflected this series, but it does contain an older version of the > dma-map* patch: > > index f136324..be529c9 100644 > --- a/board-qemu/slof/pci-phb.fs > +++ b/board-qemu/slof/pci-phb.fs > @@ -146,6 +146,7 @@ setup-puid > \ grub does not align allocated addresses to the size so when mapping, > \ we might ask bm-alloc for an extra IOMMU page > : dma-align ( size virt -- aligned-size ) tce-mask and + tce-ps #aligned ; > +: dma-trunc ( addr -- addr&~fff) tce-mask not and ; > > : dma-map-in ( virt size cachable? -- devaddr ) > phb-debug? IF cr ." dma-map-in called: " .s cr THEN > @@ -163,7 +164,7 @@ setup-puid > swap ( size dev-addr virt ) > 2dup tce-mask and or >r \ add page offset to the return value > > - tce-mask not and 3 OR \ Trunk virt and add read and write perm > + dma-trunc 3 OR \ Truncate and add read and write perm > rot ( virt dev-addr size r: dev-addr ) > 0 > ?DO > @@ -179,10 +180,10 @@ setup-puid > : dma-map-out ( virt devaddr size -- ) > phb-debug? IF cr ." dma-map-out called: " .s cr THEN > (init-dma-window-vars) > - rot drop ( devaddr size ) > + rot drop ( devaddr size ) > over dma-align > - 2dup swap tce-mask not and swap bm-handle -rot > - bm-free > + swap dma-trunc swap ( devaddr-trunc size-extended ) > + 2dup bm-handle -rot bm-free > 0 > ?DO > dup 0 dma-window-liobn -rot > > > I haven't been able to re-test yet since something seems to have gone > awry with the test system last night, but I assume that's what the issue > was. Sorry for the confusion. > >> >> It is true that checking differs between qemu's and kvm's h_put_tce >> handlers, I added this (see below) to my qemu and it still works, can >> you please try in your tree and capture the spapr_iommu_pci_put traces? >> Thanks, >> >> >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >> index 3d3bcc86496a..feae27bf11fa 100644 >> --- a/hw/ppc/spapr_iommu.c >> +++ b/hw/ppc/spapr_iommu.c >> @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, >> SpaprMachineState *spapr, >> if (tcet) { >> hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); >> >> + if (ioba & ~page_mask) >> + abort(); >> + >> >> >> >> >> -- >> Alexey
Quoting Alexey Kardashevskiy (2019-11-27 18:15:03) > > > On 23/11/2019 01:42, Michael Roth wrote: > > Quoting Alexey Kardashevskiy (2019-11-21 21:14:10) > >> > >> > >> On 22/11/2019 12:49, Alexey Kardashevskiy wrote: > >>> > >>> > >>> On 22/11/2019 11:09, Michael Roth wrote: > >>>> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) > >>>>> When QEMU is started with iommu_platform=on, the guest driver must accept > >>>>> it or the device will fail. > >>>>> > >>>>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p > >>>>> devices. -serial and -9p are only compile tested though. > >>>>> > >>>>> For virtio-net we map all RX buffers once and TX when xmit() is called > >>>>> and unmap older pages when we are about to reuse the VQ descriptor. > >>>>> As all other devices are synchronous, we unmap IOMMU pages right after > >>>>> completion of a transaction. > >>>>> > >>>>> This depends on QEMU's: > >>>>> https://patchwork.ozlabs.org/patch/1194067/ > >>>>> > >>>>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> > >>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > >>>>> --- > >>>>> Changes: > >>>>> v2: > >>>>> * added Mike's fs/dma-instance-function.fs > >>>>> * total rework > >>>>> --- > >>>>> board-qemu/slof/Makefile | 1 + > >>>>> lib/libvirtio/virtio.h | 5 ++ > >>>>> lib/libvirtio/virtio-9p.c | 4 ++ > >>>>> lib/libvirtio/virtio-blk.c | 4 ++ > >>>>> lib/libvirtio/virtio-net.c | 5 +- > >>>>> lib/libvirtio/virtio-scsi.c | 5 ++ > >>>>> lib/libvirtio/virtio-serial.c | 12 +++-- > >>>>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- > >>>>> board-qemu/slof/OF.fs | 3 ++ > >>>>> slof/fs/dma-instance-function.fs | 28 +++++++++++ > >>>>> 10 files changed, 143 insertions(+), 6 deletions(-) > >>>>> create mode 100644 slof/fs/dma-instance-function.fs > >>>>> > >>>>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile > >>>>> index 2263e751bde9..d7ed2d7a6f18 100644 > >>>>> --- a/board-qemu/slof/Makefile > >>>>> +++ b/board-qemu/slof/Makefile > >>>>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ > >>>>> $(SLOFCMNDIR)/fs/graphics.fs \ > >>>>> $(SLOFCMNDIR)/fs/generic-disk.fs \ > >>>>> $(SLOFCMNDIR)/fs/dma-function.fs \ > >>>>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ > >>>>> $(SLOFCMNDIR)/fs/pci-device.fs \ > >>>>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ > >>>>> $(SLOFCMNDIR)/fs/pci-properties.fs \ > >>>>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h > >>>>> index 4927a97f9f5f..250120b3112b 100644 > >>>>> --- a/lib/libvirtio/virtio.h > >>>>> +++ b/lib/libvirtio/virtio.h > >>>>> @@ -29,6 +29,7 @@ > >>>>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) > >>>>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) > >>>>> #define VIRTIO_F_VERSION_1 BIT(32) > >>>>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) > >>>>> > >>>>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ > >>>>> > >>>>> @@ -84,6 +85,8 @@ struct vqs { > >>>>> struct vring_desc *desc; > >>>>> struct vring_avail *avail; > >>>>> struct vring_used *used; > >>>>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ > >>>>> + uint64_t bus_desc; > >>>>> }; > >>>>> > >>>>> struct virtio_device { > >>>>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q > >>>>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>>>> uint64_t addr, uint32_t len, > >>>>> uint16_t flags, uint16_t next); > >>>>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); > >>>>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); > >>>>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); > >>>>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); > >>>>> > >>>>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c > >>>>> index 426069fe9509..76078612b06e 100644 > >>>>> --- a/lib/libvirtio/virtio-9p.c > >>>>> +++ b/lib/libvirtio/virtio-9p.c > >>>>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r > >>>>> // do something better > >>>>> mb(); > >>>>> } > >>>>> + > >>>>> + virtio_free_desc(vq, id, dev->features); > >>>>> + virtio_free_desc(vq, id + 1, dev->features); > >>>>> + > >>>>> if (i == 0) { > >>>>> return -1; > >>>>> } > >>>>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c > >>>>> index a0dadbb0d6a8..0363038e559d 100644 > >>>>> --- a/lib/libvirtio/virtio-blk.c > >>>>> +++ b/lib/libvirtio/virtio-blk.c > >>>>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, > >>>>> break; > >>>>> } > >>>>> > >>>>> + virtio_free_desc(vq, id, dev->features); > >>>>> + virtio_free_desc(vq, id + 1, dev->features); > >>>>> + virtio_free_desc(vq, id + 2, dev->features); > >>>>> + > >>>>> if (status == 0) > >>>>> return cnt; > >>>>> > >>>>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c > >>>>> index ae67883020ef..5a0d19088527 100644 > >>>>> --- a/lib/libvirtio/virtio-net.c > >>>>> +++ b/lib/libvirtio/virtio-net.c > >>>>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) > >>>>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); > >>>>> id = (idx * 2) % vq_tx->size; > >>>>> > >>>>> + virtio_free_desc(vq_tx, id, vdev->features); > >>>>> + virtio_free_desc(vq_tx, id + 1, vdev->features); > >>>>> + > >>>>> /* Set up virtqueue descriptor for header */ > >>>>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, > >>>>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); > >>>>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) > >>>>> #endif > >>>>> > >>>>> /* Copy data to destination buffer */ > >>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); > >>>>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); > >>>>> > >>>>> /* Move indices to next entries */ > >>>>> last_rx_idx = last_rx_idx + 1; > >>>>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c > >>>>> index 251661e8d466..44573d10e24c 100644 > >>>>> --- a/lib/libvirtio/virtio-scsi.c > >>>>> +++ b/lib/libvirtio/virtio-scsi.c > >>>>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, > >>>>> break; > >>>>> } > >>>>> > >>>>> + virtio_free_desc(vq, id, dev->features); > >>>>> + virtio_free_desc(vq, id + 1, dev->features); > >>>>> + if (!(buf == NULL || buf_len == 0)) > >>>>> + virtio_free_desc(vq, id + 2, dev->features); > >>>>> + > >>>>> return 0; > >>>>> } > >>>>> > >>>>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c > >>>>> index 78ab0e25d7db..66fbfe8b6cc6 100644 > >>>>> --- a/lib/libvirtio/virtio-serial.c > >>>>> +++ b/lib/libvirtio/virtio-serial.c > >>>>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) > >>>>> > >>>>> int virtio_serial_putchar(struct virtio_device *dev, char c) > >>>>> { > >>>>> - int id; > >>>>> + int id, ret; > >>>>> uint32_t vq_size, time; > >>>>> volatile uint16_t *current_used_idx; > >>>>> uint16_t last_used_idx, avail_idx; > >>>>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) > >>>>> virtio_queue_notify(dev, TX_Q); > >>>>> > >>>>> /* Wait for host to consume the descriptor */ > >>>>> + ret = 1; > >>>>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; > >>>>> while (*current_used_idx == last_used_idx) { > >>>>> // do something better > >>>>> mb(); > >>>>> if (time < SLOF_GetTimer()) { > >>>>> printf("virtio_serial_putchar failed! \n"); > >>>>> - return 0; > >>>>> + ret = 0; > >>>>> + break; > >>>>> } > >>>>> } > >>>>> > >>>>> - return 1; > >>>>> + virtio_free_desc(vq, id, dev->features); > >>>>> + > >>>>> + return ret; > >>>>> } > >>>>> > >>>>> char virtio_serial_getchar(struct virtio_device *dev) > >>>>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) > >>>>> % vq_rx->size; > >>>>> > >>>>> /* Copy data to destination buffer */ > >>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); > >>>>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); > >>>>> > >>>>> /* Move indices to next entries */ > >>>>> last_rx_idx = last_rx_idx + 1; > >>>>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c > >>>>> index fff0c7e54c19..d4db5f656888 100644 > >>>>> --- a/lib/libvirtio/virtio.c > >>>>> +++ b/lib/libvirtio/virtio.c > >>>>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>>>> next %= vq->size; > >>>>> > >>>>> if (features & VIRTIO_F_VERSION_1) { > >>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>>>> + void *gpa = (void *) addr; > >>>>> + > >>>>> + if (!vq->desc_gpas) { > >>>>> + fprintf(stderr, "IOMMU setup has not been done!\n"); > >>>>> + return; > >>>>> + } > >>>>> + > >>>>> + addr = SLOF_dma_map_in(gpa, len, 0); > >>>>> + vq->desc_gpas[id] = gpa; > >>>>> + } > >>>>> desc->addr = cpu_to_le64(addr); > >>>>> desc->len = cpu_to_le32(len); > >>>>> desc->flags = cpu_to_le16(flags); > >>>>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, > >>>>> } > >>>>> } > >>>>> > >>>>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >>>>> +{ > >>>>> + struct vring_desc *desc; > >>>>> + > >>>>> + id %= vq->size; > >>>>> + desc = &vq->desc[id]; > >>>>> + > >>>>> + if (features & VIRTIO_F_VERSION_1) { > >>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), > >>>>> + 0, le32_to_cpu(desc->len)); > >>>>> + vq->desc_gpas[id] = NULL; > >>>>> + } > >>>>> + } > >>>>> +} > >>>> > >>>> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the > >>>> disk isn't bootable and the H_PUT_TCE traces show: > >>>> > >>>> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 > >>>> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 > >>>> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 > >>>> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 > >>>> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 > >>>> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS > >>>> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 > >>>> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER > >>>> > >>>> If I disable in-kernel TCE acceleration, or use TCG, it seems to work > >>>> fine. If I add the following hack it seems to fix things: > >>>> > >>>> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) > >>>> > >>>> if (features & VIRTIO_F_VERSION_1) { > >>>> if (features & VIRTIO_F_IOMMU_PLATFORM) { > >>>> - SLOF_dma_map_out(le64_to_cpu(desc->addr), > >>>> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); > >>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), > >>>> 0, le32_to_cpu(desc->len)); > >>>> vq->desc_gpas[id] = NULL; > >>>> } > >>>> > >>>> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If > >>>> so, maybe we should align automatically in dma-map-out? > >>> > >>> > >>> I thought I did that as well in 1/4, I'll double check. Hmmm. > >> > >> > >> v2 of these patches did not align (or, trunc, really) in dma-map-out but > >> v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really > >> confused now. > > > > Yah sorry I think the issue was on my end. I thought: > > > > https://github.com/aik/SLOF/commits/virtio-iommu > > > My bad, I should have pushed it out since I started pushing them out. > > The question now is if we are good to go and I can push these out and > send SLOF blob update so people can actually start testing it? Thanks, System came back after clearing GARD record, re-tested disk boot for virtio-blk/virtio-scsi, and netboot via virtio-net, with iommu on/off and everything seems to be working. Still need to verify in an actual uv-capable system, but in terms of base IOMMU enablement the series looks good: Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com> > > > > > > reflected this series, but it does contain an older version of the > > dma-map* patch: > > > > index f136324..be529c9 100644 > > --- a/board-qemu/slof/pci-phb.fs > > +++ b/board-qemu/slof/pci-phb.fs > > @@ -146,6 +146,7 @@ setup-puid > > \ grub does not align allocated addresses to the size so when mapping, > > \ we might ask bm-alloc for an extra IOMMU page > > : dma-align ( size virt -- aligned-size ) tce-mask and + tce-ps #aligned ; > > +: dma-trunc ( addr -- addr&~fff) tce-mask not and ; > > > > : dma-map-in ( virt size cachable? -- devaddr ) > > phb-debug? IF cr ." dma-map-in called: " .s cr THEN > > @@ -163,7 +164,7 @@ setup-puid > > swap ( size dev-addr virt ) > > 2dup tce-mask and or >r \ add page offset to the return value > > > > - tce-mask not and 3 OR \ Trunk virt and add read and write perm > > + dma-trunc 3 OR \ Truncate and add read and write perm > > rot ( virt dev-addr size r: dev-addr ) > > 0 > > ?DO > > @@ -179,10 +180,10 @@ setup-puid > > : dma-map-out ( virt devaddr size -- ) > > phb-debug? IF cr ." dma-map-out called: " .s cr THEN > > (init-dma-window-vars) > > - rot drop ( devaddr size ) > > + rot drop ( devaddr size ) > > over dma-align > > - 2dup swap tce-mask not and swap bm-handle -rot > > - bm-free > > + swap dma-trunc swap ( devaddr-trunc size-extended ) > > + 2dup bm-handle -rot bm-free > > 0 > > ?DO > > dup 0 dma-window-liobn -rot > > > > > > I haven't been able to re-test yet since something seems to have gone > > awry with the test system last night, but I assume that's what the issue > > was. Sorry for the confusion. > > > >> > >> It is true that checking differs between qemu's and kvm's h_put_tce > >> handlers, I added this (see below) to my qemu and it still works, can > >> you please try in your tree and capture the spapr_iommu_pci_put traces? > >> Thanks, > >> > >> > >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c > >> index 3d3bcc86496a..feae27bf11fa 100644 > >> --- a/hw/ppc/spapr_iommu.c > >> +++ b/hw/ppc/spapr_iommu.c > >> @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, > >> SpaprMachineState *spapr, > >> if (tcet) { > >> hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); > >> > >> + if (ioba & ~page_mask) > >> + abort(); > >> + > >> > >> > >> > >> > >> -- > >> Alexey > > -- > Alexey
On 03/12/2019 13:49, Michael Roth wrote: > Quoting Alexey Kardashevskiy (2019-11-27 18:15:03) >> >> >> On 23/11/2019 01:42, Michael Roth wrote: >>> Quoting Alexey Kardashevskiy (2019-11-21 21:14:10) >>>> >>>> >>>> On 22/11/2019 12:49, Alexey Kardashevskiy wrote: >>>>> >>>>> >>>>> On 22/11/2019 11:09, Michael Roth wrote: >>>>>> Quoting Alexey Kardashevskiy (2019-11-20 17:58:01) >>>>>>> When QEMU is started with iommu_platform=on, the guest driver must accept >>>>>>> it or the device will fail. >>>>>>> >>>>>>> This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p >>>>>>> devices. -serial and -9p are only compile tested though. >>>>>>> >>>>>>> For virtio-net we map all RX buffers once and TX when xmit() is called >>>>>>> and unmap older pages when we are about to reuse the VQ descriptor. >>>>>>> As all other devices are synchronous, we unmap IOMMU pages right after >>>>>>> completion of a transaction. >>>>>>> >>>>>>> This depends on QEMU's: >>>>>>> https://patchwork.ozlabs.org/patch/1194067/ >>>>>>> >>>>>>> Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> >>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>>>>>> --- >>>>>>> Changes: >>>>>>> v2: >>>>>>> * added Mike's fs/dma-instance-function.fs >>>>>>> * total rework >>>>>>> --- >>>>>>> board-qemu/slof/Makefile | 1 + >>>>>>> lib/libvirtio/virtio.h | 5 ++ >>>>>>> lib/libvirtio/virtio-9p.c | 4 ++ >>>>>>> lib/libvirtio/virtio-blk.c | 4 ++ >>>>>>> lib/libvirtio/virtio-net.c | 5 +- >>>>>>> lib/libvirtio/virtio-scsi.c | 5 ++ >>>>>>> lib/libvirtio/virtio-serial.c | 12 +++-- >>>>>>> lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- >>>>>>> board-qemu/slof/OF.fs | 3 ++ >>>>>>> slof/fs/dma-instance-function.fs | 28 +++++++++++ >>>>>>> 10 files changed, 143 insertions(+), 6 deletions(-) >>>>>>> create mode 100644 slof/fs/dma-instance-function.fs >>>>>>> >>>>>>> diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile >>>>>>> index 2263e751bde9..d7ed2d7a6f18 100644 >>>>>>> --- a/board-qemu/slof/Makefile >>>>>>> +++ b/board-qemu/slof/Makefile >>>>>>> @@ -99,6 +99,7 @@ OF_FFS_FILES = \ >>>>>>> $(SLOFCMNDIR)/fs/graphics.fs \ >>>>>>> $(SLOFCMNDIR)/fs/generic-disk.fs \ >>>>>>> $(SLOFCMNDIR)/fs/dma-function.fs \ >>>>>>> + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ >>>>>>> $(SLOFCMNDIR)/fs/pci-device.fs \ >>>>>>> $(SLOFCMNDIR)/fs/pci-bridge.fs \ >>>>>>> $(SLOFCMNDIR)/fs/pci-properties.fs \ >>>>>>> diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h >>>>>>> index 4927a97f9f5f..250120b3112b 100644 >>>>>>> --- a/lib/libvirtio/virtio.h >>>>>>> +++ b/lib/libvirtio/virtio.h >>>>>>> @@ -29,6 +29,7 @@ >>>>>>> #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) >>>>>>> #define VIRTIO_F_RING_EVENT_IDX BIT(29) >>>>>>> #define VIRTIO_F_VERSION_1 BIT(32) >>>>>>> +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) >>>>>>> >>>>>>> #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ >>>>>>> >>>>>>> @@ -84,6 +85,8 @@ struct vqs { >>>>>>> struct vring_desc *desc; >>>>>>> struct vring_avail *avail; >>>>>>> struct vring_used *used; >>>>>>> + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ >>>>>>> + uint64_t bus_desc; >>>>>>> }; >>>>>>> >>>>>>> struct virtio_device { >>>>>>> @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q >>>>>>> extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>>>> uint64_t addr, uint32_t len, >>>>>>> uint16_t flags, uint16_t next); >>>>>>> +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); >>>>>>> +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); >>>>>>> extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); >>>>>>> extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); >>>>>>> >>>>>>> diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c >>>>>>> index 426069fe9509..76078612b06e 100644 >>>>>>> --- a/lib/libvirtio/virtio-9p.c >>>>>>> +++ b/lib/libvirtio/virtio-9p.c >>>>>>> @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r >>>>>>> // do something better >>>>>>> mb(); >>>>>>> } >>>>>>> + >>>>>>> + virtio_free_desc(vq, id, dev->features); >>>>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>>>> + >>>>>>> if (i == 0) { >>>>>>> return -1; >>>>>>> } >>>>>>> diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c >>>>>>> index a0dadbb0d6a8..0363038e559d 100644 >>>>>>> --- a/lib/libvirtio/virtio-blk.c >>>>>>> +++ b/lib/libvirtio/virtio-blk.c >>>>>>> @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, >>>>>>> break; >>>>>>> } >>>>>>> >>>>>>> + virtio_free_desc(vq, id, dev->features); >>>>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>>>> + virtio_free_desc(vq, id + 2, dev->features); >>>>>>> + >>>>>>> if (status == 0) >>>>>>> return cnt; >>>>>>> >>>>>>> diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c >>>>>>> index ae67883020ef..5a0d19088527 100644 >>>>>>> --- a/lib/libvirtio/virtio-net.c >>>>>>> +++ b/lib/libvirtio/virtio-net.c >>>>>>> @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) >>>>>>> idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); >>>>>>> id = (idx * 2) % vq_tx->size; >>>>>>> >>>>>>> + virtio_free_desc(vq_tx, id, vdev->features); >>>>>>> + virtio_free_desc(vq_tx, id + 1, vdev->features); >>>>>>> + >>>>>>> /* Set up virtqueue descriptor for header */ >>>>>>> virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, >>>>>>> net_hdr_size, VRING_DESC_F_NEXT, id + 1); >>>>>>> @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) >>>>>>> #endif >>>>>>> >>>>>>> /* Copy data to destination buffer */ >>>>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); >>>>>>> + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); >>>>>>> >>>>>>> /* Move indices to next entries */ >>>>>>> last_rx_idx = last_rx_idx + 1; >>>>>>> diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c >>>>>>> index 251661e8d466..44573d10e24c 100644 >>>>>>> --- a/lib/libvirtio/virtio-scsi.c >>>>>>> +++ b/lib/libvirtio/virtio-scsi.c >>>>>>> @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, >>>>>>> break; >>>>>>> } >>>>>>> >>>>>>> + virtio_free_desc(vq, id, dev->features); >>>>>>> + virtio_free_desc(vq, id + 1, dev->features); >>>>>>> + if (!(buf == NULL || buf_len == 0)) >>>>>>> + virtio_free_desc(vq, id + 2, dev->features); >>>>>>> + >>>>>>> return 0; >>>>>>> } >>>>>>> >>>>>>> diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c >>>>>>> index 78ab0e25d7db..66fbfe8b6cc6 100644 >>>>>>> --- a/lib/libvirtio/virtio-serial.c >>>>>>> +++ b/lib/libvirtio/virtio-serial.c >>>>>>> @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) >>>>>>> >>>>>>> int virtio_serial_putchar(struct virtio_device *dev, char c) >>>>>>> { >>>>>>> - int id; >>>>>>> + int id, ret; >>>>>>> uint32_t vq_size, time; >>>>>>> volatile uint16_t *current_used_idx; >>>>>>> uint16_t last_used_idx, avail_idx; >>>>>>> @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) >>>>>>> virtio_queue_notify(dev, TX_Q); >>>>>>> >>>>>>> /* Wait for host to consume the descriptor */ >>>>>>> + ret = 1; >>>>>>> time = SLOF_GetTimer() + VIRTIO_TIMEOUT; >>>>>>> while (*current_used_idx == last_used_idx) { >>>>>>> // do something better >>>>>>> mb(); >>>>>>> if (time < SLOF_GetTimer()) { >>>>>>> printf("virtio_serial_putchar failed! \n"); >>>>>>> - return 0; >>>>>>> + ret = 0; >>>>>>> + break; >>>>>>> } >>>>>>> } >>>>>>> >>>>>>> - return 1; >>>>>>> + virtio_free_desc(vq, id, dev->features); >>>>>>> + >>>>>>> + return ret; >>>>>>> } >>>>>>> >>>>>>> char virtio_serial_getchar(struct virtio_device *dev) >>>>>>> @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) >>>>>>> % vq_rx->size; >>>>>>> >>>>>>> /* Copy data to destination buffer */ >>>>>>> - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); >>>>>>> + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); >>>>>>> >>>>>>> /* Move indices to next entries */ >>>>>>> last_rx_idx = last_rx_idx + 1; >>>>>>> diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c >>>>>>> index fff0c7e54c19..d4db5f656888 100644 >>>>>>> --- a/lib/libvirtio/virtio.c >>>>>>> +++ b/lib/libvirtio/virtio.c >>>>>>> @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>>>> next %= vq->size; >>>>>>> >>>>>>> if (features & VIRTIO_F_VERSION_1) { >>>>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>>>>> + void *gpa = (void *) addr; >>>>>>> + >>>>>>> + if (!vq->desc_gpas) { >>>>>>> + fprintf(stderr, "IOMMU setup has not been done!\n"); >>>>>>> + return; >>>>>>> + } >>>>>>> + >>>>>>> + addr = SLOF_dma_map_in(gpa, len, 0); >>>>>>> + vq->desc_gpas[id] = gpa; >>>>>>> + } >>>>>>> desc->addr = cpu_to_le64(addr); >>>>>>> desc->len = cpu_to_le32(len); >>>>>>> desc->flags = cpu_to_le16(flags); >>>>>>> @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, >>>>>>> } >>>>>>> } >>>>>>> >>>>>>> +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >>>>>>> +{ >>>>>>> + struct vring_desc *desc; >>>>>>> + >>>>>>> + id %= vq->size; >>>>>>> + desc = &vq->desc[id]; >>>>>>> + >>>>>>> + if (features & VIRTIO_F_VERSION_1) { >>>>>>> + if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>>>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr), >>>>>>> + 0, le32_to_cpu(desc->len)); >>>>>>> + vq->desc_gpas[id] = NULL; >>>>>>> + } >>>>>>> + } >>>>>>> +} >>>>>> >>>>>> On a 4.18 RHEL8 kernel I was unable to boot from virtio-blk, it complains the >>>>>> disk isn't bootable and the H_PUT_TCE traces show: >>>>>> >>>>>> qemu-system-ppc-19019 [045] .... 786844.067356: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x1000,0xe46b003,0xdc73350 >>>>>> qemu-system-ppc-19019 [045] .... 786844.067358: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [045] .... 786844.067361: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x2000,0xe46c003,0xdc73350 >>>>>> qemu-system-ppc-19019 [045] .... 786844.067362: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [041] .... 786844.070532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3000,0xe67b003,0xdc73490 >>>>>> qemu-system-ppc-19019 [041] .... 786844.070533: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [041] .... 786844.070817: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x4000,0xe469003,0xdc73490 >>>>>> qemu-system-ppc-19019 [041] .... 786844.070818: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [041] .... 786844.071102: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x5000,0xe67a003,0xdc73490 >>>>>> qemu-system-ppc-19019 [041] .... 786844.071103: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [041] .... 786844.071123: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_SUCCESS >>>>>> qemu-system-ppc-19019 [041] .... 786844.073532: kvm_hcall_enter: VCPU 0: hcall=H_PUT_TCE GPR4-7=0x80000000,0x3228,0x0,0xdc73488 >>>>>> qemu-system-ppc-19019 [041] .... 786844.073534: kvm_hcall_exit: VCPU 0: ret=RESUME_GUEST hcall_rc=H_PARAMETER >>>>>> >>>>>> If I disable in-kernel TCE acceleration, or use TCG, it seems to work >>>>>> fine. If I add the following hack it seems to fix things: >>>>>> >>>>>> @@ -305,7 +307,8 @@ void virtio_free_desc(struct vqs *vq, int id, uint64_t features) >>>>>> >>>>>> if (features & VIRTIO_F_VERSION_1) { >>>>>> if (features & VIRTIO_F_IOMMU_PLATFORM) { >>>>>> - SLOF_dma_map_out(le64_to_cpu(desc->addr), >>>>>> + fprintf(stderr, "mapping out desc->addr: %llx\n", le64_to_cpu(desc->addr)); >>>>>> + SLOF_dma_map_out(le64_to_cpu(desc->addr) & ~(0xfff), >>>>>> 0, le32_to_cpu(desc->len)); >>>>>> vq->desc_gpas[id] = NULL; >>>>>> } >>>>>> >>>>>> I guess the kernel is a bit stricter about unmapping unaligned TCE entries? If >>>>>> so, maybe we should align automatically in dma-map-out? >>>>> >>>>> >>>>> I thought I did that as well in 1/4, I'll double check. Hmmm. >>>> >>>> >>>> v2 of these patches did not align (or, trunc, really) in dma-map-out but >>>> v3 do, it is "swap dma-trunc swap" in dma-map-out of 1/4. SO I am really >>>> confused now. >>> >>> Yah sorry I think the issue was on my end. I thought: >>> >>> https://github.com/aik/SLOF/commits/virtio-iommu >> >> >> My bad, I should have pushed it out since I started pushing them out. >> >> The question now is if we are good to go and I can push these out and >> send SLOF blob update so people can actually start testing it? Thanks, > > System came back after clearing GARD record, re-tested disk boot for > virtio-blk/virtio-scsi, and netboot via virtio-net, with iommu on/off and > everything seems to be working. > > Still need to verify in an actual uv-capable system, but in terms of base > IOMMU enablement the series looks good: > > Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com> Oh great, thanks! Are going to give it some more review or I just push it out and send slof blob update to qemu? David, how much do you want to see many rb's for these? Thanks, > >> >> >>> >>> reflected this series, but it does contain an older version of the >>> dma-map* patch: >>> >>> index f136324..be529c9 100644 >>> --- a/board-qemu/slof/pci-phb.fs >>> +++ b/board-qemu/slof/pci-phb.fs >>> @@ -146,6 +146,7 @@ setup-puid >>> \ grub does not align allocated addresses to the size so when mapping, >>> \ we might ask bm-alloc for an extra IOMMU page >>> : dma-align ( size virt -- aligned-size ) tce-mask and + tce-ps #aligned ; >>> +: dma-trunc ( addr -- addr&~fff) tce-mask not and ; >>> >>> : dma-map-in ( virt size cachable? -- devaddr ) >>> phb-debug? IF cr ." dma-map-in called: " .s cr THEN >>> @@ -163,7 +164,7 @@ setup-puid >>> swap ( size dev-addr virt ) >>> 2dup tce-mask and or >r \ add page offset to the return value >>> >>> - tce-mask not and 3 OR \ Trunk virt and add read and write perm >>> + dma-trunc 3 OR \ Truncate and add read and write perm >>> rot ( virt dev-addr size r: dev-addr ) >>> 0 >>> ?DO >>> @@ -179,10 +180,10 @@ setup-puid >>> : dma-map-out ( virt devaddr size -- ) >>> phb-debug? IF cr ." dma-map-out called: " .s cr THEN >>> (init-dma-window-vars) >>> - rot drop ( devaddr size ) >>> + rot drop ( devaddr size ) >>> over dma-align >>> - 2dup swap tce-mask not and swap bm-handle -rot >>> - bm-free >>> + swap dma-trunc swap ( devaddr-trunc size-extended ) >>> + 2dup bm-handle -rot bm-free >>> 0 >>> ?DO >>> dup 0 dma-window-liobn -rot >>> >>> >>> I haven't been able to re-test yet since something seems to have gone >>> awry with the test system last night, but I assume that's what the issue >>> was. Sorry for the confusion. >>> >>>> >>>> It is true that checking differs between qemu's and kvm's h_put_tce >>>> handlers, I added this (see below) to my qemu and it still works, can >>>> you please try in your tree and capture the spapr_iommu_pci_put traces? >>>> Thanks, >>>> >>>> >>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >>>> index 3d3bcc86496a..feae27bf11fa 100644 >>>> --- a/hw/ppc/spapr_iommu.c >>>> +++ b/hw/ppc/spapr_iommu.c >>>> @@ -564,6 +564,9 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, >>>> SpaprMachineState *spapr, >>>> if (tcet) { >>>> hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); >>>> >>>> + if (ioba & ~page_mask) >>>> + abort(); >>>> + >>>> >>>> >>>> >>>> >>>> -- >>>> Alexey >> >> -- >> Alexey
diff --git a/board-qemu/slof/Makefile b/board-qemu/slof/Makefile index 2263e751bde9..d7ed2d7a6f18 100644 --- a/board-qemu/slof/Makefile +++ b/board-qemu/slof/Makefile @@ -99,6 +99,7 @@ OF_FFS_FILES = \ $(SLOFCMNDIR)/fs/graphics.fs \ $(SLOFCMNDIR)/fs/generic-disk.fs \ $(SLOFCMNDIR)/fs/dma-function.fs \ + $(SLOFCMNDIR)/fs/dma-instance-function.fs \ $(SLOFCMNDIR)/fs/pci-device.fs \ $(SLOFCMNDIR)/fs/pci-bridge.fs \ $(SLOFCMNDIR)/fs/pci-properties.fs \ diff --git a/lib/libvirtio/virtio.h b/lib/libvirtio/virtio.h index 4927a97f9f5f..250120b3112b 100644 --- a/lib/libvirtio/virtio.h +++ b/lib/libvirtio/virtio.h @@ -29,6 +29,7 @@ #define VIRTIO_F_RING_INDIRECT_DESC BIT(28) #define VIRTIO_F_RING_EVENT_IDX BIT(29) #define VIRTIO_F_VERSION_1 BIT(32) +#define VIRTIO_F_IOMMU_PLATFORM BIT(33) #define VIRTIO_TIMEOUT 5000 /* 5 sec timeout */ @@ -84,6 +85,8 @@ struct vqs { struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; + void **desc_gpas; /* to get gpa from desc->addr (which is ioba) */ + uint64_t bus_desc; }; struct virtio_device { @@ -109,6 +112,8 @@ extern struct vring_used *virtio_get_vring_used(struct virtio_device *dev, int q extern void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, uint64_t addr, uint32_t len, uint16_t flags, uint16_t next); +extern void virtio_free_desc(struct vqs *vq, int id, uint64_t features); +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id); extern struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id); extern void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id); diff --git a/lib/libvirtio/virtio-9p.c b/lib/libvirtio/virtio-9p.c index 426069fe9509..76078612b06e 100644 --- a/lib/libvirtio/virtio-9p.c +++ b/lib/libvirtio/virtio-9p.c @@ -129,6 +129,10 @@ static int virtio_9p_transact(void *opaque, uint8_t *tx, int tx_size, uint8_t *r // do something better mb(); } + + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + if (i == 0) { return -1; } diff --git a/lib/libvirtio/virtio-blk.c b/lib/libvirtio/virtio-blk.c index a0dadbb0d6a8..0363038e559d 100644 --- a/lib/libvirtio/virtio-blk.c +++ b/lib/libvirtio/virtio-blk.c @@ -195,6 +195,10 @@ virtioblk_transfer(struct virtio_device *dev, char *buf, uint64_t blocknum, break; } + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + virtio_free_desc(vq, id + 2, dev->features); + if (status == 0) return cnt; diff --git a/lib/libvirtio/virtio-net.c b/lib/libvirtio/virtio-net.c index ae67883020ef..5a0d19088527 100644 --- a/lib/libvirtio/virtio-net.c +++ b/lib/libvirtio/virtio-net.c @@ -255,6 +255,9 @@ static int virtionet_xmit(struct virtio_net *vnet, char *buf, int len) idx = virtio_modern16_to_cpu(vdev, vq_tx->avail->idx); id = (idx * 2) % vq_tx->size; + virtio_free_desc(vq_tx, id, vdev->features); + virtio_free_desc(vq_tx, id + 1, vdev->features); + /* Set up virtqueue descriptor for header */ virtio_fill_desc(vq_tx, id, vdev->features, (uint64_t)nethdr, net_hdr_size, VRING_DESC_F_NEXT, id + 1); @@ -317,7 +320,7 @@ static int virtionet_receive(struct virtio_net *vnet, char *buf, int maxlen) #endif /* Copy data to destination buffer */ - memcpy(buf, (void *)virtio_modern64_to_cpu(vdev, vq_rx->desc[id].addr), len); + memcpy(buf, virtio_desc_addr(vdev, VQ_RX, id), len); /* Move indices to next entries */ last_rx_idx = last_rx_idx + 1; diff --git a/lib/libvirtio/virtio-scsi.c b/lib/libvirtio/virtio-scsi.c index 251661e8d466..44573d10e24c 100644 --- a/lib/libvirtio/virtio-scsi.c +++ b/lib/libvirtio/virtio-scsi.c @@ -81,6 +81,11 @@ int virtioscsi_send(struct virtio_device *dev, break; } + virtio_free_desc(vq, id, dev->features); + virtio_free_desc(vq, id + 1, dev->features); + if (!(buf == NULL || buf_len == 0)) + virtio_free_desc(vq, id + 2, dev->features); + return 0; } diff --git a/lib/libvirtio/virtio-serial.c b/lib/libvirtio/virtio-serial.c index 78ab0e25d7db..66fbfe8b6cc6 100644 --- a/lib/libvirtio/virtio-serial.c +++ b/lib/libvirtio/virtio-serial.c @@ -108,7 +108,7 @@ void virtio_serial_shutdown(struct virtio_device *dev) int virtio_serial_putchar(struct virtio_device *dev, char c) { - int id; + int id, ret; uint32_t vq_size, time; volatile uint16_t *current_used_idx; uint16_t last_used_idx, avail_idx; @@ -135,17 +135,21 @@ int virtio_serial_putchar(struct virtio_device *dev, char c) virtio_queue_notify(dev, TX_Q); /* Wait for host to consume the descriptor */ + ret = 1; time = SLOF_GetTimer() + VIRTIO_TIMEOUT; while (*current_used_idx == last_used_idx) { // do something better mb(); if (time < SLOF_GetTimer()) { printf("virtio_serial_putchar failed! \n"); - return 0; + ret = 0; + break; } } - return 1; + virtio_free_desc(vq, id, dev->features); + + return ret; } char virtio_serial_getchar(struct virtio_device *dev) @@ -165,7 +169,7 @@ char virtio_serial_getchar(struct virtio_device *dev) % vq_rx->size; /* Copy data to destination buffer */ - memcpy(buf, (void *)virtio_modern64_to_cpu(dev, vq_rx->desc[id - 1].addr), RX_ELEM_SIZE); + memcpy(buf, virtio_desc_addr(dev, RX_Q, id - 1), RX_ELEM_SIZE); /* Move indices to next entries */ last_rx_idx = last_rx_idx + 1; diff --git a/lib/libvirtio/virtio.c b/lib/libvirtio/virtio.c index fff0c7e54c19..d4db5f656888 100644 --- a/lib/libvirtio/virtio.c +++ b/lib/libvirtio/virtio.c @@ -273,6 +273,17 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, next %= vq->size; if (features & VIRTIO_F_VERSION_1) { + if (features & VIRTIO_F_IOMMU_PLATFORM) { + void *gpa = (void *) addr; + + if (!vq->desc_gpas) { + fprintf(stderr, "IOMMU setup has not been done!\n"); + return; + } + + addr = SLOF_dma_map_in(gpa, len, 0); + vq->desc_gpas[id] = gpa; + } desc->addr = cpu_to_le64(addr); desc->len = cpu_to_le32(len); desc->flags = cpu_to_le16(flags); @@ -285,6 +296,32 @@ void virtio_fill_desc(struct vqs *vq, int id, uint64_t features, } } +void virtio_free_desc(struct vqs *vq, int id, uint64_t features) +{ + struct vring_desc *desc; + + id %= vq->size; + desc = &vq->desc[id]; + + if (features & VIRTIO_F_VERSION_1) { + if (features & VIRTIO_F_IOMMU_PLATFORM) { + SLOF_dma_map_out(le64_to_cpu(desc->addr), + 0, le32_to_cpu(desc->len)); + vq->desc_gpas[id] = NULL; + } + } +} + +void *virtio_desc_addr(struct virtio_device *vdev, int queue, int id) +{ + struct vqs *vq = &vdev->vq[queue]; + + if (vq->desc_gpas) + return vq->desc_gpas[id]; + + return (void *) virtio_modern64_to_cpu(vdev, vq->desc[id].addr); +} + /** * Reset virtio device */ @@ -326,6 +363,19 @@ static void virtio_set_qaddr(struct virtio_device *dev, int queue, unsigned long uint64_t q_used; uint32_t q_size = virtio_get_qsize(dev, queue); + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { + unsigned long cb; + + cb = q_size * sizeof(struct vring_desc); + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + q_desc = SLOF_dma_map_in((void *)q_desc, cb, 0); + + dev->vq[queue].bus_desc = q_desc; + } + virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_desc), q_desc); q_avail = q_desc + q_size * sizeof(struct vring_desc); virtio_pci_write64(dev->common.addr + offset_of(struct virtio_dev_common, q_avail), q_avail); @@ -373,14 +423,41 @@ struct vqs *virtio_queue_init_vq(struct virtio_device *dev, unsigned int id) vq->avail->flags = virtio_cpu_to_modern16(dev, VRING_AVAIL_F_NO_INTERRUPT); vq->avail->idx = 0; + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) + vq->desc_gpas = SLOF_alloc_mem_aligned( + vq->size * sizeof(vq->desc_gpas[0]), 4096); return vq; } void virtio_queue_term_vq(struct virtio_device *dev, struct vqs *vq, unsigned int id) { - if (vq->desc) + if (vq->desc_gpas) { + int i; + + for (i = 0; i < vq->size; ++i) + virtio_free_desc(vq, i, dev->features); + + memset(vq->desc_gpas, 0, vq->size * sizeof(vq->desc_gpas[0])); + SLOF_free_mem(vq->desc_gpas, + vq->size * sizeof(vq->desc_gpas[0])); + } + if (vq->desc) { + if (dev->features & VIRTIO_F_IOMMU_PLATFORM) { + unsigned long cb; + uint32_t q_size = virtio_get_qsize(dev, vq->id); + + cb = q_size * sizeof(struct vring_desc); + cb += sizeof(struct vring_avail) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + cb += sizeof(struct vring_used) + sizeof(uint16_t) * q_size; + cb = VQ_ALIGN(cb); + + SLOF_dma_map_out(vq->bus_desc, 0, cb); + } + SLOF_free_mem(vq->desc, virtio_vring_size(vq->size)); + } memset(vq, 0, sizeof(*vq)); } @@ -474,6 +551,9 @@ int virtio_negotiate_guest_features(struct virtio_device *dev, uint64_t features return -1; } + if (host_features & VIRTIO_F_IOMMU_PLATFORM) + features |= VIRTIO_F_IOMMU_PLATFORM; + virtio_set_guest_features(dev, features); host_features = virtio_get_host_features(dev); if ((host_features & features) != features) { diff --git a/board-qemu/slof/OF.fs b/board-qemu/slof/OF.fs index a85f6c558e67..3e117ad03e09 100644 --- a/board-qemu/slof/OF.fs +++ b/board-qemu/slof/OF.fs @@ -143,6 +143,9 @@ check-for-nvramrc 8a0 cp +\ For DMA functions used by client/package instances. +#include "dma-instance-function.fs" + \ The client interface. #include "client.fs" \ ELF binary file format. diff --git a/slof/fs/dma-instance-function.fs b/slof/fs/dma-instance-function.fs new file mode 100644 index 000000000000..057181be0f44 --- /dev/null +++ b/slof/fs/dma-instance-function.fs @@ -0,0 +1,28 @@ +\ ****************************************************************************/ +\ * Copyright (c) 2011 IBM Corporation +\ * All rights reserved. +\ * This program and the accompanying materials +\ * are made available under the terms of the BSD License +\ * which accompanies this distribution, and is available at +\ * http://www.opensource.org/licenses/bsd-license.php +\ * +\ * Contributors: +\ * IBM Corporation - initial implementation +\ ****************************************************************************/ + +\ DMA memory allocation functions +: dma-alloc ( size -- virt ) + s" dma-alloc" $call-parent +; + +: dma-free ( virt size -- ) + s" dma-free" $call-parent +; + +: dma-map-in ( virt size cacheable? -- devaddr ) + s" dma-map-in" $call-parent +; + +: dma-map-out ( virt devaddr size -- ) + s" dma-map-out" $call-parent +;
When QEMU is started with iommu_platform=on, the guest driver must accept it or the device will fail. This enables IOMMU support for virtio-net, -scsi, -block, -serial, -9p devices. -serial and -9p are only compile tested though. For virtio-net we map all RX buffers once and TX when xmit() is called and unmap older pages when we are about to reuse the VQ descriptor. As all other devices are synchronous, we unmap IOMMU pages right after completion of a transaction. This depends on QEMU's: https://patchwork.ozlabs.org/patch/1194067/ Copied-bits-and-pieces-from: Michael Roth <mdroth@linux.vnet.ibm.com> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- Changes: v2: * added Mike's fs/dma-instance-function.fs * total rework --- board-qemu/slof/Makefile | 1 + lib/libvirtio/virtio.h | 5 ++ lib/libvirtio/virtio-9p.c | 4 ++ lib/libvirtio/virtio-blk.c | 4 ++ lib/libvirtio/virtio-net.c | 5 +- lib/libvirtio/virtio-scsi.c | 5 ++ lib/libvirtio/virtio-serial.c | 12 +++-- lib/libvirtio/virtio.c | 82 +++++++++++++++++++++++++++++++- board-qemu/slof/OF.fs | 3 ++ slof/fs/dma-instance-function.fs | 28 +++++++++++ 10 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 slof/fs/dma-instance-function.fs