Message ID | 011b01d122af$c60f7ce0$522e76a0$@samsung.com |
---|---|
State | New |
Headers | show |
Looks like this would be ok for virtio-ccw (as it does not call virtio_queue_set_host_notifier_forwarding) Question is might something like this for virtio-ccw useful as well? We cannot use memory_region, though, we would need to handle that in our diagnose code. Conny, FYI. On 11/19/2015 10:50 AM, Pavel Fedin wrote: > If you happen to have a kernel with ioeventfd support enabled, but > missing support for them in KVM, and you attempt to enable vhost by > setting vhost=on, qemu aborts with error: > > kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented > > This patch adds a mechanism which allows to emulate KVM binding by > triggering the related notifiers via the userspace. The first time the > emulation is used, a warning is displayed, so that the user knows about > potential performance impact: > > 2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow) > > This problem can be observed with libvirt, which checks for /dev/vhost-net > availability and just inserts "vhost=on" automatically in this case; on an > ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in > expert settings. > > Signed-off-by: Pavel Fedin <p.fedin@samsung.com> > --- > hw/virtio/virtio-mmio.c | 15 +++++++++--- > hw/virtio/virtio-pci.c | 61 ++++++++++++++++++++++++++-------------------- > hw/virtio/virtio.c | 24 +++++++++++++++++- > include/hw/virtio/virtio.h | 1 + > 4 files changed, 69 insertions(+), 32 deletions(-) > > diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c > index 16621fa..69d4cbc 100644 > --- a/hw/virtio/virtio-mmio.c > +++ b/hw/virtio/virtio-mmio.c > @@ -110,11 +110,18 @@ static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy, > return r; > } > virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); > - memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > - true, n, notifier); > + > + if (kvm_eventfds_enabled()) { > + memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > + true, n, notifier); > + } else if (!set_handler) { > + virtio_queue_set_host_notifier_forwarding(vq); > + } > } else { > - memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > - true, n, notifier); > + if (kvm_eventfds_enabled()) { > + memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > + true, n, notifier); > + } > virtio_queue_set_host_notifier_fd_handler(vq, false, false); > event_notifier_cleanup(notifier); > } > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index 96be4fd..b27a630 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -293,41 +293,48 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, > return r; > } > virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); > - if (modern) { > - if (fast_mmio) { > - memory_region_add_eventfd(modern_mr, modern_addr, 0, > - false, n, notifier); > - } else { > - memory_region_add_eventfd(modern_mr, modern_addr, 2, > - false, n, notifier); > + > + if (kvm_eventfds_enabled()) { > + if (modern) { > + if (fast_mmio) { > + memory_region_add_eventfd(modern_mr, modern_addr, 0, > + false, n, notifier); > + } else { > + memory_region_add_eventfd(modern_mr, modern_addr, 2, > + false, n, notifier); > + } > + if (modern_pio) { > + memory_region_add_eventfd(modern_notify_mr, 0, 2, > + true, n, notifier); > + } > } > - if (modern_pio) { > - memory_region_add_eventfd(modern_notify_mr, 0, 2, > - true, n, notifier); > + if (legacy) { > + memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > + true, n, notifier); > } > - } > - if (legacy) { > - memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > - true, n, notifier); > + } else if (!set_handler) { > + virtio_queue_set_host_notifier_forwarding(vq); > } > } else { > - if (modern) { > - if (fast_mmio) { > - memory_region_del_eventfd(modern_mr, modern_addr, 0, > - false, n, notifier); > - } else { > - memory_region_del_eventfd(modern_mr, modern_addr, 2, > - false, n, notifier); > + if (kvm_eventfds_enabled()) { > + if (modern) { > + if (fast_mmio) { > + memory_region_del_eventfd(modern_mr, modern_addr, 0, > + false, n, notifier); > + } else { > + memory_region_del_eventfd(modern_mr, modern_addr, 2, > + false, n, notifier); > + } > + if (modern_pio) { > + memory_region_del_eventfd(modern_notify_mr, 0, 2, > + true, n, notifier); > + } > } > - if (modern_pio) { > - memory_region_del_eventfd(modern_notify_mr, 0, 2, > + if (legacy) { > + memory_region_del_eventfd(legacy_mr, legacy_addr, 2, > true, n, notifier); > } > } > - if (legacy) { > - memory_region_del_eventfd(legacy_mr, legacy_addr, 2, > - true, n, notifier); > - } > virtio_queue_set_host_notifier_fd_handler(vq, false, false); > event_notifier_cleanup(notifier); > } > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index 1edef59..6fe268f 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -89,6 +89,7 @@ struct VirtQueue > VirtIODevice *vdev; > EventNotifier guest_notifier; > EventNotifier host_notifier; > + bool forward_host_notifier; > QLIST_ENTRY(VirtQueue) node; > }; > > @@ -969,7 +970,13 @@ void virtio_queue_notify_vq(VirtQueue *vq) > > void virtio_queue_notify(VirtIODevice *vdev, int n) > { > - virtio_queue_notify_vq(&vdev->vq[n]); > + VirtQueue *vq = &vdev->vq[n]; > + > + if (vq->forward_host_notifier) { > + event_notifier_set(&vq->host_notifier); > + } else { > + virtio_queue_notify_vq(&vdev->vq[n]); > + } > } > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > @@ -1715,6 +1722,21 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, > /* Test and clear notifier before after disabling event, > * in case poll callback didn't have time to run. */ > virtio_queue_host_notifier_read(&vq->host_notifier); > + > + vq->forward_host_notifier = false; > + } > +} > + > +static bool forwarding_warning; > + > +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq) > +{ > + vq->forward_host_notifier = true; > + > + if (!forwarding_warning) { > + forwarding_warning = true; > + error_report("KVM does not support eventfd binding, " > + "using userspace event forwarding (slow)"); > } > } > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 205fadf..f288ccb 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -245,6 +245,7 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, > EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); > void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, > bool set_handler); > +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq); > void virtio_queue_notify_vq(VirtQueue *vq); > void virtio_irq(VirtQueue *vq); > VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); >
Hello! > Looks like this would be ok for virtio-ccw (as it does not call > virtio_queue_set_host_notifier_forwarding) Yes, this is the reason why i intentionally did not insert extra logic into virtio_queue_set_host_notifier_fd_handler(), but made it into separate function. I don't know whether KVM on S390 reports KVM_CAP_IOEVENTFD. > Question is might something like this for virtio-ccw useful as well? Yes, provided you have something to bind userspace actions to. Looks like S390 doesn't use MMIO here at all, but some obscure (for me) thing, called "subchannel". Another question is whether you can have kernel without KVM_CAP_IOEVENTFD on S390. Was it introduced since the beginning, or some time later? > We cannot use memory_region, though, we would need to handle that in our > diagnose code. Sorry, this phrase is a complete mystery to me, i have no idea what is diagnose code, as well as have never seen S390 in real life. I think if you have motivation, you could implement userspace forwarding for ccw yourself, since you know the thing and know how to do it. For MMIO it was indeed very simple. Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia
On Thu, Nov 19, 2015 at 12:50:49PM +0300, Pavel Fedin wrote: > If you happen to have a kernel with ioeventfd support enabled, but > missing support for them in KVM, and you attempt to enable vhost by > setting vhost=on, qemu aborts with error: > > kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented > > This patch adds a mechanism which allows to emulate KVM binding by > triggering the related notifiers via the userspace. The first time the > emulation is used, a warning is displayed, so that the user knows about > potential performance impact: > > 2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow) > > This problem can be observed with libvirt, which checks for /dev/vhost-net > availability and just inserts "vhost=on" automatically in this case; on an > ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in > expert settings. > > Signed-off-by: Pavel Fedin <p.fedin@samsung.com> OK, that's better, thanks! Why do we need if (kvm_eventfds_enabled()) { memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, true, n, notifier); } else if (!set_handler) { virtio_queue_set_host_notifier_forwarding(vq); } everywhere though? Can't memory_region_add_eventfd DTRT depending on kvm etc? > --- > hw/virtio/virtio-mmio.c | 15 +++++++++--- > hw/virtio/virtio-pci.c | 61 ++++++++++++++++++++++++++-------------------- > hw/virtio/virtio.c | 24 +++++++++++++++++- > include/hw/virtio/virtio.h | 1 + > 4 files changed, 69 insertions(+), 32 deletions(-) > > diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c > index 16621fa..69d4cbc 100644 > --- a/hw/virtio/virtio-mmio.c > +++ b/hw/virtio/virtio-mmio.c > @@ -110,11 +110,18 @@ static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy, > return r; > } > virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); > - memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > - true, n, notifier); > + > + if (kvm_eventfds_enabled()) { > + memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > + true, n, notifier); > + } else if (!set_handler) { > + virtio_queue_set_host_notifier_forwarding(vq); > + } > } else { > - memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > - true, n, notifier); > + if (kvm_eventfds_enabled()) { > + memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > + true, n, notifier); > + } > virtio_queue_set_host_notifier_fd_handler(vq, false, false); > event_notifier_cleanup(notifier); > } > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index 96be4fd..b27a630 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -293,41 +293,48 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, > return r; > } > virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); > - if (modern) { > - if (fast_mmio) { > - memory_region_add_eventfd(modern_mr, modern_addr, 0, > - false, n, notifier); > - } else { > - memory_region_add_eventfd(modern_mr, modern_addr, 2, > - false, n, notifier); > + > + if (kvm_eventfds_enabled()) { > + if (modern) { > + if (fast_mmio) { > + memory_region_add_eventfd(modern_mr, modern_addr, 0, > + false, n, notifier); > + } else { > + memory_region_add_eventfd(modern_mr, modern_addr, 2, > + false, n, notifier); > + } > + if (modern_pio) { > + memory_region_add_eventfd(modern_notify_mr, 0, 2, > + true, n, notifier); > + } > } > - if (modern_pio) { > - memory_region_add_eventfd(modern_notify_mr, 0, 2, > - true, n, notifier); > + if (legacy) { > + memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > + true, n, notifier); > } > - } > - if (legacy) { > - memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > - true, n, notifier); > + } else if (!set_handler) { > + virtio_queue_set_host_notifier_forwarding(vq); > } > } else { > - if (modern) { > - if (fast_mmio) { > - memory_region_del_eventfd(modern_mr, modern_addr, 0, > - false, n, notifier); > - } else { > - memory_region_del_eventfd(modern_mr, modern_addr, 2, > - false, n, notifier); > + if (kvm_eventfds_enabled()) { > + if (modern) { > + if (fast_mmio) { > + memory_region_del_eventfd(modern_mr, modern_addr, 0, > + false, n, notifier); > + } else { > + memory_region_del_eventfd(modern_mr, modern_addr, 2, > + false, n, notifier); > + } > + if (modern_pio) { > + memory_region_del_eventfd(modern_notify_mr, 0, 2, > + true, n, notifier); > + } > } > - if (modern_pio) { > - memory_region_del_eventfd(modern_notify_mr, 0, 2, > + if (legacy) { > + memory_region_del_eventfd(legacy_mr, legacy_addr, 2, > true, n, notifier); > } > } > - if (legacy) { > - memory_region_del_eventfd(legacy_mr, legacy_addr, 2, > - true, n, notifier); > - } > virtio_queue_set_host_notifier_fd_handler(vq, false, false); > event_notifier_cleanup(notifier); > } > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index 1edef59..6fe268f 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -89,6 +89,7 @@ struct VirtQueue > VirtIODevice *vdev; > EventNotifier guest_notifier; > EventNotifier host_notifier; > + bool forward_host_notifier; > QLIST_ENTRY(VirtQueue) node; > }; > > @@ -969,7 +970,13 @@ void virtio_queue_notify_vq(VirtQueue *vq) > > void virtio_queue_notify(VirtIODevice *vdev, int n) > { > - virtio_queue_notify_vq(&vdev->vq[n]); > + VirtQueue *vq = &vdev->vq[n]; > + > + if (vq->forward_host_notifier) { > + event_notifier_set(&vq->host_notifier); > + } else { > + virtio_queue_notify_vq(&vdev->vq[n]); > + } > } > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > @@ -1715,6 +1722,21 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, > /* Test and clear notifier before after disabling event, > * in case poll callback didn't have time to run. */ > virtio_queue_host_notifier_read(&vq->host_notifier); > + > + vq->forward_host_notifier = false; > + } > +} > + > +static bool forwarding_warning; > + > +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq) > +{ > + vq->forward_host_notifier = true; > + > + if (!forwarding_warning) { > + forwarding_warning = true; > + error_report("KVM does not support eventfd binding, " > + "using userspace event forwarding (slow)"); > } > } > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 205fadf..f288ccb 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -245,6 +245,7 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, > EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); > void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, > bool set_handler); > +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq); > void virtio_queue_notify_vq(VirtQueue *vq); > void virtio_irq(VirtQueue *vq); > VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); > -- > 1.9.5.msysgit.0 >
On Thu, 19 Nov 2015 13:18:13 +0300 Pavel Fedin <p.fedin@samsung.com> wrote: > Hello! > > > Looks like this would be ok for virtio-ccw (as it does not call > > virtio_queue_set_host_notifier_forwarding) > > Yes, this is the reason why i intentionally did not insert extra logic into virtio_queue_set_host_notifier_fd_handler(), but made > it into separate function. I don't know whether KVM on S390 reports KVM_CAP_IOEVENTFD. Yes, it does. > > > Question is might something like this for virtio-ccw useful as well? > > Yes, provided you have something to bind userspace actions to. Looks like S390 doesn't use MMIO here at all, but some obscure (for > me) thing, called "subchannel". :) Think of a "subchannel" as a communication channel between the operating system and the device. It is used to transmit commands that can do various things like read, write, or configure stuff. Basically, stuff that is handled via mmio on other platforms. > Another question is whether you can have kernel without KVM_CAP_IOEVENTFD on S390. Was it introduced since the beginning, or some > time later? There are old kernels around that have the needed basic support for the channel subsystem (needed for virtio-ccw), but not the ioeventfd support. > > > We cannot use memory_region, though, we would need to handle that in our > > diagnose code. > > Sorry, this phrase is a complete mystery to me, i have no idea what is diagnose code, as well as have never seen S390 in real life. "diagnose" is a hypercall on the s390. For virtio-ccw, guest->host notification happens via a special hypercall (diagnose 0x500). Therefore, we need to hook into our implementation of that diagnose for our handling. > I think if you have motivation, you could implement userspace forwarding for ccw yourself, since you know the thing and know how to > do it. For MMIO it was indeed very simple. I'll take a peek at your patch.
Hello! > OK, that's better, thanks! Yes, indeed this was simple. > Why do we need > if (kvm_eventfds_enabled()) { > memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > true, n, notifier); > } else if (!set_handler) { > virtio_queue_set_host_notifier_forwarding(vq); > } > everywhere though? > > Can't memory_region_add_eventfd DTRT depending on kvm etc? You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete. I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing "function not supported" error, which is quite easy to trace down and fix? What i would refactor, perhaps, is to add a return code to memory_region_add_eventfd(), so that it can signal failure instead of a critical abort, this would allow to get rid of kvm_eventfds_enabled() accompanying checks. Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia
On Thu, Nov 19, 2015 at 02:44:37PM +0300, Pavel Fedin wrote: > Hello! > > > OK, that's better, thanks! > > Yes, indeed this was simple. > > > Why do we need > > if (kvm_eventfds_enabled()) { > > memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > > true, n, notifier); > > } else if (!set_handler) { > > virtio_queue_set_host_notifier_forwarding(vq); > > } > > everywhere though? > > > > Can't memory_region_add_eventfd DTRT depending on kvm etc? > > You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that > expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete. > I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely > everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we > are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing First of all, memory_region_add_eventfd simply exits on failure. It seems unlikely you will break something which isn't already broken. Further: $ git grep memory_region_add_eventfd hw/misc/ivshmem.c: memory_region_add_eventfd(&s->ivshmem_mmio, hw/misc/pci-testdev.c: memory_region_add_eventfd(test->mr, hw/virtio/virtio-mmio.c: memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 0, hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 2, hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_notify_mr, 0, 2, hw/virtio/virtio-pci.c: memory_region_add_eventfd(legacy_mr, legacy_addr, 2, include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr, include/exec/memory.h: * memory_region_add_eventfd() call. memory.c:void memory_region_add_eventfd(MemoryRegion *mr, Not such a bit deal to audit all call sites, is it? Cc memory API maintainer for an opinion. Paolo, do you see anything wrong with making memory_region_add_eventfd work (slowly) without kvm ioeventfd support? > "function not supported" error, which is quite easy to trace down and fix? That's a problem with switching from vhost to virtio too, apparently you decided it's not a big deal there, why a change of heart? > What i would refactor, perhaps, is to add a return code to memory_region_add_eventfd(), so that it can signal failure instead of a > critical abort, this would allow to get rid of kvm_eventfds_enabled() accompanying checks. > > Kind regards, > Pavel Fedin > Expert Engineer > Samsung Electronics Research center Russia
Hello! > > I think if you have motivation, you could implement userspace forwarding for ccw yourself, > since you know the thing and know how to > > do it. For MMIO it was indeed very simple. > > I'll take a peek at your patch. This is actually a note for Michael. ccw support is one more reason for handling forwarding on virtio device level, not on memory region level. Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia
On Thu, Nov 19, 2015 at 03:02:12PM +0300, Pavel Fedin wrote: > Hello! > > > > I think if you have motivation, you could implement userspace forwarding for ccw yourself, > > since you know the thing and know how to > > > do it. For MMIO it was indeed very simple. > > > > I'll take a peek at your patch. > > This is actually a note for Michael. ccw support is one more reason for handling forwarding on virtio device level, not on memory > region level. > > Kind regards, > Pavel Fedin > Expert Engineer > Samsung Electronics Research center Russia > I don't know what is the right solution for CCW but patching pci and mmio like you did will not help CCW at all.
On 19/11/2015 13:01, Michael S. Tsirkin wrote: >> > You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that >> > expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete. >> > I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely >> > everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we >> > are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing > First of all, memory_region_add_eventfd simply exits on failure. > It seems unlikely you will break something which isn't already > broken. > > Further: > > $ git grep memory_region_add_eventfd > hw/misc/ivshmem.c: memory_region_add_eventfd(&s->ivshmem_mmio, > hw/misc/pci-testdev.c: memory_region_add_eventfd(test->mr, > hw/virtio/virtio-mmio.c: memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 0, > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 2, > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_notify_mr, 0, 2, > hw/virtio/virtio-pci.c: memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word > include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr, > include/exec/memory.h: * memory_region_add_eventfd() call. > memory.c:void memory_region_add_eventfd(MemoryRegion *mr, > > Not such a bit deal to audit all call sites, is it? > > Cc memory API maintainer for an opinion. > > Paolo, do you see anything wrong with making > memory_region_add_eventfd work (slowly) without kvm ioeventfd support? > Sure, it's even been on the todo list for a while. Stefan Hajnoczi had a patch, the only ugly thing was that it slowed down a little all non-ioeventfd accesses, but I guess that's okay because it's probably not measurable. http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html Paolo Paolo
On Thu, Nov 19, 2015 at 01:06:17PM +0100, Paolo Bonzini wrote: > > > On 19/11/2015 13:01, Michael S. Tsirkin wrote: > >> > You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that > >> > expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete. > >> > I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely > >> > everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we > >> > are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing > > First of all, memory_region_add_eventfd simply exits on failure. > > It seems unlikely you will break something which isn't already > > broken. > > > > Further: > > > > $ git grep memory_region_add_eventfd > > hw/misc/ivshmem.c: memory_region_add_eventfd(&s->ivshmem_mmio, > > hw/misc/pci-testdev.c: memory_region_add_eventfd(test->mr, > > hw/virtio/virtio-mmio.c: memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, > > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 0, > > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_mr, modern_addr, 2, > > hw/virtio/virtio-pci.c: memory_region_add_eventfd(modern_notify_mr, 0, 2, > > hw/virtio/virtio-pci.c: memory_region_add_eventfd(legacy_mr, legacy_addr, 2, > > include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word > > include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr, > > include/exec/memory.h: * memory_region_add_eventfd() call. > > memory.c:void memory_region_add_eventfd(MemoryRegion *mr, > > > > Not such a bit deal to audit all call sites, is it? > > > > Cc memory API maintainer for an opinion. > > > > Paolo, do you see anything wrong with making > > memory_region_add_eventfd work (slowly) without kvm ioeventfd support? > > > > Sure, it's even been on the todo list for a while. Stefan Hajnoczi had > a patch, the only ugly thing was that it slowed down a little all > non-ioeventfd accesses, but I guess that's okay because it's probably > not measurable. > > http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html > > Paolo > > Paolo Maybe it can be imporved to probe the kernel and only take action if kvm does not support ioeventfd.
Hello! > > Paolo, do you see anything wrong with making > > memory_region_add_eventfd work (slowly) without kvm ioeventfd support? > > > > Sure, it's even been on the todo list for a while. Stefan Hajnoczi had > a patch, the only ugly thing was that it slowed down a little all > non-ioeventfd accesses, but I guess that's okay because it's probably > not measurable. > > http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by: 1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even smaller performance degradation. 2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best performance. Will it be OK then? Cc'ed also Cornelia because he's taking part in CCW discussion. And the same kind of generic event forwarding could be implemented for CCW then. Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia
On 19/11/2015 13:17, Pavel Fedin wrote: > Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by: > 1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even > smaller performance degradation. > 2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best > performance. > Will it be OK then? Yes! However QEMU pretty much always tries to use ioeventfd for virtio. I'm not sure you want to warn outside the special cases of vhost or dataplane... So perhaps it's better to warn in the vhost and dataplane case specifically, rather than always. Paolo
On Thu, 19 Nov 2015 15:17:57 +0300 Pavel Fedin <p.fedin@samsung.com> wrote: > Hello! > > > > Paolo, do you see anything wrong with making > > > memory_region_add_eventfd work (slowly) without kvm ioeventfd support? > > > > > > > Sure, it's even been on the todo list for a while. Stefan Hajnoczi had > > a patch, the only ugly thing was that it slowed down a little all > > non-ioeventfd accesses, but I guess that's okay because it's probably > > not measurable. > > > > http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html If I read this correctly, memory regions already keep track of ioeventfds and this patch can simply trigger them manually if that had not already been done? > > Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by: > 1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even > smaller performance degradation. > 2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best > performance. > Will it be OK then? > > Cc'ed also Cornelia because he's taking part in CCW discussion. And the same kind of generic event forwarding could be implemented > for CCW then. Only that we don't have such a nice tracking structure that memory regions already have. The intercept handler for diagnose 500 already ends up at the correct device (its id is one of the parameters), so I'd need to check for an existing notifier on the virtqueue and trigger that instead of calling virtio_queue_notify directly?
Hello! > If I read this correctly, memory regions already keep track of > ioeventfds and this patch can simply trigger them manually if that had > not already been done? Yes, exactly, and this is the new idea. > Only that we don't have such a nice tracking structure that memory > regions already have. > > The intercept handler for diagnose 500 already ends up at the correct > device (its id is one of the parameters), so I'd need to check for an > existing notifier on the virtqueue and trigger that instead of calling > virtio_queue_notify directly? Yes, absolutely correct, but only if the notifier has actually been installed. Pure userspace virtio will not do it, AFAIK, if we don't have KVM_CAP_IOEVENTFD. Take a careful look at virtio-mmio.c, you'll see that host notifiers are actually installed from two places: 1. virtio_mmio_start_ioeventfd() 2. virtio_mmio_set_host_notifier() Userspace implementation of virtio uses (1) path, while vhost uses (2). Note also that (1) does not do anything at all if KVM does not support ioeventfds. So, (1) case breaks up into two: 1a. userspace virtio, notifications triggered by handling MMIO writes in userspace 1b. userspace virtio, notifications triggered by ioeventfds, which are bound to MMIO in kernel I don't know why we have (1b) at all, because i'm in doubt there would be any noticeable performance advantage. But, i believe, there was some reason to implement it. Both (1) and (2) end up in the same internal routine, and the only difference is 'bool set_handler', which is set to false for vhost. This is what i rely on, and this is what you should detect too. PCI backend is a little bit more complicated to read, but it works absolutely in the same way. Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 16621fa..69d4cbc 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -110,11 +110,18 @@ static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy, return r; } virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); - memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, - true, n, notifier); + + if (kvm_eventfds_enabled()) { + memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + true, n, notifier); + } else if (!set_handler) { + virtio_queue_set_host_notifier_forwarding(vq); + } } else { - memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, - true, n, notifier); + if (kvm_eventfds_enabled()) { + memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + true, n, notifier); + } virtio_queue_set_host_notifier_fd_handler(vq, false, false); event_notifier_cleanup(notifier); } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 96be4fd..b27a630 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -293,41 +293,48 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy, return r; } virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); - if (modern) { - if (fast_mmio) { - memory_region_add_eventfd(modern_mr, modern_addr, 0, - false, n, notifier); - } else { - memory_region_add_eventfd(modern_mr, modern_addr, 2, - false, n, notifier); + + if (kvm_eventfds_enabled()) { + if (modern) { + if (fast_mmio) { + memory_region_add_eventfd(modern_mr, modern_addr, 0, + false, n, notifier); + } else { + memory_region_add_eventfd(modern_mr, modern_addr, 2, + false, n, notifier); + } + if (modern_pio) { + memory_region_add_eventfd(modern_notify_mr, 0, 2, + true, n, notifier); + } } - if (modern_pio) { - memory_region_add_eventfd(modern_notify_mr, 0, 2, - true, n, notifier); + if (legacy) { + memory_region_add_eventfd(legacy_mr, legacy_addr, 2, + true, n, notifier); } - } - if (legacy) { - memory_region_add_eventfd(legacy_mr, legacy_addr, 2, - true, n, notifier); + } else if (!set_handler) { + virtio_queue_set_host_notifier_forwarding(vq); } } else { - if (modern) { - if (fast_mmio) { - memory_region_del_eventfd(modern_mr, modern_addr, 0, - false, n, notifier); - } else { - memory_region_del_eventfd(modern_mr, modern_addr, 2, - false, n, notifier); + if (kvm_eventfds_enabled()) { + if (modern) { + if (fast_mmio) { + memory_region_del_eventfd(modern_mr, modern_addr, 0, + false, n, notifier); + } else { + memory_region_del_eventfd(modern_mr, modern_addr, 2, + false, n, notifier); + } + if (modern_pio) { + memory_region_del_eventfd(modern_notify_mr, 0, 2, + true, n, notifier); + } } - if (modern_pio) { - memory_region_del_eventfd(modern_notify_mr, 0, 2, + if (legacy) { + memory_region_del_eventfd(legacy_mr, legacy_addr, 2, true, n, notifier); } } - if (legacy) { - memory_region_del_eventfd(legacy_mr, legacy_addr, 2, - true, n, notifier); - } virtio_queue_set_host_notifier_fd_handler(vq, false, false); event_notifier_cleanup(notifier); } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 1edef59..6fe268f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -89,6 +89,7 @@ struct VirtQueue VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; + bool forward_host_notifier; QLIST_ENTRY(VirtQueue) node; }; @@ -969,7 +970,13 @@ void virtio_queue_notify_vq(VirtQueue *vq) void virtio_queue_notify(VirtIODevice *vdev, int n) { - virtio_queue_notify_vq(&vdev->vq[n]); + VirtQueue *vq = &vdev->vq[n]; + + if (vq->forward_host_notifier) { + event_notifier_set(&vq->host_notifier); + } else { + virtio_queue_notify_vq(&vdev->vq[n]); + } } uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) @@ -1715,6 +1722,21 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, /* Test and clear notifier before after disabling event, * in case poll callback didn't have time to run. */ virtio_queue_host_notifier_read(&vq->host_notifier); + + vq->forward_host_notifier = false; + } +} + +static bool forwarding_warning; + +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq) +{ + vq->forward_host_notifier = true; + + if (!forwarding_warning) { + forwarding_warning = true; + error_report("KVM does not support eventfd binding, " + "using userspace event forwarding (slow)"); } } diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 205fadf..f288ccb 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -245,6 +245,7 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, bool set_handler); +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq); void virtio_queue_notify_vq(VirtQueue *vq); void virtio_irq(VirtQueue *vq); VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
If you happen to have a kernel with ioeventfd support enabled, but missing support for them in KVM, and you attempt to enable vhost by setting vhost=on, qemu aborts with error: kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented This patch adds a mechanism which allows to emulate KVM binding by triggering the related notifiers via the userspace. The first time the emulation is used, a warning is displayed, so that the user knows about potential performance impact: 2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow) This problem can be observed with libvirt, which checks for /dev/vhost-net availability and just inserts "vhost=on" automatically in this case; on an ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in expert settings. Signed-off-by: Pavel Fedin <p.fedin@samsung.com> --- hw/virtio/virtio-mmio.c | 15 +++++++++--- hw/virtio/virtio-pci.c | 61 ++++++++++++++++++++++++++-------------------- hw/virtio/virtio.c | 24 +++++++++++++++++- include/hw/virtio/virtio.h | 1 + 4 files changed, 69 insertions(+), 32 deletions(-)