diff mbox

virtio: Implement userspace forwarding for host notifiers

Message ID 011b01d122af$c60f7ce0$522e76a0$@samsung.com
State New
Headers show

Commit Message

Pavel Fedin Nov. 19, 2015, 9:50 a.m. UTC
If you happen to have a kernel with ioeventfd support enabled, but
missing support for them in KVM, and you attempt to enable vhost by
setting vhost=on, qemu aborts with error:

kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented

This patch adds a mechanism which allows to emulate KVM binding by
triggering the related notifiers via the userspace. The first time the
emulation is used, a warning is displayed, so that the user knows about
potential performance impact:

2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow)

This problem can be observed with libvirt, which checks for /dev/vhost-net
availability and just inserts "vhost=on" automatically in this case; on an
ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in
expert settings.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
---
 hw/virtio/virtio-mmio.c    | 15 +++++++++---
 hw/virtio/virtio-pci.c     | 61 ++++++++++++++++++++++++++--------------------
 hw/virtio/virtio.c         | 24 +++++++++++++++++-
 include/hw/virtio/virtio.h |  1 +
 4 files changed, 69 insertions(+), 32 deletions(-)

Comments

Christian Borntraeger Nov. 19, 2015, 9:56 a.m. UTC | #1
Looks like this would be ok for virtio-ccw (as it does not call 
virtio_queue_set_host_notifier_forwarding)
Question is might something like this for virtio-ccw useful as well?

We cannot use memory_region, though, we would need to handle that in our
diagnose code.

Conny, FYI.




On 11/19/2015 10:50 AM, Pavel Fedin wrote:
> If you happen to have a kernel with ioeventfd support enabled, but
> missing support for them in KVM, and you attempt to enable vhost by
> setting vhost=on, qemu aborts with error:
> 
> kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented
> 
> This patch adds a mechanism which allows to emulate KVM binding by
> triggering the related notifiers via the userspace. The first time the
> emulation is used, a warning is displayed, so that the user knows about
> potential performance impact:
> 
> 2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow)
> 
> This problem can be observed with libvirt, which checks for /dev/vhost-net
> availability and just inserts "vhost=on" automatically in this case; on an
> ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in
> expert settings.
> 
> Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
> ---
>  hw/virtio/virtio-mmio.c    | 15 +++++++++---
>  hw/virtio/virtio-pci.c     | 61 ++++++++++++++++++++++++++--------------------
>  hw/virtio/virtio.c         | 24 +++++++++++++++++-
>  include/hw/virtio/virtio.h |  1 +
>  4 files changed, 69 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
> index 16621fa..69d4cbc 100644
> --- a/hw/virtio/virtio-mmio.c
> +++ b/hw/virtio/virtio-mmio.c
> @@ -110,11 +110,18 @@ static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy,
>              return r;
>          }
>          virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
> -        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> -                                  true, n, notifier);
> +
> +        if (kvm_eventfds_enabled()) {
> +            memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> +                                      true, n, notifier);
> +        } else if (!set_handler) {
> +            virtio_queue_set_host_notifier_forwarding(vq);
> +        }
>      } else {
> -        memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> -                                  true, n, notifier);
> +        if (kvm_eventfds_enabled()) {
> +            memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> +                                      true, n, notifier);
> +        }
>          virtio_queue_set_host_notifier_fd_handler(vq, false, false);
>          event_notifier_cleanup(notifier);
>      }
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 96be4fd..b27a630 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -293,41 +293,48 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
>              return r;
>          }
>          virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
> -        if (modern) {
> -            if (fast_mmio) {
> -                memory_region_add_eventfd(modern_mr, modern_addr, 0,
> -                                          false, n, notifier);
> -            } else {
> -                memory_region_add_eventfd(modern_mr, modern_addr, 2,
> -                                          false, n, notifier);
> +
> +        if (kvm_eventfds_enabled()) {
> +            if (modern) {
> +                if (fast_mmio) {
> +                    memory_region_add_eventfd(modern_mr, modern_addr, 0,
> +                                              false, n, notifier);
> +                } else {
> +                    memory_region_add_eventfd(modern_mr, modern_addr, 2,
> +                                              false, n, notifier);
> +                }
> +                if (modern_pio) {
> +                    memory_region_add_eventfd(modern_notify_mr, 0, 2,
> +                                                  true, n, notifier);
> +                }
>              }
> -            if (modern_pio) {
> -                memory_region_add_eventfd(modern_notify_mr, 0, 2,
> -                                              true, n, notifier);
> +            if (legacy) {
> +                memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> +                                          true, n, notifier);
>              }
> -        }
> -        if (legacy) {
> -            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> -                                      true, n, notifier);
> +        } else if (!set_handler) {
> +            virtio_queue_set_host_notifier_forwarding(vq);
>          }
>      } else {
> -        if (modern) {
> -            if (fast_mmio) {
> -                memory_region_del_eventfd(modern_mr, modern_addr, 0,
> -                                          false, n, notifier);
> -            } else {
> -                memory_region_del_eventfd(modern_mr, modern_addr, 2,
> -                                          false, n, notifier);
> +        if (kvm_eventfds_enabled()) {
> +            if (modern) {
> +                if (fast_mmio) {
> +                    memory_region_del_eventfd(modern_mr, modern_addr, 0,
> +                                              false, n, notifier);
> +                } else {
> +                    memory_region_del_eventfd(modern_mr, modern_addr, 2,
> +                                              false, n, notifier);
> +                }
> +                if (modern_pio) {
> +                    memory_region_del_eventfd(modern_notify_mr, 0, 2,
> +                                              true, n, notifier);
> +                }
>              }
> -            if (modern_pio) {
> -                memory_region_del_eventfd(modern_notify_mr, 0, 2,
> +            if (legacy) {
> +                memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
>                                            true, n, notifier);
>              }
>          }
> -        if (legacy) {
> -            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
> -                                      true, n, notifier);
> -        }
>          virtio_queue_set_host_notifier_fd_handler(vq, false, false);
>          event_notifier_cleanup(notifier);
>      }
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 1edef59..6fe268f 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -89,6 +89,7 @@ struct VirtQueue
>      VirtIODevice *vdev;
>      EventNotifier guest_notifier;
>      EventNotifier host_notifier;
> +    bool forward_host_notifier;
>      QLIST_ENTRY(VirtQueue) node;
>  };
> 
> @@ -969,7 +970,13 @@ void virtio_queue_notify_vq(VirtQueue *vq)
> 
>  void virtio_queue_notify(VirtIODevice *vdev, int n)
>  {
> -    virtio_queue_notify_vq(&vdev->vq[n]);
> +    VirtQueue *vq = &vdev->vq[n];
> +
> +    if (vq->forward_host_notifier) {
> +        event_notifier_set(&vq->host_notifier);
> +    } else {
> +        virtio_queue_notify_vq(&vdev->vq[n]);
> +    }
>  }
> 
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> @@ -1715,6 +1722,21 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
>          /* Test and clear notifier before after disabling event,
>           * in case poll callback didn't have time to run. */
>          virtio_queue_host_notifier_read(&vq->host_notifier);
> +
> +        vq->forward_host_notifier = false;
> +    }
> +}
> +
> +static bool forwarding_warning;
> +
> +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq)
> +{
> +    vq->forward_host_notifier = true;
> +
> +    if (!forwarding_warning) {
> +        forwarding_warning = true;
> +        error_report("KVM does not support eventfd binding, "
> +                     "using userspace event forwarding (slow)");
>      }
>  }
> 
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 205fadf..f288ccb 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -245,6 +245,7 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
>  void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
>                                                 bool set_handler);
> +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq);
>  void virtio_queue_notify_vq(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
>  VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
>
Pavel Fedin Nov. 19, 2015, 10:18 a.m. UTC | #2
Hello!

> Looks like this would be ok for virtio-ccw (as it does not call
> virtio_queue_set_host_notifier_forwarding)

 Yes, this is the reason why i intentionally did not insert extra logic into virtio_queue_set_host_notifier_fd_handler(), but made
it into separate function. I don't know whether KVM on S390 reports KVM_CAP_IOEVENTFD.

> Question is might something like this for virtio-ccw useful as well?

 Yes, provided you have something to bind userspace actions to. Looks like S390 doesn't use MMIO here at all, but some obscure (for
me) thing, called "subchannel".
 Another question is whether you can have kernel without KVM_CAP_IOEVENTFD on S390. Was it introduced since the beginning, or some
time later?

> We cannot use memory_region, though, we would need to handle that in our
> diagnose code.

 Sorry, this phrase is a complete mystery to me, i have no idea what is diagnose code, as well as have never seen S390 in real life.
I think if you have motivation, you could implement userspace forwarding for ccw yourself, since you know the thing and know how to
do it. For MMIO it was indeed very simple.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
Michael S. Tsirkin Nov. 19, 2015, 10:55 a.m. UTC | #3
On Thu, Nov 19, 2015 at 12:50:49PM +0300, Pavel Fedin wrote:
> If you happen to have a kernel with ioeventfd support enabled, but
> missing support for them in KVM, and you attempt to enable vhost by
> setting vhost=on, qemu aborts with error:
> 
> kvm_mem_ioeventfd_add: error adding ioeventfd: Function not implemented
> 
> This patch adds a mechanism which allows to emulate KVM binding by
> triggering the related notifiers via the userspace. The first time the
> emulation is used, a warning is displayed, so that the user knows about
> potential performance impact:
> 
> 2015-11-19T09:35:16.618380Z qemu-system-aarch64: KVM does not support eventfd binding, using userspace event forwarding (slow)
> 
> This problem can be observed with libvirt, which checks for /dev/vhost-net
> availability and just inserts "vhost=on" automatically in this case; on an
> ARM64 system using stock kernel 3.18.0 with CONFIG_IOEVENTFD enabled in
> expert settings.
> 
> Signed-off-by: Pavel Fedin <p.fedin@samsung.com>

OK, that's better, thanks!
Why do we need
        if (kvm_eventfds_enabled()) {
            memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
                                    true, n, notifier);
        } else if (!set_handler) {
           virtio_queue_set_host_notifier_forwarding(vq);
        }
everywhere though?

Can't memory_region_add_eventfd DTRT depending on kvm etc?

> ---
>  hw/virtio/virtio-mmio.c    | 15 +++++++++---
>  hw/virtio/virtio-pci.c     | 61 ++++++++++++++++++++++++++--------------------
>  hw/virtio/virtio.c         | 24 +++++++++++++++++-
>  include/hw/virtio/virtio.h |  1 +
>  4 files changed, 69 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
> index 16621fa..69d4cbc 100644
> --- a/hw/virtio/virtio-mmio.c
> +++ b/hw/virtio/virtio-mmio.c
> @@ -110,11 +110,18 @@ static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy,
>              return r;
>          }
>          virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
> -        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> -                                  true, n, notifier);
> +
> +        if (kvm_eventfds_enabled()) {
> +            memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> +                                      true, n, notifier);
> +        } else if (!set_handler) {
> +            virtio_queue_set_host_notifier_forwarding(vq);
> +        }
>      } else {
> -        memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> -                                  true, n, notifier);
> +        if (kvm_eventfds_enabled()) {
> +            memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> +                                      true, n, notifier);
> +        }
>          virtio_queue_set_host_notifier_fd_handler(vq, false, false);
>          event_notifier_cleanup(notifier);
>      }
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 96be4fd..b27a630 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -293,41 +293,48 @@ static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
>              return r;
>          }
>          virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
> -        if (modern) {
> -            if (fast_mmio) {
> -                memory_region_add_eventfd(modern_mr, modern_addr, 0,
> -                                          false, n, notifier);
> -            } else {
> -                memory_region_add_eventfd(modern_mr, modern_addr, 2,
> -                                          false, n, notifier);
> +
> +        if (kvm_eventfds_enabled()) {
> +            if (modern) {
> +                if (fast_mmio) {
> +                    memory_region_add_eventfd(modern_mr, modern_addr, 0,
> +                                              false, n, notifier);
> +                } else {
> +                    memory_region_add_eventfd(modern_mr, modern_addr, 2,
> +                                              false, n, notifier);
> +                }
> +                if (modern_pio) {
> +                    memory_region_add_eventfd(modern_notify_mr, 0, 2,
> +                                                  true, n, notifier);
> +                }
>              }
> -            if (modern_pio) {
> -                memory_region_add_eventfd(modern_notify_mr, 0, 2,
> -                                              true, n, notifier);
> +            if (legacy) {
> +                memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> +                                          true, n, notifier);
>              }
> -        }
> -        if (legacy) {
> -            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> -                                      true, n, notifier);
> +        } else if (!set_handler) {
> +            virtio_queue_set_host_notifier_forwarding(vq);
>          }
>      } else {
> -        if (modern) {
> -            if (fast_mmio) {
> -                memory_region_del_eventfd(modern_mr, modern_addr, 0,
> -                                          false, n, notifier);
> -            } else {
> -                memory_region_del_eventfd(modern_mr, modern_addr, 2,
> -                                          false, n, notifier);
> +        if (kvm_eventfds_enabled()) {
> +            if (modern) {
> +                if (fast_mmio) {
> +                    memory_region_del_eventfd(modern_mr, modern_addr, 0,
> +                                              false, n, notifier);
> +                } else {
> +                    memory_region_del_eventfd(modern_mr, modern_addr, 2,
> +                                              false, n, notifier);
> +                }
> +                if (modern_pio) {
> +                    memory_region_del_eventfd(modern_notify_mr, 0, 2,
> +                                              true, n, notifier);
> +                }
>              }
> -            if (modern_pio) {
> -                memory_region_del_eventfd(modern_notify_mr, 0, 2,
> +            if (legacy) {
> +                memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
>                                            true, n, notifier);
>              }
>          }
> -        if (legacy) {
> -            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
> -                                      true, n, notifier);
> -        }
>          virtio_queue_set_host_notifier_fd_handler(vq, false, false);
>          event_notifier_cleanup(notifier);
>      }
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 1edef59..6fe268f 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -89,6 +89,7 @@ struct VirtQueue
>      VirtIODevice *vdev;
>      EventNotifier guest_notifier;
>      EventNotifier host_notifier;
> +    bool forward_host_notifier;
>      QLIST_ENTRY(VirtQueue) node;
>  };
>  
> @@ -969,7 +970,13 @@ void virtio_queue_notify_vq(VirtQueue *vq)
>  
>  void virtio_queue_notify(VirtIODevice *vdev, int n)
>  {
> -    virtio_queue_notify_vq(&vdev->vq[n]);
> +    VirtQueue *vq = &vdev->vq[n];
> +
> +    if (vq->forward_host_notifier) {
> +        event_notifier_set(&vq->host_notifier);
> +    } else {
> +        virtio_queue_notify_vq(&vdev->vq[n]);
> +    }
>  }
>  
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> @@ -1715,6 +1722,21 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
>          /* Test and clear notifier before after disabling event,
>           * in case poll callback didn't have time to run. */
>          virtio_queue_host_notifier_read(&vq->host_notifier);
> +
> +        vq->forward_host_notifier = false;
> +    }
> +}
> +
> +static bool forwarding_warning;
> +
> +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq)
> +{
> +    vq->forward_host_notifier = true;
> +
> +    if (!forwarding_warning) {
> +        forwarding_warning = true;
> +        error_report("KVM does not support eventfd binding, "
> +                     "using userspace event forwarding (slow)");
>      }
>  }
>  
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 205fadf..f288ccb 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -245,6 +245,7 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
>  void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
>                                                 bool set_handler);
> +void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq);
>  void virtio_queue_notify_vq(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
>  VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
> -- 
> 1.9.5.msysgit.0
>
Cornelia Huck Nov. 19, 2015, 11:41 a.m. UTC | #4
On Thu, 19 Nov 2015 13:18:13 +0300
Pavel Fedin <p.fedin@samsung.com> wrote:

>  Hello!
> 
> > Looks like this would be ok for virtio-ccw (as it does not call
> > virtio_queue_set_host_notifier_forwarding)
> 
>  Yes, this is the reason why i intentionally did not insert extra logic into virtio_queue_set_host_notifier_fd_handler(), but made
> it into separate function. I don't know whether KVM on S390 reports KVM_CAP_IOEVENTFD.

Yes, it does.

> 
> > Question is might something like this for virtio-ccw useful as well?
> 
>  Yes, provided you have something to bind userspace actions to. Looks like S390 doesn't use MMIO here at all, but some obscure (for
> me) thing, called "subchannel".

:) Think of a "subchannel" as a communication channel between the
operating system and the device. It is used to transmit commands that
can do various things like read, write, or configure stuff. Basically,
stuff that is handled via mmio on other platforms.

>  Another question is whether you can have kernel without KVM_CAP_IOEVENTFD on S390. Was it introduced since the beginning, or some
> time later?

There are old kernels around that have the needed basic support for the
channel subsystem (needed for virtio-ccw), but not the ioeventfd
support.

> 
> > We cannot use memory_region, though, we would need to handle that in our
> > diagnose code.
> 
>  Sorry, this phrase is a complete mystery to me, i have no idea what is diagnose code, as well as have never seen S390 in real life.

"diagnose" is a hypercall on the s390. For virtio-ccw, guest->host
notification happens via a special hypercall (diagnose 0x500).
Therefore, we need to hook into our implementation of that diagnose for
our handling.

> I think if you have motivation, you could implement userspace forwarding for ccw yourself, since you know the thing and know how to
> do it. For MMIO it was indeed very simple.

I'll take a peek at your patch.
Pavel Fedin Nov. 19, 2015, 11:44 a.m. UTC | #5
Hello!

> OK, that's better, thanks!

 Yes, indeed this was simple.

> Why do we need
>         if (kvm_eventfds_enabled()) {
>             memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
>                                     true, n, notifier);
>         } else if (!set_handler) {
>            virtio_queue_set_host_notifier_forwarding(vq);
>         }
> everywhere though?
> 
> Can't memory_region_add_eventfd DTRT depending on kvm etc?

 You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that
expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete.
 I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely
everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we
are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing
"function not supported" error, which is quite easy to trace down and fix?
 What i would refactor, perhaps, is to add a return code to memory_region_add_eventfd(), so that it can signal failure instead of a
critical abort, this would allow to get rid of kvm_eventfds_enabled() accompanying checks.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
Michael S. Tsirkin Nov. 19, 2015, 12:01 p.m. UTC | #6
On Thu, Nov 19, 2015 at 02:44:37PM +0300, Pavel Fedin wrote:
>  Hello!
> 
> > OK, that's better, thanks!
> 
>  Yes, indeed this was simple.
> 
> > Why do we need
> >         if (kvm_eventfds_enabled()) {
> >             memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> >                                     true, n, notifier);
> >         } else if (!set_handler) {
> >            virtio_queue_set_host_notifier_forwarding(vq);
> >         }
> > everywhere though?
> > 
> > Can't memory_region_add_eventfd DTRT depending on kvm etc?
> 
>  You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that
> expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete.
>  I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely
> everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we
> are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing

First of all, memory_region_add_eventfd simply exits on failure.
It seems unlikely you will break something which isn't already
broken.

Further:

$ git grep memory_region_add_eventfd
hw/misc/ivshmem.c:    memory_region_add_eventfd(&s->ivshmem_mmio,
hw/misc/pci-testdev.c:    memory_region_add_eventfd(test->mr,
hw/virtio/virtio-mmio.c:        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 0,
hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 2,
hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_notify_mr, 0, 2,
hw/virtio/virtio-pci.c:            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word
include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr,
include/exec/memory.h: * memory_region_add_eventfd() call.
memory.c:void memory_region_add_eventfd(MemoryRegion *mr,

Not such a bit deal to audit all call sites, is it?

Cc memory API maintainer for an opinion.

Paolo, do you see anything wrong with making
memory_region_add_eventfd work (slowly) without kvm ioeventfd support?

> "function not supported" error, which is quite easy to trace down and fix?

That's a problem with switching from vhost to virtio too,
apparently you decided it's not a big deal there,
why a change of heart?

>  What i would refactor, perhaps, is to add a return code to memory_region_add_eventfd(), so that it can signal failure instead of a
> critical abort, this would allow to get rid of kvm_eventfds_enabled() accompanying checks.
> 
> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
Pavel Fedin Nov. 19, 2015, 12:02 p.m. UTC | #7
Hello!

> > I think if you have motivation, you could implement userspace forwarding for ccw yourself,
> since you know the thing and know how to
> > do it. For MMIO it was indeed very simple.
> 
> I'll take a peek at your patch.

 This is actually a note for Michael. ccw support is one more reason for handling forwarding on virtio device level, not on memory
region level.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
Michael S. Tsirkin Nov. 19, 2015, 12:05 p.m. UTC | #8
On Thu, Nov 19, 2015 at 03:02:12PM +0300, Pavel Fedin wrote:
>  Hello!
> 
> > > I think if you have motivation, you could implement userspace forwarding for ccw yourself,
> > since you know the thing and know how to
> > > do it. For MMIO it was indeed very simple.
> > 
> > I'll take a peek at your patch.
> 
>  This is actually a note for Michael. ccw support is one more reason for handling forwarding on virtio device level, not on memory
> region level.
> 
> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
> 

I don't know what is the right solution for CCW but patching
pci and mmio like you did will not help CCW at all.
Paolo Bonzini Nov. 19, 2015, 12:06 p.m. UTC | #9
On 19/11/2015 13:01, Michael S. Tsirkin wrote:
>> >  You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that
>> > expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete.
>> >  I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely
>> > everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we
>> > are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing
> First of all, memory_region_add_eventfd simply exits on failure.
> It seems unlikely you will break something which isn't already
> broken.
> 
> Further:
> 
> $ git grep memory_region_add_eventfd
> hw/misc/ivshmem.c:    memory_region_add_eventfd(&s->ivshmem_mmio,
> hw/misc/pci-testdev.c:    memory_region_add_eventfd(test->mr,
> hw/virtio/virtio-mmio.c:        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 0,
> hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 2,
> hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_notify_mr, 0, 2,
> hw/virtio/virtio-pci.c:            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word
> include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr,
> include/exec/memory.h: * memory_region_add_eventfd() call.
> memory.c:void memory_region_add_eventfd(MemoryRegion *mr,
> 
> Not such a bit deal to audit all call sites, is it?
> 
> Cc memory API maintainer for an opinion.
> 
> Paolo, do you see anything wrong with making
> memory_region_add_eventfd work (slowly) without kvm ioeventfd support?
> 

Sure, it's even been on the todo list for a while.  Stefan Hajnoczi had
a patch, the only ugly thing was that it slowed down a little all
non-ioeventfd accesses, but I guess that's okay because it's probably
not measurable.

http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html

Paolo

Paolo
Michael S. Tsirkin Nov. 19, 2015, 12:10 p.m. UTC | #10
On Thu, Nov 19, 2015 at 01:06:17PM +0100, Paolo Bonzini wrote:
> 
> 
> On 19/11/2015 13:01, Michael S. Tsirkin wrote:
> >> >  You know... I took a look at this, and yes, i could simply hook up emulation into memory region handlers. And everything that
> >> > expects KVM eventfd binding will magically start working, probably rendering some bypass code obsolete.
> >> >  I have only one concern against this. qemu is a large piece of software, consisting of lots of components. I cannot test absolutely
> >> > everything in every configuration. I suggest, old code was written with the assumption that if memory_region_add_eventfd() works, we
> >> > are really using KVM acceleration. If we break this assumption, how much code will mysteriously misbehave instead of throwing
> > First of all, memory_region_add_eventfd simply exits on failure.
> > It seems unlikely you will break something which isn't already
> > broken.
> > 
> > Further:
> > 
> > $ git grep memory_region_add_eventfd
> > hw/misc/ivshmem.c:    memory_region_add_eventfd(&s->ivshmem_mmio,
> > hw/misc/pci-testdev.c:    memory_region_add_eventfd(test->mr,
> > hw/virtio/virtio-mmio.c:        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
> > hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 0,
> > hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_mr, modern_addr, 2,
> > hw/virtio/virtio-pci.c:                memory_region_add_eventfd(modern_notify_mr, 0, 2,
> > hw/virtio/virtio-pci.c:            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
> > include/exec/memory.h: * memory_region_add_eventfd: Request an eventfd to be triggered when a word
> > include/exec/memory.h:void memory_region_add_eventfd(MemoryRegion *mr,
> > include/exec/memory.h: * memory_region_add_eventfd() call.
> > memory.c:void memory_region_add_eventfd(MemoryRegion *mr,
> > 
> > Not such a bit deal to audit all call sites, is it?
> > 
> > Cc memory API maintainer for an opinion.
> > 
> > Paolo, do you see anything wrong with making
> > memory_region_add_eventfd work (slowly) without kvm ioeventfd support?
> > 
> 
> Sure, it's even been on the todo list for a while.  Stefan Hajnoczi had
> a patch, the only ugly thing was that it slowed down a little all
> non-ioeventfd accesses, but I guess that's okay because it's probably
> not measurable.
> 
> http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html
> 
> Paolo
> 
> Paolo

Maybe it can be imporved to probe the kernel and only take action if kvm
does not support ioeventfd.
Pavel Fedin Nov. 19, 2015, 12:17 p.m. UTC | #11
Hello!

> > Paolo, do you see anything wrong with making
> > memory_region_add_eventfd work (slowly) without kvm ioeventfd support?
> >
> 
> Sure, it's even been on the todo list for a while.  Stefan Hajnoczi had
> a patch, the only ugly thing was that it slowed down a little all
> non-ioeventfd accesses, but I guess that's okay because it's probably
> not measurable.
> 
> http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html

 Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by:
1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even
smaller performance degradation.
2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best
performance.
 Will it be OK then?

 Cc'ed also Cornelia because he's taking part in CCW discussion. And the same kind of generic event forwarding could be implemented
for CCW then.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
Paolo Bonzini Nov. 19, 2015, 12:31 p.m. UTC | #12
On 19/11/2015 13:17, Pavel Fedin wrote:
>  Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by:
> 1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even
> smaller performance degradation.
> 2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best
> performance.
>  Will it be OK then?

Yes!  However QEMU pretty much always tries to use ioeventfd for virtio.
 I'm not sure you want to warn outside the special cases of vhost or
dataplane...  So perhaps it's better to warn in the vhost and dataplane
case specifically, rather than always.

Paolo
Cornelia Huck Nov. 19, 2015, 12:55 p.m. UTC | #13
On Thu, 19 Nov 2015 15:17:57 +0300
Pavel Fedin <p.fedin@samsung.com> wrote:

>  Hello!
> 
> > > Paolo, do you see anything wrong with making
> > > memory_region_add_eventfd work (slowly) without kvm ioeventfd support?
> > >
> > 
> > Sure, it's even been on the todo list for a while.  Stefan Hajnoczi had
> > a patch, the only ugly thing was that it slowed down a little all
> > non-ioeventfd accesses, but I guess that's okay because it's probably
> > not measurable.
> > 
> > http://lists.nongnu.org/archive/html/qemu-devel/2015-07/msg04710.html

If I read this correctly, memory regions already keep track of
ioeventfds and this patch can simply trigger them manually if that had
not already been done?

> 
>  Ok, good. So, this has already been tested. Ok, i think i can further extend this patch by:
> 1. Add a flag to disable memory_region_dispatch_write_ioeventfds(), to be set by KVM code for example. So that with KVM we have even
> smaller performance degradation.
> 2. Issue a one-shot warning, like in my current patch, so that the user knows that his/her kernel prevents from getting the best
> performance.
>  Will it be OK then?
> 
>  Cc'ed also Cornelia because he's taking part in CCW discussion. And the same kind of generic event forwarding could be implemented
> for CCW then.

Only that we don't have such a nice tracking structure that memory
regions already have.

The intercept handler for diagnose 500 already ends up at the correct
device (its id is one of the parameters), so I'd need to check for an
existing notifier on the virtqueue and trigger that instead of calling
virtio_queue_notify directly?
Pavel Fedin Nov. 19, 2015, 4:03 p.m. UTC | #14
Hello!

> If I read this correctly, memory regions already keep track of
> ioeventfds and this patch can simply trigger them manually if that had
> not already been done?

 Yes, exactly, and this is the new idea.

> Only that we don't have such a nice tracking structure that memory
> regions already have.
> 
> The intercept handler for diagnose 500 already ends up at the correct
> device (its id is one of the parameters), so I'd need to check for an
> existing notifier on the virtqueue and trigger that instead of calling
> virtio_queue_notify directly?

 Yes, absolutely correct, but only if the notifier has actually been installed. Pure userspace virtio will not do it, AFAIK, if we
don't have KVM_CAP_IOEVENTFD. Take a careful look at virtio-mmio.c, you'll see that host notifiers are actually installed from two
places:
1. virtio_mmio_start_ioeventfd()
2. virtio_mmio_set_host_notifier()

 Userspace implementation of virtio uses (1) path, while vhost uses (2). Note also that (1) does not do anything at all if KVM does
not support ioeventfds. So, (1) case breaks up into two:
 1a. userspace virtio, notifications triggered by handling MMIO writes in userspace
 1b. userspace virtio, notifications triggered by ioeventfds, which are bound to MMIO in kernel
 I don't know why we have (1b) at all, because i'm in doubt there would be any noticeable performance advantage. But, i believe,
there was some reason to implement it.

 Both (1) and (2) end up in the same internal routine, and the only difference is 'bool set_handler', which is set to false for
vhost. This is what i rely on, and this is what you should detect too.

 PCI backend is a little bit more complicated to read, but it works absolutely in the same way.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia
diff mbox

Patch

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 16621fa..69d4cbc 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -110,11 +110,18 @@  static int virtio_mmio_set_host_notifier_internal(VirtIOMMIOProxy *proxy,
             return r;
         }
         virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
-        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
-                                  true, n, notifier);
+
+        if (kvm_eventfds_enabled()) {
+            memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
+                                      true, n, notifier);
+        } else if (!set_handler) {
+            virtio_queue_set_host_notifier_forwarding(vq);
+        }
     } else {
-        memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
-                                  true, n, notifier);
+        if (kvm_eventfds_enabled()) {
+            memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4,
+                                      true, n, notifier);
+        }
         virtio_queue_set_host_notifier_fd_handler(vq, false, false);
         event_notifier_cleanup(notifier);
     }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 96be4fd..b27a630 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -293,41 +293,48 @@  static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
             return r;
         }
         virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
-        if (modern) {
-            if (fast_mmio) {
-                memory_region_add_eventfd(modern_mr, modern_addr, 0,
-                                          false, n, notifier);
-            } else {
-                memory_region_add_eventfd(modern_mr, modern_addr, 2,
-                                          false, n, notifier);
+
+        if (kvm_eventfds_enabled()) {
+            if (modern) {
+                if (fast_mmio) {
+                    memory_region_add_eventfd(modern_mr, modern_addr, 0,
+                                              false, n, notifier);
+                } else {
+                    memory_region_add_eventfd(modern_mr, modern_addr, 2,
+                                              false, n, notifier);
+                }
+                if (modern_pio) {
+                    memory_region_add_eventfd(modern_notify_mr, 0, 2,
+                                                  true, n, notifier);
+                }
             }
-            if (modern_pio) {
-                memory_region_add_eventfd(modern_notify_mr, 0, 2,
-                                              true, n, notifier);
+            if (legacy) {
+                memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
+                                          true, n, notifier);
             }
-        }
-        if (legacy) {
-            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
-                                      true, n, notifier);
+        } else if (!set_handler) {
+            virtio_queue_set_host_notifier_forwarding(vq);
         }
     } else {
-        if (modern) {
-            if (fast_mmio) {
-                memory_region_del_eventfd(modern_mr, modern_addr, 0,
-                                          false, n, notifier);
-            } else {
-                memory_region_del_eventfd(modern_mr, modern_addr, 2,
-                                          false, n, notifier);
+        if (kvm_eventfds_enabled()) {
+            if (modern) {
+                if (fast_mmio) {
+                    memory_region_del_eventfd(modern_mr, modern_addr, 0,
+                                              false, n, notifier);
+                } else {
+                    memory_region_del_eventfd(modern_mr, modern_addr, 2,
+                                              false, n, notifier);
+                }
+                if (modern_pio) {
+                    memory_region_del_eventfd(modern_notify_mr, 0, 2,
+                                              true, n, notifier);
+                }
             }
-            if (modern_pio) {
-                memory_region_del_eventfd(modern_notify_mr, 0, 2,
+            if (legacy) {
+                memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
                                           true, n, notifier);
             }
         }
-        if (legacy) {
-            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
-                                      true, n, notifier);
-        }
         virtio_queue_set_host_notifier_fd_handler(vq, false, false);
         event_notifier_cleanup(notifier);
     }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 1edef59..6fe268f 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -89,6 +89,7 @@  struct VirtQueue
     VirtIODevice *vdev;
     EventNotifier guest_notifier;
     EventNotifier host_notifier;
+    bool forward_host_notifier;
     QLIST_ENTRY(VirtQueue) node;
 };
 
@@ -969,7 +970,13 @@  void virtio_queue_notify_vq(VirtQueue *vq)
 
 void virtio_queue_notify(VirtIODevice *vdev, int n)
 {
-    virtio_queue_notify_vq(&vdev->vq[n]);
+    VirtQueue *vq = &vdev->vq[n];
+
+    if (vq->forward_host_notifier) {
+        event_notifier_set(&vq->host_notifier);
+    } else {
+        virtio_queue_notify_vq(&vdev->vq[n]);
+    }
 }
 
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
@@ -1715,6 +1722,21 @@  void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
         /* Test and clear notifier before after disabling event,
          * in case poll callback didn't have time to run. */
         virtio_queue_host_notifier_read(&vq->host_notifier);
+
+        vq->forward_host_notifier = false;
+    }
+}
+
+static bool forwarding_warning;
+
+void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq)
+{
+    vq->forward_host_notifier = true;
+
+    if (!forwarding_warning) {
+        forwarding_warning = true;
+        error_report("KVM does not support eventfd binding, "
+                     "using userspace event forwarding (slow)");
     }
 }
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 205fadf..f288ccb 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -245,6 +245,7 @@  void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
 void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool set_handler);
+void virtio_queue_set_host_notifier_forwarding(VirtQueue *vq);
 void virtio_queue_notify_vq(VirtQueue *vq);
 void virtio_irq(VirtQueue *vq);
 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);