diff mbox

memory: emulate ioeventfd

Message ID 006e01d12377$0b9c2d40$22d487c0$@samsung.com
State New
Headers show

Commit Message

Pavel Fedin Nov. 20, 2015, 9:37 a.m. UTC
The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
turn guest MMIO/PIO writes into eventfd file descriptor events.  This
allows arbitrary threads to be notified when the guest writes to a
specific MMIO/PIO address.

qtest and TCG do not support ioeventfd because memory writes are not
checked against registered ioeventfds in QEMU.  This patch implements
this in memory_region_dispatch_write() so qtest can use ioeventfd.

Also this patch fixes vhost aborting on some misconfigured old kernels
like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
in expert settings, while MMIO binding support in KVM will still be
missing.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
---
RFC => PATCH:
- Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not needed
- Renamed "ioeventfd" to "eventfd", just to make words shorter
- Add a one-shot warning about missing MMIO bindings in KVM
---
 kvm-all.c |  6 ++++--
 memory.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 2 deletions(-)

Comments

Michael S. Tsirkin Nov. 20, 2015, 9:41 a.m. UTC | #1
On Fri, Nov 20, 2015 at 12:37:16PM +0300, Pavel Fedin wrote:
> The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
> turn guest MMIO/PIO writes into eventfd file descriptor events.  This
> allows arbitrary threads to be notified when the guest writes to a
> specific MMIO/PIO address.
> 
> qtest and TCG do not support ioeventfd because memory writes are not
> checked against registered ioeventfds in QEMU.  This patch implements
> this in memory_region_dispatch_write() so qtest can use ioeventfd.
> 
> Also this patch fixes vhost aborting on some misconfigured old kernels
> like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
> in expert settings, while MMIO binding support in KVM will still be
> missing.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Pavel Fedin <p.fedin@samsung.com>

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

> ---
> RFC => PATCH:
> - Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not needed
> - Renamed "ioeventfd" to "eventfd", just to make words shorter
> - Add a one-shot warning about missing MMIO bindings in KVM
> ---
>  kvm-all.c |  6 ++++--
>  memory.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index ddb007a..70f5cec 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1633,8 +1633,10 @@ static int kvm_init(MachineState *ms)
>  
>      kvm_state = s;
>  
> -    s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> -    s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +    if (kvm_eventfds_allowed) {
> +        s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> +        s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +    }
>      s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
>      s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
>  
> diff --git a/memory.c b/memory.c
> index e193658..4d138fb 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -18,12 +18,14 @@
>  #include "exec/ioport.h"
>  #include "qapi/visitor.h"
>  #include "qemu/bitops.h"
> +#include "qemu/error-report.h"
>  #include "qom/object.h"
>  #include "trace.h"
>  #include <assert.h>
>  
>  #include "exec/memory-internal.h"
>  #include "exec/ram_addr.h"
> +#include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
>  
>  //#define DEBUG_UNASSIGNED
> @@ -1141,6 +1143,32 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
>      return r;
>  }
>  
> +/* Return true if an eventfd was signalled */
> +static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
> +                                                    hwaddr addr,
> +                                                    uint64_t data,
> +                                                    unsigned size,
> +                                                    MemTxAttrs attrs)
> +{
> +    MemoryRegionIoeventfd ioeventfd = {
> +        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
> +        .data = data,
> +    };
> +    unsigned i;
> +
> +    for (i = 0; i < mr->ioeventfd_nb; i++) {
> +        ioeventfd.match_data = mr->ioeventfds[i].match_data;
> +        ioeventfd.e = mr->ioeventfds[i].e;
> +
> +        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
> +            event_notifier_set(ioeventfd.e);
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
>  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>                                           hwaddr addr,
>                                           uint64_t data,
> @@ -1154,6 +1182,11 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>  
>      adjust_endianness(mr, &data, size);
>  
> +    if ((!kvm_eventfds_enabled()) &&
> +        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
> +        return MEMTX_OK;
> +    }
> +
>      if (mr->ops->write) {
>          return access_with_adjusted_size(addr, &data, size,
>                                           mr->ops->impl.min_access_size,
> @@ -1672,6 +1705,8 @@ void memory_region_clear_global_locking(MemoryRegion *mr)
>      mr->global_locking = false;
>  }
>  
> +static bool userspace_eventfd_warning;
> +
>  void memory_region_add_eventfd(MemoryRegion *mr,
>                                 hwaddr addr,
>                                 unsigned size,
> @@ -1688,6 +1723,13 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>  
> +    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
> +                            userspace_eventfd_warning))) {
> +        userspace_eventfd_warning = true;
> +        error_report("Using eventfd without MMIO binding in KVM. "
> +                     "Suboptimal performance expected");
> +    }
> +
>      if (size) {
>          adjust_endianness(mr, &mrfd.data, size);
>      }
> -- 
> 1.9.5.msysgit.0
>
Paolo Bonzini Nov. 20, 2015, 3:59 p.m. UTC | #2
On 20/11/2015 10:37, Pavel Fedin wrote:
> The ioeventfd mechanism is used by vhost, dataplane, and virtio-pci to
> turn guest MMIO/PIO writes into eventfd file descriptor events.  This
> allows arbitrary threads to be notified when the guest writes to a
> specific MMIO/PIO address.
> 
> qtest and TCG do not support ioeventfd because memory writes are not
> checked against registered ioeventfds in QEMU.  This patch implements
> this in memory_region_dispatch_write() so qtest can use ioeventfd.
> 
> Also this patch fixes vhost aborting on some misconfigured old kernels
> like 3.18.0 on ARM. It is possible to explicitly enable CONFIG_EVENTFD
> in expert settings, while MMIO binding support in KVM will still be
> missing.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
> ---
> RFC => PATCH:
> - Add !kvm_eventfds_enabled() conditions to bypass eventfd injection when not needed
> - Renamed "ioeventfd" to "eventfd", just to make words shorter
> - Add a one-shot warning about missing MMIO bindings in KVM
> ---
>  kvm-all.c |  6 ++++--
>  memory.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index ddb007a..70f5cec 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1633,8 +1633,10 @@ static int kvm_init(MachineState *ms)
>  
>      kvm_state = s;
>  
> -    s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> -    s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +    if (kvm_eventfds_allowed) {
> +        s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
> +        s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
> +    }
>      s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
>      s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
>  
> diff --git a/memory.c b/memory.c
> index e193658..4d138fb 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -18,12 +18,14 @@
>  #include "exec/ioport.h"
>  #include "qapi/visitor.h"
>  #include "qemu/bitops.h"
> +#include "qemu/error-report.h"
>  #include "qom/object.h"
>  #include "trace.h"
>  #include <assert.h>
>  
>  #include "exec/memory-internal.h"
>  #include "exec/ram_addr.h"
> +#include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
>  
>  //#define DEBUG_UNASSIGNED
> @@ -1141,6 +1143,32 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
>      return r;
>  }
>  
> +/* Return true if an eventfd was signalled */
> +static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
> +                                                    hwaddr addr,
> +                                                    uint64_t data,
> +                                                    unsigned size,
> +                                                    MemTxAttrs attrs)
> +{
> +    MemoryRegionIoeventfd ioeventfd = {
> +        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
> +        .data = data,
> +    };
> +    unsigned i;
> +
> +    for (i = 0; i < mr->ioeventfd_nb; i++) {
> +        ioeventfd.match_data = mr->ioeventfds[i].match_data;
> +        ioeventfd.e = mr->ioeventfds[i].e;
> +
> +        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
> +            event_notifier_set(ioeventfd.e);
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
>  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>                                           hwaddr addr,
>                                           uint64_t data,
> @@ -1154,6 +1182,11 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>  
>      adjust_endianness(mr, &data, size);
>  
> +    if ((!kvm_eventfds_enabled()) &&
> +        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
> +        return MEMTX_OK;
> +    }
> +
>      if (mr->ops->write) {
>          return access_with_adjusted_size(addr, &data, size,
>                                           mr->ops->impl.min_access_size,
> @@ -1672,6 +1705,8 @@ void memory_region_clear_global_locking(MemoryRegion *mr)
>      mr->global_locking = false;
>  }
>  
> +static bool userspace_eventfd_warning;
> +
>  void memory_region_add_eventfd(MemoryRegion *mr,
>                                 hwaddr addr,
>                                 unsigned size,
> @@ -1688,6 +1723,13 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>      };
>      unsigned i;
>  
> +    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
> +                            userspace_eventfd_warning))) {
> +        userspace_eventfd_warning = true;
> +        error_report("Using eventfd without MMIO binding in KVM. "
> +                     "Suboptimal performance expected");
> +    }
> +
>      if (size) {
>          adjust_endianness(mr, &mrfd.data, size);
>      }
> 

Queued for 2.6.

Paolo
diff mbox

Patch

diff --git a/kvm-all.c b/kvm-all.c
index ddb007a..70f5cec 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1633,8 +1633,10 @@  static int kvm_init(MachineState *ms)
 
     kvm_state = s;
 
-    s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
-    s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+    if (kvm_eventfds_allowed) {
+        s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
+        s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+    }
     s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
     s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
 
diff --git a/memory.c b/memory.c
index e193658..4d138fb 100644
--- a/memory.c
+++ b/memory.c
@@ -18,12 +18,14 @@ 
 #include "exec/ioport.h"
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
+#include "qemu/error-report.h"
 #include "qom/object.h"
 #include "trace.h"
 #include <assert.h>
 
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
+#include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 
 //#define DEBUG_UNASSIGNED
@@ -1141,6 +1143,32 @@  MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
     return r;
 }
 
+/* Return true if an eventfd was signalled */
+static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
+                                                    hwaddr addr,
+                                                    uint64_t data,
+                                                    unsigned size,
+                                                    MemTxAttrs attrs)
+{
+    MemoryRegionIoeventfd ioeventfd = {
+        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
+        .data = data,
+    };
+    unsigned i;
+
+    for (i = 0; i < mr->ioeventfd_nb; i++) {
+        ioeventfd.match_data = mr->ioeventfds[i].match_data;
+        ioeventfd.e = mr->ioeventfds[i].e;
+
+        if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
+            event_notifier_set(ioeventfd.e);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
@@ -1154,6 +1182,11 @@  MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
 
     adjust_endianness(mr, &data, size);
 
+    if ((!kvm_eventfds_enabled()) &&
+        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
+        return MEMTX_OK;
+    }
+
     if (mr->ops->write) {
         return access_with_adjusted_size(addr, &data, size,
                                          mr->ops->impl.min_access_size,
@@ -1672,6 +1705,8 @@  void memory_region_clear_global_locking(MemoryRegion *mr)
     mr->global_locking = false;
 }
 
+static bool userspace_eventfd_warning;
+
 void memory_region_add_eventfd(MemoryRegion *mr,
                                hwaddr addr,
                                unsigned size,
@@ -1688,6 +1723,13 @@  void memory_region_add_eventfd(MemoryRegion *mr,
     };
     unsigned i;
 
+    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
+                            userspace_eventfd_warning))) {
+        userspace_eventfd_warning = true;
+        error_report("Using eventfd without MMIO binding in KVM. "
+                     "Suboptimal performance expected");
+    }
+
     if (size) {
         adjust_endianness(mr, &mrfd.data, size);
     }