diff mbox series

[4/4] membarrier: add --enable-membarrier

Message ID 20180309132922.24211-5-pbonzini@redhat.com
State New
Headers show
Series Optionally use membarrier system call for RCU | expand

Commit Message

Paolo Bonzini March 9, 2018, 1:29 p.m. UTC
Actually enable the global memory barriers if supported by the OS.
Because only recent versions of Linux include the support, they
are disabled by default.  Note that it also has to be disabled
for QEMU to run under Wine.

Before this patch, rcutorture reports 85 ns/read for my machine,
after the patch it reports 12.5 ns/read.  On the other hand updates
go from 50 *micro*seconds to 20 *milli*seconds.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 configure                     | 41 ++++++++++++++++++++++++++++++++++-
 include/qemu/sys_membarrier.h | 10 +++++++++
 util/Makefile.objs            |  1 +
 util/sys_membarrier.c         | 50 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 util/sys_membarrier.c

Comments

Emilio Cota March 22, 2018, 1:29 a.m. UTC | #1
On Fri, Mar 09, 2018 at 14:29:22 +0100, Paolo Bonzini wrote:
> Actually enable the global memory barriers if supported by the OS.
> Because only recent versions of Linux include the support, they
> are disabled by default.  Note that it also has to be disabled
> for QEMU to run under Wine.
> 
> Before this patch, rcutorture reports 85 ns/read for my machine,
> after the patch it reports 12.5 ns/read.  On the other hand updates
> go from 50 *micro*seconds to 20 *milli*seconds.

It is indeed hard to see a large impact on performance given the
large size of our critical sections. But hey, rcu_read_unlock
goes down from 0.24% to 0.08% of execution time when booting
aarch64 linux!

As we remove bottlenecks though we should be able to gain
more benefits from this, at least in MTTCG where vcpu threads
exit the execution loop quite often.

I did some tests on qht-bench, moving the rcu_read_lock/unlock
pair to wrap each lookup instead of wrapping the entire test.
The results are great; without membarrier lookup throughput
goes down by half; with it, throughput only goes down by 5%.

(snip)
> +##########################################
> +# check for usable membarrier system call
> +if test "$membarrier" = "yes"; then
> +    have_membarrier=no
> +    if test "$mingw32" = "yes" ; then
> +        have_membarrier=yes
> +    elif test "$linux" = "yes" ; then
> +        cat > $TMPC << EOF
> +    #include <linux/membarrier.h>
> +    #include <sys/syscall.h>
> +    #include <unistd.h>
> +    int main(void) {
> +        syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0);
> +        syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0);
> +    }

I think we should also check here that MEMBARRIER_CMD_SHARED is
actually supported; it is possible for a kernel to have
the system call yet not support it (e.g. when the
kernel is compiled with nohz_full). Instead of failing at run-time
(in smp_mb_global_init) we should perhaps bark at configure time
as well.

Other than that the patches look good. Thanks for doing this!

		Emilio
Paolo Bonzini March 22, 2018, 8:57 a.m. UTC | #2
On 22/03/2018 02:29, Emilio G. Cota wrote:
> It is indeed hard to see a large impact on performance given the
> large size of our critical sections. But hey, rcu_read_unlock
> goes down from 0.24% to 0.08% of execution time when booting
> aarch64 linux!

I expect something a little better than 0.15% from virtio, especially
once I re-enable MemoryRegionCache in 2.13.

>> +    int main(void) {
>> +        syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0);
>> +        syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0);
>> +    }
> 
> I think we should also check here that MEMBARRIER_CMD_SHARED is
> actually supported;

Checking run-time constraints at compile-time is a bit pointless.  My
idea was to add a third mode where the choice is done at run-time, but
that wouldn't have made 2.12.

Paolo
diff mbox series

Patch

diff --git a/configure b/configure
index 6f3921c02a..4fcd18f136 100755
--- a/configure
+++ b/configure
@@ -342,7 +342,7 @@  attr=""
 libattr=""
 xfs=""
 tcg="yes"
-
+membarrier=""
 vhost_net="no"
 vhost_crypto="no"
 vhost_scsi="no"
@@ -1161,6 +1161,10 @@  for opt do
   ;;
   --enable-attr) attr="yes"
   ;;
+  --disable-membarrier) membarrier="no"
+  ;;
+  --enable-membarrier) membarrier="yes"
+  ;;
   --disable-blobs) blobs="no"
   ;;
   --with-pkgversion=*) pkgversion=" ($optarg)"
@@ -1577,6 +1581,7 @@  disabled with --disable-FEATURE, default is enabled if available:
   xen-pci-passthrough
   brlapi          BrlAPI (Braile)
   curl            curl connectivity
+  membarrier      membarrier system call (for Linux 4.14+ or Windows)
   fdt             fdt device tree
   bluez           bluez stack connectivity
   kvm             KVM acceleration support
@@ -5137,6 +5142,35 @@  if compile_prog "" "" ; then
     have_fsxattr=yes
 fi
 
+##########################################
+# check for usable membarrier system call
+if test "$membarrier" = "yes"; then
+    have_membarrier=no
+    if test "$mingw32" = "yes" ; then
+        have_membarrier=yes
+    elif test "$linux" = "yes" ; then
+        cat > $TMPC << EOF
+    #include <linux/membarrier.h>
+    #include <sys/syscall.h>
+    #include <unistd.h>
+    int main(void) {
+        syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0);
+        syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0);
+    }
+EOF
+        if compile_prog "" "" ; then
+            have_membarrier=yes
+        fi
+    fi
+    if test "$have_membarrier" = "no"; then
+      feature_not_found "membarrier" "membarrier system call not available"
+    fi
+else
+    # Do not enable it by default even for Mingw32, because it doesn't
+    # work on Wine.
+    membarrier=no
+fi
+
 ##########################################
 # check if rtnetlink.h exists and is useful
 have_rtnetlink=no
@@ -5763,6 +5798,7 @@  fi
 echo "malloc trim support $malloc_trim"
 echo "RDMA support      $rdma"
 echo "fdt support       $fdt"
+echo "membarrier        $membarrier"
 echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
 echo "madvise           $madvise"
@@ -6245,6 +6281,9 @@  fi
 if test "$fdt" = "yes" ; then
   echo "CONFIG_FDT=y" >> $config_host_mak
 fi
+if test "$membarrier" = "yes" ; then
+  echo "CONFIG_MEMBARRIER=y" >> $config_host_mak
+fi
 if test "$signalfd" = "yes" ; then
   echo "CONFIG_SIGNALFD=y" >> $config_host_mak
 fi
diff --git a/include/qemu/sys_membarrier.h b/include/qemu/sys_membarrier.h
index 9ce7f5210b..316e3dc4a2 100644
--- a/include/qemu/sys_membarrier.h
+++ b/include/qemu/sys_membarrier.h
@@ -9,9 +9,19 @@ 
 #ifndef QEMU_SYS_MEMBARRIER_H
 #define QEMU_SYS_MEMBARRIER_H 1
 
+#ifdef CONFIG_MEMBARRIER
+/* Only block reordering at the compiler level in the performance-critical
+ * side.  The slow side forces processor-level ordering on all other cores
+ * through a system call.
+ */
+extern void smp_mb_global_init(void);
+extern void smp_mb_global(void);
+#define smp_mb_placeholder()       barrier()
+#else
 /* Keep it simple, execute a real memory barrier on both sides.  */
 static inline void smp_mb_global_init(void) {}
 #define smp_mb_global()            smp_mb()
 #define smp_mb_placeholder()       smp_mb()
+#endif
 
 #endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index ae90b9963d..728c3541db 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -33,6 +33,7 @@  util-obj-y += throttle.o
 util-obj-y += getauxval.o
 util-obj-y += readline.o
 util-obj-y += rcu.o
+util-obj-$(CONFIG_MEMBARRIER) += sys_membarrier.o
 util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 util-obj-y += qemu-coroutine-sleep.o
 util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
diff --git a/util/sys_membarrier.c b/util/sys_membarrier.c
new file mode 100644
index 0000000000..8dcb53e63e
--- /dev/null
+++ b/util/sys_membarrier.c
@@ -0,0 +1,50 @@ 
+/*
+ * Process-global memory barriers
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ */
+
+#include <qemu/osdep.h>
+#include <qemu/sys_membarrier.h>
+#include <qemu/error-report.h>
+
+#ifdef CONFIG_LINUX
+#include <linux/membarrier.h>
+#include <sys/syscall.h>
+
+static int
+membarrier(int cmd, int flags)
+{
+    return syscall(__NR_membarrier, cmd, flags);
+}
+#endif
+
+void smp_mb_global(void)
+{
+#if defined CONFIG_WIN32
+    FlushProcessWriteBuffers();
+#elif defined CONFIG_LINUX
+    membarrier(MEMBARRIER_CMD_SHARED, 0);
+#else
+#error --enable-membarrier is not supported on this operating system.
+#endif
+}
+
+void smp_mb_global_init(void)
+{
+#ifdef CONFIG_LINUX
+    int ret = membarrier(MEMBARRIER_CMD_QUERY, 0);
+    if (ret < 0) {
+        error_report("This QEMU binary requires the membarrier system call.");
+        error_report("Please upgrade your system to a newer version of Linux");
+        exit(1);
+    }
+    if (!(ret & MEMBARRIER_CMD_SHARED)) {
+        error_report("This QEMU binary requires MEMBARRIER_CMD_SHARED support.");
+        error_report("Please upgrade your system to a newer version of Linux");
+        exit(1);
+    }
+#endif
+}