Patchwork [1/2] host-utils: Use __int128 for mul[us]64

login
register
mail settings
Submitter Richard Henderson
Date Jan. 28, 2013, 6:52 p.m.
Message ID <1359399154-13050-2-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/216302/
State New
Headers show

Comments

Richard Henderson - Jan. 28, 2013, 6:52 p.m.
Replace some x86_64 specific inline assembly with something that
all 64-bit hosts ought to optimize well.  At worst this becomes a
call to the gcc __multi3 routine, which is no worse than our
implementation in util/host-utils.c.

With gcc 4.7, we get identical code generation for x86_64.  We
now get native multiplication on ia64 and s390x hosts.  With minor
improvements to gcc we can get it for ppc64 as well.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 configure                 | 20 ++++++++++++++++++++
 include/qemu/host-utils.h | 17 ++++++++---------
 util/host-utils.c         |  4 ++--
 3 files changed, 30 insertions(+), 11 deletions(-)
Peter Maydell - Jan. 28, 2013, 7:02 p.m.
On 28 January 2013 18:52, Richard Henderson <rth@twiddle.net> wrote:
> Replace some x86_64 specific inline assembly with something that
> all 64-bit hosts ought to optimize well.  At worst this becomes a
> call to the gcc __multi3 routine, which is no worse than our
> implementation in util/host-utils.c.

Hurrah for dropping inline assembly.

> +########################################
> +# check if __int128 is usable.
> +
> +int128=no
> +cat > $TMPC << EOF
> +int main (void) {
> +  __int128 a = 0;
> +  unsigned __int128 b = 1;
> +  a = a + b;
> +  a = a * b;
> +  return 0;
> +}

Have you written the test like this because you know
there are compilers out there that implement addition
but not multiplication (or unsigned but not signed, or
whatever), or just out of a vague sense of caution?

-- PMM
Richard Henderson - Jan. 28, 2013, 7:06 p.m.
On 01/28/2013 11:02 AM, Peter Maydell wrote:
> Have you written the test like this because you know
> there are compilers out there that implement addition
> but not multiplication (or unsigned but not signed, or
> whatever), or just out of a vague sense of caution?

Vague sense of caution.  I tried to use all of the things
that I was actually going to use in the qemu source.


r~
Blue Swirl - Jan. 28, 2013, 8:29 p.m.
On Mon, Jan 28, 2013 at 6:52 PM, Richard Henderson <rth@twiddle.net> wrote:
> Replace some x86_64 specific inline assembly with something that
> all 64-bit hosts ought to optimize well.  At worst this becomes a
> call to the gcc __multi3 routine, which is no worse than our
> implementation in util/host-utils.c.
>
> With gcc 4.7, we get identical code generation for x86_64.  We
> now get native multiplication on ia64 and s390x hosts.  With minor
> improvements to gcc we can get it for ppc64 as well.

In OpenBIOS we use __int128_t and __uint128_t, which are closer in
form to standard types. Could you use those instead of __int128 and
unsigned  __int128?

>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  configure                 | 20 ++++++++++++++++++++
>  include/qemu/host-utils.h | 17 ++++++++---------
>  util/host-utils.c         |  4 ++--
>  3 files changed, 30 insertions(+), 11 deletions(-)
>
> diff --git a/configure b/configure
> index b7635e4..ecf1cbc 100755
> --- a/configure
> +++ b/configure
> @@ -3150,6 +3150,22 @@ if compile_prog "" "" ; then
>      cpuid_h=yes
>  fi
>
> +########################################
> +# check if __int128 is usable.
> +
> +int128=no
> +cat > $TMPC << EOF
> +int main (void) {
> +  __int128 a = 0;
> +  unsigned __int128 b = 1;
> +  a = a + b;
> +  a = a * b;
> +  return 0;
> +}
> +EOF
> +if compile_prog "" "" ; then
> +    int128=yes
> +fi
>
>  ##########################################
>  # End of CC checks
> @@ -3692,6 +3708,10 @@ if test "$cpuid_h" = "yes" ; then
>    echo "CONFIG_CPUID_H=y" >> $config_host_mak
>  fi
>
> +if test "$int128" = "yes" ; then
> +  echo "CONFIG_INT128=y" >> $config_host_mak
> +fi
> +
>  if test "$glusterfs" = "yes" ; then
>    echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
>  fi
> diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
> index 81c9a75..01f6610 100644
> --- a/include/qemu/host-utils.h
> +++ b/include/qemu/host-utils.h
> @@ -27,22 +27,21 @@
>
>  #include "qemu/compiler.h"   /* QEMU_GNUC_PREREQ */
>
> -#if defined(__x86_64__)
> -#define __HAVE_FAST_MULU64__
> +#ifdef CONFIG_INT128
>  static inline void mulu64(uint64_t *plow, uint64_t *phigh,
>                            uint64_t a, uint64_t b)
>  {
> -    __asm__ ("mul %0\n\t"
> -             : "=d" (*phigh), "=a" (*plow)
> -             : "a" (a), "0" (b));
> +    unsigned __int128 r = (unsigned __int128)a * b;

__uint128_t?

> +    *plow = r;
> +    *phigh = r >> 64;
>  }
> -#define __HAVE_FAST_MULS64__
> +
>  static inline void muls64(uint64_t *plow, uint64_t *phigh,
>                            int64_t a, int64_t b)
>  {
> -    __asm__ ("imul %0\n\t"
> -             : "=d" (*phigh), "=a" (*plow)
> -             : "a" (a), "0" (b));
> +    __int128 r = (__int128)a * b;
> +    *plow = r;
> +    *phigh = r >> 64;
>  }
>  #else
>  void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
> diff --git a/util/host-utils.c b/util/host-utils.c
> index 5e3915a..2d06a2c 100644
> --- a/util/host-utils.c
> +++ b/util/host-utils.c
> @@ -30,7 +30,7 @@
>  //#define DEBUG_MULDIV
>
>  /* Long integer helpers */
> -#if !defined(__x86_64__)
> +#ifndef CONFIG_INT128
>  static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
>  {
>      *plow += a;
> @@ -102,4 +102,4 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
>             a, b, *phigh, *plow);
>  #endif
>  }
> -#endif /* !defined(__x86_64__) */
> +#endif /* !CONFIG_INT128 */
> --
> 1.7.11.7
>
>

Patch

diff --git a/configure b/configure
index b7635e4..ecf1cbc 100755
--- a/configure
+++ b/configure
@@ -3150,6 +3150,22 @@  if compile_prog "" "" ; then
     cpuid_h=yes
 fi
 
+########################################
+# check if __int128 is usable.
+
+int128=no
+cat > $TMPC << EOF
+int main (void) {
+  __int128 a = 0;
+  unsigned __int128 b = 1;
+  a = a + b;
+  a = a * b;
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    int128=yes
+fi
 
 ##########################################
 # End of CC checks
@@ -3692,6 +3708,10 @@  if test "$cpuid_h" = "yes" ; then
   echo "CONFIG_CPUID_H=y" >> $config_host_mak
 fi
 
+if test "$int128" = "yes" ; then
+  echo "CONFIG_INT128=y" >> $config_host_mak
+fi
+
 if test "$glusterfs" = "yes" ; then
   echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
 fi
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 81c9a75..01f6610 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -27,22 +27,21 @@ 
 
 #include "qemu/compiler.h"   /* QEMU_GNUC_PREREQ */
 
-#if defined(__x86_64__)
-#define __HAVE_FAST_MULU64__
+#ifdef CONFIG_INT128
 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
                           uint64_t a, uint64_t b)
 {
-    __asm__ ("mul %0\n\t"
-             : "=d" (*phigh), "=a" (*plow)
-             : "a" (a), "0" (b));
+    unsigned __int128 r = (unsigned __int128)a * b;
+    *plow = r;
+    *phigh = r >> 64;
 }
-#define __HAVE_FAST_MULS64__
+
 static inline void muls64(uint64_t *plow, uint64_t *phigh,
                           int64_t a, int64_t b)
 {
-    __asm__ ("imul %0\n\t"
-             : "=d" (*phigh), "=a" (*plow)
-             : "a" (a), "0" (b));
+    __int128 r = (__int128)a * b;
+    *plow = r;
+    *phigh = r >> 64;
 }
 #else
 void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
diff --git a/util/host-utils.c b/util/host-utils.c
index 5e3915a..2d06a2c 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -30,7 +30,7 @@ 
 //#define DEBUG_MULDIV
 
 /* Long integer helpers */
-#if !defined(__x86_64__)
+#ifndef CONFIG_INT128
 static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 {
     *plow += a;
@@ -102,4 +102,4 @@  void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
            a, b, *phigh, *plow);
 #endif
 }
-#endif /* !defined(__x86_64__) */
+#endif /* !CONFIG_INT128 */