Message ID | 1359399154-13050-2-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On 28 January 2013 18:52, Richard Henderson <rth@twiddle.net> wrote: > Replace some x86_64 specific inline assembly with something that > all 64-bit hosts ought to optimize well. At worst this becomes a > call to the gcc __multi3 routine, which is no worse than our > implementation in util/host-utils.c. Hurrah for dropping inline assembly. > +######################################## > +# check if __int128 is usable. > + > +int128=no > +cat > $TMPC << EOF > +int main (void) { > + __int128 a = 0; > + unsigned __int128 b = 1; > + a = a + b; > + a = a * b; > + return 0; > +} Have you written the test like this because you know there are compilers out there that implement addition but not multiplication (or unsigned but not signed, or whatever), or just out of a vague sense of caution? -- PMM
On 01/28/2013 11:02 AM, Peter Maydell wrote: > Have you written the test like this because you know > there are compilers out there that implement addition > but not multiplication (or unsigned but not signed, or > whatever), or just out of a vague sense of caution? Vague sense of caution. I tried to use all of the things that I was actually going to use in the qemu source. r~
On Mon, Jan 28, 2013 at 6:52 PM, Richard Henderson <rth@twiddle.net> wrote: > Replace some x86_64 specific inline assembly with something that > all 64-bit hosts ought to optimize well. At worst this becomes a > call to the gcc __multi3 routine, which is no worse than our > implementation in util/host-utils.c. > > With gcc 4.7, we get identical code generation for x86_64. We > now get native multiplication on ia64 and s390x hosts. With minor > improvements to gcc we can get it for ppc64 as well. In OpenBIOS we use __int128_t and __uint128_t, which are closer in form to standard types. Could you use those instead of __int128 and unsigned __int128? > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > configure | 20 ++++++++++++++++++++ > include/qemu/host-utils.h | 17 ++++++++--------- > util/host-utils.c | 4 ++-- > 3 files changed, 30 insertions(+), 11 deletions(-) > > diff --git a/configure b/configure > index b7635e4..ecf1cbc 100755 > --- a/configure > +++ b/configure > @@ -3150,6 +3150,22 @@ if compile_prog "" "" ; then > cpuid_h=yes > fi > > +######################################## > +# check if __int128 is usable. > + > +int128=no > +cat > $TMPC << EOF > +int main (void) { > + __int128 a = 0; > + unsigned __int128 b = 1; > + a = a + b; > + a = a * b; > + return 0; > +} > +EOF > +if compile_prog "" "" ; then > + int128=yes > +fi > > ########################################## > # End of CC checks > @@ -3692,6 +3708,10 @@ if test "$cpuid_h" = "yes" ; then > echo "CONFIG_CPUID_H=y" >> $config_host_mak > fi > > +if test "$int128" = "yes" ; then > + echo "CONFIG_INT128=y" >> $config_host_mak > +fi > + > if test "$glusterfs" = "yes" ; then > echo "CONFIG_GLUSTERFS=y" >> $config_host_mak > fi > diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h > index 81c9a75..01f6610 100644 > --- a/include/qemu/host-utils.h > +++ b/include/qemu/host-utils.h > @@ -27,22 +27,21 @@ > > #include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */ > > -#if defined(__x86_64__) > -#define __HAVE_FAST_MULU64__ > +#ifdef CONFIG_INT128 > static inline void mulu64(uint64_t *plow, uint64_t *phigh, > uint64_t a, uint64_t b) > { > - __asm__ ("mul %0\n\t" > - : "=d" (*phigh), "=a" (*plow) > - : "a" (a), "0" (b)); > + unsigned __int128 r = (unsigned __int128)a * b; __uint128_t? > + *plow = r; > + *phigh = r >> 64; > } > -#define __HAVE_FAST_MULS64__ > + > static inline void muls64(uint64_t *plow, uint64_t *phigh, > int64_t a, int64_t b) > { > - __asm__ ("imul %0\n\t" > - : "=d" (*phigh), "=a" (*plow) > - : "a" (a), "0" (b)); > + __int128 r = (__int128)a * b; > + *plow = r; > + *phigh = r >> 64; > } > #else > void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); > diff --git a/util/host-utils.c b/util/host-utils.c > index 5e3915a..2d06a2c 100644 > --- a/util/host-utils.c > +++ b/util/host-utils.c > @@ -30,7 +30,7 @@ > //#define DEBUG_MULDIV > > /* Long integer helpers */ > -#if !defined(__x86_64__) > +#ifndef CONFIG_INT128 > static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) > { > *plow += a; > @@ -102,4 +102,4 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) > a, b, *phigh, *plow); > #endif > } > -#endif /* !defined(__x86_64__) */ > +#endif /* !CONFIG_INT128 */ > -- > 1.7.11.7 > >
diff --git a/configure b/configure index b7635e4..ecf1cbc 100755 --- a/configure +++ b/configure @@ -3150,6 +3150,22 @@ if compile_prog "" "" ; then cpuid_h=yes fi +######################################## +# check if __int128 is usable. + +int128=no +cat > $TMPC << EOF +int main (void) { + __int128 a = 0; + unsigned __int128 b = 1; + a = a + b; + a = a * b; + return 0; +} +EOF +if compile_prog "" "" ; then + int128=yes +fi ########################################## # End of CC checks @@ -3692,6 +3708,10 @@ if test "$cpuid_h" = "yes" ; then echo "CONFIG_CPUID_H=y" >> $config_host_mak fi +if test "$int128" = "yes" ; then + echo "CONFIG_INT128=y" >> $config_host_mak +fi + if test "$glusterfs" = "yes" ; then echo "CONFIG_GLUSTERFS=y" >> $config_host_mak fi diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 81c9a75..01f6610 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -27,22 +27,21 @@ #include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */ -#if defined(__x86_64__) -#define __HAVE_FAST_MULU64__ +#ifdef CONFIG_INT128 static inline void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) { - __asm__ ("mul %0\n\t" - : "=d" (*phigh), "=a" (*plow) - : "a" (a), "0" (b)); + unsigned __int128 r = (unsigned __int128)a * b; + *plow = r; + *phigh = r >> 64; } -#define __HAVE_FAST_MULS64__ + static inline void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) { - __asm__ ("imul %0\n\t" - : "=d" (*phigh), "=a" (*plow) - : "a" (a), "0" (b)); + __int128 r = (__int128)a * b; + *plow = r; + *phigh = r >> 64; } #else void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); diff --git a/util/host-utils.c b/util/host-utils.c index 5e3915a..2d06a2c 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -30,7 +30,7 @@ //#define DEBUG_MULDIV /* Long integer helpers */ -#if !defined(__x86_64__) +#ifndef CONFIG_INT128 static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) { *plow += a; @@ -102,4 +102,4 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) a, b, *phigh, *plow); #endif } -#endif /* !defined(__x86_64__) */ +#endif /* !CONFIG_INT128 */
Replace some x86_64 specific inline assembly with something that all 64-bit hosts ought to optimize well. At worst this becomes a call to the gcc __multi3 routine, which is no worse than our implementation in util/host-utils.c. With gcc 4.7, we get identical code generation for x86_64. We now get native multiplication on ia64 and s390x hosts. With minor improvements to gcc we can get it for ppc64 as well. Signed-off-by: Richard Henderson <rth@twiddle.net> --- configure | 20 ++++++++++++++++++++ include/qemu/host-utils.h | 17 ++++++++--------- util/host-utils.c | 4 ++-- 3 files changed, 30 insertions(+), 11 deletions(-)