diff mbox

PR70117, ppc long double isinf

Message ID 20160407080354.GJ18129@bubble.grove.modra.org
State New
Headers show

Commit Message

Alan Modra April 7, 2016, 8:03 a.m. UTC
On Wed, Apr 06, 2016 at 06:49:19PM +0930, Alan Modra wrote:
> On Wed, Apr 06, 2016 at 10:46:48AM +0200, Richard Biener wrote:
> > Can you add a testcase or two for the isnormal () case?
> 
> Sure.  I'll adapt the testcase I was using to verify the output,

Revised testcase - target fixed, compiled at -O2 with volatile vars so
we're testing optimized builtins with non-constant data.


> >  What does XLC do here?
> 
> Not sure, sorry.  I don't have xlc handy.  Will try later.

It seems that to compile 128-bit long double with xlc, I need xlc128,
and I don't have that..  For a double, xlc implements isnormal() on
power8 by moving the fpr argument to a gpr followed by a bunch of bit
twiddling to test the exponent.  xlc's sequence isn't as good as it
could be, 15 insns.  The ideal ought to be the following, I think,
which gcc compiles to 8 insns on power8 (and could be 7 insns if a
useless sign extension was eliminated).

int
bit_isnormal (double x)
{
  union { double d; uint64_t l; } val;
  val.d = x;
  uint64_t exp = (val.l >> 52) & 0x7ff;
  return exp - 1 < 0x7fe;
}

The above is around twice as fast as fold_builtin_interclass_mathfn
implementation of isnormal() for double, on power8.  I expect a bit
twiddling implementation for IBM extended would show similar or better
improvement.

However I'm not inclined to pursue this, especially for gcc-6.  The
patch I posted for isnormal() IBM extended is already faster (about
65% average timing on power8) than what existed previously.

Comments

Richard Biener April 7, 2016, 9:32 a.m. UTC | #1
On April 7, 2016 10:03:54 AM GMT+02:00, Alan Modra <amodra@gmail.com> wrote:
>On Wed, Apr 06, 2016 at 06:49:19PM +0930, Alan Modra wrote:
>> On Wed, Apr 06, 2016 at 10:46:48AM +0200, Richard Biener wrote:
>> > Can you add a testcase or two for the isnormal () case?
>> 
>> Sure.  I'll adapt the testcase I was using to verify the output,
>
>Revised testcase - target fixed, compiled at -O2 with volatile vars so
>we're testing optimized builtins with non-constant data.
>
>diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c
>b/gcc/testsuite/gcc.target/powerpc/pr70117.c
>new file mode 100644
>index 0000000..f1fdedb
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
>@@ -0,0 +1,92 @@
>+/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin*
>powerpc*-*-aix* rs6000-*-* } } } */
>+/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
>+
>+#include <float.h>
>+
>+union gl_long_double_union
>+{
>+  struct { double hi; double lo; } dd;
>+  long double ld;
>+};
>+
>+/* This is gnulib's LDBL_MAX which, being 107 bits in precision, is
>+   slightly larger than gcc's 106 bit precision LDBL_MAX.  */
>+volatile union gl_long_double_union gl_LDBL_MAX =
>+  { { DBL_MAX, DBL_MAX / (double)134217728UL / (double)134217728UL }
>};
>+
>+volatile double min_denorm = 0x1p-1074;
>+volatile double ld_low = 0x1p-969;
>+volatile double dinf = 1.0/0.0;
>+volatile double dnan = 0.0/0.0;
>+
>+int
>+main (void)
>+{
>+  long double ld;
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (__builtin_isinfl (ld))
>+    __builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (__builtin_isinfl (ld))
>+    __builtin_abort ();
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (!__builtin_isfinite (ld))
>+    __builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (!__builtin_isfinite (ld))
>+    __builtin_abort ();
>+
>+  ld = ld_low;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -ld_low;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = -min_denorm;
>+  ld += ld_low;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = min_denorm;
>+  ld -= ld_low;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = 0.0;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -0.0;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = LDBL_MAX;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -LDBL_MAX;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (!__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = dinf;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -dinf;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+
>+  ld = dnan;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  ld = -dnan;
>+  if (__builtin_isnormal (ld))
>+    __builtin_abort ();
>+  return 0;
>+}
>
>> >  What does XLC do here?
>> 
>> Not sure, sorry.  I don't have xlc handy.  Will try later.
>
>It seems that to compile 128-bit long double with xlc, I need xlc128,
>and I don't have that..  For a double, xlc implements isnormal() on
>power8 by moving the fpr argument to a gpr followed by a bunch of bit
>twiddling to test the exponent.  xlc's sequence isn't as good as it
>could be, 15 insns.  The ideal ought to be the following, I think,
>which gcc compiles to 8 insns on power8 (and could be 7 insns if a
>useless sign extension was eliminated).
>
>int
>bit_isnormal (double x)
>{
>  union { double d; uint64_t l; } val;
>  val.d = x;
>  uint64_t exp = (val.l >> 52) & 0x7ff;
>  return exp - 1 < 0x7fe;
>}
>
>The above is around twice as fast as fold_builtin_interclass_mathfn
>implementation of isnormal() for double, on power8.  I expect a bit
>twiddling implementation for IBM extended would show similar or better
>improvement.
>
>However I'm not inclined to pursue this, especially for gcc-6.  The
>patch I posted for isnormal() IBM extended is already faster (about
>65% average timing on power8) than what existed previously.

That's good to know.  I think the patch is OK but please seek approval from a ppc maintainer as well

Thanks,
Richard.
Alan Modra April 7, 2016, 2:17 p.m. UTC | #2
On Thu, Apr 07, 2016 at 11:32:58AM +0200, Richard Biener wrote:
> That's good to know.  I think the patch is OK but please seek approval from a ppc maintainer as well

There's only one of those.  David?  Thread starts here
https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00213.html
David Edelsohn April 7, 2016, 2:43 p.m. UTC | #3
On Thu, Apr 7, 2016 at 10:17 AM, Alan Modra <amodra@gmail.com> wrote:
> On Thu, Apr 07, 2016 at 11:32:58AM +0200, Richard Biener wrote:
>> That's good to know.  I think the patch is OK but please seek approval from a ppc maintainer as well
>
> There's only one of those.  David?  Thread starts here
> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00213.html

Yes, I have been following this entertaining thread.

This is okay.

By the way, xlc -qldbl128 should enable 128 bit.

Thanks, David
Alan Modra April 8, 2016, 3:03 a.m. UTC | #4
On Thu, Apr 07, 2016 at 10:43:31AM -0400, David Edelsohn wrote:
> Yes, I have been following this entertaining thread.

How to waste lots of time over one bit.  Floating point is like that.
:-)

I see the bug was opened against 5.3, so OK to commit there after a
few days and maybe 4.9 too, Richard?
Richard Biener April 8, 2016, 5:41 a.m. UTC | #5
On April 8, 2016 5:03:04 AM GMT+02:00, Alan Modra <amodra@gmail.com> wrote:
>On Thu, Apr 07, 2016 at 10:43:31AM -0400, David Edelsohn wrote:
>> Yes, I have been following this entertaining thread.
>
>How to waste lots of time over one bit.  Floating point is like that.
>:-)
>
>I see the bug was opened against 5.3, so OK to commit there after a
>few days and maybe 4.9 too, Richard?

Yes please.

Richard.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c b/gcc/testsuite/gcc.target/powerpc/pr70117.c
new file mode 100644
index 0000000..f1fdedb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
@@ -0,0 +1,92 @@ 
+/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* rs6000-*-* } } } */
+/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
+
+#include <float.h>
+
+union gl_long_double_union
+{
+  struct { double hi; double lo; } dd;
+  long double ld;
+};
+
+/* This is gnulib's LDBL_MAX which, being 107 bits in precision, is
+   slightly larger than gcc's 106 bit precision LDBL_MAX.  */
+volatile union gl_long_double_union gl_LDBL_MAX =
+  { { DBL_MAX, DBL_MAX / (double)134217728UL / (double)134217728UL } };
+
+volatile double min_denorm = 0x1p-1074;
+volatile double ld_low = 0x1p-969;
+volatile double dinf = 1.0/0.0;
+volatile double dnan = 0.0/0.0;
+
+int
+main (void)
+{
+  long double ld;
+
+  ld = gl_LDBL_MAX.ld;
+  if (__builtin_isinfl (ld))
+    __builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (__builtin_isinfl (ld))
+    __builtin_abort ();
+
+  ld = gl_LDBL_MAX.ld;
+  if (!__builtin_isfinite (ld))
+    __builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (!__builtin_isfinite (ld))
+    __builtin_abort ();
+
+  ld = ld_low;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -ld_low;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = -min_denorm;
+  ld += ld_low;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = min_denorm;
+  ld -= ld_low;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = 0.0;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -0.0;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = LDBL_MAX;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -LDBL_MAX;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = gl_LDBL_MAX.ld;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (!__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = dinf;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -dinf;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+
+  ld = dnan;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+  ld = -dnan;
+  if (__builtin_isnormal (ld))
+    __builtin_abort ();
+  return 0;
+}