diff mbox

[4/6,ARC] Handle FPX NaN within optimized floating point library.

Message ID 1460990028-5718-5-git-send-email-claziss@synopsys.com
State New
Headers show

Commit Message

Claudiu Zissulescu April 18, 2016, 2:33 p.m. UTC
OK to apply?
Claudiu

gcc/
2016-04-18  Claudiu Zissulescu  <claziss@synopsys.com>

	* testsuite/gcc.target/arc/ieee_eq.c: New test.

libgcc/
2016-04-18  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/ieee-754/eqdf2.S: Handle FPX NaN.
---
 gcc/testsuite/gcc.target/arc/ieee_eq.c | 47 ++++++++++++++++++++++++++++++++++
 libgcc/config/arc/ieee-754/eqdf2.S     | 13 ++++++----
 2 files changed, 55 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/ieee_eq.c

Comments

Joern Wolfgang Rennecke April 28, 2016, 11:27 a.m. UTC | #1
On 18/04/16 15:33, Claudiu Zissulescu wrote:
> OK to apply?
No.  You are clobbering DBL0H.

Besides, why would you change any of the code, apart from the argument
to #ifdef and the comments?
Claudiu Zissulescu April 28, 2016, 11:35 a.m. UTC | #2
> Besides, why would you change any of the code, apart from the argument
> to #ifdef and the comments?

It is not working/giving wrong results. I think, the test shows you this if you run it without all the libgcc mods.
Joern Wolfgang Rennecke April 28, 2016, 11:41 a.m. UTC | #3
On 28/04/16 12:35, Claudiu Zissulescu wrote:
>> Besides, why would you change any of the code, apart from the argument
>> to #ifdef and the comments?
> It is not working/giving wrong results. I think, the test shows you this if you run it without all the libgcc mods.
I can't.

Where exactly does the test go wrong?
Can you show a trace of __eqdf2 with register values?
Claudiu Zissulescu April 28, 2016, 11:43 a.m. UTC | #4
> 
> Where exactly does the test go wrong?

I will try to trace it back when I develop it. It passed too long since then. Probably something related with big-endian.
Claudiu Zissulescu April 28, 2016, 2:11 p.m. UTC | #5
Hi,

> Where exactly does the test go wrong?

The test which fails is this one: 
	TEST_EQ (double, __DBL_MAX__, __DBL_MAX__, 1);
From the test file included in the patch.

> Can you show a trace of __eqdf2 with register values?

Sure thing, running for ARC700, using original implementation and enabled guarded code for FPX handling:

[0x000002a2] 0xc000                 K Z    ld_s           r0,[sp,0x0] : lw [0x5000c0c0] => 0xffffffff : (w1) r0 <= 0xffffffff *
[0x000002a4] 0xc101                 K Z    ld_s           r1,[sp,0x4] : lw [0x5000c0c4] => 0x7fefffff : (w1) r1 <= 0x7fefffff *
[0x000002a6] 0xc202                 K Z    ld_s           r2,[sp,0x8] : lw [0x5000c0c8] => 0xffffffff : (w1) r2 <= 0xffffffff *
[0x000002a8] 0xc303                 K Z    ld_s           r3,[sp,0xc] : lw [0x5000c0cc] => 0x7fefffff : (w1) r3 <= 0x7fefffff *
[0x000002aa] 0x0aea0000             K Z    bl             0x2e8 : (w0) r31 <= 0x000002ae *
[0x00000590] 0x091d00e1             K Z    brne.d         r1,r3,0x1c
[0x00000594] 0x2153050c             K Z    bmsk           r12,r1,0x14 : (w0) r12 <= 0x000fffff *
[0x00000598] 0x200580be             K Z    or.f           0,r0,r2 *
[0x0000059c] 0x24cf1562             K  N   bset.ne        r12,r12,0x15 : (w0) r12 <= 0x002fffff *
[0x000005a0] 0x2414904c             K  N   add1.f         r12,r12,r1 : (w0) r12 <= 0x000ffffd *
[0x000005a4] 0x7fe0                 K   C  j_s.d          [blink] *
[0x000005a6] 0x20cc8086             KD  C  cmp.cc         r0,r2
 
For reference, the routine:

	.global __eqdf2
	.balign 4
	HIDDEN_FUNC(__eqdf2)
	/* Good performance as long as the difference in high word is
	   well predictable (as seen from the branch predictor).  */
__eqdf2:
	brne.d DBL0H,DBL1H,.Lhighdiff
	bmsk    r12,DBL0H,20
#ifndef __HS__
	/* The next two instructions are required to recognize the FPX
	NaN, which has a pattern like this: 0x7ff0_0000_8000_0000, as
	oposite to 0x7ff8_0000_0000_0000.  */
	or.f    0,DBL0L,DBL1L
	bset.ne r12,r12,21
#endif /* __HS__ */
	add1.f	r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN.  */
	j_s.d	[blink]
	cmp.cc	DBL0L,DBL1L
	.balign 4
.Lhighdiff:
	or	r12,DBL0H,DBL1H
	or.f	0,DBL0L,DBL1L
	j_s.d	[blink]
	bmsk.eq.f r12,r12,30
	ENDFUNC(__eqdf2)

All those results were collected using nsimfree.

Please let me know if you need more info,
Claudiu
Joern Wolfgang Rennecke April 28, 2016, 3:02 p.m. UTC | #6
On 28/04/16 15:11, Claudiu Zissulescu wrote:
> Sure thing, running for ARC700, using original implementation and enabled guarded code for FPX handling:
>
> [0x000002a2] 0xc000                 K Z    ld_s           r0,[sp,0x0] : lw [0x5000c0c0] => 0xffffffff : (w1) r0 <= 0xffffffff *
> [0x000002a4] 0xc101                 K Z    ld_s           r1,[sp,0x4] : lw [0x5000c0c4] => 0x7fefffff : (w1) r1 <= 0x7fefffff *
> [0x000002a6] 0xc202                 K Z    ld_s           r2,[sp,0x8] : lw [0x5000c0c8] => 0xffffffff : (w1) r2 <= 0xffffffff *
> [0x000002a8] 0xc303                 K Z    ld_s           r3,[sp,0xc] : lw [0x5000c0cc] => 0x7fefffff : (w1) r3 <= 0x7fefffff *
> [0x000002aa] 0x0aea0000             K Z    bl             0x2e8 : (w0) r31 <= 0x000002ae *
> [0x00000590] 0x091d00e1             K Z    brne.d         r1,r3,0x1c
> [0x00000594] 0x2153050c             K Z    bmsk           r12,r1,0x14 : (w0) r12 <= 0x000fffff *
> [0x00000598] 0x200580be             K Z    or.f           0,r0,r2 *
> [0x0000059c] 0x24cf1562             K  N   bset.ne        r12,r12,0x15 : (w0) r12 <= 0x002fffff *
> [0x000005a0] 0x2414904c             K  N   add1.f         r12,r12,r1 : (w0) r12 <= 0x000ffffd *
> [0x000005a4] 0x7fe0                 K   C  j_s.d          [blink] *
> [0x000005a6] 0x20cc8086             KD  C  cmp.cc         r0,r2
>   
>   
I see, we basically have an overflow.
I think the DPFP_COMPAT / __HS__ variant should be something like:

         brne DBL0H,DBL1H,.Lhighdiff
         mov_s r12,0x00200000
         or.f 0,DBL0L,DBL1L
         bset.ne r12,r12,0

         add1.f  r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN.  */
         j_s.d   [blink]
         cmp.cc  DBL0L,DBL1L
...

Where the mov_s could be replaced with something else that loads the 
same value,
depending on what instructions are supported.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/arc/ieee_eq.c b/gcc/testsuite/gcc.target/arc/ieee_eq.c
new file mode 100644
index 0000000..70aebad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/ieee_eq.c
@@ -0,0 +1,47 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <stdio.h>
+#include <float.h>
+
+#define TEST_EQ(TYPE,X,Y,RES)				\
+  do {							\
+    volatile TYPE a, b;					\
+    a = (TYPE) X;					\
+    b = (TYPE) Y;					\
+    if ((a == b) != RES)				\
+      {							\
+	printf ("Runtime computation error @%d. %g "	\
+		"!= %g\n", __LINE__, a, b);		\
+	error = 1;					\
+      }							\
+  } while (0)
+
+#ifndef __HS__
+/* Special type of NaN found when using double FPX instructions.  */
+static const unsigned long long __nan = 0x7FF0000080000000ULL;
+# define W (*(double *) &__nan)
+#else
+# define W __builtin_nan ("")
+#endif
+
+#define Q __builtin_nan ("")
+#define H __builtin_inf ()
+
+int main (void)
+{
+  int error = 0;
+
+  TEST_EQ (double, 1, 1, 1);
+  TEST_EQ (double, 1, 2, 0);
+  TEST_EQ (double, W, W, 0);
+  TEST_EQ (double, Q, Q, 0);
+  TEST_EQ (double, __DBL_MAX__, __DBL_MAX__, 1);
+  TEST_EQ (double, __DBL_MIN__, __DBL_MIN__, 1);
+  TEST_EQ (double, H, H, 1);
+
+  if (error)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/libgcc/config/arc/ieee-754/eqdf2.S b/libgcc/config/arc/ieee-754/eqdf2.S
index bc7d88e..3b23e04 100644
--- a/libgcc/config/arc/ieee-754/eqdf2.S
+++ b/libgcc/config/arc/ieee-754/eqdf2.S
@@ -58,11 +58,14 @@  __eqdf2:
 	   well predictable (as seen from the branch predictor).  */
 __eqdf2:
 	brne.d DBL0H,DBL1H,.Lhighdiff
-	bmsk r12,DBL0H,20
-#ifdef DPFP_COMPAT
-	or.f 0,DBL0L,DBL1L
-	bset.ne r12,r12,21
-#endif /* DPFP_COMPAT */
+#ifndef __HS__
+	/* The next two instructions are required to recognize the FPX
+	NaN, which has a pattern like this: 0x7ff0_0000_8000_0000, as
+	oposite to 0x7ff8_0000_0000_0000.  */
+	or.f    0,DBL0L,DBL1L
+	bset.ne DBL0H,DBL0H,19
+#endif /* __HS__ */
+	bmsk    r12,DBL0H,20
 	add1.f	r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN.  */
 	j_s.d	[blink]
 	cmp.cc	DBL0L,DBL1L