Patchwork [AArch64] Fix unordered comparisons to floating-point vcond.

login
register
mail settings
Submitter James Greenhalgh
Date Jan. 18, 2013, 11:55 a.m.
Message ID <1358510126-9282-1-git-send-email-james.greenhalgh@arm.com>
Download mbox | patch
Permalink /patch/213582/
State New
Headers show

Comments

James Greenhalgh - Jan. 18, 2013, 11:55 a.m.
Hi,

When I added support for floating-point vcond on aarch64,
I didn't consider the various UNORDERED cases gcc required
support for.  This could in some circumstances lead to
an ICE.

This patch fixes that oversight, adding support for
the UNLT, UNLE, UNEQ, UNGE, UNGT, ORDERED and UNORDERED
cases to the AArch64 backend.

I've tested this patch with no regressions on aarch64-none-elf.

Is this OK to commit?

Regards,
James Greenhalgh

---

gcc/

2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64-simd.md
	(aarch64_vcond_internal<mode>): Handle unordered cases.
	* config/aarch64/iterators.md (v_cmp_result): New.

gcc/testsuite/

2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.target/aarch64/vect-fcm-gt-f.c: Change expected output.
	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
Marcus Shawcroft - Jan. 18, 2013, 4:21 p.m.
OK
/Marcus

On 18 January 2013 11:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>
> Hi,
>
> When I added support for floating-point vcond on aarch64,
> I didn't consider the various UNORDERED cases gcc required
> support for.  This could in some circumstances lead to
> an ICE.
>
> This patch fixes that oversight, adding support for
> the UNLT, UNLE, UNEQ, UNGE, UNGT, ORDERED and UNORDERED
> cases to the AArch64 backend.
>
> I've tested this patch with no regressions on aarch64-none-elf.
>
> Is this OK to commit?
>
> Regards,
> James Greenhalgh
>
> ---
>
> gcc/
>
> 2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>
>
>         * config/aarch64/aarch64-simd.md
>         (aarch64_vcond_internal<mode>): Handle unordered cases.
>         * config/aarch64/iterators.md (v_cmp_result): New.
>
> gcc/testsuite/
>
> 2013-01-18  James Greenhalgh  <james.greenhalgh@arm.com>
>
>         * gcc.target/aarch64/vect-fcm-gt-f.c: Change expected output.
>         * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
>         * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
>         * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
>         * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 50297a9..19cc87d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1586,37 +1586,128 @@ 
   "TARGET_SIMD"
 {
   int inverse = 0;
+  int swap_bsl_operands = 0;
   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
 
-  if (!REG_P (operands[5])
-      && (operands[5] != CONST0_RTX (<MODE>mode)))
-    operands[5] = force_reg (<MODE>mode, operands[5]);
+  rtx (*base_comparison) (rtx, rtx, rtx);
+  rtx (*complimentary_comparison) (rtx, rtx, rtx);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case GE:
+    case LE:
+    case EQ:
+      if (!REG_P (operands[5])
+	  && (operands[5] != CONST0_RTX (<MODE>mode)))
+	operands[5] = force_reg (<MODE>mode, operands[5]);
+      break;
+    default:
+      if (!REG_P (operands[5]))
+	operands[5] = force_reg (<MODE>mode, operands[5]);
+    }
 
   switch (GET_CODE (operands[3]))
     {
     case LT:
+    case UNLT:
       inverse = 1;
       /* Fall through.  */
     case GE:
-      emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
+    case UNGE:
+    case ORDERED:
+    case UNORDERED:
+      base_comparison = gen_aarch64_cmge<mode>;
+      complimentary_comparison = gen_aarch64_cmgt<mode>;
       break;
     case LE:
+    case UNLE:
       inverse = 1;
       /* Fall through.  */
     case GT:
-      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
+    case UNGT:
+      base_comparison = gen_aarch64_cmgt<mode>;
+      complimentary_comparison = gen_aarch64_cmge<mode>;
       break;
+    case EQ:
     case NE:
-      inverse = 1;
-      /* Fall through.  */
+    case UNEQ:
+      base_comparison = gen_aarch64_cmeq<mode>;
+      complimentary_comparison = gen_aarch64_cmeq<mode>;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[3]))
+    {
+    case LT:
+    case LE:
+    case GT:
+    case GE:
     case EQ:
-      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
+      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
+	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
+	 a GE b -> a GE b
+	 a GT b -> a GT b
+	 a LE b -> b GE a
+	 a LT b -> b GT a
+	 a EQ b -> a EQ b  */
+
+      if (!inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+      break;
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case NE:
+      /* FCM returns false for lanes which are unordered, so if we use
+	 the inverse of the comparison we actually want to emit, then
+	 swap the operands to BSL, we will end up with the correct result.
+	 Note that a NE NaN and NaN NE b are true for all a, b.
+
+	 Our transformations are:
+	 a GE b -> !(b GT a)
+	 a GT b -> !(b GE a)
+	 a LE b -> !(a GT b)
+	 a LT b -> !(a GE b)
+	 a NE b -> !(a EQ b)  */
+
+      if (inverse)
+	emit_insn (base_comparison (mask, operands[4], operands[5]));
+      else
+	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
+
+      swap_bsl_operands = 1;
+      break;
+    case UNEQ:
+      /* We check (a > b ||  b > a).  combining these comparisons give us
+	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
+	 will then give us (a == b ||  a UNORDERED b) as intended.  */
+
+      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+      swap_bsl_operands = 1;
+      break;
+    case UNORDERED:
+       /* Operands are ORDERED iff (a > b || b >= a).
+	 Swapping the operands to BSL will give the UNORDERED case.  */
+     swap_bsl_operands = 1;
+     /* Fall through.  */
+    case ORDERED:
+      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
+      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
       break;
     default:
       gcc_unreachable ();
     }
 
-  if (inverse)
+  if (swap_bsl_operands)
     emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
 				    operands[1]));
   else
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3a57494..b66418e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -430,6 +430,14 @@ 
 				(V2SF "V2SI") (V4SF  "V4SI")
 				(V2DF "V2DI")])
 
+;; Lower case mode of results of comparison operations.
+(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
+				(V4HI "v4hi") (V8HI  "v8hi")
+				(V2SI "v2si") (V4SI  "v4si")
+				(DI   "di")   (V2DI  "v2di")
+				(V2SF "v2si") (V4SF  "v4si")
+				(V2DF "v2di")])
+
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
 		       (V2SI "w") (V4SI "w") (SI "w")])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
index a177d28..b6fb5ae 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
index 01f3880..283d34f 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
index 6027593..868e1f8 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
index 0337d70..e3258f3 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
index b812a39..ed8b452 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
index 5e012a4..e90a875 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -8,7 +8,7 @@ 
 #include "vect-fcm.x"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
-/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
+/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */