Patchwork [AArch64] Support float->int conversions in vector registers.

login
register
mail settings
Submitter James Greenhalgh
Date May 1, 2013, 3:09 p.m.
Message ID <1367420955-23543-1-git-send-email-james.greenhalgh@arm.com>
Download mbox | patch
Permalink /patch/240776/
State New
Headers show

Comments

James Greenhalgh - May 1, 2013, 3:09 p.m.
Hi,

The fcvt instructions also have forms which leave their integer
result as a scalar in the SIMD register set.

This patch adds those alternatives for the lceil family
of standard patterns.

Regression tested on aarch64-none-elf with no regressions.

Thanks,
James

---
2013-05-01  James Greenhalgh  <james.greenhalgh@arm.com>

gcc/

	* config/aarch64/aarch64.md
	(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Add vector
	register to vector register alternative.
	(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
	(fixuns_trunc<GPF:mode><GPI:mode>2): Likewise.

gcc/testsuite/

	* gcc.target/aarch64/scalar-fcvt.c: New.
Marcus Shawcroft - May 1, 2013, 3:14 p.m.
On 01/05/13 16:09, James Greenhalgh wrote:
>
> Hi,
>
> The fcvt instructions also have forms which leave their integer
> result as a scalar in the SIMD register set.
>
> This patch adds those alternatives for the lceil family
> of standard patterns.
>
> Regression tested on aarch64-none-elf with no regressions.
>
> Thanks,
> James
>
> ---
> 2013-05-01  James Greenhalgh  <james.greenhalgh@arm.com>
>
> gcc/
>
> 	* config/aarch64/aarch64.md
> 	(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Add vector
> 	register to vector register alternative.
> 	(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
> 	(fixuns_trunc<GPF:mode><GPI:mode>2): Likewise.
>
> gcc/testsuite/
>
> 	* gcc.target/aarch64/scalar-fcvt.c: New.
>


OK
/Marcus

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 50acdaa..80a0190 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3231,11 +3231,14 @@ 
 ;; frcvt floating-point round to integer and convert standard patterns.
 ;; Expands to lbtrunc, lceil, lfloor, lround.
 (define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
-		      FCVT)))]
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(FIXUORS:GPI (unspec:GPF
+			[(match_operand:GPF 1 "register_operand" "w,w")]
+			FCVT)))]
   "TARGET_FLOAT"
-  "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1"
+  "@
+   fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1
+   fcvt<frint_suffix><su>\\t%<GPF:v>0, %<GPF:v>1"
   [(set_attr "v8type" "fcvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
@@ -3324,20 +3327,24 @@ 
 )
 
 (define_insn "fix_trunc<GPF:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-        (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(fix:GPI (match_operand:GPF 1 "register_operand" "w,w")))]
   "TARGET_FLOAT"
-  "fcvtzs\\t%<GPI:w>0, %<GPF:s>1"
+  "@
+  fcvtzs\\t%<GPI:w>0, %<GPF:s>1
+  fcvtzs\\t%<GPF:v>0, %<GPF:v>1"
   [(set_attr "v8type" "fcvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
 )
 
 (define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-        (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w,w")))]
   "TARGET_FLOAT"
-  "fcvtzu\\t%<GPI:w>0, %<GPF:s>1"
+  "@
+   fcvtzu\\t%<GPI:w>0, %<GPF:s>1
+   fcvtzu\\t%<GPF:v>0, %<GPF:v>1"
   [(set_attr "v8type" "fcvtf2i")
    (set_attr "mode" "<GPF:MODE>")
    (set_attr "mode2" "<GPI:MODE>")]
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar-fcvt.c b/gcc/testsuite/gcc.target/aarch64/scalar-fcvt.c
new file mode 100644
index 0000000..4b122e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/scalar-fcvt.c
@@ -0,0 +1,121 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define NUM_TESTS 8
+
+float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f};
+double input_f64[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5};
+
+/* Used to force a variable to a SIMD register.  */
+#define force_simd(V1)   asm volatile ("dup %d0, %1.d[0]"	\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
+
+#define TEST(SUFFIX, WIDTH, US, UNSIGNED, W)				\
+int									\
+test_fcvt##SUFFIX##US##_float##WIDTH##_t (void)				\
+{									\
+  int ret = 0;								\
+  int i = 0;								\
+  UNSIGNED##int##WIDTH##_t output[NUM_TESTS];				\
+									\
+  for (i = 0; i < NUM_TESTS; i++)					\
+    {									\
+      /* Inhibit optimization of our calculation in general		\
+	 purpose registers.  */						\
+      asm volatile ("" : : : "memory");					\
+      float##WIDTH##_t f1 = input_f##WIDTH[i];				\
+      output[i] = vcvt##SUFFIX##W##_##US##WIDTH##_f##WIDTH (f1);	\
+    }									\
+									\
+  for (i = 0; i < NUM_TESTS; i++)					\
+    {									\
+      /* Inhibit optimization of our calculation in SIMD registers.  */ \
+      asm volatile ("" : : : "memory");					\
+      float##WIDTH##_t f1 = input_f##WIDTH[i];				\
+      UNSIGNED##int##WIDTH##_t r =					\
+		vcvt##SUFFIX##W##_##US##WIDTH##_f##WIDTH (f1);		\
+      force_simd (r)							\
+      output[i] ^= r;							\
+    }									\
+									\
+  for (i = 0; i < NUM_TESTS; i++)					\
+    ret |= output[i];							\
+									\
+  return ret;								\
+}
+
+#define BUILD_VARIANTS(SUFFIX)						\
+TEST (SUFFIX, 32, u, u, s)						\
+TEST (SUFFIX, 32, s,  , s)						\
+TEST (SUFFIX, 64, u, u, d)						\
+TEST (SUFFIX, 64, s,  , d)						\
+
+BUILD_VARIANTS ()
+/* { dg-final { scan-assembler "fcvtzu\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzu\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */
+BUILD_VARIANTS (a)
+/* { dg-final { scan-assembler "fcvtau\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtas\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtau\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtas\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */
+BUILD_VARIANTS (m)
+/* { dg-final { scan-assembler "fcvtmu\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtms\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtmu\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtms\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */
+BUILD_VARIANTS (n)
+/* { dg-final { scan-assembler "fcvtnu\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtns\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtnu\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtns\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */
+BUILD_VARIANTS (p)
+/* { dg-final { scan-assembler "fcvtpu\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtps\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtpu\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtps\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */
+
+#undef TEST
+#define TEST(SUFFIX, WIDTH, US, UNSIGNED, W)				\
+  if (test_fcvt##SUFFIX##US##_float##WIDTH##_t ())			\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  BUILD_VARIANTS ()
+  BUILD_VARIANTS (a)
+  BUILD_VARIANTS (m)
+  BUILD_VARIANTS (n)
+  BUILD_VARIANTS (p)
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */