diff mbox series

[AArch64] Use scvtf fbits option where appropriate

Message ID d5c7b8da-a2fc-353a-7609-1c5ec2d770a8@arm.com
State New
Headers show
Series [AArch64] Use scvtf fbits option where appropriate | expand

Commit Message

Joel Hutton June 18, 2019, 3:34 p.m. UTC
On 18/06/2019 11:37, Richard Earnshaw (lists) wrote:
> Start sentences with a capital letter.  End them with a full stop.
> "inequal" isn't a word: you probably mean "unequal".

I've fixed this, the iterator is, however defined as 'fcvt_iesize'
and described in the adjacent comment in iterators.md as 'inequal'.
I've addressed your other comments.

On 18/06/2019 13:30, Richard Sandiford wrote:
> Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>>   > +/* If X is a positive CONST_DOUBLE with a value that is the 
>> reciprocal of a
>>   > +   power of 2 (i.e 1/2^n) return the number of float bits. e.g. 
>> for x==(1/2^n)
>>   > +   return n. Otherwise return -1.  */
>>   > +int
>>   > +aarch64_fpconst_pow2_recip (rtx x)
>>   > +{
>>   > +  REAL_VALUE_TYPE r0;
>>   > +
>>   > +  if (!CONST_DOUBLE_P (x))
>>   > +    return -1;
>>> CONST_DOUBLE can be used for things other than floating point.  You
>>> should really check that the mode on the double in is in class 
>>> MODE_FLOAT.
>>   Several other functions (eg aarch64_fpconst_pow_of_2) do the same 
>> since
>> this function is only called with HF/SF/DF mode. We could add an 
>> assert for
>> SCALAR_FLOAT_MODE_P (but then aarch64_fpconst_pow_of_2 should do
>> the same).
> IMO we should leave it as-is.  aarch64.h has:
I've gone with the majority and left it as-is, but I don't have strong 
feelings on it.
diff mbox series

Patch

From 1e44ef7e999527a0b03316cf0ea002f8d4437052 Mon Sep 17 00:00:00 2001
From: Joel Hutton <Joel.Hutton@arm.com>
Date: Thu, 13 Jun 2019 11:08:56 +0100
Subject: [PATCH] SCVTF fbits

---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64.c                  |  23 +++
 gcc/config/aarch64/aarch64.md                 |  39 +++++
 gcc/config/aarch64/constraints.md             |   7 +
 gcc/config/aarch64/predicates.md              |   4 +
 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
 6 files changed, 214 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1e3b1c91db1..ad1ba458a3f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -494,6 +494,7 @@  enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd9ed8..028da32174d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18707,6 +18707,29 @@  aarch64_fpconst_pow_of_2 (rtx x)
   return exact_log2 (real_to_integer (r));
 }
 
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return n. Otherwise return -1.  */
+
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
+    return -1;
+
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+	int ret = exact_log2 (real_to_integer (&r0));
+	if (ret >= 1 && ret <= 31)
+	    return ret;
+    }
+  return -1;
+}
+
 /* If X is a vector of equal CONST_DOUBLE values and that value is
    Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 526c7fb0dab..c7c6a18b0ff 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,44 @@ 
   [(set_attr "type" "f_cvtf2i")]
 )
 
+;; Equal width integer to fp combine.
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    switch (which_alternative)
+    {
+      case 0:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+      case 1:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+      default:
+	gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
+)
+
+;; Unequal width integer to fp combine.
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+  }
+  [(set_attr "type" "f_cvti2f")]
+)
+
+;; Equal width integer to fp conversion.
 (define_insn "<optab><fcvt_target><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w,w")
         (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
@@ -6027,6 +6065,7 @@ 
    (set_attr "arch" "simd,fp")]
 )
 
+;; Unequal width integer to fp conversions.
 (define_insn "<optab><fcvt_iesize><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
         (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 21f9549e660..b0caa13b435 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -329,6 +329,13 @@ 
       (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
 						 QImode)")))
 
+(define_constraint "Dt"
+  "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+      (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_constraint "Dl"
   "@internal
  A constraint that matches vector of immediates for left shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..da295981286 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -98,6 +98,10 @@ 
   (and (match_code "const_double")
 	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
 
+(define_predicate "aarch64_fp_pow2_recip"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
new file mode 100644
index 00000000000..e8d1de6279b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
@@ -0,0 +1,140 @@ 
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a)			\
+	float				\
+fsfoo##__a (int x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fusfoo##__a (unsigned int x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fslfoo##__a (long x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fulfoo##__a (unsigned long x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+
+#define FUNC_DEFD(__a)			\
+double					\
+dsfoo##__a (int x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dusfoo##__a (unsigned int x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dslfoo##__a (long x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dulfoo##__a (unsigned long x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}
+
+FUNC_DEFS (4)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFD (4)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFS (8)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFD (8)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFS (16)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFD (16)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFS (31)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+FUNC_DEFD (31)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+#define FUNC_TESTS(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+#define FUNC_TESTD(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+	int
+main (void)
+{
+	int i;
+
+	for (i = 0; i < 32; i ++)
+	{
+		FUNC_TESTS (4, i);
+		FUNC_TESTS (8, i);
+		FUNC_TESTS (16, i);
+		FUNC_TESTS (31, i);
+
+		FUNC_TESTD (4, i);
+		FUNC_TESTD (8, i);
+		FUNC_TESTD (16, i);
+		FUNC_TESTD (31, i);
+	}
+	return 0;
+}
-- 
2.17.1