@@ -1097,6 +1097,14 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
return NULL_TREE;
}
+ case BUILT_IN_CTZ:
+ {
+ if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
+ return NULL_TREE;
+ }
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
@@ -46,6 +46,7 @@
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clz, 2)
+ BUILTIN_VS (UNOP, ctz, 2)
BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
@@ -303,6 +303,20 @@
[(set_attr "type" "neon_rbit")]
)
+(define_expand "ctz<mode>2"
+ [(set (match_operand:VS 0 "register_operand")
+ (ctz:VS (match_operand:VS 1 "register_operand")))]
+ "TARGET_SIMD"
+ {
+ emit_insn (gen_bswap<mode> (operands[0], operands[1]));
+ rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
+ <MODE>mode, 0);
+ emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
+ emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+ DONE;
+ }
+)
+
(define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
@@ -183,6 +183,9 @@
;; All byte modes.
(define_mode_iterator VB [V8QI V16QI])
+;; 2 and 4 lane SI modes.
+(define_mode_iterator VS [V2SI V4SI])
+
(define_mode_iterator TX [TI TF])
;; Opaque structure modes.
@@ -670,6 +673,9 @@
(V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")])
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
+
;; -------------------------------------------------------------------
;; Code Iterators
;; -------------------------------------------------------------------
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fno-inline" } */
+
+extern void abort ();
+
+#define TEST(name, subname, count) \
+void \
+count_tz_##name (unsigned *__restrict a, int *__restrict b) \
+{ \
+ int i; \
+ for (i = 0; i < count; i++) \
+ b[i] = __builtin_##subname (a[i]); \
+}
+
+#define CHECK(name, count, input, output) \
+ count_tz_##name (input, output); \
+ for (i = 0; i < count; i++) \
+ { \
+ if (output[i] != r[i]) \
+ abort (); \
+ }
+
+TEST (v4si, ctz, 4)
+TEST (v2si, ctz, 2)
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.2s" } } */
+
+int
+main ()
+{
+ unsigned int x4[4] = { 0x0, 0xFF80, 0x1FFFF, 0xFF000000 };
+ int r[4] = { 32, 7, 0, 24 };
+ int d[4], i;
+
+ CHECK (v4si, 4, x4, d);
+ CHECK (v2si, 2, x4, d);
+
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */