===================================================================
@@ -167,6 +167,10 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CON
cond_ior, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW,
cond_xor, cond_binary)
+DEF_INTERNAL_OPTAB_FN (COND_SHL, ECF_CONST | ECF_NOTHROW,
+ cond_ashl, cond_binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (COND_SHR, ECF_CONST | ECF_NOTHROW, first,
+ cond_ashr, cond_lshr, cond_binary)
DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary)
DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
===================================================================
@@ -3286,7 +3286,9 @@ #define FOR_EACH_CODE_MAPPING(T) \
T (MAX_EXPR, IFN_COND_MAX) \
T (BIT_AND_EXPR, IFN_COND_AND) \
T (BIT_IOR_EXPR, IFN_COND_IOR) \
- T (BIT_XOR_EXPR, IFN_COND_XOR)
+ T (BIT_XOR_EXPR, IFN_COND_XOR) \
+ T (LSHIFT_EXPR, IFN_COND_SHL) \
+ T (RSHIFT_EXPR, IFN_COND_SHR)
/* Return a function that only performs CODE when a certain condition is met
and that uses a given fallback value otherwise. For example, if CODE is
===================================================================
@@ -83,12 +83,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
plus minus
mult trunc_div trunc_mod rdiv
min max
- bit_and bit_ior bit_xor)
+ bit_and bit_ior bit_xor
+ lshift rshift)
(define_operator_list COND_BINARY
IFN_COND_ADD IFN_COND_SUB
IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV
IFN_COND_MIN IFN_COND_MAX
- IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
+ IFN_COND_AND IFN_COND_IOR IFN_COND_XOR
+ IFN_COND_SHL IFN_COND_SHR)
/* Same for ternary operations. */
(define_operator_list UNCOND_TERNARY
===================================================================
@@ -230,6 +230,9 @@ OPTAB_D (cond_umod_optab, "cond_umod$a")
OPTAB_D (cond_and_optab, "cond_and$a")
OPTAB_D (cond_ior_optab, "cond_ior$a")
OPTAB_D (cond_xor_optab, "cond_xor$a")
+OPTAB_D (cond_ashl_optab, "cond_ashl$a")
+OPTAB_D (cond_ashr_optab, "cond_ashr$a")
+OPTAB_D (cond_lshr_optab, "cond_lshr$a")
OPTAB_D (cond_smin_optab, "cond_smin$a")
OPTAB_D (cond_smax_optab, "cond_smax$a")
OPTAB_D (cond_umin_optab, "cond_umin$a")
===================================================================
@@ -129,7 +129,11 @@ create_convert_operand_to (class expand_
/* Make OP describe an input operand that should have the same value
as VALUE, after any mode conversion that the backend might request.
If VALUE is a CONST_INT, it should be treated as having mode MODE.
- UNSIGNED_P says whether VALUE is unsigned. */
+ UNSIGNED_P says whether VALUE is unsigned.
+
+ The conversion of VALUE can include a combination of numerical
+ conversion (as for convert_modes) and duplicating a scalar to fill
+ a vector (if VALUE is a scalar but the operand is a vector). */
static inline void
create_convert_operand_from (class expand_operand *op, rtx value,
===================================================================
@@ -7212,7 +7212,7 @@ maybe_legitimize_operand_same_code (enum
maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
class expand_operand *op)
{
- machine_mode mode, imode;
+ machine_mode mode, imode, tmode;
mode = op->mode;
switch (op->type)
@@ -7259,9 +7259,17 @@ maybe_legitimize_operand (enum insn_code
gcc_assert (mode != VOIDmode);
imode = insn_data[(int) icode].operand[opno].mode;
+ tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)
+ ? GET_MODE_INNER (imode) : imode);
+ if (tmode != VOIDmode && tmode != mode)
+ {
+ op->value = convert_modes (tmode, mode, op->value, op->unsigned_p);
+ mode = tmode;
+ }
if (imode != VOIDmode && imode != mode)
{
- op->value = convert_modes (imode, mode, op->value, op->unsigned_p);
+ gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode));
+ op->value = expand_vector_broadcast (imode, op->value);
mode = imode;
}
goto input;
===================================================================
@@ -1280,6 +1280,7 @@ (define_code_iterator SVE_INT_UNARY [abs
;; SVE integer binary operations.
(define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
+ ashift ashiftrt lshiftrt
and ior xor])
;; SVE integer binary division operations.
@@ -1475,6 +1476,9 @@ (define_code_attr sve_int_op [(plus "add
(smax "smax")
(umin "umin")
(umax "umax")
+ (ashift "lsl")
+ (ashiftrt "asr")
+ (lshiftrt "lsr")
(and "and")
(ior "orr")
(xor "eor")
@@ -1484,17 +1488,20 @@ (define_code_attr sve_int_op [(plus "add
(popcount "cnt")])
(define_code_attr sve_int_op_rev [(plus "add")
- (minus "subr")
- (mult "mul")
- (div "sdivr")
- (udiv "udivr")
- (smin "smin")
- (smax "smax")
- (umin "umin")
- (umax "umax")
- (and "and")
- (ior "orr")
- (xor "eor")])
+ (minus "subr")
+ (mult "mul")
+ (div "sdivr")
+ (udiv "udivr")
+ (smin "smin")
+ (smax "smax")
+ (umin "umin")
+ (umax "umax")
+ (ashift "lslr")
+ (ashiftrt "asrr")
+ (lshiftrt "lsrr")
+ (and "and")
+ (ior "orr")
+ (xor "eor")])
;; The floating-point SVE instruction that implements an rtx code.
(define_code_attr sve_fp_op [(plus "fadd")
@@ -1535,6 +1542,9 @@ (define_code_attr sve_pred_int_rhs2_oper
(umax "register_operand")
(smin "register_operand")
(umin "register_operand")
+ (ashift "aarch64_sve_lshift_operand")
+ (ashiftrt "aarch64_sve_rshift_operand")
+ (lshiftrt "aarch64_sve_rshift_operand")
(and "aarch64_sve_pred_and_operand")
(ior "register_operand")
(xor "register_operand")])
===================================================================
@@ -1772,7 +1772,10 @@ (define_insn "*one_cmpl<mode>3"
;; Includes:
;; - ADD (merging form only)
;; - AND (merging form only)
+;; - ASR (merging form only)
;; - EOR (merging form only)
+;; - LSL (merging form only)
+;; - LSR (merging form only)
;; - MUL
;; - ORR (merging form only)
;; - SMAX
@@ -2405,6 +2408,49 @@ (define_insn "*post_ra_v<optab><mode>3"
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)
+;; Predicated integer shift, merging with the first input.
+(define_insn "*cond_<optab><mode>_2_const"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer shift, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, w, w")
+ (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] General binary arithmetic corresponding to rtx codes
;; -------------------------------------------------------------------------
===================================================================
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int8_t) \
+ TEST_TYPE (T, uint8_t) \
+ TEST_TYPE (T, int16_t) \
+ TEST_TYPE (T, uint16_t) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_1.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : b[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : a[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int8_t) \
+ TEST_TYPE (T, uint8_t) \
+ TEST_TYPE (T, int16_t) \
+ TEST_TYPE (T, uint16_t) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_2.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : a[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : 72; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int8_t) \
+ TEST_TYPE (T, uint8_t) \
+ TEST_TYPE (T, int16_t) \
+ TEST_TYPE (T, uint16_t) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-times {\tsel\t} 16 } } */
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_3.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP 3 : 0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int8_t) \
+ TEST_TYPE (T, uint8_t) \
+ TEST_TYPE (T, int16_t) \
+ TEST_TYPE (T, uint16_t) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_4.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : b[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_5.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : b[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : c[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlslr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_6.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : c[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : a[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_7.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : a[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */
===================================================================
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_8.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
===================================================================
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE, NAME, OP) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \
+ TYPE *__restrict b, TYPE *__restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, shl, <<) \
+ T (TYPE, shr, >>)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, int32_t) \
+ TEST_TYPE (T, uint32_t) \
+ TEST_TYPE (T, int64_t) \
+ TEST_TYPE (T, uint64_t)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
===================================================================
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_shift_9.c"
+
+#define N 99
+
+#define TEST_LOOP(TYPE, NAME, OP) \
+ { \
+ TYPE r[N], a[N], b[N], c[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = (i & 1 ? i : 3 * i); \
+ b[i] = (i >> 4) << (i & 15); \
+ c[i] = ~i & 7; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (r, a, b, c, N); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}