@@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
static bool
arc_vector_mode_supported_p (machine_mode mode)
{
- if (!TARGET_SIMD_SET)
- return false;
+ switch (mode)
+ {
+ case V2HImode:
+ return TARGET_PLUS_DMPY;
+ case V4HImode:
+ case V2SImode:
+ return TARGET_PLUS_QMACW;
+ case V4SImode:
+ case V8HImode:
+ return TARGET_SIMD_SET;
- if ((mode == V4SImode)
- || (mode == V8HImode))
- return true;
+ default:
+ return false;
+ }
+}
- return false;
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
+
+static enum machine_mode
+arc_preferred_simd_mode (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case HImode:
+ return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
+ case SImode:
+ return V2SImode;
+
+ default:
+ return word_mode;
+ }
}
+/* Implements target hook
+ TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
+
+static unsigned int
+arc_autovectorize_vector_sizes (void)
+{
+ return TARGET_PLUS_QMACW ? (8 | 4) : 0;
+}
/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
@@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
+
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
@@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
arc_mode_class[i] = 0;
break;
case MODE_VECTOR_INT:
- arc_mode_class [i] = (1<< (int) V_MODE);
+ if (GET_MODE_SIZE (m) == 4)
+ arc_mode_class[i] = (1 << (int) S_MODE);
+ else if (GET_MODE_SIZE (m) == 8)
+ arc_mode_class[i] = (1 << (int) D_MODE);
+ else
+ arc_mode_class[i] = (1 << (int) V_MODE);
break;
case MODE_CC:
default:
@@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
static void
arc_init_builtins (void)
{
+ tree V4HI_type_node;
+ tree V2SI_type_node;
+ tree V2HI_type_node;
+
+ /* Vector types based on HS SIMD elements. */
+ V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+ V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+
tree pcvoid_type_node
= build_pointer_type (build_qualified_type (void_type_node,
TYPE_QUAL_CONST));
@@ -5341,6 +5392,28 @@ arc_init_builtins (void)
tree v8hi_ftype_v8hi
= build_function_type_list (V8HI_type_node, V8HI_type_node,
NULL_TREE);
+ /* ARCv2 SIMD types. */
+ tree long_ftype_v4hi_v4hi
+ = build_function_type_list (long_long_integer_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree int_ftype_v2hi_v2hi
+ = build_function_type_list (integer_type_node,
+ V2HI_type_node, V2HI_type_node, NULL_TREE);
+ tree v2si_ftype_v2hi_v2hi
+ = build_function_type_list (V2SI_type_node,
+ V2HI_type_node, V2HI_type_node, NULL_TREE);
+ tree v2hi_ftype_v2hi_v2hi
+ = build_function_type_list (V2HI_type_node,
+ V2HI_type_node, V2HI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v2si
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_v4hi
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree long_ftype_v2si_v2hi
+ = build_function_type_list (long_long_integer_type_node,
+ V2SI_type_node, V2HI_type_node, NULL_TREE);
/* Add the builtins. */
#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
@@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
return;
}
+ if (TARGET_PLUS_QMACW
+ && GET_CODE (operands[1]) == CONST_VECTOR)
+ {
+ HOST_WIDE_INT intval0, intval1;
+ if (GET_MODE (operands[1]) == V2SImode)
+ {
+ intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
+ intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
+ }
+ else
+ {
+ intval1 = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
+ intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
+ intval0 = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+ intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+ }
+ xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+ xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+ xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
+ xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
+ emit_move_insn (xop[0], xop[2]);
+ emit_move_insn (xop[3], xop[1]);
+ return;
+ }
+
for (i = 0; i < 2; i++)
{
if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
@@ -1723,6 +1723,12 @@ enum
/* Any multiplication feature macro. */
#define TARGET_ANY_MPY \
(TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
+/* PLUS_DMPY feature macro. */
+#define TARGET_PLUS_DMPY ((arc_mpy_option > 6) && TARGET_HS)
+/* PLUS_MACD feature macro. */
+#define TARGET_PLUS_MACD ((arc_mpy_option > 7) && TARGET_HS)
+/* PLUS_QMACW feature macro. */
+#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
/* ARC600 and ARC601 feature macro. */
#define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
@@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET)
/* END SIMD marker. */
DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
+
+/* ARCv2 SIMD instructions that use/clobber the accumulator reg. */
+DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, TARGET_PLUS_DMPY)
+
+DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, TARGET_PLUS_QMACW)
+DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, TARGET_PLUS_MACD)
+DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, TARGET_PLUS_MACD)
+
+/* Combined add/sub HS SIMD instructions. */
+DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, TARGET_PLUS_QMACW)
@@ -1288,3 +1288,574 @@
[(set_attr "type" "simd_vcontrol")
(set_attr "length" "4")
(set_attr "cond" "nocond")])
+
+;; New ARCv2 SIMD extensions
+
+;;64-bit vectors of halwords and words
+(define_mode_iterator VWH [V4HI V2SI])
+
+;;double element vectors
+(define_mode_iterator VDV [V2HI V2SI])
+(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
+(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
+
+;;all vectors
+(define_mode_iterator VCT [V2HI V4HI V2SI])
+(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
+
+;; Widening operations.
+(define_code_iterator SE [sign_extend zero_extend])
+(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
+(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
+
+
+;; Move patterns
+(define_expand "movv2hi"
+ [(set (match_operand:V2HI 0 "move_dest_operand" "")
+ (match_operand:V2HI 1 "general_operand" ""))]
+ ""
+ "{
+ if (prepare_move_operands (operands, V2HImode))
+ DONE;
+ }")
+
+(define_insn_and_split "*movv2hi_insn"
+ [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:V2HI 1 "general_operand" "i,r,m,r"))]
+ "(register_operand (operands[0], V2HImode)
+ || register_operand (operands[1], V2HImode))"
+ "@
+ #
+ mov%? %0, %1
+ ld%U1%V1 %0,%1
+ st%U0%V0 %1,%0"
+ "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+ [(set (match_dup 0) (match_dup 2))]
+ {
+ HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+ intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+ operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+ operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
+ }
+ [(set_attr "type" "move,move,load,store")
+ (set_attr "predicable" "yes,yes,no,no")
+ (set_attr "iscompact" "false,false,false,false")
+ ])
+
+(define_expand "movmisalignv2hi"
+ [(set (match_operand:V2HI 0 "general_operand" "")
+ (match_operand:V2HI 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], V2HImode)
+ && !register_operand (operands[1], V2HImode))
+ operands[1] = force_reg (V2HImode, operands[1]);
+})
+
+(define_expand "mov<mode>"
+ [(set (match_operand:VWH 0 "move_dest_operand" "")
+ (match_operand:VWH 1 "general_operand" ""))]
+ ""
+ "{
+ if (GET_CODE (operands[0]) == MEM)
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ }")
+
+(define_insn_and_split "*mov<mode>_insn"
+ [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
+ (match_operand:VWH 1 "general_operand" "i,r,m,r"))]
+ "TARGET_PLUS_QMACW
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+ "*
+{
+ switch (which_alternative)
+ {
+ default:
+ return \"#\";
+
+ case 1:
+ return \"vadd2 %0, %1, 0\";
+
+ case 2:
+ if (TARGET_LL64)
+ return \"ldd%U1%V1 %0,%1\";
+ return \"#\";
+
+ case 3:
+ if (TARGET_LL64)
+ return \"std%U0%V0 %1,%0\";
+ return \"#\";
+ }
+}"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ arc_split_move (operands);
+ DONE;
+ }
+ [(set_attr "type" "move,move,load,store")
+ (set_attr "predicable" "yes,no,no,no")
+ (set_attr "iscompact" "false,false,false,false")
+ ])
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VWH 0 "general_operand" "")
+ (match_operand:VWH 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], <MODE>mode)
+ && !register_operand (operands[1], <MODE>mode))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "bswapv2hi2"
+ [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+ (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
+ "TARGET_V2 && TARGET_SWAP"
+ "swape %0, %1"
+ [(set_attr "length" "4,8")
+ (set_attr "type" "two_cycle_core")])
+
+;; Simple arithmetic insns
+(define_insn "add<mode>3"
+ [(set (match_operand:VCT 0 "register_operand" "=r,r")
+ (plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+ (match_operand:VCT 2 "register_operand" "r,r")))]
+ "TARGET_PLUS_DMPY"
+ "vadd<V_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:VCT 0 "register_operand" "=r,r")
+ (minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+ (match_operand:VCT 2 "register_operand" "r,r")))]
+ "TARGET_PLUS_DMPY"
+ "vsub<V_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+;; Combined arithmetic ops
+(define_insn "addsub<mode>3"
+ [(set (match_operand:VDV 0 "register_operand" "=r,r")
+ (vec_concat:VDV
+ (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+ (parallel [(const_int 0)]))
+ (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+ (parallel [(const_int 0)])))
+ (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+ (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_PLUS_DMPY"
+ "vaddsub<V_addsub_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subadd<mode>3"
+ [(set (match_operand:VDV 0 "register_operand" "=r,r")
+ (vec_concat:VDV
+ (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+ (parallel [(const_int 0)]))
+ (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+ (parallel [(const_int 0)])))
+ (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+ (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_PLUS_DMPY"
+ "vsubadd<V_addsub_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "addsubv4hi3"
+ [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (parallel [(const_int 0)])))
+ (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+ (vec_concat:V2HI
+ (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+ (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ ))]
+ "TARGET_PLUS_QMACW"
+ "vaddsub4h%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subaddv4hi3"
+ [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (parallel [(const_int 0)])))
+ (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+ (vec_concat:V2HI
+ (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+ (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ ))]
+ "TARGET_PLUS_QMACW"
+ "vsubadd4h%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+;; Multiplication
+(define_insn "dmpyh<V_US_suffix>"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (plus:SI
+ (mult:SI
+ (SE:SI
+ (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
+ (parallel [(const_int 0)])))
+ (SE:SI
+ (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
+ (parallel [(const_int 0)]))))
+ (mult:SI
+ (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
+ (set (reg:DI ARCV2_ACC)
+ (zero_extend:DI
+ (plus:SI
+ (mult:SI
+ (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+ (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
+ (mult:SI
+ (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
+ "TARGET_PLUS_DMPY"
+ "dmpy<V_US_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+;; We can use dmac as well here. To be investigated which version
+;; brings more.
+(define_expand "sdot_prodv2hi"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:V2HI 1 "register_operand" "")
+ (match_operand:V2HI 2 "register_operand" "")
+ (match_operand:SI 3 "register_operand" "")]
+ "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyh (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv2hi"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:V2HI 1 "register_operand" "")
+ (match_operand:V2HI 2 "register_operand" "")
+ (match_operand:SI 3 "register_operand" "")]
+ "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_insn "arc_vec_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "0,r")
+ (parallel [(const_int 0) (const_int 1)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (parallel [(const_int 0) (const_int 1)])))))
+ (set (reg:V2SI ARCV2_ACC)
+ (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 0) (const_int 1)])))
+ (SE:V2SI (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)])))))
+ ]
+ "TARGET_PLUS_MACD"
+ "vmpy2h<V_US_suffix>%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
+ [(set (reg:V2SI ARCV2_ACC)
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 0 "even_register_operand" "r")
+ (parallel [(const_int 0) (const_int 1)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "r")
+ (parallel [(const_int 0) (const_int 1)])))))
+ ]
+ "TARGET_PLUS_MACD"
+ "vmpy2h<V_US_suffix>%? 0, %0, %1"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "no")
+ (set_attr "cond" "nocond")])
+
+(define_expand "vec_widen_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand" "")
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "")
+ (parallel [(const_int 0) (const_int 1)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 2 "even_register_operand" "")
+ (parallel [(const_int 0) (const_int 1)])))))]
+ "TARGET_PLUS_QMACW"
+ {
+ emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+ }
+)
+
+(define_insn "arc_vec_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "0,r")
+ (parallel [(const_int 2) (const_int 3)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (parallel [(const_int 2) (const_int 3)])))))
+ (set (reg:V2SI ARCV2_ACC)
+ (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)])))
+ (SE:V2SI (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)])))))
+ ]
+ "TARGET_PLUS_QMACW"
+ "vmpy2h<V_US_suffix>%? %0, %R1, %R2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_expand "vec_widen_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand" "")
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "")
+ (parallel [(const_int 2) (const_int 3)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 2 "even_register_operand" "")
+ (parallel [(const_int 2) (const_int 3)])))))]
+ "TARGET_PLUS_MACD"
+ {
+ emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+ }
+)
+
+(define_insn "arc_vec_<V_US>mac_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (plus:V2SI
+ (reg:V2SI ARCV2_ACC)
+ (mult:V2SI (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 1 "even_register_operand" "0,r")
+ (parallel [(const_int 2) (const_int 3)])))
+ (SE:V2SI (vec_select:V2HI
+ (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (parallel [(const_int 2) (const_int 3)]))))))
+ (set (reg:V2SI ARCV2_ACC)
+ (plus:V2SI
+ (reg:V2SI ARCV2_ACC)
+ (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 2) (const_int 3)])))
+ (SE:V2SI (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 2) (const_int 3)]))))))
+ ]
+ "TARGET_PLUS_MACD"
+ "vmac2h<V_US_suffix>%? %0, %R1, %R2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+;; Builtins
+(define_insn "dmach"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_DMACH))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_DMPY"
+ "dmach%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmachu"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_DMACHU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_DMPY"
+ "dmachu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwh"
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+ (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_DMACWH))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "dmacwh%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwhu"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_DMACWHU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "dmacwhu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2h"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_VMAC2H))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_MACD"
+ "vmac2h%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2hu"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_VMAC2HU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_MACD"
+ "vmac2hu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2h"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")]
+ UNSPEC_ARC_VMPY2H))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_MACD"
+ "vmpy2h%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2hu"
+ [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+ (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+ (match_operand:V2HI 2 "register_operand" "r,r")]
+ UNSPEC_ARC_VMPY2HU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_MACD"
+ "vmpy2hu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmach"
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+ (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+ (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_QMACH))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "qmach%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmachu"
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+ (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+ (match_operand:V4HI 2 "even_register_operand" "r,r")
+ (reg:DI ARCV2_ACC)]
+ UNSPEC_ARC_QMACHU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "qmachu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyh"
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+ (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+ (match_operand:V4HI 2 "even_register_operand" "r,r")]
+ UNSPEC_ARC_QMPYH))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "qmpyh%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyhu"
+ [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+ (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+ (match_operand:V4HI 2 "even_register_operand" "r,r")]
+ UNSPEC_ARC_QMPYHU))
+ (clobber (reg:DI ARCV2_ACC))]
+ "TARGET_PLUS_QMACW"
+ "qmpyhu%? %0, %1, %2"
+ [(set_attr "length" "4")
+ (set_attr "type" "multi")
+ (set_attr "predicable" "yes,no")
+ (set_attr "cond" "canuse,nocond")])
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */
+
+#define STEST(name, rettype, op1type, op2type) \
+ rettype test_ ## name \
+ (op1type a, op2type b) \
+ { \
+ return __builtin_arc_ ## name (a, b); \
+ }
+
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+
+STEST (qmach, long long, v4hi, v4hi)
+STEST (qmachu, long long, v4hi, v4hi)
+STEST (qmpyh, long long, v4hi, v4hi)
+STEST (qmpyhu, long long, v4hi, v4hi)
+
+STEST (dmach, int, v2hi, v2hi)
+STEST (dmachu, int, v2hi, v2hi)
+STEST (dmpyh, int, v2hi, v2hi)
+STEST (dmpyhu, int, v2hi, v2hi)
+
+STEST (dmacwh, long, v2si, v2hi)
+STEST (dmacwhu, long, v2si, v2hi)
+
+STEST (vmac2h, v2si, v2hi, v2hi)
+STEST (vmac2hu, v2si, v2hi, v2hi)
+STEST (vmpy2h, v2si, v2hi, v2hi)
+STEST (vmpy2hu, v2si, v2hi, v2hi)
+
+STEST (vaddsub2h, v2hi, v2hi, v2hi)
+STEST (vsubadd2h, v2hi, v2hi, v2hi)
+STEST (vaddsub, v2si, v2si, v2si)
+STEST (vsubadd, v2si, v2si, v2si)
+STEST (vaddsub4h, v4hi, v4hi, v4hi)
+STEST (vsubadd4h, v4hi, v4hi, v4hi)