@@ -163,6 +163,12 @@ (define_mode_attr avx_avx2
(V4SI "avx2") (V2DI "avx2")
(V8SI "avx2") (V4DI "avx2")])
+(define_mode_attr vec_avx2
+ [(V16QI "vec") (V32QI "avx2")
+ (V8HI "vec") (V16HI "avx2")
+ (V4SI "vec") (V8SI "avx2")
+ (V2DI "vec") (V4DI "avx2")])
+
;; Mapping of logic-shift operators
(define_code_iterator lshift [lshiftrt ashift])
@@ -4838,10 +4844,10 @@ (define_insn "*<sse2_avx2>_<plusminus_in
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn_and_split "mulv16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "")
- (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")))]
+(define_insn_and_split "mul<mode>3"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "")
+ (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
+ (match_operand:VI1_AVX2 2 "register_operand" "")))]
"TARGET_SSE2
&& can_create_pseudo_p ()"
"#"
@@ -4850,34 +4856,41 @@ (define_insn_and_split "mulv16qi3"
{
rtx t[6];
int i;
+ enum machine_mode mulmode = <sseunpackmode>mode;
for (i = 0; i < 6; ++i)
- t[i] = gen_reg_rtx (V16QImode);
+ t[i] = gen_reg_rtx (<MODE>mode);
/* Unpack data such that we've got a source byte in each low byte of
each word. We don't care what goes into the high byte of each word.
Rather than trying to get zero in there, most convenient is to let
it be a copy of the low byte. */
- emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
- emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
- emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
- emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
+ emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
+ operands[1]));
+ emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
+ operands[2]));
+ emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
+ operands[1]));
+ emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
+ operands[2]));
/* Multiply words. The end-of-line annotations here give a picture of what
the output of that instruction looks like. Dot means don't care; the
letters are the bytes of the result with A being the most significant. */
- emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
- gen_lowpart (V8HImode, t[0]),
- gen_lowpart (V8HImode, t[1])));
- emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
- gen_lowpart (V8HImode, t[2]),
- gen_lowpart (V8HImode, t[3])));
+ emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
+ gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
+ gen_lowpart (mulmode, t[0]),
+ gen_lowpart (mulmode, t[1]))));
+ emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
+ gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
+ gen_lowpart (mulmode, t[2]),
+ gen_lowpart (mulmode, t[3]))));
/* Extract the even bytes and merge them back together. */
ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
set_unique_reg_note (get_last_insn (), REG_EQUAL,
- gen_rtx_MULT (V16QImode, operands[1], operands[2]));
+ gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
DONE;
})