@@ -1303,3 +1303,8 @@ (define_predicate "avx2_pblendw_operand"
HOST_WIDE_INT low = val & 0xff;
return val == ((low << 8) | low);
})
+
+;; Return true if OP is nonimmediate_operand or CONST_VECTOR.
+(define_predicate "general_vector_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_code "const_vector")))
@@ -40827,6 +40827,24 @@ ix86_expand_vecop_qihi (enum rtx_code co
gen_rtx_fmt_ee (code, qimode, op1, op2));
}
+/* Helper function of ix86_expand_mul_widen_evenodd. Return true
+ if op is CONST_VECTOR with all odd elements equal to their
+ preceeding element. */
+
+static bool
+const_vector_equal_evenodd_p (rtx op)
+{
+ enum machine_mode mode = GET_MODE (op);
+ int i, nunits = GET_MODE_NUNITS (mode);
+ if (GET_CODE (op) != CONST_VECTOR
+ || nunits != CONST_VECTOR_NUNITS (op))
+ return false;
+ for (i = 0; i < nunits; i += 2)
+ if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
+ return false;
+ return true;
+}
+
void
ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
bool uns_p, bool odd_p)
@@ -40834,6 +40852,12 @@ ix86_expand_mul_widen_evenodd (rtx dest,
enum machine_mode mode = GET_MODE (op1);
enum machine_mode wmode = GET_MODE (dest);
rtx x;
+ rtx orig_op1 = op1, orig_op2 = op2;
+
+ if (!nonimmediate_operand (op1, mode))
+ op1 = force_reg (mode, op1);
+ if (!nonimmediate_operand (op2, mode))
+ op2 = force_reg (mode, op2);
/* We only play even/odd games with vectors of SImode. */
gcc_assert (mode == V4SImode || mode == V8SImode);
@@ -40852,10 +40876,12 @@ ix86_expand_mul_widen_evenodd (rtx dest,
}
x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
- op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
- x, NULL, 1, OPTAB_DIRECT);
- op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
- x, NULL, 1, OPTAB_DIRECT);
+ if (!const_vector_equal_evenodd_p (orig_op1))
+ op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
+ x, NULL, 1, OPTAB_DIRECT);
+ if (!const_vector_equal_evenodd_p (orig_op2))
+ op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
+ x, NULL, 1, OPTAB_DIRECT);
op1 = gen_lowpart (mode, op1);
op2 = gen_lowpart (mode, op2);
}
@@ -5631,14 +5631,16 @@ (define_insn "*sse2_pmaddwd"
(define_expand "mul<mode>3"
[(set (match_operand:VI4_AVX2 0 "register_operand")
(mult:VI4_AVX2
- (match_operand:VI4_AVX2 1 "nonimmediate_operand")
- (match_operand:VI4_AVX2 2 "nonimmediate_operand")))]
+ (match_operand:VI4_AVX2 1 "general_vector_operand")
+ (match_operand:VI4_AVX2 2 "general_vector_operand")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1)
{
- if (CONSTANT_P (operands[2]))
- operands[2] = force_const_mem (<MODE>mode, operands[2]);
+ if (!nonimmediate_operand (operands[1], <MODE>mode))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ if (!nonimmediate_operand (operands[2], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
}
else
@@ -5702,8 +5704,8 @@ (define_expand "vec_widen_<s>mult_lo_<mo
;; named patterns, but signed V4SI needs special help for plain SSE2.
(define_expand "vec_widen_smult_even_v4si"
[(match_operand:V2DI 0 "register_operand")
- (match_operand:V4SI 1 "register_operand")
- (match_operand:V4SI 2 "register_operand")]
+ (match_operand:V4SI 1 "nonimmediate_operand")
+ (match_operand:V4SI 2 "nonimmediate_operand")]
"TARGET_SSE2"
{
ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
@@ -5714,8 +5716,8 @@ (define_expand "vec_widen_smult_even_v4s
(define_expand "vec_widen_<s>mult_odd_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(any_extend:<sseunpackmode>
- (match_operand:VI4_AVX2 1 "register_operand"))
- (match_operand:VI4_AVX2 2 "register_operand")]
+ (match_operand:VI4_AVX2 1 "general_vector_operand"))
+ (match_operand:VI4_AVX2 2 "general_vector_operand")]
"TARGET_SSE2"
{
ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],