@@ -13811,10 +13811,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
wsmode = GET_MODE_INNER (wvmode);
val = convert_modes (wsmode, smode, val, true);
- x = expand_simple_binop (wsmode, ASHIFT, val,
- GEN_INT (GET_MODE_BITSIZE (smode)),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+ if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
+ emit_insn (gen_insv_1 (wsmode, val, val));
+ else
+ {
+ x = expand_simple_binop (wsmode, ASHIFT, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1,
+ OPTAB_LIB_WIDEN);
+ }
x = gen_reg_rtx (wvmode);
ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
@@ -14788,6 +14795,9 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V8DFmode:
cmp_mode = V8DImode;
break;
+ case E_V2SFmode:
+ cmp_mode = V2SImode;
+ break;
case E_V4SFmode:
cmp_mode = V4SImode;
break;
@@ -14809,9 +14819,11 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
idxv = gen_reg_rtx (cmp_mode);
idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
- ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+ ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+ mode, valv, val);
gcc_assert (ok);
- ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+ ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+ cmp_mode, idxv, idx_tmp);
gcc_assert (ok);
vec[0] = target;
vec[1] = valv;
@@ -1279,11 +1279,14 @@ (define_insn "*mmx_concatv2sf"
(define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -2989,11 +2992,14 @@ (define_insn "*mmx_concatv2si"
(define_expand "vec_setv2si"
[(match_operand:V2SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -3145,11 +3151,14 @@ (define_expand "vec_initv2sisi"
(define_expand "vec_setv4hi"
[(match_operand:V4HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -3177,11 +3186,14 @@ (define_expand "vec_initv4hihi"
(define_expand "vec_setv8qi"
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -1026,6 +1026,12 @@ (define_predicate "vec_setm_operand"
(match_test "TARGET_AVX2"))
(match_code "const_int")))
+(define_predicate "vec_setm_mmx_operand"
+ (ior (and (match_operand 0 "register_operand")
+ (match_test "TARGET_SSE4_1")
+ (match_test "TARGET_MMX_WITH_SSE"))
+ (match_code "const_int")))
+
;; True for registers, or 1 or -1. Used to optimize double-word shifts.
(define_predicate "reg_or_pm1_operand"
(ior (match_operand 0 "register_operand")
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse4.1 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 4 } } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+
+#define FOO(VTYPE, TYPE) \
+ VTYPE \
+ __attribute__ ((noipa)) \
+ foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
+ { \
+ a[c] = b; \
+ return a; \
+ } \
+
+FOO (v8qi, char);
+
+FOO (v4hi, short);
+
+FOO (v2si, int);
+
+FOO (v2sf, float);
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+
+#ifndef CHECK
+#define CHECK "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK
+
+#include "sse4_1-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ CALC_TEST (v8qi, char, 8, 5);
+ CALC_TEST (v4hi, short, 4, 2);
+ CALC_TEST (v2si, int, 2, 1);
+}