@@ -23294,6 +23294,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
else
qop2 = op2;
+ qdest = gen_reg_rtx (V16QImode);
+
+ if (CONST_INT_P (op2)
+ && (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT)
+ && ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2))
+ {
+ emit_move_insn (dest, gen_lowpart (qimode, qdest));
+ return;
+ }
+
switch (code)
{
case MULT:
@@ -23358,8 +23368,6 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
bool ok;
int i;
- qdest = gen_reg_rtx (V16QImode);
-
/* Merge the data back into the right place. */
d.target = qdest;
d.op0 = qres;
@@ -20580,6 +20580,37 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
switch (mode)
{
+ case V4QImode:
+ case V8QImode:
+ if (TARGET_AVX2)
+ /* Use vpbroadcast. */
+ extra = cost->sse_op;
+ else
+ extra = cost->sse_load[2];
+
+ if (constant_op1)
+ {
+ if (code == ASHIFTRT)
+ {
+ count = 4;
+ extra *= 2;
+ }
+ else
+ count = 2;
+ }
+ else if (TARGET_AVX512BW && TARGET_AVX512VL)
+ {
+ count = 3;
+ return ix86_vec_cost (mode, cost->sse_op * count);
+ }
+ else if (TARGET_SSE4_1)
+ count = 4;
+ else if (code == ASHIFTRT)
+ count = 5;
+ else
+ count = 4;
+ return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+
case V16QImode:
if (TARGET_XOP)
{
@@ -20600,7 +20631,12 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
}
/* FALLTHRU */
case V32QImode:
- extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+ if (TARGET_AVX2)
+ /* Use vpbroadcast. */
+ extra = cost->sse_op;
+ else
+ extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+
if (constant_op1)
{
if (code == ASHIFTRT)
@@ -2680,6 +2680,28 @@
(const_string "0")))
(set_attr "mode" "TI")])
+(define_expand "<insn>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand")
+ (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+ operands[1], operands[2]);
+ DONE;
+})
+
+(define_expand "<insn>v4qi3"
+ [(set (match_operand:V4QI 0 "register_operand")
+ (any_shift:V4QI (match_operand:V4QI 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_SSE2"
+{
+ ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+ operands[1], operands[2]);
+ DONE;
+})
+
(define_insn_and_split "<insn>v2qi3"
[(set (match_operand:V2QI 0 "register_operand" "=Q")
(any_shift:V2QI
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 4
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */
new file mode 100644
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 8
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */