@@ -3721,7 +3721,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
{
op_true = force_reg (mode, op_true);
- gen = gen_mmx_pblendvb;
+ gen = gen_mmx_pblendvb64;
if (mode != V8QImode)
d = gen_reg_rtx (V8QImode);
op_false = gen_lowpart (V8QImode, op_false);
@@ -3729,6 +3729,20 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
cmp = gen_lowpart (V8QImode, cmp);
}
break;
+ case E_V4QImode:
+ case E_V2HImode:
+ if (TARGET_SSE4_1)
+ {
+ op_true = force_reg (mode, op_true);
+
+ gen = gen_mmx_pblendvb32;
+ if (mode != V4QImode)
+ d = gen_reg_rtx (V4QImode);
+ op_false = gen_lowpart (V4QImode, op_false);
+ op_true = gen_lowpart (V4QImode, op_true);
+ cmp = gen_lowpart (V4QImode, cmp);
+ }
+ break;
case E_V16QImode:
case E_V8HImode:
case E_V4SImode:
@@ -4241,6 +4255,12 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
else if (code == GT && TARGET_SSE4_1)
gen = gen_sminv8qi3;
break;
+ case E_V4QImode:
+ if (code == GTU && TARGET_SSE2)
+ gen = gen_uminv4qi3;
+ else if (code == GT && TARGET_SSE4_1)
+ gen = gen_sminv4qi3;
+ break;
case E_V8HImode:
if (code == GTU && TARGET_SSE4_1)
gen = gen_uminv8hi3;
@@ -4253,6 +4273,12 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
else if (code == GT && TARGET_SSE2)
gen = gen_sminv4hi3;
break;
+ case E_V2HImode:
+ if (code == GTU && TARGET_SSE4_1)
+ gen = gen_uminv2hi3;
+ else if (code == GT && TARGET_SSE2)
+ gen = gen_sminv2hi3;
+ break;
case E_V4SImode:
if (TARGET_SSE4_1)
gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3;
@@ -4327,8 +4353,10 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
case E_V16HImode:
case E_V16QImode:
case E_V8QImode:
+ case E_V4QImode:
case E_V8HImode:
case E_V4HImode:
+ case E_V2HImode:
/* Perform a parallel unsigned saturating subtraction. */
x = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET
@@ -1403,6 +1403,20 @@ (define_insn "*mmx_<insn><mode>3"
(set_attr "type" "mmxadd,sseadd,sseadd")
(set_attr "mode" "DI,TI,TI")])
+(define_insn "*<insn><mode>3"
+ [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
+ (sat_plusminus:VI_32
+ (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
+ (match_operand:VI_32 2 "register_operand" "x,Yw")))]
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
@@ -2032,6 +2046,20 @@ (define_insn "*mmx_eq<mode>3"
(set_attr "type" "mmxcmp,ssecmp,ssecmp")
(set_attr "mode" "DI,TI,TI")])
+(define_insn "*eq<mode>3"
+ [(set (match_operand:VI_32 0 "register_operand" "=x,x")
+ (eq:VI_32
+ (match_operand:VI_32 1 "register_operand" "%0,x")
+ (match_operand:VI_32 2 "register_operand" "x,x")))]
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "@
+ pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
(define_insn "mmx_gt<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
(gt:MMXMODEI
@@ -2047,6 +2075,19 @@ (define_insn "mmx_gt<mode>3"
(set_attr "type" "mmxcmp,ssecmp,ssecmp")
(set_attr "mode" "DI,TI,TI")])
+(define_insn "*gt<mode>3"
+ [(set (match_operand:VI_32 0 "register_operand" "=x,x")
+ (gt:VI_32
+ (match_operand:VI_32 1 "register_operand" "0,x")
+ (match_operand:VI_32 2 "register_operand" "x,x")))]
+ "TARGET_SSE2"
+ "@
+ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+ vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
(define_expand "vec_cmp<mode><mode>"
[(set (match_operand:MMXMODEI 0 "register_operand")
(match_operator:MMXMODEI 1 ""
@@ -2059,6 +2100,18 @@ (define_expand "vec_cmp<mode><mode>"
DONE;
})
+(define_expand "vec_cmp<mode><mode>"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (match_operator:VI_32 1 ""
+ [(match_operand:VI_32 2 "register_operand")
+ (match_operand:VI_32 3 "register_operand")]))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vec_cmpu<mode><mode>"
[(set (match_operand:MMXMODEI 0 "register_operand")
(match_operator:MMXMODEI 1 ""
@@ -2071,6 +2124,18 @@ (define_expand "vec_cmpu<mode><mode>"
DONE;
})
+(define_expand "vec_cmpu<mode><mode>"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (match_operator:VI_32 1 ""
+ [(match_operand:VI_32 2 "register_operand")
+ (match_operand:VI_32 3 "register_operand")]))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
[(set (match_operand:MMXMODE124 0 "register_operand")
(if_then_else:MMXMODE124
@@ -2088,6 +2153,21 @@ (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
DONE;
})
+(define_expand "vcond<mode><mode>"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (if_then_else:VI_32
+ (match_operator 3 ""
+ [(match_operand:VI_32 4 "register_operand")
+ (match_operand:VI_32 5 "register_operand")])
+ (match_operand:VI_32 1)
+ (match_operand:VI_32 2)))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
[(set (match_operand:MMXMODE124 0 "register_operand")
(if_then_else:MMXMODE124
@@ -2105,6 +2185,21 @@ (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
DONE;
})
+(define_expand "vcondu<mode><mode>"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (if_then_else:VI_32
+ (match_operator 3 ""
+ [(match_operand:VI_32 4 "register_operand")
+ (match_operand:VI_32 5 "register_operand")])
+ (match_operand:VI_32 1)
+ (match_operand:VI_32 2)))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond_mask_<mode><mmxintvecmodelower>"
[(set (match_operand:MMXMODE124 0 "register_operand")
(vec_merge:MMXMODE124
@@ -2118,7 +2213,20 @@ (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
DONE;
})
-(define_insn "mmx_pblendvb"
+(define_expand "vcond_mask_<mode><mode>"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (vec_merge:VI_32
+ (match_operand:VI_32 1 "register_operand")
+ (match_operand:VI_32 2 "register_operand")
+ (match_operand:VI_32 3 "register_operand")))]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_movcc (operands[0], operands[3],
+ operands[1], operands[2]);
+ DONE;
+})
+
+(define_insn "mmx_pblendvb64"
[(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
(unspec:V8QI
[(match_operand:V8QI 1 "register_operand" "0,0,x")
@@ -2138,6 +2246,26 @@ (define_insn "mmx_pblendvb"
(set_attr "btver2_decode" "vector")
(set_attr "mode" "TI")])
+(define_insn "mmx_pblendvb32"
+ [(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,x")
+ (unspec:V4QI
+ [(match_operand:V4QI 1 "register_operand" "0,0,x")
+ (match_operand:V4QI 2 "register_operand" "Yr,*x,x")
+ (match_operand:V4QI 3 "register_operand" "Yz,Yz,x")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "@
+ pblendvb\t{%3, %2, %0|%0, %2, %3}
+ pblendvb\t{%3, %2, %0|%0, %2, %3}
+ vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "*,*,1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "TI")])
+
;; XOP parallel XMM conditional moves
(define_insn "*xop_pcmov_<mode>"
[(set (match_operand:MMXMODE124 0 "register_operand" "=x")
@@ -2149,6 +2277,16 @@ (define_insn "*xop_pcmov_<mode>"
"vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")])
+(define_insn "*xop_pcmov_<mode>"
+ [(set (match_operand:VI_32 0 "register_operand" "=x")
+ (if_then_else:VI_32
+ (match_operand:VI_32 3 "register_operand" "x")
+ (match_operand:VI_32 1 "register_operand" "x")
+ (match_operand:VI_32 2 "register_operand" "x")))]
+ "TARGET_XOP"
+ "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral logical operations
new file mode 100644
@@ -0,0 +1,17 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
+typedef char __attribute__((__vector_size__ (4))) __v4qi;
+
+__v4qu au, bu;
+__v4qi as, bs;
+
+__v4qu uu (__v4qu a, __v4qu b) { return (a > b) ? au : bu; }
+__v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
+__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
+__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
+typedef short __attribute__((__vector_size__ (4))) __v2hi;
+
+__v2hu au, bu;
+__v2hi as, bs;
+
+__v2hu uu (__v2hu a, __v2hu b) { return (a > b) ? au : bu; }
+__v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
+__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
+__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
new file mode 100644
@@ -0,0 +1,21 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char vec __attribute__((vector_size(4)));
+
+vec lt (vec a, vec b) { return a < b; }
+vec le (vec a, vec b) { return a <= b; }
+vec eq (vec a, vec b) { return a == b; }
+vec ne (vec a, vec b) { return a != b; }
+vec ge (vec a, vec b) { return a >= b; }
+vec gt (vec a, vec b) { return a > b; }
+
+typedef unsigned char uvec __attribute__((vector_size(4)));
+
+vec ltu (uvec a, uvec b) { return a < b; }
+vec leu (uvec a, uvec b) { return a <= b; }
+vec geu (uvec a, uvec b) { return a >= b; }
+vec gtu (uvec a, uvec b) { return a > b; }
+
+/* { dg-final { scan-assembler-not "cmpb" } } */
new file mode 100644
@@ -0,0 +1,21 @@
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef short vec __attribute__((vector_size(4)));
+
+vec lt (vec a, vec b) { return a < b; }
+vec le (vec a, vec b) { return a <= b; }
+vec eq (vec a, vec b) { return a == b; }
+vec ne (vec a, vec b) { return a != b; }
+vec ge (vec a, vec b) { return a >= b; }
+vec gt (vec a, vec b) { return a > b; }
+
+typedef unsigned short uvec __attribute__((vector_size(4)));
+
+vec ltu (uvec a, uvec b) { return a < b; }
+vec leu (uvec a, uvec b) { return a <= b; }
+vec geu (uvec a, uvec b) { return a >= b; }
+vec gtu (uvec a, uvec b) { return a > b; }
+
+/* { dg-final { scan-assembler-not "cmpw" } } */