@@ -639,7 +639,7 @@ (define_attr "use_carry" "0,1" (const_st
(define_attr "movu" "0,1" (const_string "0"))
;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,bmi2"
+(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2"
(const_string "base"))
(define_attr "enabled" ""
@@ -652,6 +652,8 @@ (define_attr "enabled" ""
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
(eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
+ (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
+ (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
]
(const_int 1)))
@@ -3808,6 +3808,18 @@ (define_insn "avx2_vec_dup<mode>"
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+(define_insn "avx2_vec_dupv8sf_1"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_duplicate:V8SF
+ (vec_select:SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vbroadcastss\t{%x1, %0|%0, %x1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "vec_dupv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SF
@@ -11876,6 +11888,19 @@ (define_insn "avx2_pbroadcast<mode>"
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx2_pbroadcast<mode>_1"
+ [(set (match_operand:VI_256 0 "register_operand" "=x")
+ (vec_duplicate:VI_256
+ (vec_select:<ssescalarmode>
+ (match_operand:VI_256 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx2_permvarv8si"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(unspec:V8SI
@@ -11967,16 +11992,18 @@ (define_mode_iterator AVX_VEC_DUP_MODE
[V8SI V8SF V4DI V4DF])
(define_insn "vec_dup<mode>"
- [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
+ [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
(vec_duplicate:AVX_VEC_DUP_MODE
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
"TARGET_AVX"
"@
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
+ vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
#"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
+ (set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
(define_insn "avx2_vbroadcasti128_<mode>"
@@ -11995,7 +12022,7 @@ (define_split
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
(vec_duplicate:AVX_VEC_DUP_MODE
(match_operand:<ssescalarmode> 1 "register_operand")))]
- "TARGET_AVX && reload_completed"
+ "TARGET_AVX && !TARGET_AVX2 && reload_completed"
[(set (match_dup 2)
(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
(set (match_dup 0)
@@ -12057,7 +12084,7 @@ (define_insn_and_split "*avx_vperm_broad
[(match_operand 3 "const_int_operand" "C,n,n")])))]
"TARGET_AVX"
"#"
- "&& reload_completed"
+ "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
[(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
{
rtx op0 = operands[0], op1 = operands[1];
@@ -12067,6 +12094,13 @@ (define_insn_and_split "*avx_vperm_broad
{
int mask;
+ if (TARGET_AVX2 && elt == 0)
+ {
+ emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
+ op1)));
+ DONE;
+ }
+
/* Shuffle element we care about into all elements of the 128-bit lane.
The other lane gets shuffled too, but we don't care. */
if (<MODE>mode == V4DFmode)
@@ -35834,7 +35834,7 @@ valid_perm_using_mode_p (enum machine_mo
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
- in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
+ in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
static bool
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
@@ -35908,6 +35908,9 @@ expand_vec_perm_pshufb (struct expand_ve
if (valid_perm_using_mode_p (V8SImode, d))
vmode = V8SImode;
}
+ /* Or if vpermps can be used. */
+ else if (d->vmode == V8SFmode)
+ vmode = V8SImode;
if (vmode == V32QImode)
{
@@ -35950,6 +35953,12 @@ expand_vec_perm_pshufb (struct expand_ve
gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
vperm = force_reg (vmode, vperm);
+ if (vmode == V8SImode && d->vmode == V8SFmode)
+ {
+ vmode = V8SFmode;
+ vperm = gen_lowpart (vmode, vperm);
+ }
+
target = gen_lowpart (vmode, d->target);
op0 = gen_lowpart (vmode, d->op0);
if (d->op0 == d->op1)
@@ -35958,6 +35967,8 @@ expand_vec_perm_pshufb (struct expand_ve
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ else if (vmode == V8SFmode)
+ emit_insn (gen_avx2_permvarv8sf (target, vperm, op0));
else
emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
}
@@ -36006,20 +36017,17 @@ expand_vec_perm_1 (struct expand_vec_per
else if (broadcast_perm && TARGET_AVX2)
{
/* Use vpbroadcast{b,w,d}. */
- rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
+ rtx (*gen) (rtx, rtx) = NULL;
switch (d->vmode)
{
case V32QImode:
- op = gen_lowpart (V16QImode, op);
- gen = gen_avx2_pbroadcastv32qi;
+ gen = gen_avx2_pbroadcastv32qi_1;
break;
case V16HImode:
- op = gen_lowpart (V8HImode, op);
- gen = gen_avx2_pbroadcastv16hi;
+ gen = gen_avx2_pbroadcastv16hi_1;
break;
case V8SImode:
- op = gen_lowpart (V4SImode, op);
- gen = gen_avx2_pbroadcastv8si;
+ gen = gen_avx2_pbroadcastv8si_1;
break;
case V16QImode:
gen = gen_avx2_pbroadcastv16qi;
@@ -36027,13 +36035,16 @@ expand_vec_perm_1 (struct expand_vec_per
case V8HImode:
gen = gen_avx2_pbroadcastv8hi;
break;
+ case V8SFmode:
+ gen = gen_avx2_vec_dupv8sf_1;
+ break;
/* For other modes prefer other shuffles this function creates. */
default: break;
}
if (gen != NULL)
{
if (!d->testing_p)
- emit_insn (gen (d->target, op));
+ emit_insn (gen (d->target, d->op0));
return true;
}
}
@@ -36101,7 +36112,7 @@ expand_vec_perm_1 (struct expand_vec_per
return true;
/* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
- vpshufb, vpermd or vpermq variable permutation. */
+ vpshufb, vpermd, vpermps or vpermq variable permutation. */
if (expand_vec_perm_pshufb (d))
return true;