diff mbox

Some further XMM16+ improvements

Message ID 20160503185725.GG26501@tucnak.zalov.cz
State New
Headers show

Commit Message

Jakub Jelinek May 3, 2016, 6:57 p.m. UTC
Hi!

This patch improves code generation e.g. on the first attached testcase
and allows accepting the second one.

I've noticed we don't allow TFmode or V1TImode in xmm16+ regs at all,
while they are allowed in xmm0-xmm15, so IMHO should be ok even with
AVX512VL.

Wonder if it wouldn't be better to add a new constraint that would act
like v constraint for TARGET_AVX512VL and like x constraint otherwise,
that might greatly simplify the i386.md changes in this patch.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk,
or with some changes?  Haven't figured out how to test the *andnot*
and *<logic>* patterns though.

2016-05-03  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.h (VALID_AVX512VL_128_REG_MODE): Allow
	TFmode and V1TImode in xmm16+ registers for TARGET_AVX512VL.
	* config/i386/i386.md (avx512fvecmode): New mode attr.
	(*pushtf): Use v constraint instead of x.
	(*movtf_internal): Likewise.  For TARGET_AVX512VL and
	xmm16+ registers, use vmovdqu64 or vmovdqa64 instructions.
	(*absneg<mode>2): Add avx512vl alternatives.
	(*absnegtf2_sse): Likewise.
	(copysign<mode>3_const, copysign<mode>3_var): Likewise.
	* config/i386/sse.md (*andnot<mode>3): Add avx512vl and
	avx512f alternatives.
	(*andnottf3, *<code><mode>3, *<code>tf3): Likewise.


	Jakub
void
f1 (float x)
{
  register float a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = __builtin_fabsf (a);
  asm volatile ("" : "+v" (a));
}

void
f2 (float x, float y)
{
  register float a __asm ("xmm16"), b __asm ("xmm17");
  a = x;
  b = y;
  asm volatile ("" : "+v" (a), "+v" (b));
  a = __builtin_copysignf (a, b);
  asm volatile ("" : "+v" (a));
}

void
f3 (float x)
{
  register float a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = -a;
  asm volatile ("" : "+v" (a));
}

void
f4 (double x)
{
  register double a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = __builtin_fabs (a);
  asm volatile ("" : "+v" (a));
}

void
f5 (double x, double y)
{
  register double a __asm ("xmm16"), b __asm ("xmm17");
  a = x;
  b = y;
  asm volatile ("" : "+v" (a), "+v" (b));
  a = __builtin_copysign (a, b);
  asm volatile ("" : "+v" (a));
}

void
f6 (double x)
{
  register double a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = -a;
  asm volatile ("" : "+v" (a));
}
void
f1 (__float128 x)
{
  register __float128 a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = __builtin_fabsq (a);
  asm volatile ("" : "+v" (a));
}

void
f2 (__float128 x, __float128 y)
{
  register __float128 a __asm ("xmm16"), b __asm ("xmm17");
  a = x;
  b = y;
  asm volatile ("" : "+v" (a), "+v" (b));
  a = __builtin_copysignq (a, b);
  asm volatile ("" : "+v" (a));
}

void
f3 (__float128 x)
{
  register __float128 a __asm ("xmm16");
  a = x;
  asm volatile ("" : "+v" (a));
  a = -a;
  asm volatile ("" : "+v" (a));
}

__int128_t
f4 (void)
{
  register __int128_t a __asm ("xmm16");
  register __int128_t __attribute__((vector_size (16))) b __asm ("xmm17");
  a = 1;
  asm volatile ("" : "+v" (a));
  b[0] = a;
  asm volatile ("" : "+v" (b));
  return b[0];
}

Comments

Kirill Yukhin May 5, 2016, 9:49 a.m. UTC | #1
Hi Jakub,
On 03 May 20:57, Jakub Jelinek wrote:
> Hi!
> 
> This patch improves code generation e.g. on the first attached testcase
> and allows accepting the second one.
> 
> I've noticed we don't allow TFmode or V1TImode in xmm16+ regs at all,
> while they are allowed in xmm0-xmm15, so IMHO should be ok even with
> AVX512VL.
> 
> Wonder if it wouldn't be better to add a new constraint that would act
> like v constraint for TARGET_AVX512VL and like x constraint otherwise,
> that might greatly simplify the i386.md changes in this patch.
Good idea, I thought about that myself. IMHO this might be a follow up.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk,
> or with some changes?  Haven't figured out how to test the *andnot*
> and *<logic>* patterns though.
Are you going to commit testcases?
Yeah, tests for FP *logic* look odd, so I am OK for not having them.

--
Thanks, K


> 2016-05-03  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/i386.h (VALID_AVX512VL_128_REG_MODE): Allow
> 	TFmode and V1TImode in xmm16+ registers for TARGET_AVX512VL.
> 	* config/i386/i386.md (avx512fvecmode): New mode attr.
> 	(*pushtf): Use v constraint instead of x.
> 	(*movtf_internal): Likewise.  For TARGET_AVX512VL and
> 	xmm16+ registers, use vmovdqu64 or vmovdqa64 instructions.
> 	(*absneg<mode>2): Add avx512vl alternatives.
> 	(*absnegtf2_sse): Likewise.
> 	(copysign<mode>3_const, copysign<mode>3_var): Likewise.
> 	* config/i386/sse.md (*andnot<mode>3): Add avx512vl and
> 	avx512f alternatives.
> 	(*andnottf3, *<code><mode>3, *<code>tf3): Likewise.
> 
> --- gcc/config/i386/i386.h.jj	2016-03-30 16:00:17.000000000 +0200
> +++ gcc/config/i386/i386.h	2016-05-03 15:55:46.656342870 +0200
> @@ -1126,7 +1126,8 @@ extern const char *host_detect_local_cpu
>  
>  #define VALID_AVX512VL_128_REG_MODE(MODE)				\
>    ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode	\
> -   || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
> +   || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
> +   || (MODE) == TFmode || (MODE) == V1TImode)
>  
>  #define VALID_SSE2_REG_MODE(MODE)					\
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
> --- gcc/config/i386/i386.md.jj	2016-05-03 14:16:14.000000000 +0200
> +++ gcc/config/i386/i386.md	2016-05-03 17:13:46.643545826 +0200
> @@ -1165,6 +1165,10 @@ (define_mode_attr ssevecmode
>  (define_mode_attr ssevecmodelower
>    [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
>  
> +;; AVX512F vector mode corresponding to a scalar mode
> +(define_mode_attr avx512fvecmode
> +  [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
> +
>  ;; Instruction suffix for REX 64bit operators.
>  (define_mode_attr rex64suffix [(SI "") (DI "{q}")])
>  
> @@ -2928,7 +2932,7 @@ (define_insn "*insvqi"
>  
>  (define_insn "*pushtf"
>    [(set (match_operand:TF 0 "push_operand" "=<,<")
> -	(match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
> +	(match_operand:TF 1 "general_no_elim_operand" "v,*roF"))]
>    "TARGET_64BIT || TARGET_SSE"
>  {
>    /* This insn should be already split before reg-stack.  */
> @@ -3107,8 +3111,8 @@ (define_expand "mov<mode>"
>    "ix86_expand_move (<MODE>mode, operands); DONE;")
>  
>  (define_insn "*movtf_internal"
> -  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o")
> -	(match_operand:TF 1 "general_operand"	   "C ,xm,x,*roF,*rC"))]
> +  [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
> +	(match_operand:TF 1 "general_operand"	   "C ,vm,v,*roF,*rC"))]
>    "(TARGET_64BIT || TARGET_SSE)
>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))
>     && (!can_create_pseudo_p ()
> @@ -3133,6 +3137,10 @@ (define_insn "*movtf_internal"
>  	{
>  	  if (get_attr_mode (insn) == MODE_V4SF)
>  	    return "%vmovups\t{%1, %0|%0, %1}";
> +	  else if (TARGET_AVX512VL
> +		   && (EXT_REX_SSE_REG_P (operands[0])
> +		       || EXT_REX_SSE_REG_P (operands[1])))
> +	    return "vmovdqu64\t{%1, %0|%0, %1}";
>  	  else
>  	    return "%vmovdqu\t{%1, %0|%0, %1}";
>  	}
> @@ -3140,6 +3148,10 @@ (define_insn "*movtf_internal"
>  	{
>  	  if (get_attr_mode (insn) == MODE_V4SF)
>  	    return "%vmovaps\t{%1, %0|%0, %1}";
> +	  else if (TARGET_AVX512VL
> +		   && (EXT_REX_SSE_REG_P (operands[0])
> +		       || EXT_REX_SSE_REG_P (operands[1])))
> +	    return "vmovdqa64\t{%1, %0|%0, %1}";
>  	  else
>  	    return "%vmovdqa\t{%1, %0|%0, %1}";
>  	}
> @@ -9253,10 +9265,10 @@ (define_expand "<code><mode>2"
>    "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
>  
>  (define_insn "*absneg<mode>2"
> -  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
> +  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v,f,!r")
>  	(match_operator:MODEF 3 "absneg_operator"
> -	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
> -   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
> +	  [(match_operand:MODEF 1 "register_operand" "0,x,0,v,0,0")]))
> +   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,vm,0,X,X"))
>     (clobber (reg:CC FLAGS_REG))]
>    "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
>     || TARGET_80387"
> @@ -9265,11 +9277,14 @@ (define_insn "*absneg<mode>2"
>       (if_then_else
>         (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
>         (if_then_else
> -	 (eq_attr "alternative" "2")
> +	 (eq_attr "alternative" "4")
>  	 (symbol_ref "TARGET_MIX_SSE_I387")
> -	 (symbol_ref "true"))
> +	 (if_then_else
> +	   (eq_attr "alternative" "2,3")
> +	   (symbol_ref "TARGET_AVX512VL != 0")
> +	   (symbol_ref "true")))
>         (if_then_else
> -	 (eq_attr "alternative" "2,3")
> +	 (eq_attr "alternative" "4,5")
>  	 (symbol_ref "true")
>  	 (symbol_ref "false"))))])
>  
> @@ -9289,13 +9304,14 @@ (define_expand "<code>tf2"
>    "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
>  
>  (define_insn "*absnegtf2_sse"
> -  [(set (match_operand:TF 0 "register_operand" "=x,x")
> +  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
>  	(match_operator:TF 3 "absneg_operator"
> -	  [(match_operand:TF 1 "register_operand" "0,x")]))
> -   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
> +	  [(match_operand:TF 1 "register_operand" "0,x,0,v")]))
> +   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0,vm,0"))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_SSE"
> -  "#")
> +  "#"
> +  [(set_attr "isa" "*,*,avx512vl,avx512vl")])
>  
>  ;; Splitters for fp abs and neg.
>  
> @@ -9473,31 +9489,35 @@ (define_expand "copysign<mode>3"
>    "ix86_expand_copysign (operands); DONE;")
>  
>  (define_insn_and_split "copysign<mode>3_const"
> -  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
> +  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,v")
>  	(unspec:CSGNMODE
> -	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
> -	   (match_operand:CSGNMODE 2 "register_operand" "0")
> -	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
> +	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC,vmC")
> +	   (match_operand:CSGNMODE 2 "register_operand" "0,0")
> +	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm,vm")]
>  	  UNSPEC_COPYSIGN))]
>    "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
>     || (TARGET_SSE && (<MODE>mode == TFmode))"
>    "#"
>    "&& reload_completed"
>    [(const_int 0)]
> -  "ix86_split_copysign_const (operands); DONE;")
> +  "ix86_split_copysign_const (operands); DONE;"
> +  [(set_attr "isa" "*,avx512vl")])
>  
>  (define_insn "copysign<mode>3_var"
> -  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
> +  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x,v,v,v,v,v")
>  	(unspec:CSGNMODE
> -	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x")
> -	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x")
> -	   (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
> -	   (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
> +	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x,v,0,0,v,v")
> +	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x,1,1,v,1,v")
> +	   (match_operand:<CSGNVMODE> 4
> +	     "nonimmediate_operand" "X,xm,xm,0,0,X,vm,vm,0,0")
> +	   (match_operand:<CSGNVMODE> 5
> +	     "nonimmediate_operand" "0,xm,1,xm,1,0,vm,1,vm,1")]
>  	  UNSPEC_COPYSIGN))
> -   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
> +   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x,v,v,v,v,v"))]
>    "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
>     || (TARGET_SSE && (<MODE>mode == TFmode))"
> -  "#")
> +  "#"
> +  [(set_attr "isa" "*,*,*,*,*,avx512vl,avx512vl,avx512vl,avx512vl,avx512vl")])
>  
>  (define_split
>    [(set (match_operand:CSGNMODE 0 "register_operand")
> --- gcc/config/i386/sse.md.jj	2016-05-03 13:34:09.946986488 +0200
> +++ gcc/config/i386/sse.md	2016-05-03 17:38:02.486935094 +0200
> @@ -3000,11 +3013,11 @@ (define_expand "copysign<mode>3"
>  ;; because the native instructions read the full 128-bits.
>  
>  (define_insn "*andnot<mode>3"
> -  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
> +  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
>  	(and:MODEF
>  	  (not:MODEF
> -	    (match_operand:MODEF 1 "register_operand" "0,x"))
> -	    (match_operand:MODEF 2 "register_operand" "x,x")))]
> +	    (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
> +	    (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
>    "SSE_FLOAT_MODE_P (<MODE>mode)"
>  {
>    static char buf[32];
> @@ -3020,6 +3033,24 @@ (define_insn "*andnot<mode>3"
>      case 1:
>        ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>        break;
> +    case 2:
> +      if (TARGET_AVX512DQ)
> +	ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
> +      else
> +	{
> +	  suffix = <MODE>mode == DFmode ? "q" : "d";
> +	  ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
> +	}
> +      break;
> +    case 3:
> +      if (TARGET_AVX512DQ)
> +	ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +      else
> +	{
> +	  suffix = <MODE>mode == DFmode ? "q" : "d";
> +	  ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +	}
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -3027,11 +3058,19 @@ (define_insn "*andnot<mode>3"
>    snprintf (buf, sizeof (buf), ops, suffix);
>    return buf;
>  }
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
>     (set_attr "type" "sselog")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex,evex")
>     (set (attr "mode")
> -	(cond [(and (match_test "<MODE_SIZE> == 16")
> +	(cond [(eq_attr "alternative" "2")
> +		 (if_then_else (match_test "TARGET_AVX512DQ")
> +			       (const_string "<ssevecmode>")
> +			       (const_string "TI"))
> +	       (eq_attr "alternative" "3")
> +		 (if_then_else (match_test "TARGET_AVX512DQ")
> +			       (const_string "<avx512fvecmode>")
> +			       (const_string "XI"))
> +	       (and (match_test "<MODE_SIZE> == 16")
>  		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
>  		 (const_string "V4SF")
>  	       (match_test "TARGET_AVX")
> @@ -3042,16 +3081,17 @@ (define_insn "*andnot<mode>3"
>  	       (const_string "<ssevecmode>")))])
>  
>  (define_insn "*andnottf3"
> -  [(set (match_operand:TF 0 "register_operand" "=x,x")
> +  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
>  	(and:TF
> -	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
> -	  (match_operand:TF 2 "vector_operand" "xBm,xm")))]
> +	  (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
> +	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
>    "TARGET_SSE"
>  {
>    static char buf[32];
>    const char *ops;
>    const char *tmp
> -    = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
> +    = (which_alternative >= 2 ? "pandnq"
> +       : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
>  
>    switch (which_alternative)
>      {
> @@ -3059,8 +3099,12 @@ (define_insn "*andnottf3"
>        ops = "%s\t{%%2, %%0|%%0, %%2}";
>        break;
>      case 1:
> +    case 2:
>        ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>        break;
> +    case 3:
> +      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -3068,7 +3112,7 @@ (define_insn "*andnottf3"
>    snprintf (buf, sizeof (buf), ops, tmp);
>    return buf;
>  }
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
>     (set_attr "type" "sselog")
>     (set (attr "prefix_data16")
>       (if_then_else
> @@ -3076,9 +3120,13 @@ (define_insn "*andnottf3"
>  	    (eq_attr "mode" "TI"))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex,evex")
>     (set (attr "mode")
> -	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> +	(cond [(eq_attr "alternative" "2")
> +		 (const_string "TI")
> +	       (eq_attr "alternative" "3")
> +		 (const_string "XI")
> +	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
>  		 (const_string "V4SF")
>  	       (match_test "TARGET_AVX")
>  		 (const_string "TI")
> @@ -3089,10 +3137,10 @@ (define_insn "*andnottf3"
>  	       (const_string "TI")))])
>  
>  (define_insn "*<code><mode>3"
> -  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
> +  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
>  	(any_logic:MODEF
> -	  (match_operand:MODEF 1 "register_operand" "%0,x")
> -	  (match_operand:MODEF 2 "register_operand" "x,x")))]
> +	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
> +	  (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
>    "SSE_FLOAT_MODE_P (<MODE>mode)"
>  {
>    static char buf[32];
> @@ -3105,9 +3153,26 @@ (define_insn "*<code><mode>3"
>      case 0:
>        ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
>        break;
> +    case 2:
> +      if (!TARGET_AVX512DQ)
> +	{
> +	  suffix = <MODE>mode == DFmode ? "q" : "d";
> +	  ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
> +	  break;
> +	}
> +      /* FALLTHRU */
>      case 1:
>        ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>        break;
> +    case 3:
> +      if (TARGET_AVX512DQ)
> +	ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +      else
> +	{
> +	  suffix = <MODE>mode == DFmode ? "q" : "d";
> +	  ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +	}
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -3115,11 +3180,19 @@ (define_insn "*<code><mode>3"
>    snprintf (buf, sizeof (buf), ops, suffix);
>    return buf;
>  }
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
>     (set_attr "type" "sselog")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex,evex")
>     (set (attr "mode")
> -	(cond [(and (match_test "<MODE_SIZE> == 16")
> +	(cond [(eq_attr "alternative" "2")
> +		 (if_then_else (match_test "TARGET_AVX512DQ")
> +			       (const_string "<ssevecmode>")
> +			       (const_string "TI"))
> +	       (eq_attr "alternative" "3")
> +		 (if_then_else (match_test "TARGET_AVX512DQ")
> +			       (const_string "<avx512fvecmode>")
> +			       (const_string "XI"))
> +	       (and (match_test "<MODE_SIZE> == 16")
>  		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
>  		 (const_string "V4SF")
>  	       (match_test "TARGET_AVX")
> @@ -3138,17 +3211,18 @@ (define_expand "<code>tf3"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
>  
>  (define_insn "*<code>tf3"
> -  [(set (match_operand:TF 0 "register_operand" "=x,x")
> +  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
>  	(any_logic:TF
> -	  (match_operand:TF 1 "vector_operand" "%0,x")
> -	  (match_operand:TF 2 "vector_operand" "xBm,xm")))]
> +	  (match_operand:TF 1 "vector_operand" "%0,x,v,v")
> +	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
>    "TARGET_SSE
>     && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
>  {
>    static char buf[32];
>    const char *ops;
>    const char *tmp
> -    = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
> +    = (which_alternative >= 2 ? "p<logic>q"
> +       : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
>  
>    switch (which_alternative)
>      {
> @@ -3156,8 +3230,12 @@ (define_insn "*<code>tf3"
>        ops = "%s\t{%%2, %%0|%%0, %%2}";
>        break;
>      case 1:
> +    case 2:
>        ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>        break;
> +    case 3:
> +      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -3165,7 +3243,7 @@ (define_insn "*<code>tf3"
>    snprintf (buf, sizeof (buf), ops, tmp);
>    return buf;
>  }
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
>     (set_attr "type" "sselog")
>     (set (attr "prefix_data16")
>       (if_then_else
> @@ -3173,9 +3251,13 @@ (define_insn "*<code>tf3"
>  	    (eq_attr "mode" "TI"))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix" "orig,vex,evex,evex")
>     (set (attr "mode")
> -	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> +	(cond [(eq_attr "alternative" "2")
> +		 (const_string "TI")
> +	       (eq_attr "alternative" "3")
> +		 (const_string "QI")
> +	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
>  		 (const_string "V4SF")
>  	       (match_test "TARGET_AVX")
>  		 (const_string "TI")
> 
> 	Jakub

> void
> f1 (float x)
> {
>   register float a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = __builtin_fabsf (a);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f2 (float x, float y)
> {
>   register float a __asm ("xmm16"), b __asm ("xmm17");
>   a = x;
>   b = y;
>   asm volatile ("" : "+v" (a), "+v" (b));
>   a = __builtin_copysignf (a, b);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f3 (float x)
> {
>   register float a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = -a;
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f4 (double x)
> {
>   register double a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = __builtin_fabs (a);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f5 (double x, double y)
> {
>   register double a __asm ("xmm16"), b __asm ("xmm17");
>   a = x;
>   b = y;
>   asm volatile ("" : "+v" (a), "+v" (b));
>   a = __builtin_copysign (a, b);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f6 (double x)
> {
>   register double a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = -a;
>   asm volatile ("" : "+v" (a));
> }

> void
> f1 (__float128 x)
> {
>   register __float128 a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = __builtin_fabsq (a);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f2 (__float128 x, __float128 y)
> {
>   register __float128 a __asm ("xmm16"), b __asm ("xmm17");
>   a = x;
>   b = y;
>   asm volatile ("" : "+v" (a), "+v" (b));
>   a = __builtin_copysignq (a, b);
>   asm volatile ("" : "+v" (a));
> }
> 
> void
> f3 (__float128 x)
> {
>   register __float128 a __asm ("xmm16");
>   a = x;
>   asm volatile ("" : "+v" (a));
>   a = -a;
>   asm volatile ("" : "+v" (a));
> }
> 
> __int128_t
> f4 (void)
> {
>   register __int128_t a __asm ("xmm16");
>   register __int128_t __attribute__((vector_size (16))) b __asm ("xmm17");
>   a = 1;
>   asm volatile ("" : "+v" (a));
>   b[0] = a;
>   asm volatile ("" : "+v" (b));
>   return b[0];
> }
Jakub Jelinek May 5, 2016, 9:56 a.m. UTC | #2
On Thu, May 05, 2016 at 12:49:57PM +0300, Kirill Yukhin wrote:
> Hi Jakub,
> On 03 May 20:57, Jakub Jelinek wrote:
> > This patch improves code generation e.g. on the first attached testcase
> > and allows accepting the second one.
> > 
> > I've noticed we don't allow TFmode or V1TImode in xmm16+ regs at all,
> > while they are allowed in xmm0-xmm15, so IMHO should be ok even with
> > AVX512VL.
> > 
> > Wonder if it wouldn't be better to add a new constraint that would act
> > like v constraint for TARGET_AVX512VL and like x constraint otherwise,
> > that might greatly simplify the i386.md changes in this patch.
> Good idea, I thought about that myself. IMHO this might be a follow up.

Ok, will add that to todo.

> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk,
> > or with some changes?  Haven't figured out how to test the *andnot*
> > and *<logic>* patterns though.
> Are you going to commit testcases?

I didn't mean to in this case, but guess I could (as for the other patches,
dg-do assemble only, I think trying to scan the assembly might be too fragile,
it is up to the RA to decide).

> Yeah, tests for FP *logic* look odd, so I am OK for not having them.

So, is the patch ok for trunk with the two testcases turned into
dg-do assemble tests, or do you want me to repost with that, or add the
Yv constraint right away, something else?

	Jakub
Kirill Yukhin May 5, 2016, 10:34 a.m. UTC | #3
On 05 May 11:56, Jakub Jelinek wrote:
> On Thu, May 05, 2016 at 12:49:57PM +0300, Kirill Yukhin wrote:
> > Hi Jakub,
> > On 03 May 20:57, Jakub Jelinek wrote:
> > > This patch improves code generation e.g. on the first attached testcase
> > > and allows accepting the second one.
> > > 
> > > I've noticed we don't allow TFmode or V1TImode in xmm16+ regs at all,
> > > while they are allowed in xmm0-xmm15, so IMHO should be ok even with
> > > AVX512VL.
> > > 
> > > Wonder if it wouldn't be better to add a new constraint that would act
> > > like v constraint for TARGET_AVX512VL and like x constraint otherwise,
> > > that might greatly simplify the i386.md changes in this patch.
> > Good idea, I thought about that myself. IMHO this might be a follow up.
> 
> Ok, will add that to todo.
> 
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk,
> > > or with some changes?  Haven't figured out how to test the *andnot*
> > > and *<logic>* patterns though.
> > Are you going to commit testcases?
> 
> I didn't mean to in this case, but guess I could (as for the other patches,
> dg-do assemble only, I think trying to scan the assembly might be too fragile,
> it is up to the RA to decide).
> 
> > Yeah, tests for FP *logic* look odd, so I am OK for not having them.
> 
> So, is the patch ok for trunk with the two testcases turned into
> dg-do assemble tests, or do you want me to repost with that, or add the
> Yv constraint right away, something else?
Nope. Patch is pre-OK. Thanks!
> 
> 	Jakub

--
K
diff mbox

Patch

--- gcc/config/i386/i386.h.jj	2016-03-30 16:00:17.000000000 +0200
+++ gcc/config/i386/i386.h	2016-05-03 15:55:46.656342870 +0200
@@ -1126,7 +1126,8 @@  extern const char *host_detect_local_cpu
 
 #define VALID_AVX512VL_128_REG_MODE(MODE)				\
   ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode	\
-   || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
+   || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
+   || (MODE) == TFmode || (MODE) == V1TImode)
 
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
--- gcc/config/i386/i386.md.jj	2016-05-03 14:16:14.000000000 +0200
+++ gcc/config/i386/i386.md	2016-05-03 17:13:46.643545826 +0200
@@ -1165,6 +1165,10 @@  (define_mode_attr ssevecmode
 (define_mode_attr ssevecmodelower
   [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
 
+;; AVX512F vector mode corresponding to a scalar mode
+(define_mode_attr avx512fvecmode
+  [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
+
 ;; Instruction suffix for REX 64bit operators.
 (define_mode_attr rex64suffix [(SI "") (DI "{q}")])
 
@@ -2928,7 +2932,7 @@  (define_insn "*insvqi"
 
 (define_insn "*pushtf"
   [(set (match_operand:TF 0 "push_operand" "=<,<")
-	(match_operand:TF 1 "general_no_elim_operand" "x,*roF"))]
+	(match_operand:TF 1 "general_no_elim_operand" "v,*roF"))]
   "TARGET_64BIT || TARGET_SSE"
 {
   /* This insn should be already split before reg-stack.  */
@@ -3107,8 +3111,8 @@  (define_expand "mov<mode>"
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o")
-	(match_operand:TF 1 "general_operand"	   "C ,xm,x,*roF,*rC"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
+	(match_operand:TF 1 "general_operand"	   "C ,vm,v,*roF,*rC"))]
   "(TARGET_64BIT || TARGET_SSE)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
@@ -3133,6 +3137,10 @@  (define_insn "*movtf_internal"
 	{
 	  if (get_attr_mode (insn) == MODE_V4SF)
 	    return "%vmovups\t{%1, %0|%0, %1}";
+	  else if (TARGET_AVX512VL
+		   && (EXT_REX_SSE_REG_P (operands[0])
+		       || EXT_REX_SSE_REG_P (operands[1])))
+	    return "vmovdqu64\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovdqu\t{%1, %0|%0, %1}";
 	}
@@ -3140,6 +3148,10 @@  (define_insn "*movtf_internal"
 	{
 	  if (get_attr_mode (insn) == MODE_V4SF)
 	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else if (TARGET_AVX512VL
+		   && (EXT_REX_SSE_REG_P (operands[0])
+		       || EXT_REX_SSE_REG_P (operands[1])))
+	    return "vmovdqa64\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovdqa\t{%1, %0|%0, %1}";
 	}
@@ -9253,10 +9265,10 @@  (define_expand "<code><mode>2"
   "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
 
 (define_insn "*absneg<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v,f,!r")
 	(match_operator:MODEF 3 "absneg_operator"
-	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
-   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
+	  [(match_operand:MODEF 1 "register_operand" "0,x,0,v,0,0")]))
+   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,vm,0,X,X"))
    (clobber (reg:CC FLAGS_REG))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || TARGET_80387"
@@ -9265,11 +9277,14 @@  (define_insn "*absneg<mode>2"
      (if_then_else
        (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
        (if_then_else
-	 (eq_attr "alternative" "2")
+	 (eq_attr "alternative" "4")
 	 (symbol_ref "TARGET_MIX_SSE_I387")
-	 (symbol_ref "true"))
+	 (if_then_else
+	   (eq_attr "alternative" "2,3")
+	   (symbol_ref "TARGET_AVX512VL != 0")
+	   (symbol_ref "true")))
        (if_then_else
-	 (eq_attr "alternative" "2,3")
+	 (eq_attr "alternative" "4,5")
 	 (symbol_ref "true")
 	 (symbol_ref "false"))))])
 
@@ -9289,13 +9304,14 @@  (define_expand "<code>tf2"
   "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
 
 (define_insn "*absnegtf2_sse"
-  [(set (match_operand:TF 0 "register_operand" "=x,x")
+  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
 	(match_operator:TF 3 "absneg_operator"
-	  [(match_operand:TF 1 "register_operand" "0,x")]))
-   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
+	  [(match_operand:TF 1 "register_operand" "0,x,0,v")]))
+   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0,vm,0"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE"
-  "#")
+  "#"
+  [(set_attr "isa" "*,*,avx512vl,avx512vl")])
 
 ;; Splitters for fp abs and neg.
 
@@ -9473,31 +9489,35 @@  (define_expand "copysign<mode>3"
   "ix86_expand_copysign (operands); DONE;")
 
 (define_insn_and_split "copysign<mode>3_const"
-  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,v")
 	(unspec:CSGNMODE
-	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
-	   (match_operand:CSGNMODE 2 "register_operand" "0")
-	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC,vmC")
+	   (match_operand:CSGNMODE 2 "register_operand" "0,0")
+	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm,vm")]
 	  UNSPEC_COPYSIGN))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode))"
   "#"
   "&& reload_completed"
   [(const_int 0)]
-  "ix86_split_copysign_const (operands); DONE;")
+  "ix86_split_copysign_const (operands); DONE;"
+  [(set_attr "isa" "*,avx512vl")])
 
 (define_insn "copysign<mode>3_var"
-  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x,v,v,v,v,v")
 	(unspec:CSGNMODE
-	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x")
-	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x")
-	   (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
-	   (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x,v,0,0,v,v")
+	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x,1,1,v,1,v")
+	   (match_operand:<CSGNVMODE> 4
+	     "nonimmediate_operand" "X,xm,xm,0,0,X,vm,vm,0,0")
+	   (match_operand:<CSGNVMODE> 5
+	     "nonimmediate_operand" "0,xm,1,xm,1,0,vm,1,vm,1")]
 	  UNSPEC_COPYSIGN))
-   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x,v,v,v,v,v"))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    || (TARGET_SSE && (<MODE>mode == TFmode))"
-  "#")
+  "#"
+  [(set_attr "isa" "*,*,*,*,*,avx512vl,avx512vl,avx512vl,avx512vl,avx512vl")])
 
 (define_split
   [(set (match_operand:CSGNMODE 0 "register_operand")
--- gcc/config/i386/sse.md.jj	2016-05-03 13:34:09.946986488 +0200
+++ gcc/config/i386/sse.md	2016-05-03 17:38:02.486935094 +0200
@@ -3000,11 +3013,11 @@  (define_expand "copysign<mode>3"
 ;; because the native instructions read the full 128-bits.
 
 (define_insn "*andnot<mode>3"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
 	(and:MODEF
 	  (not:MODEF
-	    (match_operand:MODEF 1 "register_operand" "0,x"))
-	    (match_operand:MODEF 2 "register_operand" "x,x")))]
+	    (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
+	    (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
   "SSE_FLOAT_MODE_P (<MODE>mode)"
 {
   static char buf[32];
@@ -3020,6 +3033,24 @@  (define_insn "*andnot<mode>3"
     case 1:
       ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
       break;
+    case 2:
+      if (TARGET_AVX512DQ)
+	ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+      else
+	{
+	  suffix = <MODE>mode == DFmode ? "q" : "d";
+	  ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+	}
+      break;
+    case 3:
+      if (TARGET_AVX512DQ)
+	ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+      else
+	{
+	  suffix = <MODE>mode == DFmode ? "q" : "d";
+	  ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+	}
+      break;
     default:
       gcc_unreachable ();
     }
@@ -3027,11 +3058,19 @@  (define_insn "*andnot<mode>3"
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
+	(cond [(eq_attr "alternative" "2")
+		 (if_then_else (match_test "TARGET_AVX512DQ")
+			       (const_string "<ssevecmode>")
+			       (const_string "TI"))
+	       (eq_attr "alternative" "3")
+		 (if_then_else (match_test "TARGET_AVX512DQ")
+			       (const_string "<avx512fvecmode>")
+			       (const_string "XI"))
+	       (and (match_test "<MODE_SIZE> == 16")
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		 (const_string "V4SF")
 	       (match_test "TARGET_AVX")
@@ -3042,16 +3081,17 @@  (define_insn "*andnot<mode>3"
 	       (const_string "<ssevecmode>")))])
 
 (define_insn "*andnottf3"
-  [(set (match_operand:TF 0 "register_operand" "=x,x")
+  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
 	(and:TF
-	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
-	  (match_operand:TF 2 "vector_operand" "xBm,xm")))]
+	  (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
+	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
   "TARGET_SSE"
 {
   static char buf[32];
   const char *ops;
   const char *tmp
-    = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
+    = (which_alternative >= 2 ? "pandnq"
+       : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
 
   switch (which_alternative)
     {
@@ -3059,8 +3099,12 @@  (define_insn "*andnottf3"
       ops = "%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
+    case 2:
       ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
       break;
+    case 3:
+      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+      break;
     default:
       gcc_unreachable ();
     }
@@ -3068,7 +3112,7 @@  (define_insn "*andnottf3"
   snprintf (buf, sizeof (buf), ops, tmp);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
      (if_then_else
@@ -3076,9 +3120,13 @@  (define_insn "*andnottf3"
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
-	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "TI")
+	       (eq_attr "alternative" "3")
+		 (const_string "XI")
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V4SF")
 	       (match_test "TARGET_AVX")
 		 (const_string "TI")
@@ -3089,10 +3137,10 @@  (define_insn "*andnottf3"
 	       (const_string "TI")))])
 
 (define_insn "*<code><mode>3"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
 	(any_logic:MODEF
-	  (match_operand:MODEF 1 "register_operand" "%0,x")
-	  (match_operand:MODEF 2 "register_operand" "x,x")))]
+	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
+	  (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
   "SSE_FLOAT_MODE_P (<MODE>mode)"
 {
   static char buf[32];
@@ -3105,9 +3153,26 @@  (define_insn "*<code><mode>3"
     case 0:
       ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
       break;
+    case 2:
+      if (!TARGET_AVX512DQ)
+	{
+	  suffix = <MODE>mode == DFmode ? "q" : "d";
+	  ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+	  break;
+	}
+      /* FALLTHRU */
     case 1:
       ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
       break;
+    case 3:
+      if (TARGET_AVX512DQ)
+	ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+      else
+	{
+	  suffix = <MODE>mode == DFmode ? "q" : "d";
+	  ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+	}
+      break;
     default:
       gcc_unreachable ();
     }
@@ -3115,11 +3180,19 @@  (define_insn "*<code><mode>3"
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
+	(cond [(eq_attr "alternative" "2")
+		 (if_then_else (match_test "TARGET_AVX512DQ")
+			       (const_string "<ssevecmode>")
+			       (const_string "TI"))
+	       (eq_attr "alternative" "3")
+		 (if_then_else (match_test "TARGET_AVX512DQ")
+			       (const_string "<avx512fvecmode>")
+			       (const_string "XI"))
+	       (and (match_test "<MODE_SIZE> == 16")
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		 (const_string "V4SF")
 	       (match_test "TARGET_AVX")
@@ -3138,17 +3211,18 @@  (define_expand "<code>tf3"
   "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
 
 (define_insn "*<code>tf3"
-  [(set (match_operand:TF 0 "register_operand" "=x,x")
+  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
 	(any_logic:TF
-	  (match_operand:TF 1 "vector_operand" "%0,x")
-	  (match_operand:TF 2 "vector_operand" "xBm,xm")))]
+	  (match_operand:TF 1 "vector_operand" "%0,x,v,v")
+	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
   "TARGET_SSE
    && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
 {
   static char buf[32];
   const char *ops;
   const char *tmp
-    = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
+    = (which_alternative >= 2 ? "p<logic>q"
+       : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
 
   switch (which_alternative)
     {
@@ -3156,8 +3230,12 @@  (define_insn "*<code>tf3"
       ops = "%s\t{%%2, %%0|%%0, %%2}";
       break;
     case 1:
+    case 2:
       ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
       break;
+    case 3:
+      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+      break;
     default:
       gcc_unreachable ();
     }
@@ -3165,7 +3243,7 @@  (define_insn "*<code>tf3"
   snprintf (buf, sizeof (buf), ops, tmp);
   return buf;
 }
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
      (if_then_else
@@ -3173,9 +3251,13 @@  (define_insn "*<code>tf3"
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
-	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "TI")
+	       (eq_attr "alternative" "3")
+		 (const_string "QI")
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V4SF")
 	       (match_test "TARGET_AVX")
 		 (const_string "TI")