diff mbox

PATCH: Emit vzerouppers after reload

Message ID 20101102180606.GA4551@intel.com
State New
Headers show

Commit Message

H.J. Lu Nov. 2, 2010, 6:06 p.m. UTC
Hi,

This patch changes vzeroupper optimization to emit vzerouppers after
reload.  I checked in it as approved by Uros offline.

Thanks.


H.J.
---
gcc/

2010-11-02  Uros Bizjak  <ubizjak@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
	(ix86_split_call_pop_vzeroupper): Likewise.

	* config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
	the loop.
	(ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
	(ix86_split_call_vzeroupper): New.
	(ix86_split_call_pop_vzeroupper): Likewise.

	* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
	(*call_pop_0_vzeroupper): Likewise.
	(*call_pop_1_vzeroupper): Likewise.
	(*sibcall_pop_1_vzeroupper): Likewise.
	(*call_0_vzeroupper): Likewise.
	(*call_1_vzeroupper): Likewise.
	(*sibcall_1_vzeroupper): Likewise.
	(*call_1_rex64_vzeroupper): Likewise.
	(*call_1_rex64_ms_sysv_vzeroupper): New.
	(*call_1_rex64_large_vzeroupper): Likewise.
	(*sibcall_1_rex64_vzeroupper): Likewise.
	(*call_value_pop_0_vzeroupper): New.
	(*call_value_pop_1_vzeroupper): Likewise.
	(*sibcall_value_pop_1_vzeroupper): Likewise.
	(*call_value_0_vzeroupper): New.
	(*call_value_0_rex64_vzeroupper): Use
	(*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
	(*call_value_1_vzeroupper): Likewise.
	(*sibcall_value_1_vzeroupper): Likewise.
	(*call_value_1_rex64_vzeroupper): Likewise.
	(*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
	(*call_value_1_rex64_large_vzeroupper): Likewise.
	(*sibcall_value_1_rex64_vzeroupper): Likewise.

gcc/testsuite/

2010-11-02  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/avx-vzeroupper-15.c: New.
	* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-18.c: Likewise.

	PR target/46253
	* gcc.target/i386/pr46253.c: New.
diff mbox

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f6f9071..db758df 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -119,6 +119,8 @@  extern void ix86_expand_sse_unpack (rtx[], bool, bool);
 extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void ix86_split_call_vzeroupper (rtx, rtx);
+extern void ix86_split_call_pop_vzeroupper (rtx, rtx);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
 extern rtx ix86_zero_extend_to_Pmode (rtx);
 extern void ix86_split_long_move (rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 32d6371..0130bd4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -108,163 +108,119 @@  check_avx256_stores (rtx dest, const_rtx set, void *data)
 static void
 move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
 {
-  rtx curr_insn, next_insn, prev_insn, insn;
+  rtx insn;
+  rtx vzeroupper_insn = NULL_RTX;
+  rtx pat;
+  int avx256;
 
   if (dump_file)
     fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
 	     bb->index, upper_128bits_set);
 
-  for (curr_insn = BB_HEAD (bb);
-       curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
-       curr_insn = next_insn)
+  insn = BB_HEAD (bb);
+  while (insn != BB_END (bb))
     {
-      int avx256;
+      insn = NEXT_INSN (insn);
 
-      next_insn = NEXT_INSN (curr_insn);
-
-      if (!NONDEBUG_INSN_P (curr_insn))
+      if (!NONDEBUG_INSN_P (insn))
 	continue;
 
-      /* Search for vzeroupper.  */
-      insn = PATTERN (curr_insn);
-      if (GET_CODE (insn) == UNSPEC_VOLATILE
-	  && XINT (insn, 1) == UNSPECV_VZEROUPPER)
+      /* Move vzeroupper before jump/call.  */
+      if (JUMP_P (insn) || CALL_P (insn))
+	{
+	  if (!vzeroupper_insn)
+	    continue;
+
+	  if (PREV_INSN (insn) != vzeroupper_insn)
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Move vzeroupper after:\n");
+		  print_rtl_single (dump_file, PREV_INSN (insn));
+		  fprintf (dump_file, "before:\n");
+		  print_rtl_single (dump_file, insn);
+		}
+	      reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
+				  PREV_INSN (insn));
+	    }
+	  vzeroupper_insn = NULL_RTX;
+	  continue;
+	}
+
+      pat = PATTERN (insn);
+
+      /* Check insn for vzeroupper intrinsic.  */
+      if (GET_CODE (pat) == UNSPEC_VOLATILE
+	  && XINT (pat, 1) == UNSPECV_VZEROUPPER)
 	{
-	  /* Found vzeroupper.  */
 	  if (dump_file)
 	    {
+	      /* Found vzeroupper intrinsic.  */
 	      fprintf (dump_file, "Found vzeroupper:\n");
-	      print_rtl_single (dump_file, curr_insn);
+	      print_rtl_single (dump_file, insn);
 	    }
 	}
       else
 	{
-	  /* Check vzeroall intrinsic.  */
-	  if (GET_CODE (insn) == PARALLEL
-	      && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
-	      && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
-	    upper_128bits_set = false;
-	  else if (!upper_128bits_set)
+	  /* Check insn for vzeroall intrinsic.  */
+	  if (GET_CODE (pat) == PARALLEL
+	      && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
+	      && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
 	    {
-	      /* Check if upper 128bits of AVX registers are used.  */
-	      note_stores (insn, check_avx256_stores,
-			   &upper_128bits_set);
+	      upper_128bits_set = false;
+
+	      /* Delete pending vzeroupper insertion.  */
+	      if (vzeroupper_insn)
+		{
+		  delete_insn (vzeroupper_insn);
+		  vzeroupper_insn = NULL_RTX;
+		}
 	    }
+	  else if (!upper_128bits_set)
+	    note_stores (pat, check_avx256_stores, &upper_128bits_set);
 	  continue;
 	}
 
-      avx256 = INTVAL (XVECEXP (insn, 0, 0));
+      /* Process vzeroupper intrinsic.  */
+      avx256 = INTVAL (XVECEXP (pat, 0, 0));
 
       if (!upper_128bits_set)
 	{
 	  /* Since the upper 128bits are cleared, callee must not pass
 	     256bit AVX register.  We only need to check if callee
 	     returns 256bit AVX register.  */
-	  upper_128bits_set = avx256 == callee_return_avx256;
+	  upper_128bits_set = (avx256 == callee_return_avx256);
 
-	  /* Remove unnecessary vzeroupper since upper 128bits are
-	     cleared.  */
+	  /* Remove unnecessary vzeroupper since
+	     upper 128bits are cleared.  */
 	  if (dump_file)
 	    {
 	      fprintf (dump_file, "Delete redundant vzeroupper:\n");
-	      print_rtl_single (dump_file, curr_insn);
+	      print_rtl_single (dump_file, insn);
 	    }
-	  delete_insn (curr_insn);
-	  continue;
+	  delete_insn (insn);
 	}
       else if (avx256 == callee_return_pass_avx256
 	       || avx256 == callee_pass_avx256)
 	{
 	  /* Callee passes 256bit AVX register.  Check if callee
 	     returns 256bit AVX register.  */
-	  upper_128bits_set = avx256 == callee_return_pass_avx256;
+	  upper_128bits_set = (avx256 == callee_return_pass_avx256);
 
-	  /* Must remove vzeroupper since callee passes 256bit AVX
-	     register.  */
+	  /* Must remove vzeroupper since
+	     callee passes in 256bit AVX register.  */
 	  if (dump_file)
 	    {
 	      fprintf (dump_file, "Delete callee pass vzeroupper:\n");
-	      print_rtl_single (dump_file, curr_insn);
-	    }
-	  delete_insn (curr_insn);
-	  continue;
-	}
-
-      /* Find the jump after vzeroupper.  */
-      prev_insn = curr_insn;
-      if (avx256 == vzeroupper_intrinsic)
-	{
-	  /* For vzeroupper intrinsic, check if there is another
-	     vzeroupper.  */
-	  insn = NEXT_INSN (curr_insn);
-	  while (insn)
-	    {
-	      if (NONJUMP_INSN_P (insn)
-		  && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
-		  && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
-		{
-		  if (dump_file)
-		    {
-		      fprintf (dump_file,
-			       "Delete redundant vzeroupper intrinsic:\n");
-		      print_rtl_single (dump_file, curr_insn);
-		    }
-		  delete_insn (curr_insn);
-		  insn = NULL;
-		  continue;
-		}
-
-	      if (JUMP_P (insn) || CALL_P (insn))
-		break;
-	      prev_insn = insn;
-	      insn = NEXT_INSN (insn);
-	      if (insn == NEXT_INSN (BB_END (bb)))
-		break;
+	      print_rtl_single (dump_file, insn);
 	    }
-
-	  /* Continue if redundant vzeroupper intrinsic is deleted.  */
-	  if (!insn)
-	    continue;
+	  delete_insn (insn);
 	}
       else
 	{
-	  /* Find the next jump/call.  */
-	  insn = NEXT_INSN (curr_insn);
-	  while (insn)
-	    {
-	      if (JUMP_P (insn) || CALL_P (insn))
-		break;
-	      prev_insn = insn;
-	      insn = NEXT_INSN (insn);
-	      if (insn == NEXT_INSN (BB_END (bb)))
-		break;
-	    }
-
-	  if (!insn)
-	    gcc_unreachable();
+	  upper_128bits_set = false;
+	  vzeroupper_insn = insn;
 	}
-
-      /* Keep vzeroupper.  */
-      upper_128bits_set = false;
-
-      /* Also allow label as the next instruction.  */
-      if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
-	gcc_unreachable();
-
-      /* Move vzeroupper before jump/call if neeeded.  */
-      if (curr_insn != prev_insn)
-	{
-	  reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
-	  if (dump_file)
-	    {
-	      fprintf (dump_file, "Move vzeroupper after:\n");
-	      print_rtl_single (dump_file, prev_insn);
-	      fprintf (dump_file, "before:\n");
-	      print_rtl_single (dump_file, insn);
-	    }
-	}
-
-      next_insn = NEXT_INSN (insn);
     }
 
   BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
@@ -21495,10 +21451,12 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 			       + 2, vec));
     }
 
-  /* Emit vzeroupper if needed.  */
+  /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration.  */
   if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
     {
+      rtx unspec;
       int avx256;
+
       cfun->machine->use_vzeroupper_p = 1;
       if (cfun->machine->callee_pass_avx256_p)
 	{
@@ -21511,7 +21469,11 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	avx256 = callee_return_avx256;
       else
 	avx256 = call_no_avx256;
-      emit_insn (gen_avx_vzeroupper (GEN_INT (avx256))); 
+
+      unspec = gen_rtx_UNSPEC (VOIDmode,
+			       gen_rtvec (1, GEN_INT (avx256)),
+			       UNSPEC_CALL_NEEDS_VZEROUPPER);
+      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec));
     }
 
   call = emit_call_insn (call);
@@ -21521,6 +21483,24 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   return call;
 }
 
+void
+ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
+{
+  rtx call = XVECEXP (PATTERN (insn), 0, 0);
+  emit_insn (gen_avx_vzeroupper (vzeroupper));
+  emit_call_insn (call);
+}
+
+void
+ix86_split_call_pop_vzeroupper (rtx insn, rtx vzeroupper)
+{
+  rtx call = XVECEXP (PATTERN (insn), 0, 0);
+  rtx pop = XVECEXP (PATTERN (insn), 0, 1);
+  emit_insn (gen_avx_vzeroupper (vzeroupper));
+  emit_call_insn (gen_rtx_PARALLEL (VOIDmode,
+				    gen_rtvec (2, call, pop)));
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index feaf781..278bd77 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -105,6 +105,7 @@ 
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
   UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_CALL_NEEDS_VZEROUPPER
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -11260,6 +11261,21 @@ 
   DONE;
 })
 
+(define_insn_and_split "*call_pop_0_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_0"
   [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
 	 (match_operand:SI 1 "" ""))
@@ -11275,6 +11291,21 @@ 
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_pop_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_pop_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
 	 (match_operand:SI 1 "" ""))
@@ -11289,6 +11320,21 @@ 
 }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_pop_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i,i")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_pop_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand:SI 1 "" ""))
@@ -11321,6 +11367,18 @@ 
   DONE;
 })
 
+(define_insn_and_split "*call_0_vzeroupper"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_0"
   [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
 	 (match_operand 1 "" ""))]
@@ -11328,6 +11386,18 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
 	 (match_operand 1 "" ""))]
@@ -11335,6 +11405,18 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
@@ -11342,6 +11424,19 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
 	 (match_operand 1 "" ""))]
@@ -11350,6 +11445,32 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+	   (match_operand 1 "" ""))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64_ms_sysv"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
 	 (match_operand 1 "" ""))
@@ -11370,6 +11491,18 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*call_1_rex64_large_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64_large"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
 	 (match_operand 1 "" ""))]
@@ -11377,6 +11510,18 @@ 
   { return ix86_output_call_insn (insn, operands[0], 0); }
   [(set_attr "type" "call")])
 
+(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
@@ -17123,6 +17268,22 @@ 
 ;; Call-value patterns last so that the wildcard operand does not
 ;; disrupt insn-recog's switch tables.
 
+(define_insn_and_split "*call_value_pop_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_pop_0"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@@ -17134,6 +17295,22 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_pop_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_pop_1"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@@ -17145,6 +17322,22 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i,i")))
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_pop_1"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@@ -17156,6 +17349,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@@ -17164,6 +17370,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0_rex64"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@@ -17172,6 +17391,33 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+		(match_operand:DI 2 "const_int_operand" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_0_rex64_ms_sysv"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@@ -17193,6 +17439,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@@ -17201,6 +17460,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_1"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@@ -17209,6 +17481,20 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@@ -17218,6 +17504,33 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+		(match_operand:DI 2 "" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64_ms_sysv"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@@ -17239,6 +17552,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64_large"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
@@ -17247,6 +17573,19 @@ 
   { return ix86_output_call_insn (insn, operands[1], 1); }
   [(set_attr "type" "callv")])
 
+(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_1_rex64"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
new file mode 100644
index 0000000..134a3dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
+
+#include <immintrin.h>
+
+extern __m256 x, y;
+extern void (*bar) (void);
+
+void
+foo ()
+{
+  x = y;
+  bar ();
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c
new file mode 100644
index 0000000..3fb099d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi))  bar (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_0_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c
new file mode 100644
index 0000000..2f3cfd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi)) (*bar) (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c
new file mode 100644
index 0000000..541f77d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern void __attribute__ ((sysv_abi))  bar (__m256);
+
+void
+foo (void)
+{
+  bar (x);
+}
+
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
+/* { dg-final { scan-assembler-times "\\*call_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr46253.c b/gcc/testsuite/gcc.target/i386/pr46253.c
new file mode 100644
index 0000000..406790a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr46253.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -g -mf16c -mtune=generic -dp" } */
+
+typedef __m256i __attribute__ ((__vector_size__ (32)));
+
+__m256i bar (void);
+void foo (void)
+{
+  int i = 0;
+  bar ();
+  __builtin_ia32_vzeroupper ();
+  while (++i);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */