Patchwork Vzeroupper placement/47440

login
register
mail settings
Submitter Uros Bizjak
Date Nov. 16, 2012, 7:50 a.m.
Message ID <CAFULd4ZiXFgfd=tZh12Odm41imCSMGpv+i+ad-TNFs1BPxBB1g@mail.gmail.com>
Download mbox | patch
Permalink /patch/199506/
State New
Headers show

Comments

Uros Bizjak - Nov. 16, 2012, 7:50 a.m.
On Fri, Nov 9, 2012 at 2:28 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

> Finally, having a post-reload mode-switching pass, we can double-check
> that there are no live SSE registers at vzeroupper insertion point. As
> vzeroupper is only an optimization, we want to play safe and cancel
> vzeroupper insertion in this case
>
> There is no degradation for x86_64 gABI targets, since all SSE
> registers are call-clobbered. Vzeroupper is conditionally inserted
> just before call insn, where all registers are saved to stack and
> already dead. The vzeroupper at function exit is not problematic.

Patch was committed to mainline SVN with the following ChangeLog:

2012-11-16  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386-protos.h (ix86_emit_mode_set): Add third argument.
	* config/i386/i386.h (EMIT_MODE_SET): Update.
	* config/i386/i386.c (ix86_avx_emit_vzeroupper): New function.
	(ix86_emit_mode_set) <AVX_U128>: Call ix86_avx_emit_vzeroupper.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32},
configured with --with-arch=corei7-avx --with-tune=corei7-avx.

Uros.

Patch

Index: i386-protos.h
===================================================================
--- i386-protos.h	(revision 193549)
+++ i386-protos.h	(working copy)
@@ -172,8 +172,11 @@ 
 extern int ix86_mode_after (int, int, rtx);
 extern int ix86_mode_entry (int);
 extern int ix86_mode_exit (int);
-extern void ix86_emit_mode_set (int, int);
 
+#ifdef HARD_CONST
+extern void ix86_emit_mode_set (int, int, HARD_REG_SET);
+#endif
+
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
 extern void x86_emit_floatuns (rtx [2]);
Index: i386.c
===================================================================
--- i386.c	(revision 193549)
+++ i386.c	(working copy)
@@ -15477,16 +15477,38 @@ 
   emit_move_insn (new_mode, reg);
 }
 
+/* Emit vzeroupper.  */
+
+void
+ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
+{
+  int i;
+
+  /* Cancel automatic vzeroupper insertion if there are
+     live call-saved SSE registers at the insertion point.  */
+
+  for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+    if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+      return;
+
+  if (TARGET_64BIT)
+    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+      if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
+	return;
+
+  emit_insn (gen_avx_vzeroupper ());
+}
+
 /* Generate one or more insns to set ENTITY to MODE.  */
 
 void
-ix86_emit_mode_set (int entity, int mode)
+ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
 {
   switch (entity)
     {
     case AVX_U128:
       if (mode == AVX_U128_CLEAN)
-	emit_insn (gen_avx_vzeroupper ());
+	ix86_avx_emit_vzeroupper (regs_live);
       break;
     case I387_TRUNC:
     case I387_FLOOR:
Index: i386.h
===================================================================
--- i386.h	(revision 193549)
+++ i386.h	(working copy)
@@ -2226,7 +2226,7 @@ 
    are to be inserted.  */
 
 #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
-  ix86_emit_mode_set ((ENTITY), (MODE))
+  ix86_emit_mode_set ((ENTITY), (MODE), (HARD_REGS_LIVE))
 
 /* Avoid renaming of stack registers, as doing so in combination with
    scheduling just increases amount of live registers at time and in