Patchwork [off-list] Re: [PATCH] Vzeroupper placement/47440

login
register
mail settings
Submitter Uros Bizjak
Date Nov. 9, 2012, 1:28 p.m.
Message ID <CAFULd4YRSMz4Ap9GVfFnYCb=X3f7Ac_H_xMGtJqqU33zAh4vcQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/198060/
State New
Headers show

Comments

Uros Bizjak - Nov. 9, 2012, 1:28 p.m.
On Fri, Nov 9, 2012 at 1:47 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>>> >> These assert should tell you what is wrong with the control flow.
>>> >> Please look at control_flow_insn_p, which condition returns true.
>>> >
>>> > There is a note after call insn.
>>> >
>>> > (call_insn:TI 908 35558 50534 1681 (call (mem:QI (symbol_ref:DI
>>> > ("_gfortran_stop_string") [flags 0x41] <function_decl 0x7ffff7eb6200
>>> > _gfortran_stop_string>) [0 _gfortran_stop_string S1 A8])
>>> >         (const_int 0 [0])) huygens.fppized.f90:190 616 {*call}
>>> >      (expr_list:REG_DEAD (reg:DI 5 di)
>>> >         (expr_list:REG_DEAD (reg:SI 4 si)
>>> >             (expr_list:REG_NORETURN (const_int 0 [0])
>>> >                 (nil))))
>>> >     (expr_list:REG_FRAME_RELATED_EXPR (use (reg:DI 5 di))
>>> >         (expr_list:REG_BR_PRED (use (reg:SI 4 si))
>>> >             (nil))))
>>> > (note 50534 908 909 1681 (expr_list:REG_DEP_TRUE (concat:DI (reg:DI 5 di)
>>> >         (const_int 0 [0]))
>>> >     (expr_list:REG_DEP_TRUE (concat:SI (reg:SI 4 si)
>>> >             (const_int 0 [0]))
>>> >         (nil))) NOTE_INSN_CALL_ARG_LOCATION)
>>> >
>>>
>>> Huh, this RTX is ignored:
>>
>> NOTE_INSN_CALL_ARG_LOCATION is fine, even after a REG_NORETURN call.
>> It is just a way how to pass call argument details to dwarf2out.
>> If you have a pass after var-tracking, you need to skip over it.
>
> Yes, but postreload mode switching should come before pro_and_epilogue
> anyway, otherwise create_pre_exit won't work:
>
> --mode-switching.c (222)--
>         /* If this function returns a value at the end, we have to
>            insert the final mode switch before the return value copy
>            to its hard register.  */
>         if (EDGE_COUNT (EXIT_BLOCK_PTR->preds) == 1
>             && NONJUMP_INSN_P ((last_insn = BB_END (src_bb)))
>             && GET_CODE (PATTERN (last_insn)) == USE
>             && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG)
> --mode-switching.2 (228)--
>
> I believe that this will work OK if the pass is inserted before
> prologue generation pass.

Finally, having a post-reload mode-switching pass, we can double-check
that there are no live SSE registers at vzeroupper insertion point. As
vzeroupper is only an optimization, we want to play safe and cancel
vzeroupper insertion in this case

There is no degradation for x86_64 gABI targets, since all SSE
registers are call-clobbered. Vzeroupper is conditionally inserted
just before call insn, where all registers are saved to stack and
already dead. The vzeroupper at function exit is not problematic.

Uros.

Patch

Index: i386-protos.h
===================================================================
--- i386-protos.h	(revision 193329)
+++ i386-protos.h	(working copy)
@@ -172,8 +172,11 @@  extern int ix86_mode_needed (int, rtx);
 extern int ix86_mode_after (int, int, rtx);
 extern int ix86_mode_entry (int);
 extern int ix86_mode_exit (int);
-extern void ix86_emit_mode_set (int, int);
 
+#ifdef HARD_CONST
+extern void ix86_emit_mode_set (int, int, HARD_REG_SET);
+#endif
+
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
 extern void x86_emit_floatuns (rtx [2]);
Index: i386.c
===================================================================
--- i386.c	(revision 193329)
+++ i386.c	(working copy)
@@ -15277,16 +15284,38 @@  emit_i387_cw_initialization (int mode)
   emit_move_insn (new_mode, reg);
 }
 
+/* Emit vzeroupper.  */
+
+void
+ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
+{
+  int i;
+
+  /* Cancel automatic vzeroupper insertion if there are
+     live call-saved SSE registers at the insertion point.  */
+
+  for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+    if (!call_used_regs[i] && TEST_HARD_REG_BIT (regs_live, i))
+      return;
+
+  if (TARGET_64BIT)
+    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+      if (!call_used_regs[i] && TEST_HARD_REG_BIT (regs_live, i))
+	return;
+
+  emit_insn (gen_avx_vzeroupper ());
+}
+
 /* Generate one or more insns to set ENTITY to MODE.  */
 
 void
-ix86_emit_mode_set (int entity, int mode)
+ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
 {
   switch (entity)
     {
     case AVX_U128:
       if (mode == AVX_U128_CLEAN)
-	emit_insn (gen_avx_vzeroupper ());
+	ix86_avx_emit_vzeroupper (regs_live);
       break;
     case I387_TRUNC:
     case I387_FLOOR:
Index: i386.h
===================================================================
--- i386.h	(revision 193329)
+++ i386.h	(working copy)
@@ -2223,7 +2227,7 @@  enum avx_u128_state
    are to be inserted.  */
 
 #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
-  ix86_emit_mode_set ((ENTITY), (MODE))
+  ix86_emit_mode_set ((ENTITY), (MODE), (HARD_REGS_LIVE))
 
 /* Avoid renaming of stack registers, as doing so in combination with
    scheduling just increases amount of live registers at time and in