diff mbox

[rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs

Message ID 1429537832.3022.23.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt April 20, 2015, 1:50 p.m. UTC
Hi,

I've added an assert to this patch to detect a related condition.  Prior
to this patch, the prologue code was causing us to incorrectly call the
expand-time routine rs6000_emit_vsx_le_store().  This was harmless, but
we should have caught this sooner.  The patch removes this problem, but
we should still be on the lookout for such behavior in the future.  I'll
commit this version after 5.1 is released, if there are no objections.

Thanks,
Bill


2015-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/altivec.md (*altivec_lvx_<mode>_internal): Remove
	asterisk from name so this can be generated directly.
	(*altivec_stvx_<mode>_internal): Likewise.
	* config/rs6000/rs6000.c (rs6000_emit_le_vsx_store): Add assert
	that this is never called during or after reload/lra.
	(rs6000_frame_related): Remove split_reg
	argument and logic that references it.
	(emit_frame_save): Remove last parameter from call to
	rs6000_frame_related.
	(rs6000_emit_prologue): Remove last parameter from eight calls to
	rs6000_frame_related.  Force generation of stvx instruction for
	Altivec register saves.  Remove split_reg handling, which is no
	longer needed.
	(rs6000_emit_epilogue):  Force generation of lvx instruction for
	Altivec register restores.





On Tue, 2015-04-07 at 11:11 -0400, David Edelsohn wrote:
> On Mon, Apr 6, 2015 at 2:19 PM, Bill Schmidt
> <wschmidt@linux.vnet.ibm.com> wrote:
> > Hi,
> >
> > The prologue and epilogue code to save/restore Altivec registers uses
> > the generic emit_move_insn logic.  This means that when VSX is available
> > on a little-endian target, we will generate xxswapd/stxvd2x for prologue
> > saves, and lxvd2x/xxswapd for epilogue restores.  This happens too late
> > to be cleaned up by swap optimization.  Since the stack save slots are
> > aligned, we should always use lvx and stvx for this purpose instead.
> > This improves performance on LE targets, is performance-neutral for BE,
> > and is always safe.
> >
> > This change causes the previously failing test
> > gcc.target/powerpc/swaps-p8-2.c to succeed again.  This test started
> > failing when an unrelated change raised register pressure on the vector
> > registers, causing us to generate the above save/restore sequences.  The
> > test was testing whether all xxswapd instructions had been removed.
> > With this change, we no longer generate them for the save/restores, and
> > the test succeeds.
> >
> > Because we no longer generate the two-instruction sequences, we no
> > longer need the code in rs6000_frame_related to identify the true source
> > register for the two-instruction store.  I've removed that along with
> > the extra argument it required, and adjusted the callers.
> >
> > Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> > regressions, and one fixed failure.  Is this ok for trunk after 5.1
> > branches?  I'd also like to backport this as far as 4.8 for the
> > performance improvement.
> >
> > Thanks,
> > Bill
> >
> >
diff mbox

Patch

Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 222230)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -2455,7 +2455,7 @@ 
     }
 })
 
-(define_insn "*altivec_lvx_<mode>_internal"
+(define_insn "altivec_lvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "register_operand" "=v")
 	  (match_operand:VM2 1 "memory_operand" "Z"))
@@ -2478,7 +2478,7 @@ 
     }
 })
 
-(define_insn "*altivec_stvx_<mode>_internal"
+(define_insn "altivec_stvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "memory_operand" "=Z")
 	  (match_operand:VM2 1 "register_operand" "v"))
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 222230)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -8371,6 +8371,11 @@  rs6000_emit_le_vsx_store (rtx dest, rtx source, ma
 {
   rtx tmp, permute_src, permute_tmp;
 
+  /* This should never be called during or after reload, because it does
+     not re-permute the source register.  It is intended only for use
+     during expand.  */
+  gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
+
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -22768,7 +22773,7 @@  output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg, rtx split_reg)
+		      rtx reg2, rtx rreg)
 {
   rtx real, temp;
 
@@ -22859,11 +22864,6 @@  rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE
 	  }
     }
 
-  /* If a store insn has been split into multiple insns, the
-     true source register is given by split_reg.  */
-  if (split_reg != NULL_RTX)
-    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
-
   RTX_FRAME_RELATED_P (insn) = 1;
   add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
@@ -22971,7 +22971,7 @@  emit_frame_save (rtx frame_reg, machine_mode mode,
   reg = gen_rtx_REG (mode, regno);
   insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
-			       NULL_RTX, NULL_RTX, NULL_RTX);
+			       NULL_RTX, NULL_RTX);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -23551,7 +23551,7 @@  rs6000_emit_prologue (void)
 
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    treg, GEN_INT (-info->total_size), NULL_RTX);
+			    treg, GEN_INT (-info->total_size));
       sp_off = frame_off = info->total_size;
     }
 
@@ -23636,7 +23636,7 @@  rs6000_emit_prologue (void)
 
 	  insn = emit_move_insn (mem, reg);
 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	  END_USE (0);
 	}
     }
@@ -23692,7 +23692,7 @@  rs6000_emit_prologue (void)
 				     info->lr_save_offset,
 				     DFmode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23771,7 +23771,7 @@  rs6000_emit_prologue (void)
 					 SAVRES_SAVE | SAVRES_GPR);
 
 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	}
 
       /* Move the static chain pointer back.  */
@@ -23821,7 +23821,7 @@  rs6000_emit_prologue (void)
 				     info->lr_save_offset + ptr_off,
 				     reg_mode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23837,7 +23837,7 @@  rs6000_emit_prologue (void)
 			     info->gp_save_offset + frame_off + reg_size * i);
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
     }
   else if (!WORLD_SAVE_P (info))
     {
@@ -24160,7 +24160,7 @@  rs6000_emit_prologue (void)
 				     info->altivec_save_offset + ptr_off,
 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
 	{
 	  /* The oddity mentioned above clobbered our frame reg.  */
@@ -24176,7 +24176,7 @@  rs6000_emit_prologue (void)
       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
 	  {
-	    rtx areg, savereg, mem, split_reg;
+	    rtx areg, savereg, mem;
 	    int offset;
 
 	    offset = (info->altivec_save_offset + frame_off
@@ -24192,20 +24192,13 @@  rs6000_emit_prologue (void)
 	    mem = gen_frame_mem (V4SImode,
 				 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
 
-	    insn = emit_move_insn (mem, savereg);
+	    /* Rather than emitting a generic move, force use of the stvx
+	       instruction, which we always want.  In particular we don't
+	       want xxpermdi/stxvd2x for little endian.  */
+	    insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
 
-	    /* When we split a VSX store into two insns, we need to make
-	       sure the DWARF info knows which register we are storing.
-	       Pass it in to be used on the appropriate note.  */
-	    if (!BYTES_BIG_ENDIAN
-		&& GET_CODE (PATTERN (insn)) == SET
-		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
-	      split_reg = savereg;
-	    else
-	      split_reg = NULL_RTX;
-
 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				  areg, GEN_INT (offset), split_reg);
+				  areg, GEN_INT (offset));
 	  }
     }
 
@@ -24847,7 +24840,10 @@  rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}
 
@@ -25050,7 +25046,10 @@  rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}