diff mbox

[rs6000] PR target/57150, do not use VSX instructions for long double caller saves

Message ID 20130503230523.GA15350@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner May 3, 2013, 11:05 p.m. UTC
In debugging the glibc function __ieee754_scalbl when compiling for a power7
target, we discovered that the compiler was using VSX load and store
instructions to save long double types that are passed in floating point
registes.  However, since long double types are passed as two scalar floating
point values, it wastes space, and takes extra setup to use the VSX
instructions.  This patch only uses scalar load/store instructions to save long
double and __Decimal128, which has the same problem.

I have bootstraped the compiler and found no regressions in the code.  Is this
patch acceptable to check into the trunk, and gcc 4.8/4.7 branches?

[gcc]
2013-05-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/57150
	* config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode
	to save TFmode registers and DImode to save TImode registers for
	caller save operations.
	(HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to
	mark being partially clobbered since they only use the first
	double word.

	* config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode
	and TDmode only use the upper 64-bits of each VSX register.

[gcc/testsuite]
2013-05-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/57150
	* gcc.target/powerpc/pr57150.c: New file.

Comments

David Edelsohn May 4, 2013, 12:03 a.m. UTC | #1
On Fri, May 3, 2013 at 7:05 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:
> In debugging the glibc function __ieee754_scalbl when compiling for a power7
> target, we discovered that the compiler was using VSX load and store
> instructions to save long double types that are passed in floating point
> registes.  However, since long double types are passed as two scalar floating
> point values, it wastes space, and takes extra setup to use the VSX
> instructions.  This patch only uses scalar load/store instructions to save long
> double and __Decimal128, which has the same problem.
>
> I have bootstraped the compiler and found no regressions in the code.  Is this
> patch acceptable to check into the trunk, and gcc 4.8/4.7 branches?
>
> [gcc]
> 2013-05-03  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         PR target/57150
>         * config/rs6000/rs6000.h (HARD_REGNO_CALLER_SAVE_MODE): Use DFmode
>         to save TFmode registers and DImode to save TImode registers for
>         caller save operations.
>         (HARD_REGNO_CALL_PART_CLOBBERED): TFmode and TDmode do not need to
>         mark being partially clobbered since they only use the first
>         double word.
>
>         * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): TFmode
>         and TDmode only use the upper 64-bits of each VSX register.
>
> [gcc/testsuite]
> 2013-05-03  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         PR target/57150
>         * gcc.target/powerpc/pr57150.c: New file.

Okay.

Thanks, David
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 198584)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -2335,8 +2335,16 @@  rs6000_init_hard_regno_mode_ok (bool glo
 	reg_size = UNITS_PER_WORD;
 
       for (m = 0; m < NUM_MACHINE_MODES; ++m)
-	rs6000_class_max_nregs[m][c]
-	  = (GET_MODE_SIZE (m) + reg_size - 1) / reg_size;
+	{
+	  int reg_size2 = reg_size;
+
+	  /* TFmode/TDmode always takes 2 registers, even in VSX.  */
+	  if (m == TDmode || m == TFmode)
+	    reg_size2 = UNITS_PER_FP_WORD;
+
+	  rs6000_class_max_nregs[m][c]
+	    = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2;
+	}
     }
 
   if (TARGET_E500_DOUBLE)
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 198584)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -1071,12 +1071,17 @@  extern unsigned rs6000_pointer_size;
 #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
 
 /* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
-   enough space to account for vectors in FP regs. */
+   enough space to account for vectors in FP regs.  However, TFmode/TDmode
+   should not use VSX instructions to do a caller save. */
 #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)			\
   (TARGET_VSX								\
    && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE))		\
-   && FP_REGNO_P (REGNO)				\
-   ? V2DFmode						\
+   && FP_REGNO_P (REGNO)						\
+   ? V2DFmode								\
+   : ((MODE) == TFmode && FP_REGNO_P (REGNO))				\
+   ? DFmode								\
+   : ((MODE) == TDmode && FP_REGNO_P (REGNO))				\
+   ? DImode								\
    : choose_hard_reg_mode ((REGNO), (NREGS), false))
 
 #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
@@ -1084,7 +1089,8 @@  extern unsigned rs6000_pointer_size;
      && (GET_MODE_SIZE (MODE) > 4)					\
      && INT_REGNO_P (REGNO)) ? 1 : 0)					\
    || (TARGET_VSX && FP_REGNO_P (REGNO)					\
-       && GET_MODE_SIZE (MODE) > 8))
+       && GET_MODE_SIZE (MODE) > 8 && ((MODE) != TDmode) 		\
+       && ((MODE) != TFmode)))
 
 #define VSX_VECTOR_MODE(MODE)		\
 	 ((MODE) == V4SFmode		\
Index: gcc/testsuite/gcc.target/powerpc/pr57150.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr57150.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr57150.c	(revision 0)
@@ -0,0 +1,23 @@ 
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -mcpu=power7 -fcaller-saves" } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "lxvw4x" } } */
+/* { dg-final { scan-assembler-not "lvx" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+/* { dg-final { scan-assembler-not "stvx" } } */
+
+/* Insure caller save on long double does not use VSX instructions.  */
+
+extern long double modify (long double);
+
+void
+sum (long double *ptr, long double value, unsigned long n)
+{
+  unsigned long i;
+
+  for (i = 0; i < n; i++)
+    ptr[i] += modify (value);
+}