diff mbox

, PR target/65614, Prefer to use LFD on powerpc for constants

Message ID 20150403190739.GA24779@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner April 3, 2015, 7:07 p.m. UTC
In my fix for PR target/65240, I removed the special -ffast-math code that
delayed dealing with constants until reload time.  In this patch, constants are
now pushed to memory earlier, and the compiler uses LFS (load floating point
single) to load double precision constants.  When you use the LRA register
allocator (-mlra), it uses the Altivec registers for scalar data more
frequently, and there appears to be interactions between values loaded up as
single constants that are moved to the Altivec registers via XXLOR.

This patch makes (float_extend (mem)) slightly more costly than just (mem) and
the code in expr.c will not compress the constant.  In addition, for scalar
single precision moves it uses copy sign instead of or to move the data.  The
copy sign instruction deals with single precision values that would create
denormals.  While working in the code, I also noticed that truncdfsf2 did not
have support for ISA 2.07, so I added support for it.

I have done bootstraps and make check with no regressions (after fixing the two
tests that were checking that LFS was used).  I have also built and run the
Spec 2006 benchmark bwaves with the patch, and it now runs when compiled with
-mlra and upper register support.  Is the patch ok to commit to trunk?

[gcc]
2015-04-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/65614
	* config/rs6000/rs6000.c (rs6000_rtx_costs): Make FLOAT_EXTEND
	more expensive, so that LFD is used to load double constants, and
	not LFS.

	* config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP
	instead of XXLOR to copy SFmode to clear out dirty bits created
	when SFmode denormals are generated.
	(mov<mode>_hardfloat, FMOVE32 case): Likewise.
	(truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction.

[gcc/testsuite]
2015-04-03  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/65614
	* gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5
	to get floating point compression.
	* gcc.target/powerpc/compress-foat-ppc.c: Likewise.

Comments

David Edelsohn April 3, 2015, 7:21 p.m. UTC | #1
On Fri, Apr 3, 2015 at 3:07 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:
> In my fix for PR target/65240, I removed the special -ffast-math code that
> delayed dealing with constants until reload time.  In this patch, constants are
> now pushed to memory earlier, and the compiler uses LFS (load floating point
> single) to load double precision constants.  When you use the LRA register
> allocator (-mlra), it uses the Altivec registers for scalar data more
> frequently, and there appears to be interactions between values loaded up as
> single constants that are moved to the Altivec registers via XXLOR.
>
> This patch makes (float_extend (mem)) slightly more costly than just (mem) and
> the code in expr.c will not compress the constant.  In addition, for scalar
> single precision moves it uses copy sign instead of or to move the data.  The
> copy sign instruction deals with single precision values that would create
> denormals.  While working in the code, I also noticed that truncdfsf2 did not
> have support for ISA 2.07, so I added support for it.
>
> I have done bootstraps and make check with no regressions (after fixing the two
> tests that were checking that LFS was used).  I have also built and run the
> Spec 2006 benchmark bwaves with the patch, and it now runs when compiled with
> -mlra and upper register support.  Is the patch ok to commit to trunk?

The FLOAT_EXTEND cost should be based on the processor tuning, not the ISA.

- David
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 221802)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -30479,8 +30479,10 @@  rs6000_rtx_costs (rtx x, int code, int o
       return false;
 
     case FLOAT_EXTEND:
+      /* Make converts on newer machines slightly more expensive to encourage
+	 expr.c to not use a LFS instead of LFD to load constants.  */
       if (mode == DFmode)
-	*total = 0;
+	*total = (TARGET_VSX || TARGET_POPCNTD) ? 1 : 0;
       else
 	*total = rs6000_cost->fp;
       return false;
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 221802)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -5222,7 +5222,7 @@  (define_insn_and_split "*extendsfdf2_fpr
    fmr %0,%1
    lfs%U1%X1 %0,%1
    #
-   xxlor %x0,%x1,%x1
+   xscpsgndp %x0,%x1,%x1
    lxsspx %x0,%y1"
   "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
   [(const_int 0)]
@@ -5230,7 +5230,7 @@  (define_insn_and_split "*extendsfdf2_fpr
   emit_note (NOTE_INSN_DELETED);
   DONE;
 }
-  [(set_attr "type" "fp,fp,fpload,fp,vecsimple,fpload")])
+  [(set_attr "type" "fp,fp,fpload,fp,fp,fpload")])
 
 (define_expand "truncdfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
@@ -5239,10 +5239,12 @@  (define_expand "truncdfsf2"
   "")
 
 (define_insn "*truncdfsf2_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "frsp %0,%1"
+  "@
+   frsp %0,%1
+   xsrsp %x0,%x1"
   [(set_attr "type" "fp")])
 
 ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
@@ -8058,7 +8060,7 @@  (define_insn "mov<mode>_hardfloat"
    lwz%U1%X1 %0,%1
    stw%U0%X0 %1,%0
    fmr %0,%1
-   xxlor %x0,%x1,%x1
+   xscpsgndp %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    li %0,0
    <f32_li>
@@ -8070,7 +8072,7 @@  (define_insn "mov<mode>_hardfloat"
    mt%0 %1
    mf%1 %0
    nop"
-  [(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
+  [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
    (set_attr "length" "4")])
 
 (define_insn "*mov<mode>_softfloat"
Index: gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c	(revision 221802)
+++ gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c	(working copy)
@@ -1,5 +1,7 @@ 
 /* { dg-do compile { target powerpc_fprs } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mcpu=power5" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
+
 double foo (double x) {
   return x + 1.75;
 }
Index: gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c	(revision 221802)
+++ gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c	(working copy)
@@ -1,5 +1,7 @@ 
 /* { dg-do compile { target powerpc_fprs } } */
-/* { dg-options "-O2 -fpic" } */
+/* { dg-options "-O2 -fpic -mcpu=power5" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
+
 double foo (double x) {
   return x + 1.75;
 }