Patchwork [rs6000] Update Power7 scheduling

login
register
mail settings
Submitter Pat Haugen
Date Oct. 27, 2011, 10:14 p.m.
Message ID <4EA9D7DF.7010202@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/122257/
State New
Headers show

Comments

Pat Haugen - Oct. 27, 2011, 10:14 p.m.
The following patch fixes some issues with the Power7 scheduling description. 
The patch is neutral on cpu2006 (was actually hoping to see some improvements, 
but it's still the right thing to do since it more accurately describes the 
hardware).

Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?

-Pat


2011-10-27  Pat Haugen <pthaugen@us.ibm.com>

	* config/rs6000/rs6000.md (define_attr "type"): Add vecdouble.
	* config/rs6000/vsx.md (VStype_simple, VStype_mul): Use vecdouble
	type for V2DF.
	(VStype_div): Use vector types for V2DF/V4SF.
	(VStype_sqrt): Use *sqrt types.
	(VS_spdp_type): Change type to vecdouble.
	(*vsx_fmav2df4, *vsx_nfmsv2df4, vsx_xvcvdpsxws, vsx_xvcvdpuxws,
	vsx_xvcvuxdsp, vsx_xvcvsxwdp, vsx_xvcvuxwdp, vsx_xvcvspsxds,
	vsx_xvcvspuxds): Likewise.
	(*vsx_fms<mode>4): Set type via <VStype_mul>.
	(*vsx_eq_<mode>_p, *vsx_gt_<mode>_p, *vsx_ge_<mode>_p): Set type via
	<VStype_simple>.
	* config/rs6000/power7.md (power7-vecstore): Correct VSU pipe.
	(power7-fpcompare, power7-sdiv, power7-ddiv, power7-sqrt,
	power7-dsqrt): Correct insn latency.
	(power7-vecsimple): Add veccmp type and correct dispatch/VSU values.
	(power7-veccmp): Delete.
	(power7-vecfloat): Correct latency/dispatch/VSU values.
	(define_bypass "power7-vecfloat"): Correct latency and types.
	(power7-veccomplex, power7-vecperm): Correct dispatch/VSU values.
	(power7-vecdouble, power7-vecfdiv, power7-vecdiv): New.
David Edelsohn - Oct. 28, 2011, 2:13 p.m.
On Thu, Oct 27, 2011 at 6:14 PM, Pat Haugen <pthaugen@linux.vnet.ibm.com> wrote:
> The following patch fixes some issues with the Power7 scheduling
> description. The patch is neutral on cpu2006 (was actually hoping to see
> some improvements, but it's still the right thing to do since it more
> accurately describes the hardware).
>
> Bootstrap/regtest on powerpc64-linux with no new regressions. Ok for trunk?
>
> -Pat
>
>
> 2011-10-27  Pat Haugen <pthaugen@us.ibm.com>
>
>        * config/rs6000/rs6000.md (define_attr "type"): Add vecdouble.
>        * config/rs6000/vsx.md (VStype_simple, VStype_mul): Use vecdouble
>        type for V2DF.
>        (VStype_div): Use vector types for V2DF/V4SF.
>        (VStype_sqrt): Use *sqrt types.
>        (VS_spdp_type): Change type to vecdouble.
>        (*vsx_fmav2df4, *vsx_nfmsv2df4, vsx_xvcvdpsxws, vsx_xvcvdpuxws,
>        vsx_xvcvuxdsp, vsx_xvcvsxwdp, vsx_xvcvuxwdp, vsx_xvcvspsxds,
>        vsx_xvcvspuxds): Likewise.
>        (*vsx_fms<mode>4): Set type via <VStype_mul>.
>        (*vsx_eq_<mode>_p, *vsx_gt_<mode>_p, *vsx_ge_<mode>_p): Set type via
>        <VStype_simple>.
>        * config/rs6000/power7.md (power7-vecstore): Correct VSU pipe.
>        (power7-fpcompare, power7-sdiv, power7-ddiv, power7-sqrt,
>        power7-dsqrt): Correct insn latency.
>        (power7-vecsimple): Add veccmp type and correct dispatch/VSU values.
>        (power7-veccmp): Delete.
>        (power7-vecfloat): Correct latency/dispatch/VSU values.
>        (define_bypass "power7-vecfloat"): Correct latency and types.
>        (power7-veccomplex, power7-vecperm): Correct dispatch/VSU values.
>        (power7-vecdouble, power7-vecfdiv, power7-vecdiv): New.

Okay.

Thanks, David

Patch

Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 180100)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -144,7 +144,7 @@  (define_c_enum "unspecv"
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 180100)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -120,7 +120,7 @@  (define_mode_attr VSv	[(V16QI "v")
 			 (DF    "s")])
 
 ;; Appropriate type for add ops (and other simple FP ops)
-(define_mode_attr VStype_simple	[(V2DF "vecfloat")
+(define_mode_attr VStype_simple	[(V2DF "vecdouble")
 				 (V4SF "vecfloat")
 				 (DF   "fp")])
 
@@ -129,7 +129,7 @@  (define_mode_attr VSfptype_simple [(V2DF
 				   (DF   "fp_addsub_d")])
 
 ;; Appropriate type for multiply ops
-(define_mode_attr VStype_mul	[(V2DF "vecfloat")
+(define_mode_attr VStype_mul	[(V2DF "vecdouble")
 				 (V4SF "vecfloat")
 				 (DF   "dmul")])
 
@@ -137,10 +137,9 @@  (define_mode_attr VSfptype_mul	[(V2DF "f
 				 (V4SF "fp_mul_s")
 				 (DF   "fp_mul_d")])
 
-;; Appropriate type for divide ops.  For now, just lump the vector divide with
-;; the scalar divides
-(define_mode_attr VStype_div	[(V2DF "ddiv")
-				 (V4SF "sdiv")
+;; Appropriate type for divide ops.
+(define_mode_attr VStype_div	[(V2DF "vecdiv")
+				 (V4SF "vecfdiv")
 				 (DF   "ddiv")])
 
 (define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
@@ -150,8 +149,8 @@  (define_mode_attr VSfptype_div	[(V2DF "f
 ;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
 ;; the scalar sqrt
 (define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
-				 (V4SF "sdiv")
-				 (DF   "ddiv")])
+				 (V4SF "ssqrt")
+				 (DF   "dsqrt")])
 
 (define_mode_attr VSfptype_sqrt	[(V2DF "fp_sqrt_d")
 				 (V4SF "fp_sqrt_s")
@@ -171,8 +170,8 @@  (define_mode_attr VS_spdp_insn [(DF	"xsc
 				(V2DF	"xvcvdpsp")])
 
 (define_mode_attr VS_spdp_type [(DF	"fp")
-				(V4SF	"vecfloat")
-				(V2DF	"vecfloat")])
+				(V4SF	"vecdouble")
+				(V2DF	"vecdouble")])
 
 ;; Map the scalar mode for a vector type
 (define_mode_attr VS_scalar [(V2DF	"DF")
@@ -572,7 +571,7 @@  (define_insn "*vsx_fmav2df4"
    xvmaddmdp %x0,%x1,%x3
    xvmaddadp %x0,%x1,%x2
    xvmaddmdp %x0,%x1,%x3"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "*vsx_fmsdf4"
   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
@@ -604,7 +603,7 @@  (define_insn "*vsx_fms<mode>4"
    x<VSv>msubm<VSs> %x0,%x1,%x3
    x<VSv>msuba<VSs> %x0,%x1,%x2
    x<VSv>msubm<VSs> %x0,%x1,%x3"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "<VStype_mul>")])
 
 (define_insn "*vsx_nfmadf4"
   [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
@@ -688,7 +687,7 @@  (define_insn "*vsx_nfmsv2df4"
    xvnmsubmdp %x0,%x1,%x3
    xvnmsubadp %x0,%x1,%x2
    xvnmsubmdp %x0,%x1,%x3"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 ;; Vector conditional expressions (no scalar version for these instructions)
 (define_insn "vsx_eq<mode>"
@@ -741,7 +740,7 @@  (define_insn "*vsx_eq_<mode>_p"
 		  (match_dup 2)))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "xvcmpeq<VSs>. %x0,%x1,%x2"
-  [(set_attr "type" "veccmp")])
+  [(set_attr "type" "<VStype_simple>")])
 
 (define_insn "*vsx_gt_<mode>_p"
   [(set (reg:CC 74)
@@ -754,7 +753,7 @@  (define_insn "*vsx_gt_<mode>_p"
 		  (match_dup 2)))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "xvcmpgt<VSs>. %x0,%x1,%x2"
-  [(set_attr "type" "veccmp")])
+  [(set_attr "type" "<VStype_simple>")])
 
 (define_insn "*vsx_ge_<mode>_p"
   [(set (reg:CC 74)
@@ -767,7 +766,7 @@  (define_insn "*vsx_ge_<mode>_p"
 		  (match_dup 2)))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "xvcmpge<VSs>. %x0,%x1,%x2"
-  [(set_attr "type" "veccmp")])
+  [(set_attr "type" "<VStype_simple>")])
 
 ;; Vector select
 (define_insn "*vsx_xxsel<mode>"
@@ -948,7 +947,7 @@  (define_insn "vsx_xvcvdpsxws"
 		     UNSPEC_VSX_CVDPSXWS))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvdpsxws %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "vsx_xvcvdpuxws"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
@@ -956,7 +955,7 @@  (define_insn "vsx_xvcvdpuxws"
 		     UNSPEC_VSX_CVDPUXWS))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvdpuxws %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "vsx_xvcvsxdsp"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
@@ -972,7 +971,7 @@  (define_insn "vsx_xvcvuxdsp"
 		     UNSPEC_VSX_CVUXDSP))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvuxwdp %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 ;; Convert from 32-bit to 64-bit types
 (define_insn "vsx_xvcvsxwdp"
@@ -981,7 +980,7 @@  (define_insn "vsx_xvcvsxwdp"
 		     UNSPEC_VSX_CVSXWDP))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvsxwdp %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "vsx_xvcvuxwdp"
   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
@@ -989,7 +988,7 @@  (define_insn "vsx_xvcvuxwdp"
 		     UNSPEC_VSX_CVUXWDP))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvuxwdp %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "vsx_xvcvspsxds"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
@@ -997,7 +996,7 @@  (define_insn "vsx_xvcvspsxds"
 		     UNSPEC_VSX_CVSPSXDS))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvspsxds %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 (define_insn "vsx_xvcvspuxds"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
@@ -1005,7 +1004,7 @@  (define_insn "vsx_xvcvspuxds"
 		     UNSPEC_VSX_CVSPUXDS))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "xvcvspuxds %x0,%x1"
-  [(set_attr "type" "vecfloat")])
+  [(set_attr "type" "vecdouble")])
 
 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
 ;; since the xsrdpiz instruction does not truncate the value if the floating
Index: gcc/config/rs6000/power7.md
===================================================================
--- gcc/config/rs6000/power7.md	(revision 180100)
+++ gcc/config/rs6000/power7.md	(working copy)
@@ -139,7 +139,7 @@  (define_insn_reservation "power7-vecload
 (define_insn_reservation "power7-vecstore" 6
   (and (eq_attr "type" "vecstore")
        (eq_attr "cpu" "power7"))
-  "DU_power7,LSU_power7+VSU_power7")
+  "DU_power7,LSU_power7+vsu2_power7")
 
 (define_insn_reservation "power7-sync" 11
   (and (eq_attr "type" "sync")
@@ -265,54 +265,69 @@  (define_insn_reservation "power7-fp" 6
 
 (define_bypass 8 "power7-fp" "power7-branch")
 
-(define_insn_reservation "power7-fpcompare" 4
+(define_insn_reservation "power7-fpcompare" 8
   (and (eq_attr "type" "fpcompare")
        (eq_attr "cpu" "power7"))
   "DU_power7,VSU_power7")
 
-(define_insn_reservation "power7-sdiv" 26
+(define_insn_reservation "power7-sdiv" 27
   (and (eq_attr "type" "sdiv")
        (eq_attr "cpu" "power7"))
   "DU_power7,VSU_power7")
 
-(define_insn_reservation "power7-ddiv" 32
+(define_insn_reservation "power7-ddiv" 33
   (and (eq_attr "type" "ddiv")
        (eq_attr "cpu" "power7"))
   "DU_power7,VSU_power7")
 
-(define_insn_reservation "power7-sqrt" 31
+(define_insn_reservation "power7-sqrt" 32
   (and (eq_attr "type" "ssqrt")
        (eq_attr "cpu" "power7"))
   "DU_power7,VSU_power7")
 
-(define_insn_reservation "power7-dsqrt" 43
+(define_insn_reservation "power7-dsqrt" 44
   (and (eq_attr "type" "dsqrt")
        (eq_attr "cpu" "power7"))
   "DU_power7,VSU_power7")
 
 (define_insn_reservation "power7-vecsimple" 2
-  (and (eq_attr "type" "vecsimple")
+  (and (eq_attr "type" "vecsimple,veccmp")
        (eq_attr "cpu" "power7"))
-  "du1_power7,VSU_power7")
+  "DU_power7,vsu1_power7")
 
-(define_insn_reservation "power7-veccmp" 7
-  (and (eq_attr "type" "veccmp")
-       (eq_attr "cpu" "power7"))
-  "du1_power7,VSU_power7")
-
-(define_insn_reservation "power7-vecfloat" 7
+(define_insn_reservation "power7-vecfloat" 6
   (and (eq_attr "type" "vecfloat")
        (eq_attr "cpu" "power7"))
-  "du1_power7,VSU_power7")
+  "DU_power7,vsu1_power7")
 
-(define_bypass 6 "power7-vecfloat" "power7-vecfloat")
+(define_bypass 7 "power7-vecfloat" "power7-vecsimple,power7-veccomplex,\
+				    power7-vecperm")
 
 (define_insn_reservation "power7-veccomplex" 7
   (and (eq_attr "type" "veccomplex")
        (eq_attr "cpu" "power7"))
-  "du1_power7,VSU_power7")
+  "DU_power7,vsu1_power7")
 
 (define_insn_reservation "power7-vecperm" 3
   (and (eq_attr "type" "vecperm")
        (eq_attr "cpu" "power7"))
-  "du2_power7,VSU_power7")
+  "DU_power7,vsu2_power7")
+
+(define_insn_reservation "power7-vecdouble" 6
+  (and (eq_attr "type" "vecdouble")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_bypass 7 "power7-vecdouble" "power7-vecsimple,power7-veccomplex,\
+				    power7-vecperm")
+
+(define_insn_reservation "power7-vecfdiv" 26
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecdiv" 32
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+