Patchwork [ARM] NEON scheduling tweak

login
register
mail settings
Submitter Andrew Stubbs
Date Feb. 22, 2011, 12:27 p.m.
Message ID <4D63AB9C.8050101@codesourcery.com>
Download mbox | patch
Permalink /patch/83963/
State New
Headers show

Comments

Andrew Stubbs - Feb. 22, 2011, 12:27 p.m.
The attached is an old patch, originally by Mark Shinwell, that has been 
used in CodeSourcery toolchains for some time.

I've forward ported it to GCC 4.6, and benchmarked it. I find that it 
gives >2% (geomean) speed improvement in a popular benchmark running on 
Cortex-A9 NEON (Panda board).

OK for stage 1?

Andrew

Patch

2011-02-22  Andrew Stubbs  <ams@codesourcery.com>
	    Mark Shinwell  <shinwell@codesourcery.com>

	gcc/
	* config/arm/vfp.md (*arm_movsi_vfp, *thumb2_movsi_vfp)
	(*arm_movdi_vfp, *thumb2_movdi_vfp, *movsf_vfp, *thumb2_movsf_vfp)
	(*movdf_vfp, *thumb2_movdf_vfp, *movsfcc_vfp, *thumb2_movsfcc_vfp)
	(*movdfcc_vfp, *thumb2_movdfcc_vfp): Add neon_type.
	* config/arm/arm.md (neon_type): Update comment.

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -337,8 +337,6 @@ 
 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
 
 ;; Classification of NEON instructions for scheduling purposes.
-;; Do not set this attribute and the "type" attribute together in
-;; any one instruction pattern.
 (define_attr "neon_type"
    "neon_int_1,\
    neon_int_2,\
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -83,6 +83,7 @@ 
   "
   [(set_attr "predicable" "yes")
    (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
    (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
@@ -125,6 +126,7 @@ 
   "
   [(set_attr "predicable" "yes")
    (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
    (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
    (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
@@ -163,6 +165,7 @@ 
     }
   "
   [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+   (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
    (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
 			       (eq_attr "alternative" "5")
 				(if_then_else
@@ -201,6 +204,7 @@ 
     }
   "
   [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+   (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*")
    (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
 			       (eq_attr "alternative" "5")
 				(if_then_else
@@ -355,6 +359,7 @@ 
   [(set_attr "predicable" "yes")
    (set_attr "type"
      "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
    (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
@@ -392,6 +397,7 @@ 
   [(set_attr "predicable" "yes")
    (set_attr "type"
      "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+   (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
    (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
    (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
@@ -435,6 +441,7 @@ 
   "
   [(set_attr "type"
      "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
    (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
@@ -479,6 +486,7 @@ 
   "
   [(set_attr "type"
      "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
+   (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*")
    (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
@@ -514,7 +522,8 @@ 
    fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
@@ -537,7 +546,8 @@ 
    ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")]
 )
 
 (define_insn "*movdfcc_vfp"
@@ -560,7 +570,8 @@ 
    fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
 )
 
 (define_insn "*thumb2_movdfcc_vfp"
@@ -583,7 +594,8 @@ 
    ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")
+    (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")]
 )