diff mbox

[AArch64,2/2] Fix memory sizes to load/store patterns

Message ID 1497275640-6630-2-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh June 12, 2017, 1:54 p.m. UTC
Hi,

There seems to be a partial misconception in the AArch64 backend that
load1/load2 referred to the number of registers to load, rather than the
number of words to load. This patch fixes that using the new "number of
byte" types added in the previous patch.

That means using the load_16 and store_16 types that were defined in the
previous patch for the first time in the AArch64 backend. To ensure
continuity for scheduling models, I've just split this out from load_8.
Please update your models if this is very wrong!

Bootstrapped on aarch64-none-linux-gnu with no issue.

OK?

Thanks,
James

---
2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
	types correctly.
	(movti_aarch64): Likewise.
	(movdf_aarch64): Likewise.
	(movtf_aarch64): Likewise.
	(load_pairdi): Likewise.
	(store_pairdi): Likewise.
	(load_pairdf): Likewise.
	(store_pairdf): Likewise.
	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
	(ldr_got_small_<mode>): Likewise.
	(ldr_got_small_28k_<mode>): Likewise.
	(ldr_got_tiny): Likewise.
	* config/aarch64/iterators.md (ldst_sz): New.
	(ldpstp_sz): Likewise.
	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
	to store_16.
	(thunderx_load): Split load_8 to load_16.
	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
	load_8 to load_16.
	(thunderx2t99_storepair_basic): Split store_8 to store_16.
	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
	(xgene1_store_pair): Split store_8 to store_16.

Comments

James Greenhalgh June 21, 2017, 10:50 a.m. UTC | #1
*ping*

Thanks,
James

On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> 
> Hi,
> 
> There seems to be a partial misconception in the AArch64 backend that
> load1/load2 referred to the number of registers to load, rather than the
> number of words to load. This patch fixes that using the new "number of
> byte" types added in the previous patch.
> 
> That means using the load_16 and store_16 types that were defined in the
> previous patch for the first time in the AArch64 backend. To ensure
> continuity for scheduling models, I've just split this out from load_8.
> Please update your models if this is very wrong!
> 
> Bootstrapped on aarch64-none-linux-gnu with no issue.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> 	types correctly.
> 	(movti_aarch64): Likewise.
> 	(movdf_aarch64): Likewise.
> 	(movtf_aarch64): Likewise.
> 	(load_pairdi): Likewise.
> 	(store_pairdi): Likewise.
> 	(load_pairdf): Likewise.
> 	(store_pairdf): Likewise.
> 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> 	(ldr_got_small_<mode>): Likewise.
> 	(ldr_got_small_28k_<mode>): Likewise.
> 	(ldr_got_tiny): Likewise.
> 	* config/aarch64/iterators.md (ldst_sz): New.
> 	(ldpstp_sz): Likewise.
> 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> 	to store_16.
> 	(thunderx_load): Split load_8 to load_16.
> 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> 	load_8 to load_16.
> 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> 	(xgene1_store_pair): Split store_8 to store_16.
> 

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 11295a6..a1385e3 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -981,7 +981,7 @@
>         DONE;
>      }"
>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> -                     load_4,load_4,store_4,store_4,\
> +                     load_8,load_8,store_8,store_8,\
>                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
>     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
>     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> @@ -1026,7 +1026,8 @@
>     ldr\\t%q0, %1
>     str\\t%q1, %0"
>    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> -		             load_8,store_8,store_8,f_loadd,f_stored")
> +		             load_16,store_16,store_16,\
> +                             load_16,store_16")
>     (set_attr "length" "8,8,8,4,4,4,4,4,4")
>     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
>     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> @@ -1121,7 +1122,7 @@
>     str\\t%x1, %0
>     mov\\t%x0, %x1"
>    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> +                     f_loadd,f_stored,load_8,store_8,mov_reg")
>     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
>  )
>  
> @@ -1145,7 +1146,7 @@
>     stp\\t%1, %H1, %0
>     stp\\txzr, xzr, %0"
>    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> -                     f_loadd,f_stored,load_8,store_8,store_8")
> +                     f_loadd,f_stored,load_16,store_16,store_16")
>     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
>     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
>  )
> @@ -1209,7 +1210,7 @@
>    "@
>     ldp\\t%x0, %x2, %1
>     ldp\\t%d0, %d2, %1"
> -  [(set_attr "type" "load_8,neon_load1_2reg")
> +  [(set_attr "type" "load_16,neon_load1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1244,7 +1245,7 @@
>    "@
>     stp\\t%x1, %x3, %0
>     stp\\t%d1, %d3, %0"
> -  [(set_attr "type" "store_8,neon_store1_2reg")
> +  [(set_attr "type" "store_16,neon_store1_2reg")
>     (set_attr "fp" "*,yes")]
>  )
>  
> @@ -1278,7 +1279,7 @@
>    "@
>     ldp\\t%d0, %d2, %1
>     ldp\\t%x0, %x2, %1"
> -  [(set_attr "type" "neon_load1_2reg,load_8")
> +  [(set_attr "type" "neon_load1_2reg,load_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1312,7 +1313,7 @@
>    "@
>     stp\\t%d1, %d3, %0
>     stp\\t%x1, %x3, %0"
> -  [(set_attr "type" "neon_store1_2reg,store_8")
> +  [(set_attr "type" "neon_store1_2reg,store_16")
>     (set_attr "fp" "yes,*")]
>  )
>  
> @@ -1330,7 +1331,7 @@
>                     (match_operand:P 5 "const_int_operand" "n"))))])]
>    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
>    "ldp\\t%<w>2, %<w>3, [%1], %4"
> -  [(set_attr "type" "load_8")]
> +  [(set_attr "type" "load_<ldpstp_sz>")]
>  )
>  
>  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> @@ -1363,7 +1364,7 @@
>            (match_operand:GPI 3 "register_operand" "r"))])]
>    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
>    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> -  [(set_attr "type" "store_8")]
> +  [(set_attr "type" "store_<ldpstp_sz>")]
>  )
>  
>  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> @@ -5139,7 +5140,7 @@
>  		    UNSPEC_GOTSMALLPIC))]
>    ""
>    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_sidi"
> @@ -5162,7 +5163,7 @@
>  		    UNSPEC_GOTSMALLPIC28K))]
>    ""
>    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_<ldst_sz>")]
>  )
>  
>  (define_insn "ldr_got_small_28k_sidi"
> @@ -5183,7 +5184,7 @@
>  		   UNSPEC_GOTTINYPIC))]
>    ""
>    "ldr\\t%0, %L1"
> -  [(set_attr "type" "load_4")]
> +  [(set_attr "type" "load_8")]
>  )
>  
>  (define_insn "aarch64_load_tp_hard"
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 43be7fd..a65c3aa 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -384,6 +384,11 @@
>  ;; 32-bit version and "%x0" in the 64-bit version.
>  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
>  
> +;; The size of access, in bytes.
> +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> +;; Likewise for load/store pair.
> +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> +
>  ;; For inequal width int to float conversion
>  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
>  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> index c18da2f..84ac6cd 100644
> --- a/gcc/config/aarch64/thunderx.md
> +++ b/gcc/config/aarch64/thunderx.md
> @@ -100,7 +100,7 @@
>  ;; Store pair are single issued
>  (define_insn_reservation "thunderx_storepair" 1
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx_pipe0 + thunderx_pipe1")
>  
>  ;; Prefetch are single issued
> @@ -112,7 +112,7 @@
>  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
>  (define_insn_reservation "thunderx_load" 3
>    (and (eq_attr "tune" "thunderx")
> -       (eq_attr "type" "load_4, load_8"))
> +       (eq_attr "type" "load_4, load_8, load_16"))
>    "thunderx_pipe0")
>  
>  (define_insn_reservation "thunderx_brj" 1
> diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> index 632396f..4e39610 100644
> --- a/gcc/config/aarch64/thunderx2t99.md
> +++ b/gcc/config/aarch64/thunderx2t99.md
> @@ -128,7 +128,7 @@
>  
>  (define_insn_reservation "thunderx2t99_loadpair" 5
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8,load_16"))
>    "thunderx2t99_i012,thunderx2t99_ls01")
>  
>  (define_insn_reservation "thunderx2t99_store_basic" 1
> @@ -138,7 +138,7 @@
>  
>  (define_insn_reservation "thunderx2t99_storepair_basic" 1
>    (and (eq_attr "tune" "thunderx2t99")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8,store_16"))
>    "thunderx2t99_ls01,thunderx2t99_sd")
>  
>  ;; FP data processing instructions.
> diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> index 7e70408..0b457ee 100644
> --- a/gcc/config/arm/xgene1.md
> +++ b/gcc/config/arm/xgene1.md
> @@ -92,12 +92,12 @@
>  
>  (define_insn_reservation "xgene1_load_pair" 6
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "load_8"))
> +       (eq_attr "type" "load_8, load_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_store_pair" 2
>    (and (eq_attr "tune" "xgene1")
> -       (eq_attr "type" "store_8"))
> +       (eq_attr "type" "store_8, store_16"))
>    "xgene1_decodeIsolated")
>  
>  (define_insn_reservation "xgene1_fp_load1" 10
James Greenhalgh July 3, 2017, 10:46 a.m. UTC | #2
On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> *ping*

Ping*2

Thanks,
James

> On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > 
> > Hi,
> > 
> > There seems to be a partial misconception in the AArch64 backend that
> > load1/load2 referred to the number of registers to load, rather than the
> > number of words to load. This patch fixes that using the new "number of
> > byte" types added in the previous patch.
> > 
> > That means using the load_16 and store_16 types that were defined in the
> > previous patch for the first time in the AArch64 backend. To ensure
> > continuity for scheduling models, I've just split this out from load_8.
> > Please update your models if this is very wrong!
> > 
> > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > 
> > OK?
> > 
> > Thanks,
> > James
> > 
> > ---
> > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > 
> > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > 	types correctly.
> > 	(movti_aarch64): Likewise.
> > 	(movdf_aarch64): Likewise.
> > 	(movtf_aarch64): Likewise.
> > 	(load_pairdi): Likewise.
> > 	(store_pairdi): Likewise.
> > 	(load_pairdf): Likewise.
> > 	(store_pairdf): Likewise.
> > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(ldr_got_small_<mode>): Likewise.
> > 	(ldr_got_small_28k_<mode>): Likewise.
> > 	(ldr_got_tiny): Likewise.
> > 	* config/aarch64/iterators.md (ldst_sz): New.
> > 	(ldpstp_sz): Likewise.
> > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > 	to store_16.
> > 	(thunderx_load): Split load_8 to load_16.
> > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > 	load_8 to load_16.
> > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > 	(xgene1_store_pair): Split store_8 to store_16.
> > 
> 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 11295a6..a1385e3 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -981,7 +981,7 @@
> >         DONE;
> >      }"
> >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > -                     load_4,load_4,store_4,store_4,\
> > +                     load_8,load_8,store_8,store_8,\
> >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > @@ -1026,7 +1026,8 @@
> >     ldr\\t%q0, %1
> >     str\\t%q1, %0"
> >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > +		             load_16,store_16,store_16,\
> > +                             load_16,store_16")
> >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > @@ -1121,7 +1122,7 @@
> >     str\\t%x1, %0
> >     mov\\t%x0, %x1"
> >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> >  )
> >  
> > @@ -1145,7 +1146,7 @@
> >     stp\\t%1, %H1, %0
> >     stp\\txzr, xzr, %0"
> >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > +                     f_loadd,f_stored,load_16,store_16,store_16")
> >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> >  )
> > @@ -1209,7 +1210,7 @@
> >    "@
> >     ldp\\t%x0, %x2, %1
> >     ldp\\t%d0, %d2, %1"
> > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > +  [(set_attr "type" "load_16,neon_load1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1244,7 +1245,7 @@
> >    "@
> >     stp\\t%x1, %x3, %0
> >     stp\\t%d1, %d3, %0"
> > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > +  [(set_attr "type" "store_16,neon_store1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1278,7 +1279,7 @@
> >    "@
> >     ldp\\t%d0, %d2, %1
> >     ldp\\t%x0, %x2, %1"
> > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > +  [(set_attr "type" "neon_load1_2reg,load_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1312,7 +1313,7 @@
> >    "@
> >     stp\\t%d1, %d3, %0
> >     stp\\t%x1, %x3, %0"
> > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > +  [(set_attr "type" "neon_store1_2reg,store_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1330,7 +1331,7 @@
> >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > -  [(set_attr "type" "load_8")]
> > +  [(set_attr "type" "load_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > @@ -1363,7 +1364,7 @@
> >            (match_operand:GPI 3 "register_operand" "r"))])]
> >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > -  [(set_attr "type" "store_8")]
> > +  [(set_attr "type" "store_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > @@ -5139,7 +5140,7 @@
> >  		    UNSPEC_GOTSMALLPIC))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_sidi"
> > @@ -5162,7 +5163,7 @@
> >  		    UNSPEC_GOTSMALLPIC28K))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_28k_sidi"
> > @@ -5183,7 +5184,7 @@
> >  		   UNSPEC_GOTTINYPIC))]
> >    ""
> >    "ldr\\t%0, %L1"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_8")]
> >  )
> >  
> >  (define_insn "aarch64_load_tp_hard"
> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > index 43be7fd..a65c3aa 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -384,6 +384,11 @@
> >  ;; 32-bit version and "%x0" in the 64-bit version.
> >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> >  
> > +;; The size of access, in bytes.
> > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > +;; Likewise for load/store pair.
> > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > +
> >  ;; For inequal width int to float conversion
> >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > index c18da2f..84ac6cd 100644
> > --- a/gcc/config/aarch64/thunderx.md
> > +++ b/gcc/config/aarch64/thunderx.md
> > @@ -100,7 +100,7 @@
> >  ;; Store pair are single issued
> >  (define_insn_reservation "thunderx_storepair" 1
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx_pipe0 + thunderx_pipe1")
> >  
> >  ;; Prefetch are single issued
> > @@ -112,7 +112,7 @@
> >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> >  (define_insn_reservation "thunderx_load" 3
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "load_4, load_8"))
> > +       (eq_attr "type" "load_4, load_8, load_16"))
> >    "thunderx_pipe0")
> >  
> >  (define_insn_reservation "thunderx_brj" 1
> > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > index 632396f..4e39610 100644
> > --- a/gcc/config/aarch64/thunderx2t99.md
> > +++ b/gcc/config/aarch64/thunderx2t99.md
> > @@ -128,7 +128,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_loadpair" 5
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8,load_16"))
> >    "thunderx2t99_i012,thunderx2t99_ls01")
> >  
> >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > @@ -138,7 +138,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx2t99_ls01,thunderx2t99_sd")
> >  
> >  ;; FP data processing instructions.
> > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > index 7e70408..0b457ee 100644
> > --- a/gcc/config/arm/xgene1.md
> > +++ b/gcc/config/arm/xgene1.md
> > @@ -92,12 +92,12 @@
> >  
> >  (define_insn_reservation "xgene1_load_pair" 6
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8, load_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_store_pair" 2
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8, store_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_fp_load1" 10
>
James Greenhalgh July 3, 2017, 10:47 a.m. UTC | #3
On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> *ping*

*ping*x2

Thanks,
James

> On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > 
> > Hi,
> > 
> > There seems to be a partial misconception in the AArch64 backend that
> > load1/load2 referred to the number of registers to load, rather than the
> > number of words to load. This patch fixes that using the new "number of
> > byte" types added in the previous patch.
> > 
> > That means using the load_16 and store_16 types that were defined in the
> > previous patch for the first time in the AArch64 backend. To ensure
> > continuity for scheduling models, I've just split this out from load_8.
> > Please update your models if this is very wrong!
> > 
> > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > 
> > OK?
> > 
> > Thanks,
> > James
> > 
> > ---
> > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > 
> > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > 	types correctly.
> > 	(movti_aarch64): Likewise.
> > 	(movdf_aarch64): Likewise.
> > 	(movtf_aarch64): Likewise.
> > 	(load_pairdi): Likewise.
> > 	(store_pairdi): Likewise.
> > 	(load_pairdf): Likewise.
> > 	(store_pairdf): Likewise.
> > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > 	(ldr_got_small_<mode>): Likewise.
> > 	(ldr_got_small_28k_<mode>): Likewise.
> > 	(ldr_got_tiny): Likewise.
> > 	* config/aarch64/iterators.md (ldst_sz): New.
> > 	(ldpstp_sz): Likewise.
> > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > 	to store_16.
> > 	(thunderx_load): Split load_8 to load_16.
> > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > 	load_8 to load_16.
> > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > 	(xgene1_store_pair): Split store_8 to store_16.
> > 
> 
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 11295a6..a1385e3 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -981,7 +981,7 @@
> >         DONE;
> >      }"
> >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > -                     load_4,load_4,store_4,store_4,\
> > +                     load_8,load_8,store_8,store_8,\
> >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > @@ -1026,7 +1026,8 @@
> >     ldr\\t%q0, %1
> >     str\\t%q1, %0"
> >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > +		             load_16,store_16,store_16,\
> > +                             load_16,store_16")
> >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > @@ -1121,7 +1122,7 @@
> >     str\\t%x1, %0
> >     mov\\t%x0, %x1"
> >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> >  )
> >  
> > @@ -1145,7 +1146,7 @@
> >     stp\\t%1, %H1, %0
> >     stp\\txzr, xzr, %0"
> >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > +                     f_loadd,f_stored,load_16,store_16,store_16")
> >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> >  )
> > @@ -1209,7 +1210,7 @@
> >    "@
> >     ldp\\t%x0, %x2, %1
> >     ldp\\t%d0, %d2, %1"
> > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > +  [(set_attr "type" "load_16,neon_load1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1244,7 +1245,7 @@
> >    "@
> >     stp\\t%x1, %x3, %0
> >     stp\\t%d1, %d3, %0"
> > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > +  [(set_attr "type" "store_16,neon_store1_2reg")
> >     (set_attr "fp" "*,yes")]
> >  )
> >  
> > @@ -1278,7 +1279,7 @@
> >    "@
> >     ldp\\t%d0, %d2, %1
> >     ldp\\t%x0, %x2, %1"
> > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > +  [(set_attr "type" "neon_load1_2reg,load_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1312,7 +1313,7 @@
> >    "@
> >     stp\\t%d1, %d3, %0
> >     stp\\t%x1, %x3, %0"
> > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > +  [(set_attr "type" "neon_store1_2reg,store_16")
> >     (set_attr "fp" "yes,*")]
> >  )
> >  
> > @@ -1330,7 +1331,7 @@
> >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > -  [(set_attr "type" "load_8")]
> > +  [(set_attr "type" "load_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > @@ -1363,7 +1364,7 @@
> >            (match_operand:GPI 3 "register_operand" "r"))])]
> >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > -  [(set_attr "type" "store_8")]
> > +  [(set_attr "type" "store_<ldpstp_sz>")]
> >  )
> >  
> >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > @@ -5139,7 +5140,7 @@
> >  		    UNSPEC_GOTSMALLPIC))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_sidi"
> > @@ -5162,7 +5163,7 @@
> >  		    UNSPEC_GOTSMALLPIC28K))]
> >    ""
> >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_<ldst_sz>")]
> >  )
> >  
> >  (define_insn "ldr_got_small_28k_sidi"
> > @@ -5183,7 +5184,7 @@
> >  		   UNSPEC_GOTTINYPIC))]
> >    ""
> >    "ldr\\t%0, %L1"
> > -  [(set_attr "type" "load_4")]
> > +  [(set_attr "type" "load_8")]
> >  )
> >  
> >  (define_insn "aarch64_load_tp_hard"
> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > index 43be7fd..a65c3aa 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -384,6 +384,11 @@
> >  ;; 32-bit version and "%x0" in the 64-bit version.
> >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> >  
> > +;; The size of access, in bytes.
> > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > +;; Likewise for load/store pair.
> > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > +
> >  ;; For inequal width int to float conversion
> >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > index c18da2f..84ac6cd 100644
> > --- a/gcc/config/aarch64/thunderx.md
> > +++ b/gcc/config/aarch64/thunderx.md
> > @@ -100,7 +100,7 @@
> >  ;; Store pair are single issued
> >  (define_insn_reservation "thunderx_storepair" 1
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx_pipe0 + thunderx_pipe1")
> >  
> >  ;; Prefetch are single issued
> > @@ -112,7 +112,7 @@
> >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> >  (define_insn_reservation "thunderx_load" 3
> >    (and (eq_attr "tune" "thunderx")
> > -       (eq_attr "type" "load_4, load_8"))
> > +       (eq_attr "type" "load_4, load_8, load_16"))
> >    "thunderx_pipe0")
> >  
> >  (define_insn_reservation "thunderx_brj" 1
> > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > index 632396f..4e39610 100644
> > --- a/gcc/config/aarch64/thunderx2t99.md
> > +++ b/gcc/config/aarch64/thunderx2t99.md
> > @@ -128,7 +128,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_loadpair" 5
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8,load_16"))
> >    "thunderx2t99_i012,thunderx2t99_ls01")
> >  
> >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > @@ -138,7 +138,7 @@
> >  
> >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> >    (and (eq_attr "tune" "thunderx2t99")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8,store_16"))
> >    "thunderx2t99_ls01,thunderx2t99_sd")
> >  
> >  ;; FP data processing instructions.
> > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > index 7e70408..0b457ee 100644
> > --- a/gcc/config/arm/xgene1.md
> > +++ b/gcc/config/arm/xgene1.md
> > @@ -92,12 +92,12 @@
> >  
> >  (define_insn_reservation "xgene1_load_pair" 6
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "load_8"))
> > +       (eq_attr "type" "load_8, load_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_store_pair" 2
> >    (and (eq_attr "tune" "xgene1")
> > -       (eq_attr "type" "store_8"))
> > +       (eq_attr "type" "store_8, store_16"))
> >    "xgene1_decodeIsolated")
> >  
> >  (define_insn_reservation "xgene1_fp_load1" 10
>
James Greenhalgh July 27, 2017, 6:09 p.m. UTC | #4
On Mon, Jul 03, 2017 at 11:46:58AM +0100, James Greenhalgh wrote:
> On Wed, Jun 21, 2017 at 11:50:08AM +0100, James Greenhalgh wrote:
> > *ping*
> 
> Ping*2

Ping*3

Thanks,
James

> 
> Thanks,
> James
> 
> > On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote:
> > > 
> > > Hi,
> > > 
> > > There seems to be a partial misconception in the AArch64 backend that
> > > load1/load2 referred to the number of registers to load, rather than the
> > > number of words to load. This patch fixes that using the new "number of
> > > byte" types added in the previous patch.
> > > 
> > > That means using the load_16 and store_16 types that were defined in the
> > > previous patch for the first time in the AArch64 backend. To ensure
> > > continuity for scheduling models, I've just split this out from load_8.
> > > Please update your models if this is very wrong!
> > > 
> > > Bootstrapped on aarch64-none-linux-gnu with no issue.
> > > 
> > > OK?
> > > 
> > > Thanks,
> > > James
> > > 
> > > ---
> > > 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> > > 
> > > 	* config/aarch64/aarch64.md (movdi_aarch64): Set load/store
> > > 	types correctly.
> > > 	(movti_aarch64): Likewise.
> > > 	(movdf_aarch64): Likewise.
> > > 	(movtf_aarch64): Likewise.
> > > 	(load_pairdi): Likewise.
> > > 	(store_pairdi): Likewise.
> > > 	(load_pairdf): Likewise.
> > > 	(store_pairdf): Likewise.
> > > 	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(storewb_pair<GPI:mode>_<P:mode>): Likewise.
> > > 	(ldr_got_small_<mode>): Likewise.
> > > 	(ldr_got_small_28k_<mode>): Likewise.
> > > 	(ldr_got_tiny): Likewise.
> > > 	* config/aarch64/iterators.md (ldst_sz): New.
> > > 	(ldpstp_sz): Likewise.
> > > 	* config/aarch64/thunderx.md (thunderx_storepair): Split store_8
> > > 	to store_16.
> > > 	(thunderx_load): Split load_8 to load_16.
> > > 	* config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split
> > > 	load_8 to load_16.
> > > 	(thunderx2t99_storepair_basic): Split store_8 to store_16.
> > > 	* config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16.
> > > 	(xgene1_store_pair): Split store_8 to store_16.
> > > 
> > 
> > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > > index 11295a6..a1385e3 100644
> > > --- a/gcc/config/aarch64/aarch64.md
> > > +++ b/gcc/config/aarch64/aarch64.md
> > > @@ -981,7 +981,7 @@
> > >         DONE;
> > >      }"
> > >    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
> > > -                     load_4,load_4,store_4,store_4,\
> > > +                     load_8,load_8,store_8,store_8,\
> > >                       adr,adr,f_mcr,f_mrc,fmov,neon_move")
> > >     (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
> > >     (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
> > > @@ -1026,7 +1026,8 @@
> > >     ldr\\t%q0, %1
> > >     str\\t%q1, %0"
> > >    [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
> > > -		             load_8,store_8,store_8,f_loadd,f_stored")
> > > +		             load_16,store_16,store_16,\
> > > +                             load_16,store_16")
> > >     (set_attr "length" "8,8,8,4,4,4,4,4,4")
> > >     (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
> > >     (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
> > > @@ -1121,7 +1122,7 @@
> > >     str\\t%x1, %0
> > >     mov\\t%x0, %x1"
> > >    [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
> > > -                     f_loadd,f_stored,load_4,store_4,mov_reg")
> > > +                     f_loadd,f_stored,load_8,store_8,mov_reg")
> > >     (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
> > >  )
> > >  
> > > @@ -1145,7 +1146,7 @@
> > >     stp\\t%1, %H1, %0
> > >     stp\\txzr, xzr, %0"
> > >    [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
> > > -                     f_loadd,f_stored,load_8,store_8,store_8")
> > > +                     f_loadd,f_stored,load_16,store_16,store_16")
> > >     (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
> > >     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
> > >  )
> > > @@ -1209,7 +1210,7 @@
> > >    "@
> > >     ldp\\t%x0, %x2, %1
> > >     ldp\\t%d0, %d2, %1"
> > > -  [(set_attr "type" "load_8,neon_load1_2reg")
> > > +  [(set_attr "type" "load_16,neon_load1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1244,7 +1245,7 @@
> > >    "@
> > >     stp\\t%x1, %x3, %0
> > >     stp\\t%d1, %d3, %0"
> > > -  [(set_attr "type" "store_8,neon_store1_2reg")
> > > +  [(set_attr "type" "store_16,neon_store1_2reg")
> > >     (set_attr "fp" "*,yes")]
> > >  )
> > >  
> > > @@ -1278,7 +1279,7 @@
> > >    "@
> > >     ldp\\t%d0, %d2, %1
> > >     ldp\\t%x0, %x2, %1"
> > > -  [(set_attr "type" "neon_load1_2reg,load_8")
> > > +  [(set_attr "type" "neon_load1_2reg,load_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1312,7 +1313,7 @@
> > >    "@
> > >     stp\\t%d1, %d3, %0
> > >     stp\\t%x1, %x3, %0"
> > > -  [(set_attr "type" "neon_store1_2reg,store_8")
> > > +  [(set_attr "type" "neon_store1_2reg,store_16")
> > >     (set_attr "fp" "yes,*")]
> > >  )
> > >  
> > > @@ -1330,7 +1331,7 @@
> > >                     (match_operand:P 5 "const_int_operand" "n"))))])]
> > >    "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "ldp\\t%<w>2, %<w>3, [%1], %4"
> > > -  [(set_attr "type" "load_8")]
> > > +  [(set_attr "type" "load_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
> > > @@ -1363,7 +1364,7 @@
> > >            (match_operand:GPI 3 "register_operand" "r"))])]
> > >    "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
> > >    "stp\\t%<w>2, %<w>3, [%0, %4]!"
> > > -  [(set_attr "type" "store_8")]
> > > +  [(set_attr "type" "store_<ldpstp_sz>")]
> > >  )
> > >  
> > >  (define_insn "storewb_pair<GPF:mode>_<P:mode>"
> > > @@ -5139,7 +5140,7 @@
> > >  		    UNSPEC_GOTSMALLPIC))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_sidi"
> > > @@ -5162,7 +5163,7 @@
> > >  		    UNSPEC_GOTSMALLPIC28K))]
> > >    ""
> > >    "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_<ldst_sz>")]
> > >  )
> > >  
> > >  (define_insn "ldr_got_small_28k_sidi"
> > > @@ -5183,7 +5184,7 @@
> > >  		   UNSPEC_GOTTINYPIC))]
> > >    ""
> > >    "ldr\\t%0, %L1"
> > > -  [(set_attr "type" "load_4")]
> > > +  [(set_attr "type" "load_8")]
> > >  )
> > >  
> > >  (define_insn "aarch64_load_tp_hard"
> > > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> > > index 43be7fd..a65c3aa 100644
> > > --- a/gcc/config/aarch64/iterators.md
> > > +++ b/gcc/config/aarch64/iterators.md
> > > @@ -384,6 +384,11 @@
> > >  ;; 32-bit version and "%x0" in the 64-bit version.
> > >  (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
> > >  
> > > +;; The size of access, in bytes.
> > > +(define_mode_attr ldst_sz [(SI "4") (DI "8")])
> > > +;; Likewise for load/store pair.
> > > +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
> > > +
> > >  ;; For inequal width int to float conversion
> > >  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
> > >  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> > > diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
> > > index c18da2f..84ac6cd 100644
> > > --- a/gcc/config/aarch64/thunderx.md
> > > +++ b/gcc/config/aarch64/thunderx.md
> > > @@ -100,7 +100,7 @@
> > >  ;; Store pair are single issued
> > >  (define_insn_reservation "thunderx_storepair" 1
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx_pipe0 + thunderx_pipe1")
> > >  
> > >  ;; Prefetch are single issued
> > > @@ -112,7 +112,7 @@
> > >  ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
> > >  (define_insn_reservation "thunderx_load" 3
> > >    (and (eq_attr "tune" "thunderx")
> > > -       (eq_attr "type" "load_4, load_8"))
> > > +       (eq_attr "type" "load_4, load_8, load_16"))
> > >    "thunderx_pipe0")
> > >  
> > >  (define_insn_reservation "thunderx_brj" 1
> > > diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
> > > index 632396f..4e39610 100644
> > > --- a/gcc/config/aarch64/thunderx2t99.md
> > > +++ b/gcc/config/aarch64/thunderx2t99.md
> > > @@ -128,7 +128,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_loadpair" 5
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8,load_16"))
> > >    "thunderx2t99_i012,thunderx2t99_ls01")
> > >  
> > >  (define_insn_reservation "thunderx2t99_store_basic" 1
> > > @@ -138,7 +138,7 @@
> > >  
> > >  (define_insn_reservation "thunderx2t99_storepair_basic" 1
> > >    (and (eq_attr "tune" "thunderx2t99")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8,store_16"))
> > >    "thunderx2t99_ls01,thunderx2t99_sd")
> > >  
> > >  ;; FP data processing instructions.
> > > diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
> > > index 7e70408..0b457ee 100644
> > > --- a/gcc/config/arm/xgene1.md
> > > +++ b/gcc/config/arm/xgene1.md
> > > @@ -92,12 +92,12 @@
> > >  
> > >  (define_insn_reservation "xgene1_load_pair" 6
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "load_8"))
> > > +       (eq_attr "type" "load_8, load_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_store_pair" 2
> > >    (and (eq_attr "tune" "xgene1")
> > > -       (eq_attr "type" "store_8"))
> > > +       (eq_attr "type" "store_8, store_16"))
> > >    "xgene1_decodeIsolated")
> > >  
> > >  (define_insn_reservation "xgene1_fp_load1" 10
> >
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 11295a6..a1385e3 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -981,7 +981,7 @@ 
        DONE;
     }"
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,\
-                     load_4,load_4,store_4,store_4,\
+                     load_8,load_8,store_8,store_8,\
                      adr,adr,f_mcr,f_mrc,fmov,neon_move")
    (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
    (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
@@ -1026,7 +1026,8 @@ 
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
-		             load_8,store_8,store_8,f_loadd,f_stored")
+		             load_16,store_16,store_16,\
+                             load_16,store_16")
    (set_attr "length" "8,8,8,4,4,4,4,4,4")
    (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
    (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
@@ -1121,7 +1122,7 @@ 
    str\\t%x1, %0
    mov\\t%x0, %x1"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load_4,store_4,mov_reg")
+                     f_loadd,f_stored,load_8,store_8,mov_reg")
    (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
 )
 
@@ -1145,7 +1146,7 @@ 
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0"
   [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
-                     f_loadd,f_stored,load_8,store_8,store_8")
+                     f_loadd,f_stored,load_16,store_16,store_16")
    (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
    (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
 )
@@ -1209,7 +1210,7 @@ 
   "@
    ldp\\t%x0, %x2, %1
    ldp\\t%d0, %d2, %1"
-  [(set_attr "type" "load_8,neon_load1_2reg")
+  [(set_attr "type" "load_16,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1244,7 +1245,7 @@ 
   "@
    stp\\t%x1, %x3, %0
    stp\\t%d1, %d3, %0"
-  [(set_attr "type" "store_8,neon_store1_2reg")
+  [(set_attr "type" "store_16,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
@@ -1278,7 +1279,7 @@ 
   "@
    ldp\\t%d0, %d2, %1
    ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load_8")
+  [(set_attr "type" "neon_load1_2reg,load_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1312,7 +1313,7 @@ 
   "@
    stp\\t%d1, %d3, %0
    stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store_8")
+  [(set_attr "type" "neon_store1_2reg,store_16")
    (set_attr "fp" "yes,*")]
 )
 
@@ -1330,7 +1331,7 @@ 
                    (match_operand:P 5 "const_int_operand" "n"))))])]
   "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
-  [(set_attr "type" "load_8")]
+  [(set_attr "type" "load_<ldpstp_sz>")]
 )
 
 (define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1363,7 +1364,7 @@ 
           (match_operand:GPI 3 "register_operand" "r"))])]
   "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
   "stp\\t%<w>2, %<w>3, [%0, %4]!"
-  [(set_attr "type" "store_8")]
+  [(set_attr "type" "store_<ldpstp_sz>")]
 )
 
 (define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -5139,7 +5140,7 @@ 
 		    UNSPEC_GOTSMALLPIC))]
   ""
   "ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_sidi"
@@ -5162,7 +5163,7 @@ 
 		    UNSPEC_GOTSMALLPIC28K))]
   ""
   "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_<ldst_sz>")]
 )
 
 (define_insn "ldr_got_small_28k_sidi"
@@ -5183,7 +5184,7 @@ 
 		   UNSPEC_GOTTINYPIC))]
   ""
   "ldr\\t%0, %L1"
-  [(set_attr "type" "load_4")]
+  [(set_attr "type" "load_8")]
 )
 
 (define_insn "aarch64_load_tp_hard"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd..a65c3aa 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -384,6 +384,11 @@ 
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; The size of access, in bytes.
+(define_mode_attr ldst_sz [(SI "4") (DI "8")])
+;; Likewise for load/store pair.
+(define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
+
 ;; For inequal width int to float conversion
 (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
 (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index c18da2f..84ac6cd 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -100,7 +100,7 @@ 
 ;; Store pair are single issued
 (define_insn_reservation "thunderx_storepair" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx_pipe0 + thunderx_pipe1")
 
 ;; Prefetch are single issued
@@ -112,7 +112,7 @@ 
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "load_4, load_8"))
+       (eq_attr "type" "load_4, load_8, load_16"))
   "thunderx_pipe0")
 
 (define_insn_reservation "thunderx_brj" 1
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 632396f..4e39610 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -128,7 +128,7 @@ 
 
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8,load_16"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
 (define_insn_reservation "thunderx2t99_store_basic" 1
@@ -138,7 +138,7 @@ 
 
 (define_insn_reservation "thunderx2t99_storepair_basic" 1
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8,store_16"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
 ;; FP data processing instructions.
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index 7e70408..0b457ee 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -92,12 +92,12 @@ 
 
 (define_insn_reservation "xgene1_load_pair" 6
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "load_8"))
+       (eq_attr "type" "load_8, load_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_store_pair" 2
   (and (eq_attr "tune" "xgene1")
-       (eq_attr "type" "store_8"))
+       (eq_attr "type" "store_8, store_16"))
   "xgene1_decodeIsolated")
 
 (define_insn_reservation "xgene1_fp_load1" 10