Message ID | 1552612714-32492-1-git-send-email-apinski@marvell.com |
---|---|
State | New |
Headers | show |
Series | [AARCH64] Improve vector generation cost model | expand |
On Thu, Mar 14, 2019 at 6:19 PM <apinski@marvell.com> wrote: > > From: Andrew Pinski <apinski@marvell.com> > > Hi, > On OcteonTX2, ld1r and ld1 (with a single lane) are split > into two different micro-ops unlike most other targets. > This adds three extra costs to the cost table: > ld1_dup: used for "ld1r {v0.4s}, [x0]" > merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]" > ld1_merge: used fir "ld1 {v0.4s}[0], [x0]" > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. Ping? It has been 1.5 months now. > > Thanks, > Andrew Pinski > > ChangeLog: > * config/arm/aarch-common-protos.h (vector_cost_table): > Add merge_dup, ld1_merge, and ld1_dup. > * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs): > Update for the new fields. > (thunderx_extra_costs): Likewise. > (thunderx2t99_extra_costs): Likewise. > (tsv110_extra_costs): Likewise. > * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. > (cortexa53_extra_costs): Likewise. > (cortexa57_extra_costs): Likewise. > (exynosm1_extra_costs): Likewise. > (xgene1_extra_costs): Likewise. > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory. > Hanlde vec_merge of a memory. > > Signed-off-by: Andrew Pinski <apinski@marvell.com> > --- > gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++---- > gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++ > gcc/config/arm/aarch-common-protos.h | 3 +++ > gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++----- > 4 files changed, 61 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h > index 5c9442e1b89..9a7c70ba595 100644 > --- a/gcc/config/aarch64/aarch64-cost-tables.h > +++ b/gcc/config/aarch64/aarch64-cost-tables.h > @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index b38505b0872..dc4d3d39af8 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -10568,6 +10568,28 @@ cost_plus: > } > break; > > + case VEC_DUPLICATE: > + if (!speed) > + return false; > + > + if (GET_CODE (XEXP (x, 0)) == MEM) > + *cost += extra_cost->vect.ld1_dup; > + else > + *cost += extra_cost->vect.merge_dup; > + return true; > + > + case VEC_MERGE: > + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) > + { > + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM) > + *cost += extra_cost->vect.ld1_merge; > + else > + *cost += extra_cost->vect.merge_dup; > + return true; > + } > + break; > + > + > case TRUNCATE: > > /* Decompose <su>muldi3_highpart. */ > diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h > index 11cd5145bbc..dbc1282402a 100644 > --- a/gcc/config/arm/aarch-common-protos.h > +++ b/gcc/config/arm/aarch-common-protos.h > @@ -131,6 +131,9 @@ struct fp_cost_table > struct vector_cost_table > { > const int alu; > + const int merge_dup; > + const int ld1_merge; > + const int ld1_dup; > }; > > struct cpu_cost_table > diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h > index bc33efadc6c..a51bc668f56 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (0) /* alu. */ > + COSTS_N_INSNS (0), /* alu. */ > + COSTS_N_INSNS (0), /* dup_merge. */ > + COSTS_N_INSNS (0), /* ld1_merge. */ > + COSTS_N_INSNS (0) /* ld1_dup. */ > } > }; > > @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (2) /* alu. */ > + COSTS_N_INSNS (2), /* alu. */ > + COSTS_N_INSNS (2), /* dup_merge. */ > + COSTS_N_INSNS (2), /* ld1_merge. */ > + COSTS_N_INSNS (2) /* ld1_dup. */ > } > }; > > -- > 2.17.1 >
On Thu, May 2, 2019 at 9:10 AM Andrew Pinski <pinskia@gmail.com> wrote: > > On Thu, Mar 14, 2019 at 6:19 PM <apinski@marvell.com> wrote: > > > > From: Andrew Pinski <apinski@marvell.com> > > > > Hi, > > On OcteonTX2, ld1r and ld1 (with a single lane) are split > > into two different micro-ops unlike most other targets. > > This adds three extra costs to the cost table: > > ld1_dup: used for "ld1r {v0.4s}, [x0]" > > merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]" > > ld1_merge: used fir "ld1 {v0.4s}[0], [x0]" > > > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > Ping? It has been 1.5 months now. Ping? I have bootstrapped and tested on aarch64-linux-gnu recently with the patch. Or does this has to wait until Stage 1? Thanks, Andrew > > > > > Thanks, > > Andrew Pinski > > > > ChangeLog: > > * config/arm/aarch-common-protos.h (vector_cost_table): > > Add merge_dup, ld1_merge, and ld1_dup. > > * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs): > > Update for the new fields. > > (thunderx_extra_costs): Likewise. > > (thunderx2t99_extra_costs): Likewise. > > (tsv110_extra_costs): Likewise. > > * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. > > (cortexa53_extra_costs): Likewise. > > (cortexa57_extra_costs): Likewise. > > (exynosm1_extra_costs): Likewise. > > (xgene1_extra_costs): Likewise. > > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory. > > Hanlde vec_merge of a memory. > > > > Signed-off-by: Andrew Pinski <apinski@marvell.com> > > --- > > gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++---- > > gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++ > > gcc/config/arm/aarch-common-protos.h | 3 +++ > > gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++----- > > 4 files changed, 61 insertions(+), 9 deletions(-) > > > > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h > > index 5c9442e1b89..9a7c70ba595 100644 > > --- a/gcc/config/aarch64/aarch64-cost-tables.h > > +++ b/gcc/config/aarch64/aarch64-cost-tables.h > > @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* alu. */ > > + COSTS_N_INSNS (1), /* Alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* Alu. */ > > + COSTS_N_INSNS (1), /* Alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* Alu. */ > > + COSTS_N_INSNS (1), /* Alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* alu. */ > > + COSTS_N_INSNS (1), /* Alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > > index b38505b0872..dc4d3d39af8 100644 > > --- a/gcc/config/aarch64/aarch64.c > > +++ b/gcc/config/aarch64/aarch64.c > > @@ -10568,6 +10568,28 @@ cost_plus: > > } > > break; > > > > + case VEC_DUPLICATE: > > + if (!speed) > > + return false; > > + > > + if (GET_CODE (XEXP (x, 0)) == MEM) > > + *cost += extra_cost->vect.ld1_dup; > > + else > > + *cost += extra_cost->vect.merge_dup; > > + return true; > > + > > + case VEC_MERGE: > > + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) > > + { > > + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM) > > + *cost += extra_cost->vect.ld1_merge; > > + else > > + *cost += extra_cost->vect.merge_dup; > > + return true; > > + } > > + break; > > + > > + > > case TRUNCATE: > > > > /* Decompose <su>muldi3_highpart. */ > > diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h > > index 11cd5145bbc..dbc1282402a 100644 > > --- a/gcc/config/arm/aarch-common-protos.h > > +++ b/gcc/config/arm/aarch-common-protos.h > > @@ -131,6 +131,9 @@ struct fp_cost_table > > struct vector_cost_table > > { > > const int alu; > > + const int merge_dup; > > + const int ld1_merge; > > + const int ld1_dup; > > }; > > > > struct cpu_cost_table > > diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h > > index bc33efadc6c..a51bc668f56 100644 > > --- a/gcc/config/arm/aarch-cost-tables.h > > +++ b/gcc/config/arm/aarch-cost-tables.h > > @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* alu. */ > > + COSTS_N_INSNS (1), /* alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* alu. */ > > + COSTS_N_INSNS (1), /* alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (1) /* alu. */ > > + COSTS_N_INSNS (1), /* alu. */ > > + COSTS_N_INSNS (1), /* dup_merge. */ > > + COSTS_N_INSNS (1), /* ld1_merge. */ > > + COSTS_N_INSNS (1) /* ld1_dup. */ > > } > > }; > > > > @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (0) /* alu. */ > > + COSTS_N_INSNS (0), /* alu. */ > > + COSTS_N_INSNS (0), /* dup_merge. */ > > + COSTS_N_INSNS (0), /* ld1_merge. */ > > + COSTS_N_INSNS (0) /* ld1_dup. */ > > } > > }; > > > > @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs = > > }, > > /* Vector */ > > { > > - COSTS_N_INSNS (2) /* alu. */ > > + COSTS_N_INSNS (2), /* alu. */ > > + COSTS_N_INSNS (2), /* dup_merge. */ > > + COSTS_N_INSNS (2), /* ld1_merge. */ > > + COSTS_N_INSNS (2) /* ld1_dup. */ > > } > > }; > > > > -- > > 2.17.1 > >
Hi Andrew, On 3/15/19 1:18 AM, apinski@marvell.com wrote: > From: Andrew Pinski <apinski@marvell.com> > > Hi, > On OcteonTX2, ld1r and ld1 (with a single lane) are split > into two different micro-ops unlike most other targets. > This adds three extra costs to the cost table: > ld1_dup: used for "ld1r {v0.4s}, [x0]" > merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]" > ld1_merge: used fir "ld1 {v0.4s}[0], [x0]" > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > Sorry for the slow reply, missed it on gcc-patches :( > Thanks, > Andrew Pinski > > ChangeLog: > * config/arm/aarch-common-protos.h (vector_cost_table): > Add merge_dup, ld1_merge, and ld1_dup. > * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs): > Update for the new fields. > (thunderx_extra_costs): Likewise. > (thunderx2t99_extra_costs): Likewise. > (tsv110_extra_costs): Likewise. > * config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise. > (cortexa53_extra_costs): Likewise. > (cortexa57_extra_costs): Likewise. > (exynosm1_extra_costs): Likewise. > (xgene1_extra_costs): Likewise. > * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a > memory. > Hanlde vec_merge of a memory. > > Signed-off-by: Andrew Pinski <apinski@marvell.com> > --- > gcc/config/aarch64/aarch64-cost-tables.h | 20 +++++++++++++++---- > gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++++ > gcc/config/arm/aarch-common-protos.h | 3 +++ > gcc/config/arm/aarch-cost-tables.h | 25 +++++++++++++++++++----- > 4 files changed, 61 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h > b/gcc/config/aarch64/aarch64-cost-tables.h > index 5c9442e1b89..9a7c70ba595 100644 > --- a/gcc/config/aarch64/aarch64-cost-tables.h > +++ b/gcc/config/aarch64/aarch64-cost-tables.h > @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -330,7 +336,10 @@ const struct cpu_cost_table > thunderx2t99_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index b38505b0872..dc4d3d39af8 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -10568,6 +10568,28 @@ cost_plus: > } > break; > > + case VEC_DUPLICATE: > + if (!speed) > + return false; If I read the code right, before this patch we would be returning true for !speed i.e. not recursing. Do we want to trigger a recursion now? > + > + if (GET_CODE (XEXP (x, 0)) == MEM) > + *cost += extra_cost->vect.ld1_dup; Please use MEM_P here. > + else > + *cost += extra_cost->vect.merge_dup; > + return true; > + > + case VEC_MERGE: > + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) > + { > + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM) And here. Thanks, Kyrill > + *cost += extra_cost->vect.ld1_merge; > + else > + *cost += extra_cost->vect.merge_dup; > + return true; > + } > + break; > + > + > case TRUNCATE: > > /* Decompose <su>muldi3_highpart. */ > diff --git a/gcc/config/arm/aarch-common-protos.h > b/gcc/config/arm/aarch-common-protos.h > index 11cd5145bbc..dbc1282402a 100644 > --- a/gcc/config/arm/aarch-common-protos.h > +++ b/gcc/config/arm/aarch-common-protos.h > @@ -131,6 +131,9 @@ struct fp_cost_table > struct vector_cost_table > { > const int alu; > + const int merge_dup; > + const int ld1_merge; > + const int ld1_dup; > }; > > struct cpu_cost_table > diff --git a/gcc/config/arm/aarch-cost-tables.h > b/gcc/config/arm/aarch-cost-tables.h > index bc33efadc6c..a51bc668f56 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* dup_merge. */ > + COSTS_N_INSNS (1), /* ld1_merge. */ > + COSTS_N_INSNS (1) /* ld1_dup. */ > } > }; > > @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (0) /* alu. */ > + COSTS_N_INSNS (0), /* alu. */ > + COSTS_N_INSNS (0), /* dup_merge. */ > + COSTS_N_INSNS (0), /* ld1_merge. */ > + COSTS_N_INSNS (0) /* ld1_dup. */ > } > }; > > @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (2) /* alu. */ > + COSTS_N_INSNS (2), /* alu. */ > + COSTS_N_INSNS (2), /* dup_merge. */ > + COSTS_N_INSNS (2), /* ld1_merge. */ > + COSTS_N_INSNS (2) /* ld1_dup. */ > } > }; > > -- > 2.17.1 >
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index 5c9442e1b89..9a7c70ba595 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b38505b0872..dc4d3d39af8 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -10568,6 +10568,28 @@ cost_plus: } break; + case VEC_DUPLICATE: + if (!speed) + return false; + + if (GET_CODE (XEXP (x, 0)) == MEM) + *cost += extra_cost->vect.ld1_dup; + else + *cost += extra_cost->vect.merge_dup; + return true; + + case VEC_MERGE: + if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) + { + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM) + *cost += extra_cost->vect.ld1_merge; + else + *cost += extra_cost->vect.merge_dup; + return true; + } + break; + + case TRUNCATE: /* Decompose <su>muldi3_highpart. */ diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 11cd5145bbc..dbc1282402a 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -131,6 +131,9 @@ struct fp_cost_table struct vector_cost_table { const int alu; + const int merge_dup; + const int ld1_merge; + const int ld1_dup; }; struct cpu_cost_table diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index bc33efadc6c..a51bc668f56 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* dup_merge. */ + COSTS_N_INSNS (1), /* ld1_merge. */ + COSTS_N_INSNS (1) /* ld1_dup. */ } }; @@ -430,7 +439,10 @@ const struct cpu_cost_table exynosm1_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (0) /* alu. */ + COSTS_N_INSNS (0), /* alu. */ + COSTS_N_INSNS (0), /* dup_merge. */ + COSTS_N_INSNS (0), /* ld1_merge. */ + COSTS_N_INSNS (0) /* ld1_dup. */ } }; @@ -533,7 +545,10 @@ const struct cpu_cost_table xgene1_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (2) /* alu. */ + COSTS_N_INSNS (2), /* alu. */ + COSTS_N_INSNS (2), /* dup_merge. */ + COSTS_N_INSNS (2), /* ld1_merge. */ + COSTS_N_INSNS (2) /* ld1_dup. */ } };