Message ID | 5530EC32.4030806@linaro.org |
---|---|
State | New |
Headers | show |
On 17/04/15 12:19, Kugan wrote: > Hi James, > > Here is an attempt along this line. Is this what you have in mind? > Trying to keep functionality as before so that we can tune the > parameters later. Not fully tested yet. Hi Kugan, I'm not doing a full review here, just have a comment inline. Thanks, Kyrill > > Thanks, > Kugan > > cost.txt > > > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h > index ae2b547..ed9432e 100644 > --- a/gcc/config/aarch64/aarch64-cost-tables.h > +++ b/gcc/config/aarch64/aarch64-cost-tables.h > @@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1) /* Store. */ > } What about the other CPU vector costs? Also, tune_params would need updating. > }; > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index cba3c1a..c2d4a53 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > above this default. */ > *cost = COSTS_N_INSNS (1); > > - /* TODO: The cost infrastructure currently does not handle > - vector operations. Assume that all vector operations > - are equally expensive. */ > - if (VECTOR_MODE_P (mode)) > - { > - if (speed) > - *cost += extra_cost->vect.alu; > - return true; > - } > - > switch (code) > { > case SET: > @@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > if (speed) > { > rtx address = XEXP (op0, 0); > - if (GET_MODE_CLASS (mode) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.store; > + else if (GET_MODE_CLASS (mode) == MODE_INT) > *cost += extra_cost->ldst.store; > else if (mode == SFmode) > *cost += extra_cost->ldst.storef; > @@ -5544,10 +5536,17 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > > /* Fall through. */ > case REG: > + if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1)) > + { > + /* The cost is 1 per vector-register copied. */ > + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) > + / GET_MODE_SIZE (V4SImode); > + *cost = COSTS_N_INSNS (n_minus_1 + 1); > + } > /* const0_rtx is in general free, but we will use an > instruction to set a register to 0. */ > - if (REG_P (op1) || op1 == const0_rtx) > - { > + else if (REG_P (op1) || op1 == const0_rtx) > + { > /* The cost is 1 per register copied. */ > int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) > / UNITS_PER_WORD; > @@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) > >= INTVAL (XEXP (op0, 1)))) > op1 = XEXP (op1, 0); > + gcc_assert (!VECTOR_MODE_P (mode)); We shouldn't assert in rtx costs. If some control flow logic gets buggy, at worst we'd return a wrong rtx cost and make a suboptimal decision. This shouldn't ever lead to a crash, IMO. > > if (CONST_INT_P (op1)) > { > @@ -5621,8 +5621,10 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > case CONST_DOUBLE: > if (speed) > { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > /* mov[df,sf]_aarch64. */ > - if (aarch64_float_const_representable_p (x)) > + else if (aarch64_float_const_representable_p (x)) > /* FMOV (scalar immediate). */ > *cost += extra_cost->fp[mode == DFmode].fpconst; > else if (!aarch64_float_const_zero_rtx_p (x)) > @@ -5650,7 +5652,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > approximation for the additional cost of the addressing > mode. */ > rtx address = XEXP (x, 0); > - if (GET_MODE_CLASS (mode) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.load; > + else if (GET_MODE_CLASS (mode) == MODE_INT) > *cost += extra_cost->ldst.load; > else if (mode == SFmode) > *cost += extra_cost->ldst.loadf; > @@ -5705,7 +5709,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > case CLRSB: > case CLZ: > if (speed) > - *cost += extra_cost->alu.clz; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.clz; > + } > > return false; > > @@ -5790,6 +5799,13 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > return false; > } > > + /* VCMP. */ > + if (VECTOR_MODE_P (mode)) > + { > + if (speed) > + *cost += extra_cost->vect.alu; > + return true; > + } > return false; > > case MINUS: > @@ -5808,8 +5824,13 @@ cost_minus: > *cost += rtx_cost (op0, MINUS, 0, speed); > > if (speed) > - /* SUB(S) (immediate). */ > - *cost += extra_cost->alu.arith; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* SUB(S) (immediate). */ > + else > + *cost += extra_cost->alu.arith; > + } > return true; > > } > @@ -5818,8 +5839,12 @@ cost_minus: > if (aarch64_rtx_arith_op_extract_p (op1, mode)) > { > if (speed) > - *cost += extra_cost->alu.arith_shift; > - > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.arith_shift; > + } > *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), > (enum rtx_code) GET_CODE (op1), > 0, speed); > @@ -5844,7 +5869,10 @@ cost_minus: > > if (speed) > { > - if (GET_MODE_CLASS (mode) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + /* Vector SUB. */ > + *cost += extra_cost->vect.alu; > + else if (GET_MODE_CLASS (mode) == MODE_INT) > /* SUB(S). */ > *cost += extra_cost->alu.arith; > else if (GET_MODE_CLASS (mode) == MODE_FLOAT) > @@ -5878,8 +5906,13 @@ cost_plus: > *cost += rtx_cost (op0, PLUS, 0, speed); > > if (speed) > - /* ADD (immediate). */ > - *cost += extra_cost->alu.arith; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* ADD (immediate). */ > + else > + *cost += extra_cost->alu.arith; > + } > return true; > } > > @@ -5887,8 +5920,12 @@ cost_plus: > if (aarch64_rtx_arith_op_extract_p (op0, mode)) > { > if (speed) > - *cost += extra_cost->alu.arith_shift; > - > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.arith_shift; > + } > *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), > (enum rtx_code) GET_CODE (op0), > 0, speed); > @@ -5913,7 +5950,10 @@ cost_plus: > > if (speed) > { > - if (GET_MODE_CLASS (mode) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + /* Vector ADD. */ > + *cost += extra_cost->vect.alu; > + else if (GET_MODE_CLASS (mode) == MODE_INT) > /* ADD. */ > *cost += extra_cost->alu.arith; > else if (GET_MODE_CLASS (mode) == MODE_FLOAT) > @@ -5927,8 +5967,12 @@ cost_plus: > *cost = COSTS_N_INSNS (1); > > if (speed) > - *cost += extra_cost->alu.rev; > - > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.rev; > + } > return false; > > case IOR: > @@ -5936,10 +5980,14 @@ cost_plus: > { > *cost = COSTS_N_INSNS (1); > > - if (speed) > - *cost += extra_cost->alu.rev; > - > - return true; > + if (speed) > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.rev; > + } > + return true; > } > /* Fall through. */ > case XOR: > @@ -5948,6 +5996,13 @@ cost_plus: > op0 = XEXP (x, 0); > op1 = XEXP (x, 1); > > + if (VECTOR_MODE_P (mode)) > + { > + if (speed) > + *cost += extra_cost->vect.alu; > + return true; > + } > + > if (code == AND > && GET_CODE (op0) == MULT > && CONST_INT_P (XEXP (op0, 1)) > @@ -6013,10 +6068,15 @@ cost_plus: > return false; > > case NOT: > - /* MVN. */ > if (speed) > - *cost += extra_cost->alu.logical; > - > + { > + /* VNEG. */ > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* MVN. */ > + else > + *cost += extra_cost->alu.logical; > + } > /* The logical instruction could have the shifted register form, > but the cost is the same if the shift is processed as a separate > instruction, so we don't bother with it here. */ > @@ -6057,13 +6117,18 @@ cost_plus: > > /* UXTB/UXTH. */ > if (speed) > - *cost += extra_cost->alu.extend; > - > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.extend; > + } > return false; > > case SIGN_EXTEND: > if (MEM_P (XEXP (x, 0))) > { > + gcc_assert (!VECTOR_MODE_P (mode)); Same here. > /* LDRSH. */ > if (speed) > { > @@ -6078,7 +6143,12 @@ cost_plus: > } > > if (speed) > - *cost += extra_cost->alu.extend; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.extend; > + } > return false; > > case ASHIFT: > @@ -6087,10 +6157,16 @@ cost_plus: > > if (CONST_INT_P (op1)) > { > - /* LSL (immediate), UBMF, UBFIZ and friends. These are all > - aliases. */ > if (speed) > - *cost += extra_cost->alu.shift; > + { > + /* VSHL (immediate). */ > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* LSL (immediate), UBMF, UBFIZ and friends. These are all > + aliases. */ > + else > + *cost += extra_cost->alu.shift; > + } > > /* We can incorporate zero/sign extend for free. */ > if (GET_CODE (op0) == ZERO_EXTEND > @@ -6102,10 +6178,15 @@ cost_plus: > } > else > { > - /* LSLV. */ > if (speed) > - *cost += extra_cost->alu.shift_reg; > - > + { > + /* VSHL (register). */ > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* LSLV. */ > + else > + *cost += extra_cost->alu.shift_reg; > + } > return false; /* All arguments need to be in registers. */ > } > > @@ -6120,18 +6201,27 @@ cost_plus: > { > /* ASR (immediate) and friends. */ > if (speed) > - *cost += extra_cost->alu.shift; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.shift; > + } > > *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); > return true; > } > else > { > - > - /* ASR (register) and friends. */ > if (speed) > - *cost += extra_cost->alu.shift_reg; > - > + { > + /* VAHR (register). */ > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + /* ASR (register) and friends. */ > + else > + *cost += extra_cost->alu.shift_reg; > + } > return false; /* All arguments need to be in registers. */ > } > > @@ -6179,7 +6269,12 @@ cost_plus: > case SIGN_EXTRACT: > /* UBFX/SBFX. */ > if (speed) > - *cost += extra_cost->alu.bfx; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->alu.bfx; > + } > > /* We can trust that the immediates used will be correct (there > are no by-register forms), so we need only cost op0. */ > @@ -6196,7 +6291,9 @@ cost_plus: > case UMOD: > if (speed) > { > - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) > *cost += (extra_cost->mult[GET_MODE (x) == DImode].add > + extra_cost->mult[GET_MODE (x) == DImode].idiv); > else if (GET_MODE (x) == DFmode) > @@ -6213,7 +6310,9 @@ cost_plus: > case SQRT: > if (speed) > { > - if (GET_MODE_CLASS (mode) == MODE_INT) > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else if (GET_MODE_CLASS (mode) == MODE_INT) > /* There is no integer SQRT, so only DIV and UDIV can get > here. */ > *cost += extra_cost->mult[mode == DImode].idiv; > @@ -6245,7 +6344,12 @@ cost_plus: > op2 = XEXP (x, 2); > > if (speed) > - *cost += extra_cost->fp[mode == DFmode].fma; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->fp[mode == DFmode].fma; > + } > > /* FMSUB, FNMADD, and FNMSUB are free. */ > if (GET_CODE (op0) == NEG) > @@ -6285,7 +6389,13 @@ cost_plus: > > case FLOAT_EXTEND: > if (speed) > - *cost += extra_cost->fp[mode == DFmode].widen; > + { > + if (VECTOR_MODE_P (mode)) > + /*Vector convertion. */ > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->fp[mode == DFmode].widen; > + } > return false; > > case FLOAT_TRUNCATE: > @@ -6311,8 +6421,13 @@ cost_plus: > } > > if (speed) > - *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; > - > + { > + /* FCVT. */ > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; > + } > *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); > return true; > > @@ -6321,7 +6436,12 @@ cost_plus: > { > /* FABS and FNEG are analogous. */ > if (speed) > - *cost += extra_cost->fp[mode == DFmode].neg; > + { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > + else > + *cost += extra_cost->fp[mode == DFmode].neg; > + } > } > else > { > @@ -6338,10 +6458,13 @@ cost_plus: > case SMIN: > if (speed) > { > + if (VECTOR_MODE_P (mode)) > + *cost += extra_cost->vect.alu; > /* FMAXNM/FMINNM/FMAX/FMIN. > TODO: This may not be accurate for all implementations, but > we do not model this in the cost tables. */ > - *cost += extra_cost->fp[mode == DFmode].addsub; > + else > + *cost += extra_cost->fp[mode == DFmode].addsub; > } > return false; > > diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h > index 3ee7ebf..c8e1d2e 100644 > --- a/gcc/config/arm/aarch-common-protos.h > +++ b/gcc/config/arm/aarch-common-protos.h > @@ -124,6 +124,8 @@ struct fp_cost_table > struct vector_cost_table > { > const int alu; > + const int load; > + const int store; > }; > > struct cpu_cost_table > diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h > index 05e96a9..257902c 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -119,7 +119,9 @@ const struct cpu_cost_table generic_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1) /* Store. */ > } > }; > > @@ -220,7 +222,9 @@ const struct cpu_cost_table cortexa53_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1) /* Store. */ > } > }; > > @@ -321,7 +325,9 @@ const struct cpu_cost_table cortexa57_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* alu. */ > + COSTS_N_INSNS (1), /* alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1) /* Store. */ > } > }; > > @@ -422,7 +428,9 @@ const struct cpu_cost_table xgene1_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (2) /* alu. */ > + COSTS_N_INSNS (2), /* alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1), /* Store. */ > } > }; >
On Fri, Apr 17, 2015 at 12:19:14PM +0100, Kugan wrote: > >> My point is that adding your patch while keeping the logic at the top > >> which claims to catch ALL vector operations makes for less readable > >> code. > >> > >> At the very least you'll need to update this comment: > >> > >> /* TODO: The cost infrastructure currently does not handle > >> vector operations. Assume that all vector operations > >> are equally expensive. */ > >> > >> to make it clear that this doesn't catch vector set operations. > >> > >> But fixing the comment doesn't improve the messy code so I'd certainly > >> prefer to see one of the other approaches which have been discussed. > > > > I see your point. Let me work on this based on your suggestions above. > > Hi James, > > Here is an attempt along this line. Is this what you have in mind? > Trying to keep functionality as before so that we can tune the > parameters later. Not fully tested yet. Hi Kugan, Sorry to have dropped out of the thread for a while, I'm currently travelling in the US. This is along the lines of what I had in mind, thanks for digging through and doing it. It needs a little polishing, just neaten up the rough edges of comments and where they sit next to the new if conditionals, and of course, testing, and I have a few comments below. Thanks, James > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h > index ae2b547..ed9432e 100644 > --- a/gcc/config/aarch64/aarch64-cost-tables.h > +++ b/gcc/config/aarch64/aarch64-cost-tables.h > @@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs = > }, > /* Vector */ > { > - COSTS_N_INSNS (1) /* Alu. */ > + COSTS_N_INSNS (1), /* Alu. */ > + COSTS_N_INSNS (1), /* Load. */ > + COSTS_N_INSNS (1) /* Store. */ > } > }; Can you push the Load/Stores in to the LD/ST section above and give them a name like loadv/storev. > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index cba3c1a..c2d4a53 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c <snip> > @@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, > && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) > >= INTVAL (XEXP (op0, 1)))) > op1 = XEXP (op1, 0); > + gcc_assert (!VECTOR_MODE_P (mode)); As Kyrill asked, please drop this.
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index ae2b547..ed9432e 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -121,7 +121,9 @@ const struct cpu_cost_table thunderx_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* Alu. */ + COSTS_N_INSNS (1), /* Alu. */ + COSTS_N_INSNS (1), /* Load. */ + COSTS_N_INSNS (1) /* Store. */ } }; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index cba3c1a..c2d4a53 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5499,16 +5499,6 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, above this default. */ *cost = COSTS_N_INSNS (1); - /* TODO: The cost infrastructure currently does not handle - vector operations. Assume that all vector operations - are equally expensive. */ - if (VECTOR_MODE_P (mode)) - { - if (speed) - *cost += extra_cost->vect.alu; - return true; - } - switch (code) { case SET: @@ -5523,7 +5513,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (speed) { rtx address = XEXP (op0, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.store; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.store; else if (mode == SFmode) *cost += extra_cost->ldst.storef; @@ -5544,10 +5536,17 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, /* Fall through. */ case REG: + if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1)) + { + /* The cost is 1 per vector-register copied. */ + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + / GET_MODE_SIZE (V4SImode); + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } /* const0_rtx is in general free, but we will use an instruction to set a register to 0. */ - if (REG_P (op1) || op1 == const0_rtx) - { + else if (REG_P (op1) || op1 == const0_rtx) + { /* The cost is 1 per register copied. */ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) / UNITS_PER_WORD; @@ -5570,6 +5569,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) >= INTVAL (XEXP (op0, 1)))) op1 = XEXP (op1, 0); + gcc_assert (!VECTOR_MODE_P (mode)); if (CONST_INT_P (op1)) { @@ -5621,8 +5621,10 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case CONST_DOUBLE: if (speed) { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; /* mov[df,sf]_aarch64. */ - if (aarch64_float_const_representable_p (x)) + else if (aarch64_float_const_representable_p (x)) /* FMOV (scalar immediate). */ *cost += extra_cost->fp[mode == DFmode].fpconst; else if (!aarch64_float_const_zero_rtx_p (x)) @@ -5650,7 +5652,9 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, approximation for the additional cost of the addressing mode. */ rtx address = XEXP (x, 0); - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.load; + else if (GET_MODE_CLASS (mode) == MODE_INT) *cost += extra_cost->ldst.load; else if (mode == SFmode) *cost += extra_cost->ldst.loadf; @@ -5705,7 +5709,12 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, case CLRSB: case CLZ: if (speed) - *cost += extra_cost->alu.clz; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.clz; + } return false; @@ -5790,6 +5799,13 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; } + /* VCMP. */ + if (VECTOR_MODE_P (mode)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } return false; case MINUS: @@ -5808,8 +5824,13 @@ cost_minus: *cost += rtx_cost (op0, MINUS, 0, speed); if (speed) - /* SUB(S) (immediate). */ - *cost += extra_cost->alu.arith; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* SUB(S) (immediate). */ + else + *cost += extra_cost->alu.arith; + } return true; } @@ -5818,8 +5839,12 @@ cost_minus: if (aarch64_rtx_arith_op_extract_p (op1, mode)) { if (speed) - *cost += extra_cost->alu.arith_shift; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.arith_shift; + } *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), (enum rtx_code) GET_CODE (op1), 0, speed); @@ -5844,7 +5869,10 @@ cost_minus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + /* Vector SUB. */ + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (mode) == MODE_INT) /* SUB(S). */ *cost += extra_cost->alu.arith; else if (GET_MODE_CLASS (mode) == MODE_FLOAT) @@ -5878,8 +5906,13 @@ cost_plus: *cost += rtx_cost (op0, PLUS, 0, speed); if (speed) - /* ADD (immediate). */ - *cost += extra_cost->alu.arith; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* ADD (immediate). */ + else + *cost += extra_cost->alu.arith; + } return true; } @@ -5887,8 +5920,12 @@ cost_plus: if (aarch64_rtx_arith_op_extract_p (op0, mode)) { if (speed) - *cost += extra_cost->alu.arith_shift; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.arith_shift; + } *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), (enum rtx_code) GET_CODE (op0), 0, speed); @@ -5913,7 +5950,10 @@ cost_plus: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + /* Vector ADD. */ + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (mode) == MODE_INT) /* ADD. */ *cost += extra_cost->alu.arith; else if (GET_MODE_CLASS (mode) == MODE_FLOAT) @@ -5927,8 +5967,12 @@ cost_plus: *cost = COSTS_N_INSNS (1); if (speed) - *cost += extra_cost->alu.rev; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } return false; case IOR: @@ -5936,10 +5980,14 @@ cost_plus: { *cost = COSTS_N_INSNS (1); - if (speed) - *cost += extra_cost->alu.rev; - - return true; + if (speed) + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.rev; + } + return true; } /* Fall through. */ case XOR: @@ -5948,6 +5996,13 @@ cost_plus: op0 = XEXP (x, 0); op1 = XEXP (x, 1); + if (VECTOR_MODE_P (mode)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } + if (code == AND && GET_CODE (op0) == MULT && CONST_INT_P (XEXP (op0, 1)) @@ -6013,10 +6068,15 @@ cost_plus: return false; case NOT: - /* MVN. */ if (speed) - *cost += extra_cost->alu.logical; - + { + /* VNEG. */ + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* MVN. */ + else + *cost += extra_cost->alu.logical; + } /* The logical instruction could have the shifted register form, but the cost is the same if the shift is processed as a separate instruction, so we don't bother with it here. */ @@ -6057,13 +6117,18 @@ cost_plus: /* UXTB/UXTH. */ if (speed) - *cost += extra_cost->alu.extend; - + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.extend; + } return false; case SIGN_EXTEND: if (MEM_P (XEXP (x, 0))) { + gcc_assert (!VECTOR_MODE_P (mode)); /* LDRSH. */ if (speed) { @@ -6078,7 +6143,12 @@ cost_plus: } if (speed) - *cost += extra_cost->alu.extend; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.extend; + } return false; case ASHIFT: @@ -6087,10 +6157,16 @@ cost_plus: if (CONST_INT_P (op1)) { - /* LSL (immediate), UBMF, UBFIZ and friends. These are all - aliases. */ if (speed) - *cost += extra_cost->alu.shift; + { + /* VSHL (immediate). */ + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* LSL (immediate), UBMF, UBFIZ and friends. These are all + aliases. */ + else + *cost += extra_cost->alu.shift; + } /* We can incorporate zero/sign extend for free. */ if (GET_CODE (op0) == ZERO_EXTEND @@ -6102,10 +6178,15 @@ cost_plus: } else { - /* LSLV. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + /* VSHL (register). */ + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* LSLV. */ + else + *cost += extra_cost->alu.shift_reg; + } return false; /* All arguments need to be in registers. */ } @@ -6120,18 +6201,27 @@ cost_plus: { /* ASR (immediate) and friends. */ if (speed) - *cost += extra_cost->alu.shift; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.shift; + } *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); return true; } else { - - /* ASR (register) and friends. */ if (speed) - *cost += extra_cost->alu.shift_reg; - + { + /* VAHR (register). */ + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + /* ASR (register) and friends. */ + else + *cost += extra_cost->alu.shift_reg; + } return false; /* All arguments need to be in registers. */ } @@ -6179,7 +6269,12 @@ cost_plus: case SIGN_EXTRACT: /* UBFX/SBFX. */ if (speed) - *cost += extra_cost->alu.bfx; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->alu.bfx; + } /* We can trust that the immediates used will be correct (there are no by-register forms), so we need only cost op0. */ @@ -6196,7 +6291,9 @@ cost_plus: case UMOD: if (speed) { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) *cost += (extra_cost->mult[GET_MODE (x) == DImode].add + extra_cost->mult[GET_MODE (x) == DImode].idiv); else if (GET_MODE (x) == DFmode) @@ -6213,7 +6310,9 @@ cost_plus: case SQRT: if (speed) { - if (GET_MODE_CLASS (mode) == MODE_INT) + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (GET_MODE_CLASS (mode) == MODE_INT) /* There is no integer SQRT, so only DIV and UDIV can get here. */ *cost += extra_cost->mult[mode == DImode].idiv; @@ -6245,7 +6344,12 @@ cost_plus: op2 = XEXP (x, 2); if (speed) - *cost += extra_cost->fp[mode == DFmode].fma; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[mode == DFmode].fma; + } /* FMSUB, FNMADD, and FNMSUB are free. */ if (GET_CODE (op0) == NEG) @@ -6285,7 +6389,13 @@ cost_plus: case FLOAT_EXTEND: if (speed) - *cost += extra_cost->fp[mode == DFmode].widen; + { + if (VECTOR_MODE_P (mode)) + /*Vector convertion. */ + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[mode == DFmode].widen; + } return false; case FLOAT_TRUNCATE: @@ -6311,8 +6421,13 @@ cost_plus: } if (speed) - *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; - + { + /* FCVT. */ + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; + } *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); return true; @@ -6321,7 +6436,12 @@ cost_plus: { /* FABS and FNEG are analogous. */ if (speed) - *cost += extra_cost->fp[mode == DFmode].neg; + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else + *cost += extra_cost->fp[mode == DFmode].neg; + } } else { @@ -6338,10 +6458,13 @@ cost_plus: case SMIN: if (speed) { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; /* FMAXNM/FMINNM/FMAX/FMIN. TODO: This may not be accurate for all implementations, but we do not model this in the cost tables. */ - *cost += extra_cost->fp[mode == DFmode].addsub; + else + *cost += extra_cost->fp[mode == DFmode].addsub; } return false; diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 3ee7ebf..c8e1d2e 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -124,6 +124,8 @@ struct fp_cost_table struct vector_cost_table { const int alu; + const int load; + const int store; }; struct cpu_cost_table diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index 05e96a9..257902c 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -119,7 +119,9 @@ const struct cpu_cost_table generic_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* Load. */ + COSTS_N_INSNS (1) /* Store. */ } }; @@ -220,7 +222,9 @@ const struct cpu_cost_table cortexa53_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* Load. */ + COSTS_N_INSNS (1) /* Store. */ } }; @@ -321,7 +325,9 @@ const struct cpu_cost_table cortexa57_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (1) /* alu. */ + COSTS_N_INSNS (1), /* alu. */ + COSTS_N_INSNS (1), /* Load. */ + COSTS_N_INSNS (1) /* Store. */ } }; @@ -422,7 +428,9 @@ const struct cpu_cost_table xgene1_extra_costs = }, /* Vector */ { - COSTS_N_INSNS (2) /* alu. */ + COSTS_N_INSNS (2), /* alu. */ + COSTS_N_INSNS (1), /* Load. */ + COSTS_N_INSNS (1), /* Store. */ } };