Message ID | 20221021135203.626255-2-dimitrije.milosevic@syrmia.com |
---|---|
State | New |
Headers | show |
Series | ivopts: Fix candidate selection for architectures with limited addressing modes. | expand |
On Fri, Oct 21, 2022 at 3:56 PM Dimitrije Milosevic <dimitrije.milosevic@syrmia.com> wrote: > > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > This patch reverts the computation of address cost complexity > to the legacy one. After f9f69dd, complexity is calculated > using the valid_mem_ref_p target hook. Architectures like > Mips only allow BASE + OFFSET addressing modes, which in turn > prevents the calculation of complexity for other addressing > modes, resulting in non-optimal candidate selection. I don't follow how only having BASE + OFFSET addressing prevents calculation of complexity for other addressing modes? Can you explain? Do you have a testcase that shows how both changes improve IV selection for MIPS? > > gcc/ChangeLog: > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > to non-static. > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > (compute_min_and_max_offset): Likewise. > (get_address_cost): Revert > complexity calculation. > > Signed-off-by: Dimitrije Milosevic <dimitrije.milosevic@syrmia.com> > --- > gcc/tree-ssa-address.cc | 2 +- > gcc/tree-ssa-address.h | 2 + > gcc/tree-ssa-loop-ivopts.cc | 214 ++++++++++++++++++++++++++++++++++-- > 3 files changed, 207 insertions(+), 11 deletions(-) > > diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc > index ba7b7c93162..442f54f0165 100644 > --- a/gcc/tree-ssa-address.cc > +++ b/gcc/tree-ssa-address.cc > @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt) > validity for a memory reference accessing memory of mode MODE in address > space AS. */ > > -static bool > +bool > multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, > addr_space_t as) > { > diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h > index 95143a099b9..09f36ee2f19 100644 > --- a/gcc/tree-ssa-address.h > +++ b/gcc/tree-ssa-address.h > @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, > class aff_tree *, tree, tree, tree, bool); > extern void copy_ref_info (tree, tree); > tree maybe_fold_tmr (tree); > +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, > + addr_space_t as); > > extern unsigned int preferred_mem_scale_factor (tree base, > machine_mode mem_mode, > diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc > index a6f926a68ef..d53ba05a4f6 100644 > --- a/gcc/tree-ssa-loop-ivopts.cc > +++ b/gcc/tree-ssa-loop-ivopts.cc > @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, > return infinite_cost; > } > > +static void > +compute_symbol_and_var_present (tree e1, tree e2, > + bool *symbol_present, bool *var_present) > +{ > + poly_uint64_pod off1, off2; > + > + e1 = strip_offset (e1, &off1); > + e2 = strip_offset (e2, &off2); > + > + STRIP_NOPS (e1); > + STRIP_NOPS (e2); > + > + if (TREE_CODE (e1) == ADDR_EXPR) > + { > + poly_int64_pod diff; > + if (ptr_difference_const (e1, e2, &diff)) > + { > + *symbol_present = false; > + *var_present = false; > + return; > + } > + > + if (integer_zerop (e2)) > + { > + tree core; > + poly_int64_pod bitsize; > + poly_int64_pod bitpos; > + widest_int mul; > + tree toffset; > + machine_mode mode; > + int unsignedp, reversep, volatilep; > + > + core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos, > + &toffset, &mode, &unsignedp, &reversep, &volatilep); > + > + if (toffset != 0 > + || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul) > + || reversep > + || !VAR_P (core)) > + { > + *symbol_present = false; > + *var_present = true; > + return; > + } > + > + if (TREE_STATIC (core) > + || DECL_EXTERNAL (core)) > + { > + *symbol_present = true; > + *var_present = false; > + return; > + } > + > + *symbol_present = false; > + *var_present = true; > + return; > + } > + > + *symbol_present = false; > + *var_present = true; > + } > + *symbol_present = false; > + > + if (operand_equal_p (e1, e2, 0)) > + { > + *var_present = false; > + return; > + } > + > + *var_present = true; > +} > + > +static void > +compute_min_and_max_offset (addr_space_t as, > + machine_mode mem_mode, poly_int64_pod *min_offset, > + poly_int64_pod *max_offset) > +{ > + machine_mode address_mode = targetm.addr_space.address_mode (as); > + HOST_WIDE_INT i; > + poly_int64_pod off, width; > + rtx addr; > + rtx reg1; > + > + reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); > + > + width = GET_MODE_BITSIZE (address_mode) - 1; > + if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1)) > + width = HOST_BITS_PER_WIDE_INT - 1; > + gcc_assert (width.is_constant ()); > + addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); > + > + off = 0; > + for (i = width.to_constant (); i >= 0; i--) > + { > + off = -(HOST_WIDE_INT_1U << i); > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + } > + if (i == -1) > + *min_offset = 0; > + else > + *min_offset = off; > + // *min_offset = (i == -1? 0 : off); > + > + for (i = width.to_constant (); i >= 0; i--) > + { > + off = (HOST_WIDE_INT_1U << i) - 1; > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + /* For some strict-alignment targets, the offset must be naturally > + aligned. Try an aligned offset if mem_mode is not QImode. */ > + off = mem_mode != QImode > + ? (HOST_WIDE_INT_1U << i) > + - (GET_MODE_SIZE (mem_mode)) > + : 0; > + if (known_gt (off, 0)) > + { > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + } > + } > + if (i == -1) > + off = 0; > + *max_offset = off; > +} > + > /* Return cost of computing USE's address expression by using CAND. > AFF_INV and AFF_VAR represent invariant and variant parts of the > address expression, respectively. If AFF_INV is simple, store > @@ -4802,6 +4931,13 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, > /* Only true if ratio != 1. */ > bool ok_with_ratio_p = false; > bool ok_without_ratio_p = false; > + tree ubase = use->iv->base; > + tree cbase = cand->iv->base, cstep = cand->iv->step; > + tree utype = TREE_TYPE (ubase), ctype; > + unsigned HOST_WIDE_INT cstepi; > + bool symbol_present = false, var_present = false, stmt_is_after_increment; > + poly_int64_pod min_offset, max_offset; > + bool offset_p, ratio_p; > > if (!aff_combination_const_p (aff_inv)) > { > @@ -4915,16 +5051,74 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, > gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); > cost += address_cost (addr, mem_mode, as, speed); > > - if (parts.symbol != NULL_TREE) > - cost.complexity += 1; > - /* Don't increase the complexity of adding a scaled index if it's > - the only kind of index that the target allows. */ > - if (parts.step != NULL_TREE && ok_without_ratio_p) > - cost.complexity += 1; > - if (parts.base != NULL_TREE && parts.index != NULL_TREE) > - cost.complexity += 1; > - if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) > - cost.complexity += 1; > + if (cst_and_fits_in_hwi (cstep)) > + cstepi = int_cst_value (cstep); > + else > + cstepi = 0; > + > + STRIP_NOPS (cbase); > + ctype = TREE_TYPE (cbase); > + > + stmt_is_after_increment = stmt_after_increment (data->current_loop, cand, > + use->stmt); > + > + if (cst_and_fits_in_hwi (cbase)) > + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), > + &symbol_present, &var_present); > + else if (ratio == 1) > + { > + tree real_cbase = cbase; > + > + /* Check to see if any adjustment is needed. */ > + if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment) > + { > + aff_tree real_cbase_aff; > + aff_tree cstep_aff; > + > + tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), > + &real_cbase_aff); > + tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); > + > + aff_combination_add (&real_cbase_aff, &cstep_aff); > + real_cbase = aff_combination_to_tree (&real_cbase_aff); > + } > + compute_symbol_and_var_present (ubase, real_cbase, > + &symbol_present, &var_present); > + } > + else if (!POINTER_TYPE_P (ctype) > + && multiplier_allowed_in_address_p > + (ratio, mem_mode, > + TYPE_ADDR_SPACE (TREE_TYPE (utype)))) > + { > + tree real_cbase = cbase; > + > + if (cstepi == 0 && stmt_is_after_increment) > + { > + if (POINTER_TYPE_P (ctype)) > + real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep); > + else > + real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep); > + } > + real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase, > + build_int_cst (ctype, ratio)); > + compute_symbol_and_var_present (ubase, real_cbase, > + &symbol_present, &var_present); > + } > + else > + { > + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), > + &symbol_present, &var_present); > + } > + > + compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset); > + offset_p = maybe_ne (aff_inv->offset, 0) > + && known_le (min_offset, aff_inv->offset) > + && known_le (aff_inv->offset, max_offset); > + ratio_p = (ratio != 1 > + && multiplier_allowed_in_address_p (ratio, mem_mode, as)); > + > + cost.complexity = (symbol_present != 0) + (var_present != 0) > + + offset_p + ratio_p; > > return cost; > } > -- > 2.25.1 >
Hi Richard, > I don't follow how only having BASE + OFFSET addressing prevents > calculation of complexity for other addressing modes? Can you explain? It's the valid_mem_ref_p target hook that prevents complexity calculation for other addressing modes (for Mips and RISC-V). Here's the snippet of the algorithm (after f9f69dd) for the complexity calculation, which is located at the beginning of the get_address_cost function in tree-ssa-loop-ivopts.cc: if (!aff_combination_const_p (aff_inv)) { parts.index = integer_one_node; /* Addressing mode "base + index". */ ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); if (ratio != 1) { parts.step = wide_int_to_tree (type, ratio); /* Addressing mode "base + index << scale". */ ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts); if (!ok_with_ratio_p) parts.step = NULL_TREE; } ... The algorithm "builds up" the actual addressing mode step-by-step, starting from BASE + INDEX. However, if valid_mem_ref_p returns negative, parts.* is set to NULL_TREE and we bail out. For Mips (or RISC-V), it always returns negative (given we are building the addressing mode up from BASE + INDEX), since Mips allows BASE + OFFSET only (see the case PLUS in mips_classify_address in config/mips/mips.cc). The result is that all addressing modes besides BASE + OFFSET, for Mips (or RISC-V) have complexities of 0. f9f69dd introduced calls to valid_mem_ref_p target hook, which were not there before, and I'm not sure why exactly. > Do you have a testcase that shows how both changes improve IV selection > for MIPS? Certainly, consider the following test case: void daxpy(float *vector1, float *vector2, int n, float fp_const) { for (int i = 0; i < n; ++i) vector1[i] += fp_const * vector2[i]; } void dgefa(float *vector, int m, int n, int l) { for (int i = 0; i < n - 1; ++i) { for (int j = i + 1; j < n; ++j) { float t = vector[m * j + l]; daxpy(&vector[m * i + i + 1], &vector[m * j + i + 1], n - (i + 1), t); } } } At the third inner loop (which gets inlined from daxpy), an unoptimal candidate selection takes place. Worth noting is that f9f69dd doesn't change the costs (they are, however, multiplied by a factor, but what was lesser/greater before is lesser/greater after). Here's how complexities stand: ===== Before f9f69dd ===== Group 1: cand cost compl. inv.expr. inv.vars 1 13 1 3; NIL; 2 13 2 4; NIL; 4 9 1 5; NIL; 5 1 0 NIL; NIL; 7 9 1 3; NIL; ===== Before f9f69dd ===== ===== After f9f69dd ===== Group 1: cand cost compl. inv.expr. inv.vars 1 10 0 4; NIL; 2 10 0 5; NIL; 4 6 0 6; NIL; 5 1 0 NIL; NIL; 7 6 0 4; NIL; ===== After f9f69dd ===== Notice how all complexities are zero, even though the candidates have different addressing modes. For this particular example, this leads to a different candidate selection: ===== Before f9f69dd ===== Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 2 IVs: Candidate 4: Var befor: ivtmp.17_52 Var after: ivtmp.17_103 Incr POS: before exit test IV struct: Type: unsigned long Base: (unsigned long) (vector_27(D) + _10) Step: 4 Object: (void *) vector_27(D) Biv: N Overflowness wrto loop niter: Overflow Candidate 5: Var befor: ivtmp.18_99 Var after: ivtmp.18_98 Incr POS: before exit test IV struct: Type: unsigned long Base: (unsigned long) (vector_27(D) + _14) Step: 4 Object: (void *) vector_27(D) Biv: N Overflowness wrto loop niter: Overflow ===== Before f9f69dd ===== ===== After f9f69dd ===== Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 1 IVs: Candidate 4: Var befor: ivtmp.17_52 Var after: ivtmp.17_103 Incr POS: before exit test IV struct: Type: unsigned long Base: (unsigned long) (vector_27(D) + _10) Step: 4 Object: (void *) vector_27(D) Biv: N Overflowness wrto loop niter: Overflow ===== After f9f69dd ===== which, in turn, leads to the following assembly sequence: ===== Before f9f69dd ===== .L83: lwc1 $f5,0($3) lwc1 $f8,0($2) lwc1 $f7,4($2) lwc1 $f6,8($2) lwc1 $f9,12($2) lwc1 $f10,16($2) maddf.s $f8,$f0,$f5 lwc1 $f11,20($2) lwc1 $f12,24($2) lwc1 $f13,28($2) ld $12,72($sp) swc1 $f8,0($2) lwc1 $f14,4($3) maddf.s $f7,$f0,$f14 swc1 $f7,4($2) lwc1 $f15,8($3) maddf.s $f6,$f0,$f15 swc1 $f6,8($2) lwc1 $f16,12($3) maddf.s $f9,$f0,$f16 swc1 $f9,12($2) lwc1 $f17,16($3) maddf.s $f10,$f0,$f17 swc1 $f10,16($2) lwc1 $f18,20($3) maddf.s $f11,$f0,$f18 swc1 $f11,20($2) lwc1 $f19,24($3) maddf.s $f12,$f0,$f19 swc1 $f12,24($2) lwc1 $f20,28($3) maddf.s $f13,$f0,$f20 swc1 $f13,28($2) daddiu $2,$2,32 bne $2,$12,.L83 daddiu $3,$3,32 ... ===== Before f9f69dd ===== ===== After f9f69dd ===== .L93: dsubu $18,$2,$4 lwc1 $f13,0($2) daddu $19,$18,$5 daddiu $16,$2,4 lwc1 $f14,0($19) dsubu $17,$16,$4 daddu $25,$17,$5 lwc1 $f15,4($2) daddiu $19,$2,12 daddiu $20,$2,8 maddf.s $f13,$f1,$f14 dsubu $16,$19,$4 daddiu $17,$2,16 dsubu $18,$20,$4 daddu $19,$16,$5 daddiu $16,$2,20 lwc1 $f10,8($2) daddu $20,$18,$5 lwc1 $f16,12($2) dsubu $18,$17,$4 lwc1 $f17,16($2) dsubu $17,$16,$4 lwc1 $f18,20($2) daddiu $16,$2,24 lwc1 $f20,24($2) daddu $18,$18,$5 swc1 $f13,0($2) daddu $17,$17,$5 lwc1 $f19,0($25) daddiu $25,$2,28 lwc1 $f11,28($2) daddiu $2,$2,32 dsubu $16,$16,$4 dsubu $25,$25,$4 maddf.s $f15,$f1,$f19 daddu $16,$16,$5 daddu $25,$25,$5 swc1 $f15,-28($2) lwc1 $f21,0($20) ld $20,48($sp) maddf.s $f10,$f1,$f21 swc1 $f10,-24($2) lwc1 $f22,0($19) maddf.s $f16,$f1,$f22 swc1 $f16,-20($2) lwc1 $f23,0($18) maddf.s $f17,$f1,$f23 swc1 $f17,-16($2) lwc1 $f0,0($17) maddf.s $f18,$f1,$f0 swc1 $f18,-12($2) lwc1 $f7,0($16) maddf.s $f20,$f1,$f7 swc1 $f20,-8($2) lwc1 $f12,0($25) maddf.s $f11,$f1,$f12 bne $2,$20,.L93 swc1 $f11,-4($2) ... ===== After f9f69dd ===== Notice the additional instructions used for index calculation, due to unoptimal candidate selection. Regards, Dimitrije From: Richard Biener <richard.guenther@gmail.com> Sent: Tuesday, October 25, 2022 1:08 PM To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. On Fri, Oct 21, 2022 at 3:56 PM Dimitrije Milosevic <dimitrije.milosevic@syrmia.com> wrote: > > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > This patch reverts the computation of address cost complexity > to the legacy one. After f9f69dd, complexity is calculated > using the valid_mem_ref_p target hook. Architectures like > Mips only allow BASE + OFFSET addressing modes, which in turn > prevents the calculation of complexity for other addressing > modes, resulting in non-optimal candidate selection. I don't follow how only having BASE + OFFSET addressing prevents calculation of complexity for other addressing modes? Can you explain? Do you have a testcase that shows how both changes improve IV selection for MIPS? > > gcc/ChangeLog: > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > to non-static. > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > (compute_min_and_max_offset): Likewise. > (get_address_cost): Revert > complexity calculation. > > Signed-off-by: Dimitrije Milosevic <dimitrije.milosevic@syrmia.com> > --- > gcc/tree-ssa-address.cc | 2 +- > gcc/tree-ssa-address.h | 2 + > gcc/tree-ssa-loop-ivopts.cc | 214 ++++++++++++++++++++++++++++++++++-- > 3 files changed, 207 insertions(+), 11 deletions(-) > > diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc > index ba7b7c93162..442f54f0165 100644 > --- a/gcc/tree-ssa-address.cc > +++ b/gcc/tree-ssa-address.cc > @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt) > validity for a memory reference accessing memory of mode MODE in address > space AS. */ > > -static bool > +bool > multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, > addr_space_t as) > { > diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h > index 95143a099b9..09f36ee2f19 100644 > --- a/gcc/tree-ssa-address.h > +++ b/gcc/tree-ssa-address.h > @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, > class aff_tree *, tree, tree, tree, bool); > extern void copy_ref_info (tree, tree); > tree maybe_fold_tmr (tree); > +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, > + addr_space_t as); > > extern unsigned int preferred_mem_scale_factor (tree base, > machine_mode mem_mode, > diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc > index a6f926a68ef..d53ba05a4f6 100644 > --- a/gcc/tree-ssa-loop-ivopts.cc > +++ b/gcc/tree-ssa-loop-ivopts.cc > @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, > return infinite_cost; > } > > +static void > +compute_symbol_and_var_present (tree e1, tree e2, > + bool *symbol_present, bool *var_present) > +{ > + poly_uint64_pod off1, off2; > + > + e1 = strip_offset (e1, &off1); > + e2 = strip_offset (e2, &off2); > + > + STRIP_NOPS (e1); > + STRIP_NOPS (e2); > + > + if (TREE_CODE (e1) == ADDR_EXPR) > + { > + poly_int64_pod diff; > + if (ptr_difference_const (e1, e2, &diff)) > + { > + *symbol_present = false; > + *var_present = false; > + return; > + } > + > + if (integer_zerop (e2)) > + { > + tree core; > + poly_int64_pod bitsize; > + poly_int64_pod bitpos; > + widest_int mul; > + tree toffset; > + machine_mode mode; > + int unsignedp, reversep, volatilep; > + > + core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos, > + &toffset, &mode, &unsignedp, &reversep, &volatilep); > + > + if (toffset != 0 > + || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul) > + || reversep > + || !VAR_P (core)) > + { > + *symbol_present = false; > + *var_present = true; > + return; > + } > + > + if (TREE_STATIC (core) > + || DECL_EXTERNAL (core)) > + { > + *symbol_present = true; > + *var_present = false; > + return; > + } > + > + *symbol_present = false; > + *var_present = true; > + return; > + } > + > + *symbol_present = false; > + *var_present = true; > + } > + *symbol_present = false; > + > + if (operand_equal_p (e1, e2, 0)) > + { > + *var_present = false; > + return; > + } > + > + *var_present = true; > +} > + > +static void > +compute_min_and_max_offset (addr_space_t as, > + machine_mode mem_mode, poly_int64_pod *min_offset, > + poly_int64_pod *max_offset) > +{ > + machine_mode address_mode = targetm.addr_space.address_mode (as); > + HOST_WIDE_INT i; > + poly_int64_pod off, width; > + rtx addr; > + rtx reg1; > + > + reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); > + > + width = GET_MODE_BITSIZE (address_mode) - 1; > + if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1)) > + width = HOST_BITS_PER_WIDE_INT - 1; > + gcc_assert (width.is_constant ()); > + addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); > + > + off = 0; > + for (i = width.to_constant (); i >= 0; i--) > + { > + off = -(HOST_WIDE_INT_1U << i); > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + } > + if (i == -1) > + *min_offset = 0; > + else > + *min_offset = off; > + // *min_offset = (i == -1? 0 : off); > + > + for (i = width.to_constant (); i >= 0; i--) > + { > + off = (HOST_WIDE_INT_1U << i) - 1; > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + /* For some strict-alignment targets, the offset must be naturally > + aligned. Try an aligned offset if mem_mode is not QImode. */ > + off = mem_mode != QImode > + ? (HOST_WIDE_INT_1U << i) > + - (GET_MODE_SIZE (mem_mode)) > + : 0; > + if (known_gt (off, 0)) > + { > + XEXP (addr, 1) = gen_int_mode (off, address_mode); > + if (memory_address_addr_space_p (mem_mode, addr, as)) > + break; > + } > + } > + if (i == -1) > + off = 0; > + *max_offset = off; > +} > + > /* Return cost of computing USE's address expression by using CAND. > AFF_INV and AFF_VAR represent invariant and variant parts of the > address expression, respectively. If AFF_INV is simple, store > @@ -4802,6 +4931,13 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, > /* Only true if ratio != 1. */ > bool ok_with_ratio_p = false; > bool ok_without_ratio_p = false; > + tree ubase = use->iv->base; > + tree cbase = cand->iv->base, cstep = cand->iv->step; > + tree utype = TREE_TYPE (ubase), ctype; > + unsigned HOST_WIDE_INT cstepi; > + bool symbol_present = false, var_present = false, stmt_is_after_increment; > + poly_int64_pod min_offset, max_offset; > + bool offset_p, ratio_p; > > if (!aff_combination_const_p (aff_inv)) > { > @@ -4915,16 +5051,74 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, > gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); > cost += address_cost (addr, mem_mode, as, speed); > > - if (parts.symbol != NULL_TREE) > - cost.complexity += 1; > - /* Don't increase the complexity of adding a scaled index if it's > - the only kind of index that the target allows. */ > - if (parts.step != NULL_TREE && ok_without_ratio_p) > - cost.complexity += 1; > - if (parts.base != NULL_TREE && parts.index != NULL_TREE) > - cost.complexity += 1; > - if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) > - cost.complexity += 1; > + if (cst_and_fits_in_hwi (cstep)) > + cstepi = int_cst_value (cstep); > + else > + cstepi = 0; > + > + STRIP_NOPS (cbase); > + ctype = TREE_TYPE (cbase); > + > + stmt_is_after_increment = stmt_after_increment (data->current_loop, cand, > + use->stmt); > + > + if (cst_and_fits_in_hwi (cbase)) > + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), > + &symbol_present, &var_present); > + else if (ratio == 1) > + { > + tree real_cbase = cbase; > + > + /* Check to see if any adjustment is needed. */ > + if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment) > + { > + aff_tree real_cbase_aff; > + aff_tree cstep_aff; > + > + tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), > + &real_cbase_aff); > + tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); > + > + aff_combination_add (&real_cbase_aff, &cstep_aff); > + real_cbase = aff_combination_to_tree (&real_cbase_aff); > + } > + compute_symbol_and_var_present (ubase, real_cbase, > + &symbol_present, &var_present); > + } > + else if (!POINTER_TYPE_P (ctype) > + && multiplier_allowed_in_address_p > + (ratio, mem_mode, > + TYPE_ADDR_SPACE (TREE_TYPE (utype)))) > + { > + tree real_cbase = cbase; > + > + if (cstepi == 0 && stmt_is_after_increment) > + { > + if (POINTER_TYPE_P (ctype)) > + real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep); > + else > + real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep); > + } > + real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase, > + build_int_cst (ctype, ratio)); > + compute_symbol_and_var_present (ubase, real_cbase, > + &symbol_present, &var_present); > + } > + else > + { > + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), > + &symbol_present, &var_present); > + } > + > + compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset); > + offset_p = maybe_ne (aff_inv->offset, 0) > + && known_le (min_offset, aff_inv->offset) > + && known_le (aff_inv->offset, max_offset); > + ratio_p = (ratio != 1 > + && multiplier_allowed_in_address_p (ratio, mem_mode, as)); > + > + cost.complexity = (symbol_present != 0) + (var_present != 0) > + + offset_p + ratio_p; > > return cost; > } > -- > 2.25.1 >
On 10/21/22 07:52, Dimitrije Milosevic wrote: > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > This patch reverts the computation of address cost complexity > to the legacy one. After f9f69dd, complexity is calculated > using the valid_mem_ref_p target hook. Architectures like > Mips only allow BASE + OFFSET addressing modes, which in turn > prevents the calculation of complexity for other addressing > modes, resulting in non-optimal candidate selection. > > gcc/ChangeLog: > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > to non-static. > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > (compute_min_and_max_offset): Likewise. > (get_address_cost): Revert > complexity calculation. THe part I don't understand is, if you only have BASE+OFF, why does preventing the calculation of more complex addressing modes matter? ie, what's the point of computing the cost of something like base + off + scaled index when the target can't utilize it? jeff
Hi Jeff, > THe part I don't understand is, if you only have BASE+OFF, why does > preventing the calculation of more complex addressing modes matter? ie, > what's the point of computing the cost of something like base + off + > scaled index when the target can't utilize it? Well, the complexities of all addressing modes other than BASE + OFFSET are equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET than a candidate with BASE + INDEX, for example, as it has to compensate the lack of other addressing modes somehow. If complexities for both of those are equal to 0, in cases where complexities decide which candidate is to be chosen, a more complex candidate may be picked. Regards, Dimitrije From: Jeff Law <jeffreyalaw@gmail.com> Sent: Friday, October 28, 2022 1:02 AM To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org> Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. On 10/21/22 07:52, Dimitrije Milosevic wrote: > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > This patch reverts the computation of address cost complexity > to the legacy one. After f9f69dd, complexity is calculated > using the valid_mem_ref_p target hook. Architectures like > Mips only allow BASE + OFFSET addressing modes, which in turn > prevents the calculation of complexity for other addressing > modes, resulting in non-optimal candidate selection. > > gcc/ChangeLog: > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > to non-static. > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > (compute_min_and_max_offset): Likewise. > (get_address_cost): Revert > complexity calculation. THe part I don't understand is, if you only have BASE+OFF, why does preventing the calculation of more complex addressing modes matter? ie, what's the point of computing the cost of something like base + off + scaled index when the target can't utilize it? jeff
On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Jeff, > > > THe part I don't understand is, if you only have BASE+OFF, why does > > preventing the calculation of more complex addressing modes matter? ie, > > what's the point of computing the cost of something like base + off + > > scaled index when the target can't utilize it? > > Well, the complexities of all addressing modes other than BASE + OFFSET are > equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > than a candidate with BASE + INDEX, for example, as it has to compensate > the lack of other addressing modes somehow. If complexities for both of > those are equal to 0, in cases where complexities decide which candidate is > to be chosen, a more complex candidate may be picked. But something is wrong then - it shouldn't ever pick a candidate with an addressing mode that isn't supported? So you say that the cost of expressing 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed accurately? The function tries to compensate for that, maybe you can point out where it goes wrong? That is, at the end it adjusts cost and complexity based on what it scrapped before, maybe that is just a bit incomplete? Note the original author of this is not available so it would help (maybe also yourself) to walk through the function with a specific candidate / use where you think the complexity (or cost) is wrong? > Regards, > Dimitrije > > > From: Jeff Law <jeffreyalaw@gmail.com> > Sent: Friday, October 28, 2022 1:02 AM > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org> > Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > On 10/21/22 07:52, Dimitrije Milosevic wrote: > > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > > > This patch reverts the computation of address cost complexity > > to the legacy one. After f9f69dd, complexity is calculated > > using the valid_mem_ref_p target hook. Architectures like > > Mips only allow BASE + OFFSET addressing modes, which in turn > > prevents the calculation of complexity for other addressing > > modes, resulting in non-optimal candidate selection. > > > > gcc/ChangeLog: > > > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > > to non-static. > > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > > (compute_min_and_max_offset): Likewise. > > (get_address_cost): Revert > > complexity calculation. > > THe part I don't understand is, if you only have BASE+OFF, why does > preventing the calculation of more complex addressing modes matter? ie, > what's the point of computing the cost of something like base + off + > scaled index when the target can't utilize it? > > > jeff >
Hi Richard, > But something is wrong then - it shouldn't ever pick a candidate with > an addressing > mode that isn't supported? Test case I presented in [0] only has non-"BASE + OFFSET" candidates. Correct me if I'm wrong, but the candidate selection algorithm doesn't take into account which addressing modes are supported by the target? > So you say that the cost of expressing > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > accurately? I just took it as an example, but yes. > The function tries to compensate for that, maybe you can point out > where it goes wrong? > That is, at the end it adjusts cost and complexity based on what it > scrapped before, maybe > that is just a bit incomplete? I think the cost.cost part is mostly okay, as the costs are just scaled (what was lesser/higher before f9f69dd is lesser/higher after f9f69dd). As far as the adjustments go, I don't think they are complete. On the other hand, as complexity is a valid part of address costs, and it can be used as a tie-breaker, I feel like it should serve a purpose, even for targets like Mips which are limited when it comes to addressing modes, rather than being equal to 0. I guess an alternative would be to fully cover cost.cost adjustments, and leave the complexities to be 0 for non-supported addressing modes. Currently, they are implemented as follows: if (simple_inv) simple_inv = (aff_inv == NULL || aff_combination_const_p (aff_inv) || aff_combination_singleton_var_p (aff_inv)); if (!aff_combination_zero_p (aff_inv)) comp_inv = aff_combination_to_tree (aff_inv); if (comp_inv != NULL_TREE) cost = force_var_cost (data, comp_inv, inv_vars); if (ratio != 1 && parts.step == NULL_TREE) var_cost += mult_by_coeff_cost (ratio, addr_mode, speed); if (comp_inv != NULL_TREE && parts.index == NULL_TREE) var_cost += add_cost (speed, addr_mode); > Note the original author of this is not available so it would help > (maybe also yourself) to > walk through the function with a specific candidate / use where you > think the complexity > (or cost) is wrong? I'd like to refer to [0] where candidate costs didn't get adjusted to cover the lack of complexity calculation. Would love to hear your thoughts. [0] https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604304.html Regards, Dimitrije From: Richard Biener <richard.guenther@gmail.com> Sent: Friday, October 28, 2022 9:00 AM To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Jeff, > > > THe part I don't understand is, if you only have BASE+OFF, why does > > preventing the calculation of more complex addressing modes matter? ie, > > what's the point of computing the cost of something like base + off + > > scaled index when the target can't utilize it? > > Well, the complexities of all addressing modes other than BASE + OFFSET are > equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > than a candidate with BASE + INDEX, for example, as it has to compensate > the lack of other addressing modes somehow. If complexities for both of > those are equal to 0, in cases where complexities decide which candidate is > to be chosen, a more complex candidate may be picked. But something is wrong then - it shouldn't ever pick a candidate with an addressing mode that isn't supported? So you say that the cost of expressing 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed accurately? The function tries to compensate for that, maybe you can point out where it goes wrong? That is, at the end it adjusts cost and complexity based on what it scrapped before, maybe that is just a bit incomplete? Note the original author of this is not available so it would help (maybe also yourself) to walk through the function with a specific candidate / use where you think the complexity (or cost) is wrong? > Regards, > Dimitrije > > > From: Jeff Law <jeffreyalaw@gmail.com> > Sent: Friday, October 28, 2022 1:02 AM > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org> > Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > On 10/21/22 07:52, Dimitrije Milosevic wrote: > > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com> > > > > This patch reverts the computation of address cost complexity > > to the legacy one. After f9f69dd, complexity is calculated > > using the valid_mem_ref_p target hook. Architectures like > > Mips only allow BASE + OFFSET addressing modes, which in turn > > prevents the calculation of complexity for other addressing > > modes, resulting in non-optimal candidate selection. > > > > gcc/ChangeLog: > > > > * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change > > to non-static. > > * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare. > > * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce. > > (compute_min_and_max_offset): Likewise. > > (get_address_cost): Revert > > complexity calculation. > > THe part I don't understand is, if you only have BASE+OFF, why does > preventing the calculation of more complex addressing modes matter? ie, > what's the point of computing the cost of something like base + off + > scaled index when the target can't utilize it? > > > jeff >
On 10/28/22 01:00, Richard Biener wrote: > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > <Dimitrije.Milosevic@syrmia.com> wrote: >> Hi Jeff, >> >>> THe part I don't understand is, if you only have BASE+OFF, why does >>> preventing the calculation of more complex addressing modes matter? ie, >>> what's the point of computing the cost of something like base + off + >>> scaled index when the target can't utilize it? >> Well, the complexities of all addressing modes other than BASE + OFFSET are >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET >> than a candidate with BASE + INDEX, for example, as it has to compensate >> the lack of other addressing modes somehow. If complexities for both of >> those are equal to 0, in cases where complexities decide which candidate is >> to be chosen, a more complex candidate may be picked. > But something is wrong then - it shouldn't ever pick a candidate with > an addressing > mode that isn't supported? So you say that the cost of expressing > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > accurately? This is exactly what I was trying to get to. If the addressing mode isn't supported, then we shouldn't be picking it as a candidate. If it is, then we've probably got a problem somewhere else in this code and this patch is likely papering over it. Jeff
Hi Jeff, > This is exactly what I was trying to get to. If the addressing mode > isn't supported, then we shouldn't be picking it as a candidate. If it > is, then we've probably got a problem somewhere else in this code and > this patch is likely papering over it. I'll take a deeper look into the candidate selection algorithm then. Will get back to you. Regards, Dimitrije
On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Jeff, > > > This is exactly what I was trying to get to. If the addressing mode > > isn't supported, then we shouldn't be picking it as a candidate. If it > > is, then we've probably got a problem somewhere else in this code and > > this patch is likely papering over it. I'm not sure this is accurate but at least the cost of using an unsupported addressing mode should be at least that of the compensating code to mangle it to a supported form. > I'll take a deeper look into the candidate selection algorithm then. Will > get back to you. Thanks - as said the unfortunate situation is that both the original author and the one who did the last bigger reworks of the code are gone. Richard. > Regards, > Dimitrije > > ________________________________________ > From: Jeff Law <jeffreyalaw@gmail.com> > Sent: Tuesday, November 1, 2022 7:46 PM > To: Richard Biener; Dimitrije Milosevic > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > On 10/28/22 01:00, Richard Biener wrote: > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > > <Dimitrije.Milosevic@syrmia.com> wrote: > >> Hi Jeff, > >> > >>> THe part I don't understand is, if you only have BASE+OFF, why does > >>> preventing the calculation of more complex addressing modes matter? ie, > >>> what's the point of computing the cost of something like base + off + > >>> scaled index when the target can't utilize it? > >> Well, the complexities of all addressing modes other than BASE + OFFSET are > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > >> than a candidate with BASE + INDEX, for example, as it has to compensate > >> the lack of other addressing modes somehow. If complexities for both of > >> those are equal to 0, in cases where complexities decide which candidate is > >> to be chosen, a more complex candidate may be picked. > > But something is wrong then - it shouldn't ever pick a candidate with > > an addressing > > mode that isn't supported? So you say that the cost of expressing > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > > accurately? > > This is exactly what I was trying to get to. If the addressing mode > isn't supported, then we shouldn't be picking it as a candidate. If it > is, then we've probably got a problem somewhere else in this code and > this patch is likely papering over it. > > > Jeff >
Hi Richard, Sorry for the delayed response, I couldn't find the time to fully focus on this topic. > I'm not sure this is accurate but at least the cost of using an unsupported > addressing mode should be at least that of the compensating code to > mangle it to a supported form. I'm pretty sure IVOPTS does not filter out candidates which aren't supported by the target architecture. It does, however, adjust the cost for a subset of those. The adjustment code modifies only the cost part of the address cost (which consists of a cost and a complexity). Having said this, I'd propose two approaches: 1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely sure they aren't already covered), leaving complexity for unsupported addressing modes zero. 2. Revert the complexity calculation (which my initial patch does), leaving everything else as it is. 3. A combination of both - if the control path gets into the adjustment code, we use the reverted complexity calculation. I'd love to get feedback regarding this, so I could focus on a concrete approach. Kind regards, Dimitrije From: Richard Biener <richard.guenther@gmail.com> Sent: Monday, November 7, 2022 2:35 PM To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Jeff, > > > This is exactly what I was trying to get to. If the addressing mode > > isn't supported, then we shouldn't be picking it as a candidate. If it > > is, then we've probably got a problem somewhere else in this code and > > this patch is likely papering over it. I'm not sure this is accurate but at least the cost of using an unsupported addressing mode should be at least that of the compensating code to mangle it to a supported form. > I'll take a deeper look into the candidate selection algorithm then. Will > get back to you. Thanks - as said the unfortunate situation is that both the original author and the one who did the last bigger reworks of the code are gone. Richard. > Regards, > Dimitrije > > ________________________________________ > From: Jeff Law <jeffreyalaw@gmail.com> > Sent: Tuesday, November 1, 2022 7:46 PM > To: Richard Biener; Dimitrije Milosevic > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > On 10/28/22 01:00, Richard Biener wrote: > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > > <Dimitrije.Milosevic@syrmia.com> wrote: > >> Hi Jeff, > >> > >>> THe part I don't understand is, if you only have BASE+OFF, why does > >>> preventing the calculation of more complex addressing modes matter? ie, > >>> what's the point of computing the cost of something like base + off + > >>> scaled index when the target can't utilize it? > >> Well, the complexities of all addressing modes other than BASE + OFFSET are > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > >> than a candidate with BASE + INDEX, for example, as it has to compensate > >> the lack of other addressing modes somehow. If complexities for both of > >> those are equal to 0, in cases where complexities decide which candidate is > >> to be chosen, a more complex candidate may be picked. > > But something is wrong then - it shouldn't ever pick a candidate with > > an addressing > > mode that isn't supported? So you say that the cost of expressing > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > > accurately? > > This is exactly what I was trying to get to. If the addressing mode > isn't supported, then we shouldn't be picking it as a candidate. If it > is, then we've probably got a problem somewhere else in this code and > this patch is likely papering over it. > > > Jeff >
On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Richard, > > Sorry for the delayed response, I couldn't find the time to fully focus on this topic. > > > I'm not sure this is accurate but at least the cost of using an unsupported > > addressing mode should be at least that of the compensating code to > > mangle it to a supported form. > > I'm pretty sure IVOPTS does not filter out candidates which aren't supported by > the target architecture. It does, however, adjust the cost for a subset of those. > The adjustment code modifies only the cost part of the address cost (which > consists of a cost and a complexity). > Having said this, I'd propose two approaches: > 1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely > sure they aren't already covered), leaving complexity for unsupported > addressing modes zero. The only documentation on complexity I find is int64_t cost; /* The runtime cost. */ unsigned complexity; /* The estimate of the complexity of the code for the computation (in no concrete units -- complexity field should be larger for more complex expressions and addressing modes). */ and complexity is used as tie-breaker only when cost is equal. Given that shouldn't unsupported addressing modes have higher complexity? I'll note that there's nothing "unsupported", each "unsupported" address computation is lowered into supported pieces. "unsupported" maybe means that "cost" isn't fully covered by address-cost and compensation stmts might be costed in quantities not fully compatible with that? That said, "complexity" seems to only complicate things :/ We do have the tie-breaker on prefering less IVs. complexity was added in r0-85562-g6e8c65f6621fb0 as part of fixing PR34711. > 2. Revert the complexity calculation (which my initial patch does), leaving > everything else as it is. > 3. A combination of both - if the control path gets into the adjustment code, we > use the reverted complexity calculation. If it's really only about the "complexity" value then each compensation step should add to the complexity? > I'd love to get feedback regarding this, so I could focus on a concrete approach. > > Kind regards, > Dimitrije > > From: Richard Biener <richard.guenther@gmail.com> > Sent: Monday, November 7, 2022 2:35 PM > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> > Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic > <Dimitrije.Milosevic@syrmia.com> wrote: > > > > Hi Jeff, > > > > > This is exactly what I was trying to get to. If the addressing mode > > > isn't supported, then we shouldn't be picking it as a candidate. If it > > > is, then we've probably got a problem somewhere else in this code and > > > this patch is likely papering over it. > > I'm not sure this is accurate but at least the cost of using an unsupported > addressing mode should be at least that of the compensating code to > mangle it to a supported form. > > > I'll take a deeper look into the candidate selection algorithm then. Will > > get back to you. > > Thanks - as said the unfortunate situation is that both the original author and > the one who did the last bigger reworks of the code are gone. > > Richard. > > > Regards, > > Dimitrije > > > > ________________________________________ > > From: Jeff Law <jeffreyalaw@gmail.com> > > Sent: Tuesday, November 1, 2022 7:46 PM > > To: Richard Biener; Dimitrije Milosevic > > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic > > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > > > > On 10/28/22 01:00, Richard Biener wrote: > > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > > > <Dimitrije.Milosevic@syrmia.com> wrote: > > >> Hi Jeff, > > >> > > >>> THe part I don't understand is, if you only have BASE+OFF, why does > > >>> preventing the calculation of more complex addressing modes matter? ie, > > >>> what's the point of computing the cost of something like base + off + > > >>> scaled index when the target can't utilize it? > > >> Well, the complexities of all addressing modes other than BASE + OFFSET are > > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > > >> than a candidate with BASE + INDEX, for example, as it has to compensate > > >> the lack of other addressing modes somehow. If complexities for both of > > >> those are equal to 0, in cases where complexities decide which candidate is > > >> to be chosen, a more complex candidate may be picked. > > > But something is wrong then - it shouldn't ever pick a candidate with > > > an addressing > > > mode that isn't supported? So you say that the cost of expressing > > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > > > accurately? > > > > This is exactly what I was trying to get to. If the addressing mode > > isn't supported, then we shouldn't be picking it as a candidate. If it > > is, then we've probably got a problem somewhere else in this code and > > this patch is likely papering over it. > > > > > > Jeff > >
Hi Richard, > The only documentation on complexity I find is > > int64_t cost; /* The runtime cost. */ > unsigned complexity; /* The estimate of the complexity of the code for > the computation (in no concrete units -- > complexity field should be larger for more > complex expressions and addressing modes). */ > > and complexity is used as tie-breaker only when cost is equal. Given that > shouldn't unsupported addressing modes have higher complexity? I'll note > that there's nothing "unsupported", each "unsupported" address computation > is lowered into supported pieces. "unsupported" maybe means that > "cost" isn't fully covered by address-cost and compensation stmts might > be costed in quantities not fully compatible with that? Correct, that's what I was aiming for initially - before f9f69dd that was the case, "unsupported" addressing modes had higher complexities. Also, that's what I meant by "unsupported" as well, thanks. > That said, "complexity" seems to only complicate things :/ We do have the > tie-breaker on preferring less IVs. complexity was added in > r0-85562-g6e8c65f6621fb0 as part of fixing PR34711. I agree that the complexity part is just (kind of) out there, not really strongly defined. I'm not sure how to feel about merging complexity into the cost part of an address cost, though. > If it's really only about the "complexity" value then each > compensation step should > add to the complexity? That could be the way to go. Also worth verifying is that we compensate for each case of an unsupported addressing mode. Kind regards, Dimitrije From: Richard Biener <richard.guenther@gmail.com> Sent: Friday, December 16, 2022 10:58 AM To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > Hi Richard, > > Sorry for the delayed response, I couldn't find the time to fully focus on this topic. > > > I'm not sure this is accurate but at least the cost of using an unsupported > > addressing mode should be at least that of the compensating code to > > mangle it to a supported form. > > I'm pretty sure IVOPTS does not filter out candidates which aren't supported by > the target architecture. It does, however, adjust the cost for a subset of those. > The adjustment code modifies only the cost part of the address cost (which > consists of a cost and a complexity). > Having said this, I'd propose two approaches: > 1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely > sure they aren't already covered), leaving complexity for unsupported > addressing modes zero. The only documentation on complexity I find is int64_t cost; /* The runtime cost. */ unsigned complexity; /* The estimate of the complexity of the code for the computation (in no concrete units -- complexity field should be larger for more complex expressions and addressing modes). */ and complexity is used as tie-breaker only when cost is equal. Given that shouldn't unsupported addressing modes have higher complexity? I'll note that there's nothing "unsupported", each "unsupported" address computation is lowered into supported pieces. "unsupported" maybe means that "cost" isn't fully covered by address-cost and compensation stmts might be costed in quantities not fully compatible with that? That said, "complexity" seems to only complicate things :/ We do have the tie-breaker on prefering less IVs. complexity was added in r0-85562-g6e8c65f6621fb0 as part of fixing PR34711. > 2. Revert the complexity calculation (which my initial patch does), leaving > everything else as it is. > 3. A combination of both - if the control path gets into the adjustment code, we > use the reverted complexity calculation. If it's really only about the "complexity" value then each compensation step should add to the complexity? > I'd love to get feedback regarding this, so I could focus on a concrete approach. > > Kind regards, > Dimitrije > > From: Richard Biener <richard.guenther@gmail.com> > Sent: Monday, November 7, 2022 2:35 PM > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> > Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic > <Dimitrije.Milosevic@syrmia.com> wrote: > > > > Hi Jeff, > > > > > This is exactly what I was trying to get to. If the addressing mode > > > isn't supported, then we shouldn't be picking it as a candidate. If it > > > is, then we've probably got a problem somewhere else in this code and > > > this patch is likely papering over it. > > I'm not sure this is accurate but at least the cost of using an unsupported > addressing mode should be at least that of the compensating code to > mangle it to a supported form. > > > I'll take a deeper look into the candidate selection algorithm then. Will > > get back to you. > > Thanks - as said the unfortunate situation is that both the original author and > the one who did the last bigger reworks of the code are gone. > > Richard. > > > Regards, > > Dimitrije > > > > ________________________________________ > > From: Jeff Law <jeffreyalaw@gmail.com> > > Sent: Tuesday, November 1, 2022 7:46 PM > > To: Richard Biener; Dimitrije Milosevic > > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic > > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > > > > On 10/28/22 01:00, Richard Biener wrote: > > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > > > <Dimitrije.Milosevic@syrmia.com> wrote: > > >> Hi Jeff, > > >> > > >>> THe part I don't understand is, if you only have BASE+OFF, why does > > >>> preventing the calculation of more complex addressing modes matter? ie, > > >>> what's the point of computing the cost of something like base + off + > > >>> scaled index when the target can't utilize it? > > >> Well, the complexities of all addressing modes other than BASE + OFFSET are > > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > > >> than a candidate with BASE + INDEX, for example, as it has to compensate > > >> the lack of other addressing modes somehow. If complexities for both of > > >> those are equal to 0, in cases where complexities decide which candidate is > > >> to be chosen, a more complex candidate may be picked. > > > But something is wrong then - it shouldn't ever pick a candidate with > > > an addressing > > > mode that isn't supported? So you say that the cost of expressing > > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > > > accurately? > > > > This is exactly what I was trying to get to. If the addressing mode > > isn't supported, then we shouldn't be picking it as a candidate. If it > > is, then we've probably got a problem somewhere else in this code and > > this patch is likely papering over it. > > > > > > Jeff > >
On Fri, Dec 16, 2022 at 12:37 PM Dimitrije Milosevic <Dimitrije.Milosevic@syrmia.com> wrote: > > > Hi Richard, > > > The only documentation on complexity I find is > > > > int64_t cost; /* The runtime cost. */ > > unsigned complexity; /* The estimate of the complexity of the code for > > the computation (in no concrete units -- > > complexity field should be larger for more > > complex expressions and addressing modes). */ > > > > and complexity is used as tie-breaker only when cost is equal. Given that > > shouldn't unsupported addressing modes have higher complexity? I'll note > > that there's nothing "unsupported", each "unsupported" address computation > > is lowered into supported pieces. "unsupported" maybe means that > > "cost" isn't fully covered by address-cost and compensation stmts might > > be costed in quantities not fully compatible with that? > > Correct, that's what I was aiming for initially - before f9f69dd that was the case, > "unsupported" addressing modes had higher complexities. > Also, that's what I meant by "unsupported" as well, thanks. > > > That said, "complexity" seems to only complicate things :/ We do have the > > tie-breaker on preferring less IVs. complexity was added in > > r0-85562-g6e8c65f6621fb0 as part of fixing PR34711. > > I agree that the complexity part is just (kind of) out there, not really strongly > defined. I'm not sure how to feel about merging complexity into the cost part > of an address cost, though. > > > If it's really only about the "complexity" value then each > > compensation step should > > add to the complexity? > > That could be the way to go. Also worth verifying is that we compensate for > each case of an unsupported addressing mode. Yes. Also given complexity is only a tie-breaker we should cost the compensation somehow, but then complexity doesn't look necessary ... Meh. > > Kind regards, > Dimitrije > > From: Richard Biener <richard.guenther@gmail.com> > Sent: Friday, December 16, 2022 10:58 AM > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> > Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic > <Dimitrije.Milosevic@syrmia.com> wrote: > > > > Hi Richard, > > > > Sorry for the delayed response, I couldn't find the time to fully focus on this topic. > > > > > I'm not sure this is accurate but at least the cost of using an unsupported > > > addressing mode should be at least that of the compensating code to > > > mangle it to a supported form. > > > > I'm pretty sure IVOPTS does not filter out candidates which aren't supported by > > the target architecture. It does, however, adjust the cost for a subset of those. > > The adjustment code modifies only the cost part of the address cost (which > > consists of a cost and a complexity). > > Having said this, I'd propose two approaches: > > 1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely > > sure they aren't already covered), leaving complexity for unsupported > > addressing modes zero. > > The only documentation on complexity I find is > > int64_t cost; /* The runtime cost. */ > unsigned complexity; /* The estimate of the complexity of the code for > the computation (in no concrete units -- > complexity field should be larger for more > complex expressions and addressing modes). */ > > and complexity is used as tie-breaker only when cost is equal. Given that > shouldn't unsupported addressing modes have higher complexity? I'll note > that there's nothing "unsupported", each "unsupported" address computation > is lowered into supported pieces. "unsupported" maybe means that > "cost" isn't fully covered by address-cost and compensation stmts might > be costed in quantities not fully compatible with that? > > That said, "complexity" seems to only complicate things :/ We do have the > tie-breaker on prefering less IVs. complexity was added in > r0-85562-g6e8c65f6621fb0 as part of fixing PR34711. > > > 2. Revert the complexity calculation (which my initial patch does), leaving > > everything else as it is. > > 3. A combination of both - if the control path gets into the adjustment code, we > > use the reverted complexity calculation. > > If it's really only about the "complexity" value then each > compensation step should > add to the complexity? > > > I'd love to get feedback regarding this, so I could focus on a concrete approach. > > > > Kind regards, > > Dimitrije > > > > From: Richard Biener <richard.guenther@gmail.com> > > Sent: Monday, November 7, 2022 2:35 PM > > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com> > > Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com> > > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > > On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic > > <Dimitrije.Milosevic@syrmia.com> wrote: > > > > > > Hi Jeff, > > > > > > > This is exactly what I was trying to get to. If the addressing mode > > > > isn't supported, then we shouldn't be picking it as a candidate. If it > > > > is, then we've probably got a problem somewhere else in this code and > > > > this patch is likely papering over it. > > > > I'm not sure this is accurate but at least the cost of using an unsupported > > addressing mode should be at least that of the compensating code to > > mangle it to a supported form. > > > > > I'll take a deeper look into the candidate selection algorithm then. Will > > > get back to you. > > > > Thanks - as said the unfortunate situation is that both the original author and > > the one who did the last bigger reworks of the code are gone. > > > > Richard. > > > > > Regards, > > > Dimitrije > > > > > > ________________________________________ > > > From: Jeff Law <jeffreyalaw@gmail.com> > > > Sent: Tuesday, November 1, 2022 7:46 PM > > > To: Richard Biener; Dimitrije Milosevic > > > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic > > > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. > > > > > > > > > On 10/28/22 01:00, Richard Biener wrote: > > > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic > > > > <Dimitrije.Milosevic@syrmia.com> wrote: > > > >> Hi Jeff, > > > >> > > > >>> THe part I don't understand is, if you only have BASE+OFF, why does > > > >>> preventing the calculation of more complex addressing modes matter? ie, > > > >>> what's the point of computing the cost of something like base + off + > > > >>> scaled index when the target can't utilize it? > > > >> Well, the complexities of all addressing modes other than BASE + OFFSET are > > > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still > > > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET > > > >> than a candidate with BASE + INDEX, for example, as it has to compensate > > > >> the lack of other addressing modes somehow. If complexities for both of > > > >> those are equal to 0, in cases where complexities decide which candidate is > > > >> to be chosen, a more complex candidate may be picked. > > > > But something is wrong then - it shouldn't ever pick a candidate with > > > > an addressing > > > > mode that isn't supported? So you say that the cost of expressing > > > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed > > > > accurately? > > > > > > This is exactly what I was trying to get to. If the addressing mode > > > isn't supported, then we shouldn't be picking it as a candidate. If it > > > is, then we've probably got a problem somewhere else in this code and > > > this patch is likely papering over it. > > > > > > > > > Jeff > > >
diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc index ba7b7c93162..442f54f0165 100644 --- a/gcc/tree-ssa-address.cc +++ b/gcc/tree-ssa-address.cc @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt) validity for a memory reference accessing memory of mode MODE in address space AS. */ -static bool +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, addr_space_t as) { diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h index 95143a099b9..09f36ee2f19 100644 --- a/gcc/tree-ssa-address.h +++ b/gcc/tree-ssa-address.h @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, class aff_tree *, tree, tree, tree, bool); extern void copy_ref_info (tree, tree); tree maybe_fold_tmr (tree); +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, + addr_space_t as); extern unsigned int preferred_mem_scale_factor (tree base, machine_mode mem_mode, diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index a6f926a68ef..d53ba05a4f6 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset, return infinite_cost; } +static void +compute_symbol_and_var_present (tree e1, tree e2, + bool *symbol_present, bool *var_present) +{ + poly_uint64_pod off1, off2; + + e1 = strip_offset (e1, &off1); + e2 = strip_offset (e2, &off2); + + STRIP_NOPS (e1); + STRIP_NOPS (e2); + + if (TREE_CODE (e1) == ADDR_EXPR) + { + poly_int64_pod diff; + if (ptr_difference_const (e1, e2, &diff)) + { + *symbol_present = false; + *var_present = false; + return; + } + + if (integer_zerop (e2)) + { + tree core; + poly_int64_pod bitsize; + poly_int64_pod bitpos; + widest_int mul; + tree toffset; + machine_mode mode; + int unsignedp, reversep, volatilep; + + core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos, + &toffset, &mode, &unsignedp, &reversep, &volatilep); + + if (toffset != 0 + || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul) + || reversep + || !VAR_P (core)) + { + *symbol_present = false; + *var_present = true; + return; + } + + if (TREE_STATIC (core) + || DECL_EXTERNAL (core)) + { + *symbol_present = true; + *var_present = false; + return; + } + + *symbol_present = false; + *var_present = true; + return; + } + + *symbol_present = false; + *var_present = true; + } + *symbol_present = false; + + if (operand_equal_p (e1, e2, 0)) + { + *var_present = false; + return; + } + + *var_present = true; +} + +static void +compute_min_and_max_offset (addr_space_t as, + machine_mode mem_mode, poly_int64_pod *min_offset, + poly_int64_pod *max_offset) +{ + machine_mode address_mode = targetm.addr_space.address_mode (as); + HOST_WIDE_INT i; + poly_int64_pod off, width; + rtx addr; + rtx reg1; + + reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); + + width = GET_MODE_BITSIZE (address_mode) - 1; + if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1)) + width = HOST_BITS_PER_WIDE_INT - 1; + gcc_assert (width.is_constant ()); + addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); + + off = 0; + for (i = width.to_constant (); i >= 0; i--) + { + off = -(HOST_WIDE_INT_1U << i); + XEXP (addr, 1) = gen_int_mode (off, address_mode); + if (memory_address_addr_space_p (mem_mode, addr, as)) + break; + } + if (i == -1) + *min_offset = 0; + else + *min_offset = off; + // *min_offset = (i == -1? 0 : off); + + for (i = width.to_constant (); i >= 0; i--) + { + off = (HOST_WIDE_INT_1U << i) - 1; + XEXP (addr, 1) = gen_int_mode (off, address_mode); + if (memory_address_addr_space_p (mem_mode, addr, as)) + break; + /* For some strict-alignment targets, the offset must be naturally + aligned. Try an aligned offset if mem_mode is not QImode. */ + off = mem_mode != QImode + ? (HOST_WIDE_INT_1U << i) + - (GET_MODE_SIZE (mem_mode)) + : 0; + if (known_gt (off, 0)) + { + XEXP (addr, 1) = gen_int_mode (off, address_mode); + if (memory_address_addr_space_p (mem_mode, addr, as)) + break; + } + } + if (i == -1) + off = 0; + *max_offset = off; +} + /* Return cost of computing USE's address expression by using CAND. AFF_INV and AFF_VAR represent invariant and variant parts of the address expression, respectively. If AFF_INV is simple, store @@ -4802,6 +4931,13 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, /* Only true if ratio != 1. */ bool ok_with_ratio_p = false; bool ok_without_ratio_p = false; + tree ubase = use->iv->base; + tree cbase = cand->iv->base, cstep = cand->iv->step; + tree utype = TREE_TYPE (ubase), ctype; + unsigned HOST_WIDE_INT cstepi; + bool symbol_present = false, var_present = false, stmt_is_after_increment; + poly_int64_pod min_offset, max_offset; + bool offset_p, ratio_p; if (!aff_combination_const_p (aff_inv)) { @@ -4915,16 +5051,74 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, gcc_assert (memory_address_addr_space_p (mem_mode, addr, as)); cost += address_cost (addr, mem_mode, as, speed); - if (parts.symbol != NULL_TREE) - cost.complexity += 1; - /* Don't increase the complexity of adding a scaled index if it's - the only kind of index that the target allows. */ - if (parts.step != NULL_TREE && ok_without_ratio_p) - cost.complexity += 1; - if (parts.base != NULL_TREE && parts.index != NULL_TREE) - cost.complexity += 1; - if (parts.offset != NULL_TREE && !integer_zerop (parts.offset)) - cost.complexity += 1; + if (cst_and_fits_in_hwi (cstep)) + cstepi = int_cst_value (cstep); + else + cstepi = 0; + + STRIP_NOPS (cbase); + ctype = TREE_TYPE (cbase); + + stmt_is_after_increment = stmt_after_increment (data->current_loop, cand, + use->stmt); + + if (cst_and_fits_in_hwi (cbase)) + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), + &symbol_present, &var_present); + else if (ratio == 1) + { + tree real_cbase = cbase; + + /* Check to see if any adjustment is needed. */ + if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment) + { + aff_tree real_cbase_aff; + aff_tree cstep_aff; + + tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), + &real_cbase_aff); + tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); + + aff_combination_add (&real_cbase_aff, &cstep_aff); + real_cbase = aff_combination_to_tree (&real_cbase_aff); + } + compute_symbol_and_var_present (ubase, real_cbase, + &symbol_present, &var_present); + } + else if (!POINTER_TYPE_P (ctype) + && multiplier_allowed_in_address_p + (ratio, mem_mode, + TYPE_ADDR_SPACE (TREE_TYPE (utype)))) + { + tree real_cbase = cbase; + + if (cstepi == 0 && stmt_is_after_increment) + { + if (POINTER_TYPE_P (ctype)) + real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep); + else + real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep); + } + real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase, + build_int_cst (ctype, ratio)); + compute_symbol_and_var_present (ubase, real_cbase, + &symbol_present, &var_present); + } + else + { + compute_symbol_and_var_present (ubase, build_int_cst (utype, 0), + &symbol_present, &var_present); + } + + compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset); + offset_p = maybe_ne (aff_inv->offset, 0) + && known_le (min_offset, aff_inv->offset) + && known_le (aff_inv->offset, max_offset); + ratio_p = (ratio != 1 + && multiplier_allowed_in_address_p (ratio, mem_mode, as)); + + cost.complexity = (symbol_present != 0) + (var_present != 0) + + offset_p + ratio_p; return cost; }