diff mbox series

[1/2] ivopts: Revert computation of address cost complexity.

Message ID 20221021135203.626255-2-dimitrije.milosevic@syrmia.com
State New
Headers show
Series ivopts: Fix candidate selection for architectures with limited addressing modes. | expand

Commit Message

Dimitrije Milošević Oct. 21, 2022, 1:52 p.m. UTC
From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>

This patch reverts the computation of address cost complexity
to the legacy one. After f9f69dd, complexity is calculated
using the valid_mem_ref_p target hook. Architectures like
Mips only allow BASE + OFFSET addressing modes, which in turn
prevents the calculation of complexity for other addressing
modes, resulting in non-optimal candidate selection.

gcc/ChangeLog:

	* tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
	to non-static.
	* tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
	* tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
	(compute_min_and_max_offset): Likewise.
	(get_address_cost): Revert
	complexity calculation.

Signed-off-by: Dimitrije Milosevic <dimitrije.milosevic@syrmia.com>
---
 gcc/tree-ssa-address.cc     |   2 +-
 gcc/tree-ssa-address.h      |   2 +
 gcc/tree-ssa-loop-ivopts.cc | 214 ++++++++++++++++++++++++++++++++++--
 3 files changed, 207 insertions(+), 11 deletions(-)

Comments

Richard Biener Oct. 25, 2022, 11:08 a.m. UTC | #1
On Fri, Oct 21, 2022 at 3:56 PM Dimitrije Milosevic
<dimitrije.milosevic@syrmia.com> wrote:
>
> From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
>
> This patch reverts the computation of address cost complexity
> to the legacy one. After f9f69dd, complexity is calculated
> using the valid_mem_ref_p target hook. Architectures like
> Mips only allow BASE + OFFSET addressing modes, which in turn
> prevents the calculation of complexity for other addressing
> modes, resulting in non-optimal candidate selection.

I don't follow how only having BASE + OFFSET addressing prevents
calculation of complexity for other addressing modes?  Can you explain?

Do you have a testcase that shows how both changes improve IV selection
for MIPS?

>
> gcc/ChangeLog:
>
>         * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
>         to non-static.
>         * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
>         * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
>         (compute_min_and_max_offset): Likewise.
>         (get_address_cost): Revert
>         complexity calculation.
>
> Signed-off-by: Dimitrije Milosevic <dimitrije.milosevic@syrmia.com>
> ---
>  gcc/tree-ssa-address.cc     |   2 +-
>  gcc/tree-ssa-address.h      |   2 +
>  gcc/tree-ssa-loop-ivopts.cc | 214 ++++++++++++++++++++++++++++++++++--
>  3 files changed, 207 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc
> index ba7b7c93162..442f54f0165 100644
> --- a/gcc/tree-ssa-address.cc
> +++ b/gcc/tree-ssa-address.cc
> @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt)
>     validity for a memory reference accessing memory of mode MODE in address
>     space AS.  */
>
> -static bool
> +bool
>  multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
>                                  addr_space_t as)
>  {
> diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
> index 95143a099b9..09f36ee2f19 100644
> --- a/gcc/tree-ssa-address.h
> +++ b/gcc/tree-ssa-address.h
> @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
>                      class aff_tree *, tree, tree, tree, bool);
>  extern void copy_ref_info (tree, tree);
>  tree maybe_fold_tmr (tree);
> +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
> +                                addr_space_t as);
>
>  extern unsigned int preferred_mem_scale_factor (tree base,
>                                                 machine_mode mem_mode,
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index a6f926a68ef..d53ba05a4f6 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
>    return infinite_cost;
>  }
>
> +static void
> +compute_symbol_and_var_present (tree e1, tree e2,
> +       bool *symbol_present, bool *var_present)
> +{
> +  poly_uint64_pod off1, off2;
> +
> +  e1 = strip_offset (e1, &off1);
> +  e2 = strip_offset (e2, &off2);
> +
> +  STRIP_NOPS (e1);
> +  STRIP_NOPS (e2);
> +
> +  if (TREE_CODE (e1) == ADDR_EXPR)
> +    {
> +      poly_int64_pod diff;
> +      if (ptr_difference_const (e1, e2, &diff))
> +  {
> +    *symbol_present = false;
> +    *var_present = false;
> +    return;
> +  }
> +
> +      if (integer_zerop (e2))
> +  {
> +    tree core;
> +    poly_int64_pod bitsize;
> +    poly_int64_pod bitpos;
> +    widest_int mul;
> +    tree toffset;
> +    machine_mode mode;
> +    int unsignedp, reversep, volatilep;
> +
> +    core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos,
> +      &toffset, &mode, &unsignedp, &reversep, &volatilep);
> +
> +    if (toffset != 0
> +    || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul)
> +    || reversep
> +    || !VAR_P (core))
> +      {
> +    *symbol_present = false;
> +    *var_present = true;
> +    return;
> +      }
> +
> +    if (TREE_STATIC (core)
> +    || DECL_EXTERNAL (core))
> +      {
> +    *symbol_present = true;
> +    *var_present = false;
> +    return;
> +      }
> +
> +    *symbol_present = false;
> +    *var_present = true;
> +    return;
> +  }
> +
> +      *symbol_present = false;
> +      *var_present = true;
> +    }
> +  *symbol_present = false;
> +
> +  if (operand_equal_p (e1, e2, 0))
> +    {
> +      *var_present = false;
> +      return;
> +    }
> +
> +  *var_present = true;
> +}
> +
> +static void
> +compute_min_and_max_offset (addr_space_t as,
> +       machine_mode mem_mode, poly_int64_pod *min_offset,
> +       poly_int64_pod *max_offset)
> +{
> +  machine_mode address_mode = targetm.addr_space.address_mode (as);
> +  HOST_WIDE_INT i;
> +  poly_int64_pod off, width;
> +  rtx addr;
> +  rtx reg1;
> +
> +  reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
> +
> +  width = GET_MODE_BITSIZE (address_mode) - 1;
> +  if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1))
> +         width = HOST_BITS_PER_WIDE_INT - 1;
> +  gcc_assert (width.is_constant ());
> +  addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
> +
> +  off = 0;
> +  for (i = width.to_constant (); i >= 0; i--)
> +    {
> +      off = -(HOST_WIDE_INT_1U << i);
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    }
> +  if (i == -1)
> +    *min_offset = 0;
> +  else
> +    *min_offset = off;
> +  // *min_offset = (i == -1? 0 : off);
> +
> +  for (i = width.to_constant (); i >= 0; i--)
> +    {
> +      off = (HOST_WIDE_INT_1U << i) - 1;
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    /* For some strict-alignment targets, the offset must be naturally
> +      aligned.  Try an aligned offset if mem_mode is not QImode.  */
> +      off = mem_mode != QImode
> +      ? (HOST_WIDE_INT_1U << i)
> +      - (GET_MODE_SIZE (mem_mode))
> +      : 0;
> +      if (known_gt (off, 0))
> +    {
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    }
> +    }
> +  if (i == -1)
> +         off = 0;
> +  *max_offset = off;
> +}
> +
>  /* Return cost of computing USE's address expression by using CAND.
>     AFF_INV and AFF_VAR represent invariant and variant parts of the
>     address expression, respectively.  If AFF_INV is simple, store
> @@ -4802,6 +4931,13 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>    /* Only true if ratio != 1.  */
>    bool ok_with_ratio_p = false;
>    bool ok_without_ratio_p = false;
> +  tree ubase = use->iv->base;
> +  tree cbase = cand->iv->base, cstep = cand->iv->step;
> +  tree utype = TREE_TYPE (ubase), ctype;
> +  unsigned HOST_WIDE_INT cstepi;
> +  bool symbol_present = false, var_present = false, stmt_is_after_increment;
> +  poly_int64_pod min_offset, max_offset;
> +  bool offset_p, ratio_p;
>
>    if (!aff_combination_const_p (aff_inv))
>      {
> @@ -4915,16 +5051,74 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>    gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
>    cost += address_cost (addr, mem_mode, as, speed);
>
> -  if (parts.symbol != NULL_TREE)
> -    cost.complexity += 1;
> -  /* Don't increase the complexity of adding a scaled index if it's
> -     the only kind of index that the target allows.  */
> -  if (parts.step != NULL_TREE && ok_without_ratio_p)
> -    cost.complexity += 1;
> -  if (parts.base != NULL_TREE && parts.index != NULL_TREE)
> -    cost.complexity += 1;
> -  if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
> -    cost.complexity += 1;
> +  if (cst_and_fits_in_hwi (cstep))
> +    cstepi = int_cst_value (cstep);
> +  else
> +    cstepi = 0;
> +
> +  STRIP_NOPS (cbase);
> +  ctype = TREE_TYPE (cbase);
> +
> +  stmt_is_after_increment = stmt_after_increment (data->current_loop, cand,
> +    use->stmt);
> +
> +  if (cst_and_fits_in_hwi (cbase))
> +    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
> +      &symbol_present, &var_present);
> +  else if (ratio == 1)
> +    {
> +      tree real_cbase = cbase;
> +
> +      /* Check to see if any adjustment is needed.  */
> +      if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment)
> +       {
> +         aff_tree real_cbase_aff;
> +         aff_tree cstep_aff;
> +
> +         tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
> +                                  &real_cbase_aff);
> +         tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
> +
> +         aff_combination_add (&real_cbase_aff, &cstep_aff);
> +         real_cbase = aff_combination_to_tree (&real_cbase_aff);
> +       }
> +    compute_symbol_and_var_present (ubase, real_cbase,
> +      &symbol_present, &var_present);
> +    }
> +  else if (!POINTER_TYPE_P (ctype)
> +          && multiplier_allowed_in_address_p
> +               (ratio, mem_mode,
> +                       TYPE_ADDR_SPACE (TREE_TYPE (utype))))
> +    {
> +      tree real_cbase = cbase;
> +
> +      if (cstepi == 0 && stmt_is_after_increment)
> +       {
> +         if (POINTER_TYPE_P (ctype))
> +           real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
> +         else
> +           real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
> +       }
> +      real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
> +                               build_int_cst (ctype, ratio));
> +    compute_symbol_and_var_present (ubase, real_cbase,
> +      &symbol_present, &var_present);
> +    }
> +  else
> +    {
> +    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
> +      &symbol_present, &var_present);
> +    }
> +
> +  compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset);
> +  offset_p = maybe_ne (aff_inv->offset, 0)
> +       && known_le (min_offset, aff_inv->offset)
> +       && known_le (aff_inv->offset, max_offset);
> +  ratio_p = (ratio != 1
> +            && multiplier_allowed_in_address_p (ratio, mem_mode, as));
> +
> +  cost.complexity = (symbol_present != 0) + (var_present != 0)
> +       + offset_p + ratio_p;
>
>    return cost;
>  }
> --
> 2.25.1
>
Dimitrije Milošević Oct. 25, 2022, 1 p.m. UTC | #2
Hi Richard,

> I don't follow how only having BASE + OFFSET addressing prevents
> calculation of complexity for other addressing modes?  Can you explain?

It's the valid_mem_ref_p target hook that prevents complexity calculation
for other addressing modes (for Mips and RISC-V).
Here's the snippet of the algorithm (after f9f69dd) for the complexity 
calculation, which is located at the beginning of the get_address_cost
function in tree-ssa-loop-ivopts.cc:

  if (!aff_combination_const_p (aff_inv))
    {
      parts.index = integer_one_node;
      /* Addressing mode "base + index".  */
      ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
      if (ratio != 1)
	{
	  parts.step = wide_int_to_tree (type, ratio);
	  /* Addressing mode "base + index << scale".  */
	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
	  if (!ok_with_ratio_p)
	    parts.step = NULL_TREE;
	}
      ...

The algorithm "builds up" the actual addressing mode step-by-step,
starting from BASE + INDEX. However, if valid_mem_ref_p returns
negative, parts.* is set to NULL_TREE and we bail out. For Mips (or 
RISC-V), it always returns negative (given we are building the addressing
mode up from BASE + INDEX), since Mips allows BASE + OFFSET only
(see the case PLUS in mips_classify_address in config/mips/mips.cc).
The result is that all addressing modes besides BASE + OFFSET, for Mips
(or RISC-V) have complexities of 0. f9f69dd introduced calls to valid_mem_ref_p
target hook, which were not there before, and I'm not sure why exactly.

> Do you have a testcase that shows how both changes improve IV selection
> for MIPS?

Certainly, consider the following test case:

void daxpy(float *vector1, float *vector2, int n, float fp_const)
{
	for (int i = 0; i < n; ++i)
		vector1[i] += fp_const * vector2[i];
}

void dgefa(float *vector, int m, int n, int l)
{
	for (int i = 0; i < n - 1; ++i)
	{
		for (int j = i + 1; j < n; ++j)
		{
			float t = vector[m * j + l];
			daxpy(&vector[m * i + i + 1], &vector[m * j + i + 1], n - (i + 1), t);
		}
	}
}

At the third inner loop (which gets inlined from daxpy), an unoptimal candidate
selection takes place.
Worth noting is that f9f69dd doesn't change the costs (they are, however, multiplied by
a factor, but what was lesser/greater before is lesser/greater after). Here's how complexities
stand:

===== Before f9f69dd =====
Group 1:
  cand	cost	compl.	inv.expr.	inv.vars
  1	13	1	3;	NIL;
  2	13	2	4;	NIL;
  4	9	1	5;	NIL;
  5	1	0	NIL;	NIL;
  7	9	1	3;	NIL;
===== Before f9f69dd =====
===== After f9f69dd =====
Group 1:
  cand	cost	compl.	inv.expr.	inv.vars
  1	10	0	4;	NIL;
  2	10	0	5;	NIL;
  4	6	0	6;	NIL;
  5	1	0	NIL;	NIL;
  7	6	0	4;	NIL;
===== After f9f69dd =====

Notice how all complexities are zero, even though the candidates have
different addressing modes.

For this particular example, this leads to a different candidate selection:

===== Before f9f69dd =====
Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 2 IVs:
Candidate 4:
  Var befor: ivtmp.17_52
  Var after: ivtmp.17_103
  Incr POS: before exit test
  IV struct:
    Type:	unsigned long
    Base:	(unsigned long) (vector_27(D) + _10)
    Step:	4
    Object:	(void *) vector_27(D)
    Biv:	N
    Overflowness wrto loop niter:	Overflow
Candidate 5:
  Var befor: ivtmp.18_99
  Var after: ivtmp.18_98
  Incr POS: before exit test
  IV struct:
    Type:	unsigned long
    Base:	(unsigned long) (vector_27(D) + _14)
    Step:	4
    Object:	(void *) vector_27(D)
    Biv:	N
    Overflowness wrto loop niter:	Overflow
===== Before f9f69dd =====
===== After f9f69dd =====
Selected IV set for loop 3 at fp_foo.c:3, 10 avg niters, 1 IVs:
Candidate 4:
  Var befor: ivtmp.17_52
  Var after: ivtmp.17_103
  Incr POS: before exit test
  IV struct:
    Type:	unsigned long
    Base:	(unsigned long) (vector_27(D) + _10)
    Step:	4
    Object:	(void *) vector_27(D)
    Biv:	N
    Overflowness wrto loop niter:	Overflow
===== After f9f69dd =====

which, in turn, leads to the following assembly sequence:

===== Before f9f69dd =====
.L83:
	lwc1	$f5,0($3)
	lwc1	$f8,0($2)
	lwc1	$f7,4($2)
	lwc1	$f6,8($2)
	lwc1	$f9,12($2)
	lwc1	$f10,16($2)
	maddf.s	$f8,$f0,$f5
	lwc1	$f11,20($2)
	lwc1	$f12,24($2)
	lwc1	$f13,28($2)
	ld	$12,72($sp)
	swc1	$f8,0($2)
	lwc1	$f14,4($3)
	maddf.s	$f7,$f0,$f14
	swc1	$f7,4($2)
	lwc1	$f15,8($3)
	maddf.s	$f6,$f0,$f15
	swc1	$f6,8($2)
	lwc1	$f16,12($3)
	maddf.s	$f9,$f0,$f16
	swc1	$f9,12($2)
	lwc1	$f17,16($3)
	maddf.s	$f10,$f0,$f17
	swc1	$f10,16($2)
	lwc1	$f18,20($3)
	maddf.s	$f11,$f0,$f18
	swc1	$f11,20($2)
	lwc1	$f19,24($3)
	maddf.s	$f12,$f0,$f19
	swc1	$f12,24($2)
	lwc1	$f20,28($3)
	maddf.s	$f13,$f0,$f20
	swc1	$f13,28($2)
	daddiu	$2,$2,32
	bne	$2,$12,.L83
	daddiu	$3,$3,32
        ...
===== Before f9f69dd =====
===== After f9f69dd =====
.L93:
	dsubu	$18,$2,$4
	lwc1	$f13,0($2)
	daddu	$19,$18,$5
	daddiu	$16,$2,4
	lwc1	$f14,0($19)
	dsubu	$17,$16,$4
	daddu	$25,$17,$5
	lwc1	$f15,4($2)
	daddiu	$19,$2,12
	daddiu	$20,$2,8
	maddf.s	$f13,$f1,$f14
	dsubu	$16,$19,$4
	daddiu	$17,$2,16
	dsubu	$18,$20,$4
	daddu	$19,$16,$5
	daddiu	$16,$2,20
	lwc1	$f10,8($2)
	daddu	$20,$18,$5
	lwc1	$f16,12($2)
	dsubu	$18,$17,$4
	lwc1	$f17,16($2)
	dsubu	$17,$16,$4
	lwc1	$f18,20($2)
	daddiu	$16,$2,24
	lwc1	$f20,24($2)
	daddu	$18,$18,$5
	swc1	$f13,0($2)
	daddu	$17,$17,$5
	lwc1	$f19,0($25)
	daddiu	$25,$2,28
	lwc1	$f11,28($2)
	daddiu	$2,$2,32
	dsubu	$16,$16,$4
	dsubu	$25,$25,$4
	maddf.s	$f15,$f1,$f19
	daddu	$16,$16,$5
	daddu	$25,$25,$5
	swc1	$f15,-28($2)
	lwc1	$f21,0($20)
	ld	$20,48($sp)
	maddf.s	$f10,$f1,$f21
	swc1	$f10,-24($2)
	lwc1	$f22,0($19)
	maddf.s	$f16,$f1,$f22
	swc1	$f16,-20($2)
	lwc1	$f23,0($18)
	maddf.s	$f17,$f1,$f23
	swc1	$f17,-16($2)
	lwc1	$f0,0($17)
	maddf.s	$f18,$f1,$f0
	swc1	$f18,-12($2)
	lwc1	$f7,0($16)
	maddf.s	$f20,$f1,$f7
	swc1	$f20,-8($2)
	lwc1	$f12,0($25)
	maddf.s	$f11,$f1,$f12
	bne	$2,$20,.L93
	swc1	$f11,-4($2)
        ...
===== After f9f69dd =====

Notice the additional instructions used for index calculation, due to
unoptimal candidate selection.

Regards,
Dimitrije

From: Richard Biener <richard.guenther@gmail.com>
Sent: Tuesday, October 25, 2022 1:08 PM
To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. 
 
On Fri, Oct 21, 2022 at 3:56 PM Dimitrije Milosevic
<dimitrije.milosevic@syrmia.com> wrote:
>
> From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
>
> This patch reverts the computation of address cost complexity
> to the legacy one. After f9f69dd, complexity is calculated
> using the valid_mem_ref_p target hook. Architectures like
> Mips only allow BASE + OFFSET addressing modes, which in turn
> prevents the calculation of complexity for other addressing
> modes, resulting in non-optimal candidate selection.

I don't follow how only having BASE + OFFSET addressing prevents
calculation of complexity for other addressing modes?  Can you explain?

Do you have a testcase that shows how both changes improve IV selection
for MIPS?

>
> gcc/ChangeLog:
>
>         * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
>         to non-static.
>         * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
>         * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
>         (compute_min_and_max_offset): Likewise.
>         (get_address_cost): Revert
>         complexity calculation.
>
> Signed-off-by: Dimitrije Milosevic <dimitrije.milosevic@syrmia.com>
> ---
>  gcc/tree-ssa-address.cc     |   2 +-
>  gcc/tree-ssa-address.h      |   2 +
>  gcc/tree-ssa-loop-ivopts.cc | 214 ++++++++++++++++++++++++++++++++++--
>  3 files changed, 207 insertions(+), 11 deletions(-)
>
> diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc
> index ba7b7c93162..442f54f0165 100644
> --- a/gcc/tree-ssa-address.cc
> +++ b/gcc/tree-ssa-address.cc
> @@ -561,7 +561,7 @@ add_to_parts (struct mem_address *parts, tree elt)
>     validity for a memory reference accessing memory of mode MODE in address
>     space AS.  */
>
> -static bool
> +bool
>  multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
>                                  addr_space_t as)
>  {
> diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
> index 95143a099b9..09f36ee2f19 100644
> --- a/gcc/tree-ssa-address.h
> +++ b/gcc/tree-ssa-address.h
> @@ -38,6 +38,8 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
>                      class aff_tree *, tree, tree, tree, bool);
>  extern void copy_ref_info (tree, tree);
>  tree maybe_fold_tmr (tree);
> +bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
> +                                addr_space_t as);
>
>  extern unsigned int preferred_mem_scale_factor (tree base,
>                                                 machine_mode mem_mode,
> diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
> index a6f926a68ef..d53ba05a4f6 100644
> --- a/gcc/tree-ssa-loop-ivopts.cc
> +++ b/gcc/tree-ssa-loop-ivopts.cc
> @@ -4774,6 +4774,135 @@ get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
>    return infinite_cost;
>  }
>
> +static void
> +compute_symbol_and_var_present (tree e1, tree e2,
> +       bool *symbol_present, bool *var_present)
> +{
> +  poly_uint64_pod off1, off2;
> +
> +  e1 = strip_offset (e1, &off1);
> +  e2 = strip_offset (e2, &off2);
> +
> +  STRIP_NOPS (e1);
> +  STRIP_NOPS (e2);
> +
> +  if (TREE_CODE (e1) == ADDR_EXPR)
> +    {
> +      poly_int64_pod diff;
> +      if (ptr_difference_const (e1, e2, &diff))
> +  {
> +    *symbol_present = false;
> +    *var_present = false;
> +    return;
> +  }
> +
> +      if (integer_zerop (e2))
> +  {
> +    tree core;
> +    poly_int64_pod bitsize;
> +    poly_int64_pod bitpos;
> +    widest_int mul;
> +    tree toffset;
> +    machine_mode mode;
> +    int unsignedp, reversep, volatilep;
> +
> +    core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos,
> +      &toffset, &mode, &unsignedp, &reversep, &volatilep);
> +
> +    if (toffset != 0
> +    || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul)
> +    || reversep
> +    || !VAR_P (core))
> +      {
> +    *symbol_present = false;
> +    *var_present = true;
> +    return;
> +      }
> +
> +    if (TREE_STATIC (core)
> +    || DECL_EXTERNAL (core))
> +      {
> +    *symbol_present = true;
> +    *var_present = false;
> +    return;
> +      }
> +
> +    *symbol_present = false;
> +    *var_present = true;
> +    return;
> +  }
> +
> +      *symbol_present = false;
> +      *var_present = true;
> +    }
> +  *symbol_present = false;
> +
> +  if (operand_equal_p (e1, e2, 0))
> +    {
> +      *var_present = false;
> +      return;
> +    }
> +
> +  *var_present = true;
> +}
> +
> +static void
> +compute_min_and_max_offset (addr_space_t as,
> +       machine_mode mem_mode, poly_int64_pod *min_offset,
> +       poly_int64_pod *max_offset)
> +{
> +  machine_mode address_mode = targetm.addr_space.address_mode (as);
> +  HOST_WIDE_INT i;
> +  poly_int64_pod off, width;
> +  rtx addr;
> +  rtx reg1;
> +
> +  reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
> +
> +  width = GET_MODE_BITSIZE (address_mode) - 1;
> +  if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1))
> +         width = HOST_BITS_PER_WIDE_INT - 1;
> +  gcc_assert (width.is_constant ());
> +  addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
> +
> +  off = 0;
> +  for (i = width.to_constant (); i >= 0; i--)
> +    {
> +      off = -(HOST_WIDE_INT_1U << i);
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    }
> +  if (i == -1)
> +    *min_offset = 0;
> +  else
> +    *min_offset = off;
> +  // *min_offset = (i == -1? 0 : off);
> +
> +  for (i = width.to_constant (); i >= 0; i--)
> +    {
> +      off = (HOST_WIDE_INT_1U << i) - 1;
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    /* For some strict-alignment targets, the offset must be naturally
> +      aligned.  Try an aligned offset if mem_mode is not QImode.  */
> +      off = mem_mode != QImode
> +      ? (HOST_WIDE_INT_1U << i)
> +      - (GET_MODE_SIZE (mem_mode))
> +      : 0;
> +      if (known_gt (off, 0))
> +    {
> +      XEXP (addr, 1) = gen_int_mode (off, address_mode);
> +      if (memory_address_addr_space_p (mem_mode, addr, as))
> +    break;
> +    }
> +    }
> +  if (i == -1)
> +         off = 0;
> +  *max_offset = off;
> +}
> +
>  /* Return cost of computing USE's address expression by using CAND.
>     AFF_INV and AFF_VAR represent invariant and variant parts of the
>     address expression, respectively.  If AFF_INV is simple, store
> @@ -4802,6 +4931,13 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>    /* Only true if ratio != 1.  */
>    bool ok_with_ratio_p = false;
>    bool ok_without_ratio_p = false;
> +  tree ubase = use->iv->base;
> +  tree cbase = cand->iv->base, cstep = cand->iv->step;
> +  tree utype = TREE_TYPE (ubase), ctype;
> +  unsigned HOST_WIDE_INT cstepi;
> +  bool symbol_present = false, var_present = false, stmt_is_after_increment;
> +  poly_int64_pod min_offset, max_offset;
> +  bool offset_p, ratio_p;
>
>    if (!aff_combination_const_p (aff_inv))
>      {
> @@ -4915,16 +5051,74 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
>    gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
>    cost += address_cost (addr, mem_mode, as, speed);
>
> -  if (parts.symbol != NULL_TREE)
> -    cost.complexity += 1;
> -  /* Don't increase the complexity of adding a scaled index if it's
> -     the only kind of index that the target allows.  */
> -  if (parts.step != NULL_TREE && ok_without_ratio_p)
> -    cost.complexity += 1;
> -  if (parts.base != NULL_TREE && parts.index != NULL_TREE)
> -    cost.complexity += 1;
> -  if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
> -    cost.complexity += 1;
> +  if (cst_and_fits_in_hwi (cstep))
> +    cstepi = int_cst_value (cstep);
> +  else
> +    cstepi = 0;
> +
> +  STRIP_NOPS (cbase);
> +  ctype = TREE_TYPE (cbase);
> +
> +  stmt_is_after_increment = stmt_after_increment (data->current_loop, cand,
> +    use->stmt);
> +
> +  if (cst_and_fits_in_hwi (cbase))
> +    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
> +      &symbol_present, &var_present);
> +  else if (ratio == 1)
> +    {
> +      tree real_cbase = cbase;
> +
> +      /* Check to see if any adjustment is needed.  */
> +      if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment)
> +       {
> +         aff_tree real_cbase_aff;
> +         aff_tree cstep_aff;
> +
> +         tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
> +                                  &real_cbase_aff);
> +         tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
> +
> +         aff_combination_add (&real_cbase_aff, &cstep_aff);
> +         real_cbase = aff_combination_to_tree (&real_cbase_aff);
> +       }
> +    compute_symbol_and_var_present (ubase, real_cbase,
> +      &symbol_present, &var_present);
> +    }
> +  else if (!POINTER_TYPE_P (ctype)
> +          && multiplier_allowed_in_address_p
> +               (ratio, mem_mode,
> +                       TYPE_ADDR_SPACE (TREE_TYPE (utype))))
> +    {
> +      tree real_cbase = cbase;
> +
> +      if (cstepi == 0 && stmt_is_after_increment)
> +       {
> +         if (POINTER_TYPE_P (ctype))
> +           real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
> +         else
> +           real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
> +       }
> +      real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
> +                               build_int_cst (ctype, ratio));
> +    compute_symbol_and_var_present (ubase, real_cbase,
> +      &symbol_present, &var_present);
> +    }
> +  else
> +    {
> +    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
> +      &symbol_present, &var_present);
> +    }
> +
> +  compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset);
> +  offset_p = maybe_ne (aff_inv->offset, 0)
> +       && known_le (min_offset, aff_inv->offset)
> +       && known_le (aff_inv->offset, max_offset);
> +  ratio_p = (ratio != 1
> +            && multiplier_allowed_in_address_p (ratio, mem_mode, as));
> +
> +  cost.complexity = (symbol_present != 0) + (var_present != 0)
> +       + offset_p + ratio_p;
>
>    return cost;
>  }
> --
> 2.25.1
>
Jeff Law Oct. 27, 2022, 11:02 p.m. UTC | #3
On 10/21/22 07:52, Dimitrije Milosevic wrote:
> From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
>
> This patch reverts the computation of address cost complexity
> to the legacy one. After f9f69dd, complexity is calculated
> using the valid_mem_ref_p target hook. Architectures like
> Mips only allow BASE + OFFSET addressing modes, which in turn
> prevents the calculation of complexity for other addressing
> modes, resulting in non-optimal candidate selection.
>
> gcc/ChangeLog:
>
> 	* tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
> 	to non-static.
> 	* tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
> 	* tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
> 	(compute_min_and_max_offset): Likewise.
> 	(get_address_cost): Revert
> 	complexity calculation.

THe part I don't understand is, if you only have BASE+OFF, why does 
preventing the calculation of more complex addressing modes matter?  ie, 
what's the point of computing the cost of something like base + off + 
scaled index when the target can't utilize it?


jeff
Dimitrije Milošević Oct. 28, 2022, 6:43 a.m. UTC | #4
Hi Jeff,

> THe part I don't understand is, if you only have BASE+OFF, why does 
> preventing the calculation of more complex addressing modes matter?  ie, 
> what's the point of computing the cost of something like base + off + 
> scaled index when the target can't utilize it?

Well, the complexities of all addressing modes other than BASE + OFFSET are
equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
than a candidate with BASE + INDEX, for example, as it has to compensate
the lack of other addressing modes somehow. If complexities for both of
those are equal to 0, in cases where complexities decide which candidate is
to be chosen, a more complex candidate may be picked.

Regards,
Dimitrije


From: Jeff Law <jeffreyalaw@gmail.com>
Sent: Friday, October 28, 2022 1:02 AM
To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>
Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com>
Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. 
 

On 10/21/22 07:52, Dimitrije Milosevic wrote:
> From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
>
> This patch reverts the computation of address cost complexity
> to the legacy one. After f9f69dd, complexity is calculated
> using the valid_mem_ref_p target hook. Architectures like
> Mips only allow BASE + OFFSET addressing modes, which in turn
> prevents the calculation of complexity for other addressing
> modes, resulting in non-optimal candidate selection.
>
> gcc/ChangeLog:
>
>        * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
>        to non-static.
>        * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
>        * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
>        (compute_min_and_max_offset): Likewise.
>        (get_address_cost): Revert
>        complexity calculation.

THe part I don't understand is, if you only have BASE+OFF, why does 
preventing the calculation of more complex addressing modes matter?  ie, 
what's the point of computing the cost of something like base + off + 
scaled index when the target can't utilize it?


jeff
Richard Biener Oct. 28, 2022, 7 a.m. UTC | #5
On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Jeff,
>
> > THe part I don't understand is, if you only have BASE+OFF, why does
> > preventing the calculation of more complex addressing modes matter?  ie,
> > what's the point of computing the cost of something like base + off +
> > scaled index when the target can't utilize it?
>
> Well, the complexities of all addressing modes other than BASE + OFFSET are
> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> than a candidate with BASE + INDEX, for example, as it has to compensate
> the lack of other addressing modes somehow. If complexities for both of
> those are equal to 0, in cases where complexities decide which candidate is
> to be chosen, a more complex candidate may be picked.

But something is wrong then - it shouldn't ever pick a candidate with
an addressing
mode that isn't supported?  So you say that the cost of expressing
'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
accurately?

The function tries to compensate for that, maybe you can point out
where it goes wrong?
That is, at the end it adjusts cost and complexity based on what it
scrapped before, maybe
that is just a bit incomplete?

Note the original author of this is not available so it would help
(maybe also yourself) to
walk through the function with a specific candidate / use where you
think the complexity
(or cost) is wrong?


> Regards,
> Dimitrije
>
>
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Friday, October 28, 2022 1:02 AM
> To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>
> Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
>
> On 10/21/22 07:52, Dimitrije Milosevic wrote:
> > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
> >
> > This patch reverts the computation of address cost complexity
> > to the legacy one. After f9f69dd, complexity is calculated
> > using the valid_mem_ref_p target hook. Architectures like
> > Mips only allow BASE + OFFSET addressing modes, which in turn
> > prevents the calculation of complexity for other addressing
> > modes, resulting in non-optimal candidate selection.
> >
> > gcc/ChangeLog:
> >
> >        * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
> >        to non-static.
> >        * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
> >        * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
> >        (compute_min_and_max_offset): Likewise.
> >        (get_address_cost): Revert
> >        complexity calculation.
>
> THe part I don't understand is, if you only have BASE+OFF, why does
> preventing the calculation of more complex addressing modes matter?  ie,
> what's the point of computing the cost of something like base + off +
> scaled index when the target can't utilize it?
>
>
> jeff
>
Dimitrije Milošević Oct. 28, 2022, 1:39 p.m. UTC | #6
Hi Richard,

> But something is wrong then - it shouldn't ever pick a candidate with
> an addressing
> mode that isn't supported?

Test case I presented in [0] only has non-"BASE + OFFSET" candidates. Correct me
if I'm wrong, but the candidate selection algorithm doesn't take into account
which addressing modes are supported by the target?

> So you say that the cost of expressing
> 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> accurately?

I just took it as an example, but yes.

> The function tries to compensate for that, maybe you can point out
> where it goes wrong?
> That is, at the end it adjusts cost and complexity based on what it
> scrapped before, maybe
> that is just a bit incomplete?

I think the cost.cost part is mostly okay, as the costs are just scaled 
(what was lesser/higher before f9f69dd is lesser/higher after f9f69dd).
As far as the adjustments go, I don't think they are complete. 
On the other hand, as complexity is a valid part of address costs, and
it can be used as a tie-breaker, I feel like it should serve a purpose, 
even for targets like Mips which are limited when it comes to 
addressing modes, rather than being equal to 0.

I guess an alternative would be to fully cover cost.cost adjustments, and
leave the complexities to be 0 for non-supported addressing modes.
Currently, they are implemented as follows:

  if (simple_inv)
    simple_inv = (aff_inv == NULL
		  || aff_combination_const_p (aff_inv)
		  || aff_combination_singleton_var_p (aff_inv));
  if (!aff_combination_zero_p (aff_inv))
    comp_inv = aff_combination_to_tree (aff_inv);
  if (comp_inv != NULL_TREE)
    cost = force_var_cost (data, comp_inv, inv_vars);
  if (ratio != 1 && parts.step == NULL_TREE)
    var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
  if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
    var_cost += add_cost (speed, addr_mode);

> Note the original author of this is not available so it would help
> (maybe also yourself) to
> walk through the function with a specific candidate / use where you
> think the complexity
> (or cost) is wrong?

I'd like to refer to [0] where candidate costs didn't get adjusted to 
cover the lack of complexity calculation.

Would love to hear your thoughts.

[0] https://gcc.gnu.org/pipermail/gcc-patches/2022-October/604304.html

Regards,
Dimitrije

From: Richard Biener <richard.guenther@gmail.com>
Sent: Friday, October 28, 2022 9:00 AM
To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. 
 
On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Jeff,
>
> > THe part I don't understand is, if you only have BASE+OFF, why does
> > preventing the calculation of more complex addressing modes matter?  ie,
> > what's the point of computing the cost of something like base + off +
> > scaled index when the target can't utilize it?
>
> Well, the complexities of all addressing modes other than BASE + OFFSET are
> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> than a candidate with BASE + INDEX, for example, as it has to compensate
> the lack of other addressing modes somehow. If complexities for both of
> those are equal to 0, in cases where complexities decide which candidate is
> to be chosen, a more complex candidate may be picked.

But something is wrong then - it shouldn't ever pick a candidate with
an addressing
mode that isn't supported?  So you say that the cost of expressing
'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
accurately?

The function tries to compensate for that, maybe you can point out
where it goes wrong?
That is, at the end it adjusts cost and complexity based on what it
scrapped before, maybe
that is just a bit incomplete?

Note the original author of this is not available so it would help
(maybe also yourself) to
walk through the function with a specific candidate / use where you
think the complexity
(or cost) is wrong?


> Regards,
> Dimitrije
>
>
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Friday, October 28, 2022 1:02 AM
> To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>
> Cc: Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
>
> On 10/21/22 07:52, Dimitrije Milosevic wrote:
> > From: Dimitrije Milošević <dimitrije.milosevic@syrmia.com>
> >
> > This patch reverts the computation of address cost complexity
> > to the legacy one. After f9f69dd, complexity is calculated
> > using the valid_mem_ref_p target hook. Architectures like
> > Mips only allow BASE + OFFSET addressing modes, which in turn
> > prevents the calculation of complexity for other addressing
> > modes, resulting in non-optimal candidate selection.
> >
> > gcc/ChangeLog:
> >
> >        * tree-ssa-address.cc (multiplier_allowed_in_address_p): Change
> >        to non-static.
> >        * tree-ssa-address.h (multiplier_allowed_in_address_p): Declare.
> >        * tree-ssa-loop-ivopts.cc (compute_symbol_and_var_present): Reintroduce.
> >        (compute_min_and_max_offset): Likewise.
> >        (get_address_cost): Revert
> >        complexity calculation.
>
> THe part I don't understand is, if you only have BASE+OFF, why does
> preventing the calculation of more complex addressing modes matter?  ie,
> what's the point of computing the cost of something like base + off +
> scaled index when the target can't utilize it?
>
>
> jeff
>
Jeff Law Nov. 1, 2022, 6:46 p.m. UTC | #7
On 10/28/22 01:00, Richard Biener wrote:
> On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> <Dimitrije.Milosevic@syrmia.com> wrote:
>> Hi Jeff,
>>
>>> THe part I don't understand is, if you only have BASE+OFF, why does
>>> preventing the calculation of more complex addressing modes matter?  ie,
>>> what's the point of computing the cost of something like base + off +
>>> scaled index when the target can't utilize it?
>> Well, the complexities of all addressing modes other than BASE + OFFSET are
>> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
>> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
>> than a candidate with BASE + INDEX, for example, as it has to compensate
>> the lack of other addressing modes somehow. If complexities for both of
>> those are equal to 0, in cases where complexities decide which candidate is
>> to be chosen, a more complex candidate may be picked.
> But something is wrong then - it shouldn't ever pick a candidate with
> an addressing
> mode that isn't supported?  So you say that the cost of expressing
> 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> accurately?

This is exactly what I was trying to get to.   If the addressing mode 
isn't supported, then we shouldn't be picking it as a candidate.  If it 
is, then we've probably got a problem somewhere else in this code and 
this patch is likely papering over it.


Jeff
Dimitrije Milošević Nov. 2, 2022, 8:40 a.m. UTC | #8
Hi Jeff,

> This is exactly what I was trying to get to.   If the addressing mode
> isn't supported, then we shouldn't be picking it as a candidate.  If it
> is, then we've probably got a problem somewhere else in this code and
> this patch is likely papering over it.

I'll take a deeper look into the candidate selection algorithm then. Will
get back to you.

Regards,
Dimitrije
Richard Biener Nov. 7, 2022, 1:35 p.m. UTC | #9
On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Jeff,
>
> > This is exactly what I was trying to get to.   If the addressing mode
> > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > is, then we've probably got a problem somewhere else in this code and
> > this patch is likely papering over it.

I'm not sure this is accurate but at least the cost of using an unsupported
addressing mode should be at least that of the compensating code to
mangle it to a supported form.

> I'll take a deeper look into the candidate selection algorithm then. Will
> get back to you.

Thanks - as said the unfortunate situation is that both the original author and
the one who did the last bigger reworks of the code are gone.

Richard.

> Regards,
> Dimitrije
>
> ________________________________________
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Tuesday, November 1, 2022 7:46 PM
> To: Richard Biener; Dimitrije Milosevic
> Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
>
> On 10/28/22 01:00, Richard Biener wrote:
> > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> > <Dimitrije.Milosevic@syrmia.com> wrote:
> >> Hi Jeff,
> >>
> >>> THe part I don't understand is, if you only have BASE+OFF, why does
> >>> preventing the calculation of more complex addressing modes matter?  ie,
> >>> what's the point of computing the cost of something like base + off +
> >>> scaled index when the target can't utilize it?
> >> Well, the complexities of all addressing modes other than BASE + OFFSET are
> >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> >> than a candidate with BASE + INDEX, for example, as it has to compensate
> >> the lack of other addressing modes somehow. If complexities for both of
> >> those are equal to 0, in cases where complexities decide which candidate is
> >> to be chosen, a more complex candidate may be picked.
> > But something is wrong then - it shouldn't ever pick a candidate with
> > an addressing
> > mode that isn't supported?  So you say that the cost of expressing
> > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> > accurately?
>
> This is exactly what I was trying to get to.   If the addressing mode
> isn't supported, then we shouldn't be picking it as a candidate.  If it
> is, then we've probably got a problem somewhere else in this code and
> this patch is likely papering over it.
>
>
> Jeff
>
Dimitrije Milošević Dec. 15, 2022, 3:26 p.m. UTC | #10
Hi Richard,

Sorry for the delayed response, I couldn't find the time to fully focus on this topic.

> I'm not sure this is accurate but at least the cost of using an unsupported
> addressing mode should be at least that of the compensating code to
> mangle it to a supported form.

I'm pretty sure IVOPTS does not filter out candidates which aren't supported by
the target architecture. It does, however, adjust the cost for a subset of those.
The adjustment code modifies only the cost part of the address cost (which
consists of a cost and a complexity).
Having said this, I'd propose two approaches:
    1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely
        sure they aren't already covered), leaving complexity for unsupported
        addressing modes zero.
    2. Revert the complexity calculation (which my initial patch does), leaving
        everything else as it is.
    3. A combination of both - if the control path gets into the adjustment code, we
        use the reverted complexity calculation.
I'd love to get feedback regarding this, so I could focus on a concrete approach.

Kind regards,
Dimitrije

From: Richard Biener <richard.guenther@gmail.com>
Sent: Monday, November 7, 2022 2:35 PM
To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. 
 
On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Jeff,
>
> > This is exactly what I was trying to get to.   If the addressing mode
> > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > is, then we've probably got a problem somewhere else in this code and
> > this patch is likely papering over it.

I'm not sure this is accurate but at least the cost of using an unsupported
addressing mode should be at least that of the compensating code to
mangle it to a supported form.

> I'll take a deeper look into the candidate selection algorithm then. Will
> get back to you.

Thanks - as said the unfortunate situation is that both the original author and
the one who did the last bigger reworks of the code are gone.

Richard.

> Regards,
> Dimitrije
>
> ________________________________________
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Tuesday, November 1, 2022 7:46 PM
> To: Richard Biener; Dimitrije Milosevic
> Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
>
> On 10/28/22 01:00, Richard Biener wrote:
> > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> > <Dimitrije.Milosevic@syrmia.com> wrote:
> >> Hi Jeff,
> >>
> >>> THe part I don't understand is, if you only have BASE+OFF, why does
> >>> preventing the calculation of more complex addressing modes matter?  ie,
> >>> what's the point of computing the cost of something like base + off +
> >>> scaled index when the target can't utilize it?
> >> Well, the complexities of all addressing modes other than BASE + OFFSET are
> >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> >> than a candidate with BASE + INDEX, for example, as it has to compensate
> >> the lack of other addressing modes somehow. If complexities for both of
> >> those are equal to 0, in cases where complexities decide which candidate is
> >> to be chosen, a more complex candidate may be picked.
> > But something is wrong then - it shouldn't ever pick a candidate with
> > an addressing
> > mode that isn't supported?  So you say that the cost of expressing
> > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> > accurately?
>
> This is exactly what I was trying to get to.   If the addressing mode
> isn't supported, then we shouldn't be picking it as a candidate.  If it
> is, then we've probably got a problem somewhere else in this code and
> this patch is likely papering over it.
>
>
> Jeff
>
Richard Biener Dec. 16, 2022, 9:58 a.m. UTC | #11
On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Richard,
>
> Sorry for the delayed response, I couldn't find the time to fully focus on this topic.
>
> > I'm not sure this is accurate but at least the cost of using an unsupported
> > addressing mode should be at least that of the compensating code to
> > mangle it to a supported form.
>
> I'm pretty sure IVOPTS does not filter out candidates which aren't supported by
> the target architecture. It does, however, adjust the cost for a subset of those.
> The adjustment code modifies only the cost part of the address cost (which
> consists of a cost and a complexity).
> Having said this, I'd propose two approaches:
>     1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely
>         sure they aren't already covered), leaving complexity for unsupported
>         addressing modes zero.

The only documentation on complexity I find is

  int64_t cost;         /* The runtime cost.  */
  unsigned complexity;  /* The estimate of the complexity of the code for
                           the computation (in no concrete units --
                           complexity field should be larger for more
                           complex expressions and addressing modes).  */

and complexity is used as tie-breaker only when cost is equal.  Given that
shouldn't unsupported addressing modes have higher complexity?  I'll note
that there's nothing "unsupported", each "unsupported" address computation
is lowered into supported pieces.  "unsupported" maybe means that
"cost" isn't fully covered by address-cost and compensation stmts might
be costed in quantities not fully compatible with that?

That said, "complexity" seems to only complicate things :/  We do have the
tie-breaker on prefering less IVs.  complexity was added in
r0-85562-g6e8c65f6621fb0 as part of fixing PR34711.

>     2. Revert the complexity calculation (which my initial patch does), leaving
>         everything else as it is.
>     3. A combination of both - if the control path gets into the adjustment code, we
>         use the reverted complexity calculation.

If it's really only about the "complexity" value then each
compensation step should
add to the complexity?

> I'd love to get feedback regarding this, so I could focus on a concrete approach.
>
> Kind regards,
> Dimitrije
>
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, November 7, 2022 2:35 PM
> To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
> On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic
> <Dimitrije.Milosevic@syrmia.com> wrote:
> >
> > Hi Jeff,
> >
> > > This is exactly what I was trying to get to.   If the addressing mode
> > > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > > is, then we've probably got a problem somewhere else in this code and
> > > this patch is likely papering over it.
>
> I'm not sure this is accurate but at least the cost of using an unsupported
> addressing mode should be at least that of the compensating code to
> mangle it to a supported form.
>
> > I'll take a deeper look into the candidate selection algorithm then. Will
> > get back to you.
>
> Thanks - as said the unfortunate situation is that both the original author and
> the one who did the last bigger reworks of the code are gone.
>
> Richard.
>
> > Regards,
> > Dimitrije
> >
> > ________________________________________
> > From: Jeff Law <jeffreyalaw@gmail.com>
> > Sent: Tuesday, November 1, 2022 7:46 PM
> > To: Richard Biener; Dimitrije Milosevic
> > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic
> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
> >
> >
> > On 10/28/22 01:00, Richard Biener wrote:
> > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> > > <Dimitrije.Milosevic@syrmia.com> wrote:
> > >> Hi Jeff,
> > >>
> > >>> THe part I don't understand is, if you only have BASE+OFF, why does
> > >>> preventing the calculation of more complex addressing modes matter?  ie,
> > >>> what's the point of computing the cost of something like base + off +
> > >>> scaled index when the target can't utilize it?
> > >> Well, the complexities of all addressing modes other than BASE + OFFSET are
> > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> > >> than a candidate with BASE + INDEX, for example, as it has to compensate
> > >> the lack of other addressing modes somehow. If complexities for both of
> > >> those are equal to 0, in cases where complexities decide which candidate is
> > >> to be chosen, a more complex candidate may be picked.
> > > But something is wrong then - it shouldn't ever pick a candidate with
> > > an addressing
> > > mode that isn't supported?  So you say that the cost of expressing
> > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> > > accurately?
> >
> > This is exactly what I was trying to get to.   If the addressing mode
> > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > is, then we've probably got a problem somewhere else in this code and
> > this patch is likely papering over it.
> >
> >
> > Jeff
> >
Dimitrije Milošević Dec. 16, 2022, 11:37 a.m. UTC | #12
Hi Richard,

> The only documentation on complexity I find is
>
>   int64_t cost;         /* The runtime cost.  */
>   unsigned complexity;  /* The estimate of the complexity of the code for
>                            the computation (in no concrete units --
>                            complexity field should be larger for more
>                            complex expressions and addressing modes).  */
>
> and complexity is used as tie-breaker only when cost is equal.  Given that
> shouldn't unsupported addressing modes have higher complexity?  I'll note
> that there's nothing "unsupported", each "unsupported" address computation
> is lowered into supported pieces.  "unsupported" maybe means that
> "cost" isn't fully covered by address-cost and compensation stmts might
> be costed in quantities not fully compatible with that?

Correct, that's what I was aiming for initially - before f9f69dd that was the case,
"unsupported" addressing modes had higher complexities.
Also, that's what I meant by "unsupported" as well, thanks.

> That said, "complexity" seems to only complicate things :/  We do have the
> tie-breaker on preferring less IVs.  complexity was added in
> r0-85562-g6e8c65f6621fb0 as part of fixing PR34711.

I agree that the complexity part is just (kind of) out there, not really strongly
defined. I'm not sure how to feel about merging complexity into the cost part
of an address cost, though.

> If it's really only about the "complexity" value then each
> compensation step should
> add to the complexity?

That could be the way to go. Also worth verifying is that we compensate for
each case of an unsupported addressing mode.

Kind regards,
Dimitrije

From: Richard Biener <richard.guenther@gmail.com>
Sent: Friday, December 16, 2022 10:58 AM
To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity. 
 
On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
> Hi Richard,
>
> Sorry for the delayed response, I couldn't find the time to fully focus on this topic.
>
> > I'm not sure this is accurate but at least the cost of using an unsupported
> > addressing mode should be at least that of the compensating code to
> > mangle it to a supported form.
>
> I'm pretty sure IVOPTS does not filter out candidates which aren't supported by
> the target architecture. It does, however, adjust the cost for a subset of those.
> The adjustment code modifies only the cost part of the address cost (which
> consists of a cost and a complexity).
> Having said this, I'd propose two approaches:
>     1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely
>         sure they aren't already covered), leaving complexity for unsupported
>         addressing modes zero.

The only documentation on complexity I find is

  int64_t cost;         /* The runtime cost.  */
  unsigned complexity;  /* The estimate of the complexity of the code for
                           the computation (in no concrete units --
                           complexity field should be larger for more
                           complex expressions and addressing modes).  */

and complexity is used as tie-breaker only when cost is equal.  Given that
shouldn't unsupported addressing modes have higher complexity?  I'll note
that there's nothing "unsupported", each "unsupported" address computation
is lowered into supported pieces.  "unsupported" maybe means that
"cost" isn't fully covered by address-cost and compensation stmts might
be costed in quantities not fully compatible with that?

That said, "complexity" seems to only complicate things :/  We do have the
tie-breaker on prefering less IVs.  complexity was added in
r0-85562-g6e8c65f6621fb0 as part of fixing PR34711.

>     2. Revert the complexity calculation (which my initial patch does), leaving
>         everything else as it is.
>     3. A combination of both - if the control path gets into the adjustment code, we
>         use the reverted complexity calculation.

If it's really only about the "complexity" value then each
compensation step should
add to the complexity?

> I'd love to get feedback regarding this, so I could focus on a concrete approach.
>
> Kind regards,
> Dimitrije
>
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, November 7, 2022 2:35 PM
> To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
> On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic
> <Dimitrije.Milosevic@syrmia.com> wrote:
> >
> > Hi Jeff,
> >
> > > This is exactly what I was trying to get to.   If the addressing mode
> > > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > > is, then we've probably got a problem somewhere else in this code and
> > > this patch is likely papering over it.
>
> I'm not sure this is accurate but at least the cost of using an unsupported
> addressing mode should be at least that of the compensating code to
> mangle it to a supported form.
>
> > I'll take a deeper look into the candidate selection algorithm then. Will
> > get back to you.
>
> Thanks - as said the unfortunate situation is that both the original author and
> the one who did the last bigger reworks of the code are gone.
>
> Richard.
>
> > Regards,
> > Dimitrije
> >
> > ________________________________________
> > From: Jeff Law <jeffreyalaw@gmail.com>
> > Sent: Tuesday, November 1, 2022 7:46 PM
> > To: Richard Biener; Dimitrije Milosevic
> > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic
> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
> >
> >
> > On 10/28/22 01:00, Richard Biener wrote:
> > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> > > <Dimitrije.Milosevic@syrmia.com> wrote:
> > >> Hi Jeff,
> > >>
> > >>> THe part I don't understand is, if you only have BASE+OFF, why does
> > >>> preventing the calculation of more complex addressing modes matter?  ie,
> > >>> what's the point of computing the cost of something like base + off +
> > >>> scaled index when the target can't utilize it?
> > >> Well, the complexities of all addressing modes other than BASE + OFFSET are
> > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> > >> than a candidate with BASE + INDEX, for example, as it has to compensate
> > >> the lack of other addressing modes somehow. If complexities for both of
> > >> those are equal to 0, in cases where complexities decide which candidate is
> > >> to be chosen, a more complex candidate may be picked.
> > > But something is wrong then - it shouldn't ever pick a candidate with
> > > an addressing
> > > mode that isn't supported?  So you say that the cost of expressing
> > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> > > accurately?
> >
> > This is exactly what I was trying to get to.   If the addressing mode
> > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > is, then we've probably got a problem somewhere else in this code and
> > this patch is likely papering over it.
> >
> >
> > Jeff
> >
Richard Biener Dec. 16, 2022, 11:58 a.m. UTC | #13
On Fri, Dec 16, 2022 at 12:37 PM Dimitrije Milosevic
<Dimitrije.Milosevic@syrmia.com> wrote:
>
>
> Hi Richard,
>
> > The only documentation on complexity I find is
> >
> >   int64_t cost;         /* The runtime cost.  */
> >   unsigned complexity;  /* The estimate of the complexity of the code for
> >                            the computation (in no concrete units --
> >                            complexity field should be larger for more
> >                            complex expressions and addressing modes).  */
> >
> > and complexity is used as tie-breaker only when cost is equal.  Given that
> > shouldn't unsupported addressing modes have higher complexity?  I'll note
> > that there's nothing "unsupported", each "unsupported" address computation
> > is lowered into supported pieces.  "unsupported" maybe means that
> > "cost" isn't fully covered by address-cost and compensation stmts might
> > be costed in quantities not fully compatible with that?
>
> Correct, that's what I was aiming for initially - before f9f69dd that was the case,
> "unsupported" addressing modes had higher complexities.
> Also, that's what I meant by "unsupported" as well, thanks.
>
> > That said, "complexity" seems to only complicate things :/  We do have the
> > tie-breaker on preferring less IVs.  complexity was added in
> > r0-85562-g6e8c65f6621fb0 as part of fixing PR34711.
>
> I agree that the complexity part is just (kind of) out there, not really strongly
> defined. I'm not sure how to feel about merging complexity into the cost part
> of an address cost, though.
>
> > If it's really only about the "complexity" value then each
> > compensation step should
> > add to the complexity?
>
> That could be the way to go. Also worth verifying is that we compensate for
> each case of an unsupported addressing mode.

Yes.  Also given complexity is only a tie-breaker we should cost the
compensation
somehow, but then complexity doesn't look necessary ...

Meh.

>
> Kind regards,
> Dimitrije
>
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, December 16, 2022 10:58 AM
> To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
> Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
>
> On Thu, Dec 15, 2022 at 4:26 PM Dimitrije Milosevic
> <Dimitrije.Milosevic@syrmia.com> wrote:
> >
> > Hi Richard,
> >
> > Sorry for the delayed response, I couldn't find the time to fully focus on this topic.
> >
> > > I'm not sure this is accurate but at least the cost of using an unsupported
> > > addressing mode should be at least that of the compensating code to
> > > mangle it to a supported form.
> >
> > I'm pretty sure IVOPTS does not filter out candidates which aren't supported by
> > the target architecture. It does, however, adjust the cost for a subset of those.
> > The adjustment code modifies only the cost part of the address cost (which
> > consists of a cost and a complexity).
> > Having said this, I'd propose two approaches:
> >     1. Cover all cases of unsupported addressing modes (if needed, I'm not entirely
> >         sure they aren't already covered), leaving complexity for unsupported
> >         addressing modes zero.
>
> The only documentation on complexity I find is
>
>   int64_t cost;         /* The runtime cost.  */
>   unsigned complexity;  /* The estimate of the complexity of the code for
>                            the computation (in no concrete units --
>                            complexity field should be larger for more
>                            complex expressions and addressing modes).  */
>
> and complexity is used as tie-breaker only when cost is equal.  Given that
> shouldn't unsupported addressing modes have higher complexity?  I'll note
> that there's nothing "unsupported", each "unsupported" address computation
> is lowered into supported pieces.  "unsupported" maybe means that
> "cost" isn't fully covered by address-cost and compensation stmts might
> be costed in quantities not fully compatible with that?
>
> That said, "complexity" seems to only complicate things :/  We do have the
> tie-breaker on prefering less IVs.  complexity was added in
> r0-85562-g6e8c65f6621fb0 as part of fixing PR34711.
>
> >     2. Revert the complexity calculation (which my initial patch does), leaving
> >         everything else as it is.
> >     3. A combination of both - if the control path gets into the adjustment code, we
> >         use the reverted complexity calculation.
>
> If it's really only about the "complexity" value then each
> compensation step should
> add to the complexity?
>
> > I'd love to get feedback regarding this, so I could focus on a concrete approach.
> >
> > Kind regards,
> > Dimitrije
> >
> > From: Richard Biener <richard.guenther@gmail.com>
> > Sent: Monday, November 7, 2022 2:35 PM
> > To: Dimitrije Milosevic <Dimitrije.Milosevic@Syrmia.com>
> > Cc: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; Djordje Todorovic <Djordje.Todorovic@syrmia.com>
> > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
> >
> > On Wed, Nov 2, 2022 at 9:40 AM Dimitrije Milosevic
> > <Dimitrije.Milosevic@syrmia.com> wrote:
> > >
> > > Hi Jeff,
> > >
> > > > This is exactly what I was trying to get to.   If the addressing mode
> > > > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > > > is, then we've probably got a problem somewhere else in this code and
> > > > this patch is likely papering over it.
> >
> > I'm not sure this is accurate but at least the cost of using an unsupported
> > addressing mode should be at least that of the compensating code to
> > mangle it to a supported form.
> >
> > > I'll take a deeper look into the candidate selection algorithm then. Will
> > > get back to you.
> >
> > Thanks - as said the unfortunate situation is that both the original author and
> > the one who did the last bigger reworks of the code are gone.
> >
> > Richard.
> >
> > > Regards,
> > > Dimitrije
> > >
> > > ________________________________________
> > > From: Jeff Law <jeffreyalaw@gmail.com>
> > > Sent: Tuesday, November 1, 2022 7:46 PM
> > > To: Richard Biener; Dimitrije Milosevic
> > > Cc: gcc-patches@gcc.gnu.org; Djordje Todorovic
> > > Subject: Re: [PATCH 1/2] ivopts: Revert computation of address cost complexity.
> > >
> > >
> > > On 10/28/22 01:00, Richard Biener wrote:
> > > > On Fri, Oct 28, 2022 at 8:43 AM Dimitrije Milosevic
> > > > <Dimitrije.Milosevic@syrmia.com> wrote:
> > > >> Hi Jeff,
> > > >>
> > > >>> THe part I don't understand is, if you only have BASE+OFF, why does
> > > >>> preventing the calculation of more complex addressing modes matter?  ie,
> > > >>> what's the point of computing the cost of something like base + off +
> > > >>> scaled index when the target can't utilize it?
> > > >> Well, the complexities of all addressing modes other than BASE + OFFSET are
> > > >> equal to 0. For targets like Mips, which only has BASE + OFFSET, it would still
> > > >> be more complex to use a candidate with BASE + INDEX << SCALE + OFFSET
> > > >> than a candidate with BASE + INDEX, for example, as it has to compensate
> > > >> the lack of other addressing modes somehow. If complexities for both of
> > > >> those are equal to 0, in cases where complexities decide which candidate is
> > > >> to be chosen, a more complex candidate may be picked.
> > > > But something is wrong then - it shouldn't ever pick a candidate with
> > > > an addressing
> > > > mode that isn't supported?  So you say that the cost of expressing
> > > > 'BASE + INDEX << SCALE + OFFSET' as 'BASE + OFFSET' is not computed
> > > > accurately?
> > >
> > > This is exactly what I was trying to get to.   If the addressing mode
> > > isn't supported, then we shouldn't be picking it as a candidate.  If it
> > > is, then we've probably got a problem somewhere else in this code and
> > > this patch is likely papering over it.
> > >
> > >
> > > Jeff
> > >
diff mbox series

Patch

diff --git a/gcc/tree-ssa-address.cc b/gcc/tree-ssa-address.cc
index ba7b7c93162..442f54f0165 100644
--- a/gcc/tree-ssa-address.cc
+++ b/gcc/tree-ssa-address.cc
@@ -561,7 +561,7 @@  add_to_parts (struct mem_address *parts, tree elt)
    validity for a memory reference accessing memory of mode MODE in address
    space AS.  */
 
-static bool
+bool
 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
 				 addr_space_t as)
 {
diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
index 95143a099b9..09f36ee2f19 100644
--- a/gcc/tree-ssa-address.h
+++ b/gcc/tree-ssa-address.h
@@ -38,6 +38,8 @@  tree create_mem_ref (gimple_stmt_iterator *, tree,
 		     class aff_tree *, tree, tree, tree, bool);
 extern void copy_ref_info (tree, tree);
 tree maybe_fold_tmr (tree);
+bool multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
+				 addr_space_t as);
 
 extern unsigned int preferred_mem_scale_factor (tree base,
 						machine_mode mem_mode,
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index a6f926a68ef..d53ba05a4f6 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -4774,6 +4774,135 @@  get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
   return infinite_cost;
 }
 
+static void
+compute_symbol_and_var_present (tree e1, tree e2,
+       bool *symbol_present, bool *var_present)
+{
+  poly_uint64_pod off1, off2;
+
+  e1 = strip_offset (e1, &off1);
+  e2 = strip_offset (e2, &off2);
+
+  STRIP_NOPS (e1);
+  STRIP_NOPS (e2);
+
+  if (TREE_CODE (e1) == ADDR_EXPR)
+    {
+      poly_int64_pod diff;
+      if (ptr_difference_const (e1, e2, &diff))
+  {
+    *symbol_present = false;
+    *var_present = false;
+    return;
+  }
+
+      if (integer_zerop (e2))
+  {
+    tree core;
+    poly_int64_pod bitsize;
+    poly_int64_pod bitpos;
+    widest_int mul;
+    tree toffset;
+    machine_mode mode;
+    int unsignedp, reversep, volatilep;
+
+    core = get_inner_reference (TREE_OPERAND (e1, 0), &bitsize, &bitpos,
+      &toffset, &mode, &unsignedp, &reversep, &volatilep);
+
+    if (toffset != 0
+    || !constant_multiple_p (bitpos, BITS_PER_UNIT, &mul)
+    || reversep
+    || !VAR_P (core))
+      {
+    *symbol_present = false;
+    *var_present = true;
+    return;
+      }
+
+    if (TREE_STATIC (core)
+    || DECL_EXTERNAL (core))
+      {
+    *symbol_present = true;
+    *var_present = false;
+    return;
+      }
+
+    *symbol_present = false;
+    *var_present = true;
+    return;
+  }
+
+      *symbol_present = false;
+      *var_present = true;
+    }
+  *symbol_present = false;
+
+  if (operand_equal_p (e1, e2, 0))
+    {
+      *var_present = false;
+      return;
+    }
+
+  *var_present = true;
+}
+
+static void
+compute_min_and_max_offset (addr_space_t as,
+       machine_mode mem_mode, poly_int64_pod *min_offset,
+       poly_int64_pod *max_offset)
+{
+  machine_mode address_mode = targetm.addr_space.address_mode (as);
+  HOST_WIDE_INT i;
+  poly_int64_pod off, width;
+  rtx addr;
+  rtx reg1;
+
+  reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
+
+  width = GET_MODE_BITSIZE (address_mode) - 1;
+  if (known_gt (width, HOST_BITS_PER_WIDE_INT - 1))
+	  width = HOST_BITS_PER_WIDE_INT - 1;
+  gcc_assert (width.is_constant ());
+  addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
+
+  off = 0;
+  for (i = width.to_constant (); i >= 0; i--)
+    {
+      off = -(HOST_WIDE_INT_1U << i);
+      XEXP (addr, 1) = gen_int_mode (off, address_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+    break;
+    }
+  if (i == -1)
+    *min_offset = 0;
+  else
+    *min_offset = off;
+  // *min_offset = (i == -1? 0 : off);
+
+  for (i = width.to_constant (); i >= 0; i--)
+    {
+      off = (HOST_WIDE_INT_1U << i) - 1;
+      XEXP (addr, 1) = gen_int_mode (off, address_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+    break;
+    /* For some strict-alignment targets, the offset must be naturally
+      aligned.  Try an aligned offset if mem_mode is not QImode.  */
+      off = mem_mode != QImode
+      ? (HOST_WIDE_INT_1U << i)
+      - (GET_MODE_SIZE (mem_mode))
+      : 0;
+      if (known_gt (off, 0))
+    {
+      XEXP (addr, 1) = gen_int_mode (off, address_mode);
+      if (memory_address_addr_space_p (mem_mode, addr, as))
+    break;
+    }
+    }
+  if (i == -1)
+	  off = 0;
+  *max_offset = off;
+}
+
 /* Return cost of computing USE's address expression by using CAND.
    AFF_INV and AFF_VAR represent invariant and variant parts of the
    address expression, respectively.  If AFF_INV is simple, store
@@ -4802,6 +4931,13 @@  get_address_cost (struct ivopts_data *data, struct iv_use *use,
   /* Only true if ratio != 1.  */
   bool ok_with_ratio_p = false;
   bool ok_without_ratio_p = false;
+  tree ubase = use->iv->base;
+  tree cbase = cand->iv->base, cstep = cand->iv->step;
+  tree utype = TREE_TYPE (ubase), ctype;
+  unsigned HOST_WIDE_INT cstepi;
+  bool symbol_present = false, var_present = false, stmt_is_after_increment;
+  poly_int64_pod min_offset, max_offset;
+  bool offset_p, ratio_p;
 
   if (!aff_combination_const_p (aff_inv))
     {
@@ -4915,16 +5051,74 @@  get_address_cost (struct ivopts_data *data, struct iv_use *use,
   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
   cost += address_cost (addr, mem_mode, as, speed);
 
-  if (parts.symbol != NULL_TREE)
-    cost.complexity += 1;
-  /* Don't increase the complexity of adding a scaled index if it's
-     the only kind of index that the target allows.  */
-  if (parts.step != NULL_TREE && ok_without_ratio_p)
-    cost.complexity += 1;
-  if (parts.base != NULL_TREE && parts.index != NULL_TREE)
-    cost.complexity += 1;
-  if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
-    cost.complexity += 1;
+  if (cst_and_fits_in_hwi (cstep))
+    cstepi = int_cst_value (cstep);
+  else
+    cstepi = 0;
+
+  STRIP_NOPS (cbase);
+  ctype = TREE_TYPE (cbase);
+
+  stmt_is_after_increment = stmt_after_increment (data->current_loop, cand,
+    use->stmt);
+
+  if (cst_and_fits_in_hwi (cbase))
+    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
+      &symbol_present, &var_present);
+  else if (ratio == 1)
+    {
+      tree real_cbase = cbase;
+
+      /* Check to see if any adjustment is needed.  */
+      if (!cst_and_fits_in_hwi (cstep) && stmt_is_after_increment)
+	{
+	  aff_tree real_cbase_aff;
+	  aff_tree cstep_aff;
+
+	  tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
+				   &real_cbase_aff);
+	  tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
+
+	  aff_combination_add (&real_cbase_aff, &cstep_aff);
+	  real_cbase = aff_combination_to_tree (&real_cbase_aff);
+	}
+    compute_symbol_and_var_present (ubase, real_cbase,
+      &symbol_present, &var_present);
+    }
+  else if (!POINTER_TYPE_P (ctype)
+	   && multiplier_allowed_in_address_p
+		(ratio, mem_mode,
+			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
+    {
+      tree real_cbase = cbase;
+
+      if (cstepi == 0 && stmt_is_after_increment)
+	{
+	  if (POINTER_TYPE_P (ctype))
+	    real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
+	  else
+	    real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
+	}
+      real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
+				build_int_cst (ctype, ratio));
+    compute_symbol_and_var_present (ubase, real_cbase,
+      &symbol_present, &var_present);
+    }
+  else
+    {
+    compute_symbol_and_var_present (ubase, build_int_cst (utype, 0),
+      &symbol_present, &var_present);
+    }
+
+  compute_min_and_max_offset (as, mem_mode, &min_offset, &max_offset);
+  offset_p = maybe_ne (aff_inv->offset, 0)
+       && known_le (min_offset, aff_inv->offset)
+       && known_le (aff_inv->offset, max_offset);
+  ratio_p = (ratio != 1
+	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
+
+  cost.complexity = (symbol_present != 0) + (var_present != 0)
+       + offset_p + ratio_p;
 
   return cost;
 }