diff mbox

Fix PR46556 (straight-line strength reduction, part 2)

Message ID 1340919932.2861.15.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt June 28, 2012, 9:45 p.m. UTC
Here's a relatively small piece of strength reduction that solves that
pesky addressing bug that got me looking at this in the first place...

The main part of the code is the stuff that was reviewed last year, but
which needed to find a good home.  So hopefully that's in pretty good
shape.  I recast base_cand_map as an htab again since I now need to look
up trees other than SSA names.  I plan to put together a follow-up patch
to change code and commentary references so that "base_name" becomes
"base_expr".  Doing that now would clutter up the patch too much.

Bootstrapped and tested on powerpc64-linux-gnu with no new regressions.
Ok for trunk?

Thanks,
Bill


gcc:

	PR tree-optimization/46556
	* gimple-ssa-strength-reduction.c (enum cand_kind): Add CAND_REF.
	(base_cand_map): Change to hash table.
	(base_cand_hash): New function.
	(base_cand_free): Likewise.
	(base_cand_eq): Likewise.
	(lookup_cand): Change base_cand_map to hash table.
	(find_basis_for_candidate): Likewise.
	(base_cand_from_table): Exclude CAND_REF.
	(restructure_reference): New function.
	(slsr_process_ref): Likewise.
	(find_candidates_in_block): Call slsr_process_ref.
	(dump_candidate): Handle CAND_REF.
	(base_cand_dump_callback): New function.
	(dump_cand_chains): Change base_cand_map to hash table.
	(replace_ref): New function.
	(replace_refs): Likewise.
	(analyze_candidates_and_replace): Call replace_refs.
	(execute_strength_reduction): Change base_cand_map to hash table.

gcc/testsuite:

	PR tree-optimization/46556
	* testsuite/gcc.dg/tree-ssa/slsr-27.c: New.
	* testsuite/gcc.dg/tree-ssa/slsr-28.c: New.
	* testsuite/gcc.dg/tree-ssa/slsr-29.c: New.

Comments

Bill Schmidt July 22, 2012, 3:19 p.m. UTC | #1
Ping...

On Thu, 2012-06-28 at 16:45 -0500, William J. Schmidt wrote:
> Here's a relatively small piece of strength reduction that solves that
> pesky addressing bug that got me looking at this in the first place...
> 
> The main part of the code is the stuff that was reviewed last year, but
> which needed to find a good home.  So hopefully that's in pretty good
> shape.  I recast base_cand_map as an htab again since I now need to look
> up trees other than SSA names.  I plan to put together a follow-up patch
> to change code and commentary references so that "base_name" becomes
> "base_expr".  Doing that now would clutter up the patch too much.
> 
> Bootstrapped and tested on powerpc64-linux-gnu with no new regressions.
> Ok for trunk?
> 
> Thanks,
> Bill
> 
> 
> gcc:
> 
> 	PR tree-optimization/46556
> 	* gimple-ssa-strength-reduction.c (enum cand_kind): Add CAND_REF.
> 	(base_cand_map): Change to hash table.
> 	(base_cand_hash): New function.
> 	(base_cand_free): Likewise.
> 	(base_cand_eq): Likewise.
> 	(lookup_cand): Change base_cand_map to hash table.
> 	(find_basis_for_candidate): Likewise.
> 	(base_cand_from_table): Exclude CAND_REF.
> 	(restructure_reference): New function.
> 	(slsr_process_ref): Likewise.
> 	(find_candidates_in_block): Call slsr_process_ref.
> 	(dump_candidate): Handle CAND_REF.
> 	(base_cand_dump_callback): New function.
> 	(dump_cand_chains): Change base_cand_map to hash table.
> 	(replace_ref): New function.
> 	(replace_refs): Likewise.
> 	(analyze_candidates_and_replace): Call replace_refs.
> 	(execute_strength_reduction): Change base_cand_map to hash table.
> 
> gcc/testsuite:
> 
> 	PR tree-optimization/46556
> 	* testsuite/gcc.dg/tree-ssa/slsr-27.c: New.
> 	* testsuite/gcc.dg/tree-ssa/slsr-28.c: New.
> 	* testsuite/gcc.dg/tree-ssa/slsr-29.c: New.
> 
> 
> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c	(revision 0)
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 3 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c	(revision 0)
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +  if (n > 12)
> +    foo (p->a[n], p->c[n], p->b[n]);
> +  else if (n > 3)
> +    foo (p->b[n], p->a[n], p->c[n]);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c	(revision 0)
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dom2" } */
> +
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +  if (n > 3)
> +    {
> +      foo (p->a[n], p->c[n], p->b[n]);
> +      if (n > 12)
> +	foo (p->b[n], p->a[n], p->c[n]);
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
> +/* { dg-final { cleanup-tree-dump "dom2" } } */
> Index: gcc/gimple-ssa-strength-reduction.c
> ===================================================================
> --- gcc/gimple-ssa-strength-reduction.c	(revision 189025)
> +++ gcc/gimple-ssa-strength-reduction.c	(working copy)
> @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3.  If not see
>     2) Explicit multiplies, unknown constant multipliers,
>        no conditional increments. (data gathering complete,
>        replacements pending)
> -   3) Implicit multiplies in addressing expressions. (pending)
> +   3) Implicit multiplies in addressing expressions. (complete)
>     4) Explicit multiplies, conditional increments. (pending)
>  
>     It would also be possible to apply strength reduction to divisions
> @@ -107,9 +107,49 @@ along with GCC; see the file COPYING3.  If not see
>  
>     as a strength reduction opportunity, even though this S1 would
>     also be replaceable by the S1' above.  This can be added if it
> -   comes up in practice.  */
> +   comes up in practice.
>  
> +   Strength reduction in addressing
> +   --------------------------------
> +   There is another kind of candidate known as CAND_REF.  A CAND_REF
> +   describes a statement containing a memory reference having 
> +   complex addressing that might benefit from strength reduction.
> +   Specifically, we are interested in references for which 
> +   get_inner_reference returns a base address, offset, and bitpos as
> +   follows:
>  
> +     base:    MEM_REF (T1, C1)
> +     offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
> +     bitpos:  C4 * BITS_PER_UNIT
> +
> +   Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are 
> +   arbitrary integer constants.  Note that C2 may be zero, in which
> +   case the offset will be MULT_EXPR (T2, C3).
> +
> +   When this pattern is recognized, the original memory reference
> +   can be replaced with:
> +
> +     MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
> +              C1 + (C2 * C3) + C4)
> +
> +   which distributes the multiply to allow constant folding.  When
> +   two or more addressing expressions can be represented by MEM_REFs
> +   of this form, differing only in the constants C1, C2, and C4,
> +   making this substitution produces more efficient addressing during
> +   the RTL phases.  When there are not at least two expressions with
> +   the same values of T1, T2, and C3, there is nothing to be gained
> +   by the replacement.
> +
> +   Strength reduction of CAND_REFs uses the same infrastructure as
> +   that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
> +   field, MULT_EXPR (T2, C3) in the stride (S) field, and 
> +   C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
> +   is thus another CAND_REF with the same B and S values.  When at 
> +   least two CAND_REFs are chained together using the basis relation,
> +   each of them is replaced as above, resulting in improved code
> +   generation for addressing.  */
> +
> +
>  /* Index into the candidate vector, offset by 1.  VECs are zero-based,
>     while cand_idx's are one-based, with zero indicating null.  */
>  typedef unsigned cand_idx;
> @@ -118,7 +158,8 @@ typedef unsigned cand_idx;
>  enum cand_kind
>  {
>    CAND_MULT,
> -  CAND_ADD
> +  CAND_ADD,
> +  CAND_REF
>  };
>  
>  struct slsr_cand_d
> @@ -137,7 +178,9 @@ struct slsr_cand_d
>  
>    /* The type of the candidate.  This is normally the type of base_name,
>       but casts may have occurred when combining feeding instructions.
> -     A candidate can only be a basis for candidates of the same final type.  */
> +     A candidate can only be a basis for candidates of the same final type.
> +     (For CAND_REFs, this is the type to be used for operand 1 of the
> +     replacement MEM_REF.)  */
>    tree cand_type;
>  
>    /* The kind of candidate (CAND_MULT, etc.).  */
> @@ -211,8 +254,8 @@ static struct pointer_map_t *stmt_cand_map;
>  /* Obstack for candidates.  */
>  static struct obstack cand_obstack;
>  
> -/* Array mapping from base SSA names to chains of candidates.  */
> -static cand_chain_t *base_cand_map;
> +/* Hash table embodying a mapping from base names to chains of candidates.  */
> +static htab_t base_cand_map;
>  
>  /* Obstack for candidate chains.  */
>  static struct obstack chain_obstack;
> @@ -225,6 +268,33 @@ lookup_cand (cand_idx idx)
>    return VEC_index (slsr_cand_t, cand_vec, idx - 1);
>  }
>  
> +/* Callback to produce a hash value for a candidate chain header.  */
> +
> +static hashval_t
> +base_cand_hash (const void *p)
> +{
> +  tree base_expr = ((const_cand_chain_t) p)->base_name;
> +  return iterative_hash_expr (base_expr, 0);
> +}
> +
> +/* Callback when an element is removed from the hash table.
> +   We never remove entries until the entire table is released.  */
> +
> +static void
> +base_cand_free (void *p ATTRIBUTE_UNUSED)
> +{
> +}
> +
> +/* Callback to return true if two candidate chain headers are equal.  */
> +
> +static int
> +base_cand_eq (const void *p1, const void *p2)
> +{
> +  const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
> +  const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
> +  return operand_equal_p (chain1->base_name, chain2->base_name, 0);
> +}
> +
>  /* Use the base name from candidate C to look for possible candidates
>     that can serve as a basis for C.  Each potential basis must also
>     appear in a block that dominates the candidate statement and have
> @@ -235,11 +305,12 @@ lookup_cand (cand_idx idx)
>  static int
>  find_basis_for_candidate (slsr_cand_t c)
>  {
> +  cand_chain mapping_key;
>    cand_chain_t chain;
>    slsr_cand_t basis = NULL;
>  
> -  gcc_assert (TREE_CODE (c->base_name) == SSA_NAME);
> -  chain = base_cand_map[SSA_NAME_VERSION (c->base_name)];
> +  mapping_key.base_name = c->base_name;
> +  chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
>  
>    for (; chain; chain = chain->next)
>      {
> @@ -273,23 +344,23 @@ find_basis_for_candidate (slsr_cand_t c)
>  static void
>  record_potential_basis (slsr_cand_t c)
>  {
> -  cand_chain_t node, head;
> -  int index;
> +  cand_chain_t node;
> +  void **slot;
>  
>    node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
>    node->base_name = c->base_name;
>    node->cand = c;
>    node->next = NULL;
> -  index = SSA_NAME_VERSION (c->base_name);
> -  head = base_cand_map[index];
> +  slot = htab_find_slot (base_cand_map, node, INSERT);
>  
> -  if (head)
> +  if (*slot)
>      {
> +      cand_chain_t head = (cand_chain_t) (*slot);
>        node->next = head->next;
>        head->next = node;
>      }
>    else
> -    base_cand_map[index] = node;
> +    *slot = node;
>  }
>  
>  /* Allocate storage for a new candidate and initialize its fields.
> @@ -390,10 +461,11 @@ base_cand_from_table (tree base_in)
>      return (slsr_cand_t) NULL;
>  
>    result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
> -  if (!result)
> -    return (slsr_cand_t) NULL;
> +  
> +  if (result && (*result)->kind != CAND_REF)
> +    return *result;
>  
> -  return *result;
> +  return (slsr_cand_t) NULL;
>  }
>  
>  /* Add an entry to the statement-to-candidate mapping.  */
> @@ -406,6 +478,127 @@ add_cand_for_stmt (gimple gs, slsr_cand_t c)
>    *slot = c;
>  }
>  
> +/* Look for the following pattern:
> +
> +    *PBASE:    MEM_REF (T1, C1)
> +
> +    *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
> +                     or
> +               MULT_EXPR (PLUS_EXPR (T2, C2), C3)
> +                     or
> +               MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
> +
> +    *PINDEX:   C4 * BITS_PER_UNIT
> +
> +   If not present, leave the input values unchanged and return FALSE.
> +   Otherwise, modify the input values as follows and return TRUE:
> +
> +    *PBASE:    T1
> +    *POFFSET:  MULT_EXPR (T2, C3)
> +    *PINDEX:   C1 + (C2 * C3) + C4  */
> +
> +static bool
> +restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
> +		       tree *ptype)
> +{
> +  tree base = *pbase, offset = *poffset;
> +  double_int index = *pindex;
> +  double_int bpu = uhwi_to_double_int (BITS_PER_UNIT);
> +  tree mult_op0, mult_op1, t1, t2, type;
> +  double_int c1, c2, c3, c4;
> +
> +  if (!base
> +      || !offset
> +      || TREE_CODE (base) != MEM_REF
> +      || TREE_CODE (offset) != MULT_EXPR
> +      || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
> +      || !double_int_zero_p (double_int_umod (index, bpu, FLOOR_MOD_EXPR)))
> +    return false;
> +
> +  t1 = TREE_OPERAND (base, 0);
> +  c1 = mem_ref_offset (base);
> +  type = TREE_TYPE (TREE_OPERAND (base, 1));
> +
> +  mult_op0 = TREE_OPERAND (offset, 0);
> +  mult_op1 = TREE_OPERAND (offset, 1);
> +
> +  c3 = tree_to_double_int (mult_op1);
> +
> +  if (TREE_CODE (mult_op0) == PLUS_EXPR)
> +
> +    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
> +      {
> +	t2 = TREE_OPERAND (mult_op0, 0);
> +	c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
> +      }
> +    else
> +      return false;
> +
> +  else if (TREE_CODE (mult_op0) == MINUS_EXPR)
> +
> +    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
> +      {
> +	t2 = TREE_OPERAND (mult_op0, 0);
> +	c2 = double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0, 1)));
> +      }
> +    else
> +      return false;
> +
> +  else
> +    {
> +      t2 = mult_op0;
> +      c2 = double_int_zero;
> +    }
> +
> +  c4 = double_int_udiv (index, bpu, FLOOR_DIV_EXPR);
> +
> +  *pbase = t1;
> +  *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
> +			  double_int_to_tree (sizetype, c3));
> +  *pindex = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
> +  *ptype = type;
> +
> +  return true;
> +}
> +
> +/* Given GS which contains a data reference, create a CAND_REF entry in
> +   the candidate table and attempt to find a basis.  */
> +
> +static void
> +slsr_process_ref (gimple gs)
> +{
> +  tree ref_expr, base, offset, type;
> +  HOST_WIDE_INT bitsize, bitpos;
> +  enum machine_mode mode;
> +  int unsignedp, volatilep;
> +  double_int index;
> +  slsr_cand_t c;
> +
> +  if (gimple_vdef (gs))
> +    ref_expr = gimple_assign_lhs (gs);
> +  else
> +    ref_expr = gimple_assign_rhs1 (gs);
> +
> +  if (!handled_component_p (ref_expr)
> +      || TREE_CODE (ref_expr) == BIT_FIELD_REF
> +      || (TREE_CODE (ref_expr) == COMPONENT_REF
> +	  && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
> +    return;
> +
> +  base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
> +			      &unsignedp, &volatilep, false);
> +  index = uhwi_to_double_int (bitpos);
> +
> +  if (!restructure_reference (&base, &offset, &index, &type))
> +    return;
> +
> +  c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
> +				 type, 0);
> +
> +  /* Add the candidate to the statement-candidate mapping.  */
> +  add_cand_for_stmt (gs, c);
> +}
> +
>  /* Create a candidate entry for a statement GS, where GS multiplies
>     two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
>     about the two SSA names into the new candidate.  Return the new
> @@ -1056,8 +1249,12 @@ find_candidates_in_block (struct dom_walk_data *wa
>      {
>        gimple gs = gsi_stmt (gsi);
>  
> -      if (is_gimple_assign (gs)
> -	  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
> +      if (gimple_vuse (gs) && gimple_assign_single_p (gs))
> +	slsr_process_ref (gs);
> +
> +      else if (is_gimple_assign (gs)
> +	       && SCALAR_INT_MODE_P
> +	            (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
>  	{
>  	  tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
>  
> @@ -1151,6 +1348,15 @@ dump_candidate (slsr_cand_t c)
>        print_generic_expr (dump_file, c->stride, 0);
>        fputs (") : ", dump_file);
>        break;
> +    case CAND_REF:
> +      fputs ("     REF  : ", dump_file);
> +      print_generic_expr (dump_file, c->base_name, 0);
> +      fputs (" + (", dump_file);
> +      print_generic_expr (dump_file, c->stride, 0);
> +      fputs (") + ", dump_file);
> +      dump_double_int (dump_file, c->index, false);
> +      fputs (" : ", dump_file);
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -1181,36 +1387,33 @@ dump_cand_vec (void)
>      dump_candidate (c);
>  }
>  
> -/* Dump the candidate chains.  */
> +/* Callback used to dump the candidate chains hash table.  */
>  
> -static void
> -dump_cand_chains (void)
> +static int
> +base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
>  {
> -  unsigned i;
> +  const_cand_chain_t chain = *((const_cand_chain_t *) slot);
> +  cand_chain_t p;
>  
> -  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
> +  print_generic_expr (dump_file, chain->base_name, 0);
> +  fprintf (dump_file, " -> %d", chain->cand->cand_num);
>  
> -  for (i = 0; i < num_ssa_names; i++)
> -    {
> -      const_cand_chain_t chain = base_cand_map[i];
> +  for (p = chain->next; p; p = p->next)
> +    fprintf (dump_file, " -> %d", p->cand->cand_num);
>  
> -      if (chain)
> -	{
> -	  cand_chain_t p;
> +  fputs ("\n", dump_file);
> +  return 1;
> +}
>  
> -	  print_generic_expr (dump_file, chain->base_name, 0);
> -	  fprintf (dump_file, " -> %d", chain->cand->cand_num);
> +/* Dump the candidate chains.  */
>  
> -	  for (p = chain->next; p; p = p->next)
> -	    fprintf (dump_file, " -> %d", p->cand->cand_num);
> -
> -	  fputs ("\n", dump_file);
> -	}
> -    }
> -
> +static void
> +dump_cand_chains (void)
> +{
> +  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
> +  htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
>    fputs ("\n", dump_file);
>  }
> -
>  
>  /* Recursive helper for unconditional_cands_with_known_stride_p.
>     Returns TRUE iff C, its siblings, and its dependents are all
> @@ -1246,6 +1449,53 @@ unconditional_cands_with_known_stride_p (slsr_cand
>    return unconditional_cands (lookup_cand (root->dependent));
>  }
>  
> +/* Replace *EXPR in candidate C with an equivalent strength-reduced
> +   data reference.  */
> +
> +static void
> +replace_ref (tree *expr, slsr_cand_t c)
> +{
> +  tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_name),
> +			       c->base_name, c->stride);
> +  tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
> +			      double_int_to_tree (c->cand_type, c->index));
> +  
> +  /* Gimplify the base addressing expression for the new MEM_REF tree.  */
> +  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
> +  TREE_OPERAND (mem_ref, 0)
> +    = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
> +				/*simple_p=*/true, NULL,
> +				/*before=*/true, GSI_SAME_STMT);
> +  copy_ref_info (mem_ref, *expr);
> +  *expr = mem_ref;
> +  update_stmt (c->cand_stmt);
> +}
> +
> +/* Replace CAND_REF candidate C, each sibling of candidate C, and each
> +   dependent of candidate C with an equivalent strength-reduced data
> +   reference.  */
> +
> +static void
> +replace_refs (slsr_cand_t c)
> +{
> +  if (gimple_vdef (c->cand_stmt))
> +    {
> +      tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
> +      replace_ref (lhs, c);
> +    }
> +  else
> +    {
> +      tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
> +      replace_ref (rhs, c);
> +    }
> +
> +  if (c->sibling)
> +    replace_refs (lookup_cand (c->sibling));
> +
> +  if (c->dependent)
> +    replace_refs (lookup_cand (c->dependent));
> +}
> +
>  /* Calculate the increment required for candidate C relative to 
>     its basis.  */
>  
> @@ -1413,13 +1663,18 @@ analyze_candidates_and_replace (void)
>  
>        first_dep = lookup_cand (c->dependent);
>  
> +      /* If this is a chain of CAND_REFs, unconditionally replace
> +	 each of them with a strength-reduced data reference.  */
> +      if (c->kind == CAND_REF)
> +	replace_refs (c);
> +
>        /* If the common stride of all related candidates is a
>  	 known constant, and none of these has a phi-dependence,
>  	 then all replacements are considered profitable.
>  	 Each replaces a multiply by a single add, with the
>  	 possibility that a feeding add also goes dead as a
>  	 result.  */
> -      if (unconditional_cands_with_known_stride_p (c))
> +      else if (unconditional_cands_with_known_stride_p (c))
>  	replace_dependents (first_dep);
>  
>        /* TODO:  When the stride is an SSA name, it may still be
> @@ -1428,9 +1683,6 @@ analyze_candidates_and_replace (void)
>  	 can be reused, or are less expensive to calculate than
>  	 the replaced statements.  */
>  
> -      /* TODO:  Strength-reduce data references with implicit
> -	 multiplication in their addressing expressions.  */
> -
>        /* TODO:  When conditional increments occur so that a 
>  	 candidate is dependent upon a phi-basis, the cost of
>  	 introducing a temporary must be accounted for.  */
> @@ -1455,8 +1707,8 @@ execute_strength_reduction (void)
>    gcc_obstack_init (&chain_obstack);
>  
>    /* Allocate the mapping from base names to candidate chains.  */
> -  base_cand_map = XNEWVEC (cand_chain_t, num_ssa_names);
> -  memset (base_cand_map, 0, num_ssa_names * sizeof (cand_chain_t));
> +  base_cand_map = htab_create (500, base_cand_hash,
> +			       base_cand_eq, base_cand_free);
>  
>    /* Initialize the loop optimizer.  We need to detect flow across
>       back edges, and this gives us dominator information as well.  */
> @@ -1490,7 +1742,7 @@ execute_strength_reduction (void)
>    /* Free resources.  */
>    fini_walk_dominator_tree (&walk_data);
>    loop_optimizer_finalize ();
> -  free (base_cand_map);
> +  htab_delete (base_cand_map);
>    obstack_free (&chain_obstack, NULL);
>    pointer_map_destroy (stmt_cand_map);
>    VEC_free (slsr_cand_t, heap, cand_vec);
>
Richard Biener Aug. 1, 2012, 12:41 p.m. UTC | #2
On Sun, Jul 22, 2012 at 5:19 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Ping...
>
> On Thu, 2012-06-28 at 16:45 -0500, William J. Schmidt wrote:
>> Here's a relatively small piece of strength reduction that solves that
>> pesky addressing bug that got me looking at this in the first place...
>>
>> The main part of the code is the stuff that was reviewed last year, but
>> which needed to find a good home.  So hopefully that's in pretty good
>> shape.  I recast base_cand_map as an htab again since I now need to look
>> up trees other than SSA names.  I plan to put together a follow-up patch
>> to change code and commentary references so that "base_name" becomes
>> "base_expr".  Doing that now would clutter up the patch too much.
>>
>> Bootstrapped and tested on powerpc64-linux-gnu with no new regressions.
>> Ok for trunk?

Ok.

Thanks,
Richard.

>> Thanks,
>> Bill
>>
>>
>> gcc:
>>
>>       PR tree-optimization/46556
>>       * gimple-ssa-strength-reduction.c (enum cand_kind): Add CAND_REF.
>>       (base_cand_map): Change to hash table.
>>       (base_cand_hash): New function.
>>       (base_cand_free): Likewise.
>>       (base_cand_eq): Likewise.
>>       (lookup_cand): Change base_cand_map to hash table.
>>       (find_basis_for_candidate): Likewise.
>>       (base_cand_from_table): Exclude CAND_REF.
>>       (restructure_reference): New function.
>>       (slsr_process_ref): Likewise.
>>       (find_candidates_in_block): Call slsr_process_ref.
>>       (dump_candidate): Handle CAND_REF.
>>       (base_cand_dump_callback): New function.
>>       (dump_cand_chains): Change base_cand_map to hash table.
>>       (replace_ref): New function.
>>       (replace_refs): Likewise.
>>       (analyze_candidates_and_replace): Call replace_refs.
>>       (execute_strength_reduction): Change base_cand_map to hash table.
>>
>> gcc/testsuite:
>>
>>       PR tree-optimization/46556
>>       * testsuite/gcc.dg/tree-ssa/slsr-27.c: New.
>>       * testsuite/gcc.dg/tree-ssa/slsr-28.c: New.
>>       * testsuite/gcc.dg/tree-ssa/slsr-29.c: New.
>>
>>
>> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c   (revision 0)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c   (revision 0)
>> @@ -0,0 +1,22 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-dom2" } */
>> +
>> +struct x
>> +{
>> +  int a[16];
>> +  int b[16];
>> +  int c[16];
>> +};
>> +
>> +extern void foo (int, int, int);
>> +
>> +void
>> +f (struct x *p, unsigned int n)
>> +{
>> +  foo (p->a[n], p->c[n], p->b[n]);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 3 "dom2" } } */
>> +/* { dg-final { cleanup-tree-dump "dom2" } } */
>> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c   (revision 0)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c   (revision 0)
>> @@ -0,0 +1,26 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-dom2" } */
>> +
>> +struct x
>> +{
>> +  int a[16];
>> +  int b[16];
>> +  int c[16];
>> +};
>> +
>> +extern void foo (int, int, int);
>> +
>> +void
>> +f (struct x *p, unsigned int n)
>> +{
>> +  foo (p->a[n], p->c[n], p->b[n]);
>> +  if (n > 12)
>> +    foo (p->a[n], p->c[n], p->b[n]);
>> +  else if (n > 3)
>> +    foo (p->b[n], p->a[n], p->c[n]);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
>> +/* { dg-final { cleanup-tree-dump "dom2" } } */
>> Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c   (revision 0)
>> +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c   (revision 0)
>> @@ -0,0 +1,28 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-dom2" } */
>> +
>> +struct x
>> +{
>> +  int a[16];
>> +  int b[16];
>> +  int c[16];
>> +};
>> +
>> +extern void foo (int, int, int);
>> +
>> +void
>> +f (struct x *p, unsigned int n)
>> +{
>> +  foo (p->a[n], p->c[n], p->b[n]);
>> +  if (n > 3)
>> +    {
>> +      foo (p->a[n], p->c[n], p->b[n]);
>> +      if (n > 12)
>> +     foo (p->b[n], p->a[n], p->c[n]);
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
>> +/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
>> +/* { dg-final { cleanup-tree-dump "dom2" } } */
>> Index: gcc/gimple-ssa-strength-reduction.c
>> ===================================================================
>> --- gcc/gimple-ssa-strength-reduction.c       (revision 189025)
>> +++ gcc/gimple-ssa-strength-reduction.c       (working copy)
>> @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3.  If not see
>>     2) Explicit multiplies, unknown constant multipliers,
>>        no conditional increments. (data gathering complete,
>>        replacements pending)
>> -   3) Implicit multiplies in addressing expressions. (pending)
>> +   3) Implicit multiplies in addressing expressions. (complete)
>>     4) Explicit multiplies, conditional increments. (pending)
>>
>>     It would also be possible to apply strength reduction to divisions
>> @@ -107,9 +107,49 @@ along with GCC; see the file COPYING3.  If not see
>>
>>     as a strength reduction opportunity, even though this S1 would
>>     also be replaceable by the S1' above.  This can be added if it
>> -   comes up in practice.  */
>> +   comes up in practice.
>>
>> +   Strength reduction in addressing
>> +   --------------------------------
>> +   There is another kind of candidate known as CAND_REF.  A CAND_REF
>> +   describes a statement containing a memory reference having
>> +   complex addressing that might benefit from strength reduction.
>> +   Specifically, we are interested in references for which
>> +   get_inner_reference returns a base address, offset, and bitpos as
>> +   follows:
>>
>> +     base:    MEM_REF (T1, C1)
>> +     offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
>> +     bitpos:  C4 * BITS_PER_UNIT
>> +
>> +   Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
>> +   arbitrary integer constants.  Note that C2 may be zero, in which
>> +   case the offset will be MULT_EXPR (T2, C3).
>> +
>> +   When this pattern is recognized, the original memory reference
>> +   can be replaced with:
>> +
>> +     MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
>> +              C1 + (C2 * C3) + C4)
>> +
>> +   which distributes the multiply to allow constant folding.  When
>> +   two or more addressing expressions can be represented by MEM_REFs
>> +   of this form, differing only in the constants C1, C2, and C4,
>> +   making this substitution produces more efficient addressing during
>> +   the RTL phases.  When there are not at least two expressions with
>> +   the same values of T1, T2, and C3, there is nothing to be gained
>> +   by the replacement.
>> +
>> +   Strength reduction of CAND_REFs uses the same infrastructure as
>> +   that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
>> +   field, MULT_EXPR (T2, C3) in the stride (S) field, and
>> +   C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
>> +   is thus another CAND_REF with the same B and S values.  When at
>> +   least two CAND_REFs are chained together using the basis relation,
>> +   each of them is replaced as above, resulting in improved code
>> +   generation for addressing.  */
>> +
>> +
>>  /* Index into the candidate vector, offset by 1.  VECs are zero-based,
>>     while cand_idx's are one-based, with zero indicating null.  */
>>  typedef unsigned cand_idx;
>> @@ -118,7 +158,8 @@ typedef unsigned cand_idx;
>>  enum cand_kind
>>  {
>>    CAND_MULT,
>> -  CAND_ADD
>> +  CAND_ADD,
>> +  CAND_REF
>>  };
>>
>>  struct slsr_cand_d
>> @@ -137,7 +178,9 @@ struct slsr_cand_d
>>
>>    /* The type of the candidate.  This is normally the type of base_name,
>>       but casts may have occurred when combining feeding instructions.
>> -     A candidate can only be a basis for candidates of the same final type.  */
>> +     A candidate can only be a basis for candidates of the same final type.
>> +     (For CAND_REFs, this is the type to be used for operand 1 of the
>> +     replacement MEM_REF.)  */
>>    tree cand_type;
>>
>>    /* The kind of candidate (CAND_MULT, etc.).  */
>> @@ -211,8 +254,8 @@ static struct pointer_map_t *stmt_cand_map;
>>  /* Obstack for candidates.  */
>>  static struct obstack cand_obstack;
>>
>> -/* Array mapping from base SSA names to chains of candidates.  */
>> -static cand_chain_t *base_cand_map;
>> +/* Hash table embodying a mapping from base names to chains of candidates.  */
>> +static htab_t base_cand_map;
>>
>>  /* Obstack for candidate chains.  */
>>  static struct obstack chain_obstack;
>> @@ -225,6 +268,33 @@ lookup_cand (cand_idx idx)
>>    return VEC_index (slsr_cand_t, cand_vec, idx - 1);
>>  }
>>
>> +/* Callback to produce a hash value for a candidate chain header.  */
>> +
>> +static hashval_t
>> +base_cand_hash (const void *p)
>> +{
>> +  tree base_expr = ((const_cand_chain_t) p)->base_name;
>> +  return iterative_hash_expr (base_expr, 0);
>> +}
>> +
>> +/* Callback when an element is removed from the hash table.
>> +   We never remove entries until the entire table is released.  */
>> +
>> +static void
>> +base_cand_free (void *p ATTRIBUTE_UNUSED)
>> +{
>> +}
>> +
>> +/* Callback to return true if two candidate chain headers are equal.  */
>> +
>> +static int
>> +base_cand_eq (const void *p1, const void *p2)
>> +{
>> +  const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
>> +  const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
>> +  return operand_equal_p (chain1->base_name, chain2->base_name, 0);
>> +}
>> +
>>  /* Use the base name from candidate C to look for possible candidates
>>     that can serve as a basis for C.  Each potential basis must also
>>     appear in a block that dominates the candidate statement and have
>> @@ -235,11 +305,12 @@ lookup_cand (cand_idx idx)
>>  static int
>>  find_basis_for_candidate (slsr_cand_t c)
>>  {
>> +  cand_chain mapping_key;
>>    cand_chain_t chain;
>>    slsr_cand_t basis = NULL;
>>
>> -  gcc_assert (TREE_CODE (c->base_name) == SSA_NAME);
>> -  chain = base_cand_map[SSA_NAME_VERSION (c->base_name)];
>> +  mapping_key.base_name = c->base_name;
>> +  chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
>>
>>    for (; chain; chain = chain->next)
>>      {
>> @@ -273,23 +344,23 @@ find_basis_for_candidate (slsr_cand_t c)
>>  static void
>>  record_potential_basis (slsr_cand_t c)
>>  {
>> -  cand_chain_t node, head;
>> -  int index;
>> +  cand_chain_t node;
>> +  void **slot;
>>
>>    node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
>>    node->base_name = c->base_name;
>>    node->cand = c;
>>    node->next = NULL;
>> -  index = SSA_NAME_VERSION (c->base_name);
>> -  head = base_cand_map[index];
>> +  slot = htab_find_slot (base_cand_map, node, INSERT);
>>
>> -  if (head)
>> +  if (*slot)
>>      {
>> +      cand_chain_t head = (cand_chain_t) (*slot);
>>        node->next = head->next;
>>        head->next = node;
>>      }
>>    else
>> -    base_cand_map[index] = node;
>> +    *slot = node;
>>  }
>>
>>  /* Allocate storage for a new candidate and initialize its fields.
>> @@ -390,10 +461,11 @@ base_cand_from_table (tree base_in)
>>      return (slsr_cand_t) NULL;
>>
>>    result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
>> -  if (!result)
>> -    return (slsr_cand_t) NULL;
>> +
>> +  if (result && (*result)->kind != CAND_REF)
>> +    return *result;
>>
>> -  return *result;
>> +  return (slsr_cand_t) NULL;
>>  }
>>
>>  /* Add an entry to the statement-to-candidate mapping.  */
>> @@ -406,6 +478,127 @@ add_cand_for_stmt (gimple gs, slsr_cand_t c)
>>    *slot = c;
>>  }
>>
>> +/* Look for the following pattern:
>> +
>> +    *PBASE:    MEM_REF (T1, C1)
>> +
>> +    *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
>> +                     or
>> +               MULT_EXPR (PLUS_EXPR (T2, C2), C3)
>> +                     or
>> +               MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
>> +
>> +    *PINDEX:   C4 * BITS_PER_UNIT
>> +
>> +   If not present, leave the input values unchanged and return FALSE.
>> +   Otherwise, modify the input values as follows and return TRUE:
>> +
>> +    *PBASE:    T1
>> +    *POFFSET:  MULT_EXPR (T2, C3)
>> +    *PINDEX:   C1 + (C2 * C3) + C4  */
>> +
>> +static bool
>> +restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
>> +                    tree *ptype)
>> +{
>> +  tree base = *pbase, offset = *poffset;
>> +  double_int index = *pindex;
>> +  double_int bpu = uhwi_to_double_int (BITS_PER_UNIT);
>> +  tree mult_op0, mult_op1, t1, t2, type;
>> +  double_int c1, c2, c3, c4;
>> +
>> +  if (!base
>> +      || !offset
>> +      || TREE_CODE (base) != MEM_REF
>> +      || TREE_CODE (offset) != MULT_EXPR
>> +      || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
>> +      || !double_int_zero_p (double_int_umod (index, bpu, FLOOR_MOD_EXPR)))
>> +    return false;
>> +
>> +  t1 = TREE_OPERAND (base, 0);
>> +  c1 = mem_ref_offset (base);
>> +  type = TREE_TYPE (TREE_OPERAND (base, 1));
>> +
>> +  mult_op0 = TREE_OPERAND (offset, 0);
>> +  mult_op1 = TREE_OPERAND (offset, 1);
>> +
>> +  c3 = tree_to_double_int (mult_op1);
>> +
>> +  if (TREE_CODE (mult_op0) == PLUS_EXPR)
>> +
>> +    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
>> +      {
>> +     t2 = TREE_OPERAND (mult_op0, 0);
>> +     c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
>> +      }
>> +    else
>> +      return false;
>> +
>> +  else if (TREE_CODE (mult_op0) == MINUS_EXPR)
>> +
>> +    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
>> +      {
>> +     t2 = TREE_OPERAND (mult_op0, 0);
>> +     c2 = double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0, 1)));
>> +      }
>> +    else
>> +      return false;
>> +
>> +  else
>> +    {
>> +      t2 = mult_op0;
>> +      c2 = double_int_zero;
>> +    }
>> +
>> +  c4 = double_int_udiv (index, bpu, FLOOR_DIV_EXPR);
>> +
>> +  *pbase = t1;
>> +  *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
>> +                       double_int_to_tree (sizetype, c3));
>> +  *pindex = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
>> +  *ptype = type;
>> +
>> +  return true;
>> +}
>> +
>> +/* Given GS which contains a data reference, create a CAND_REF entry in
>> +   the candidate table and attempt to find a basis.  */
>> +
>> +static void
>> +slsr_process_ref (gimple gs)
>> +{
>> +  tree ref_expr, base, offset, type;
>> +  HOST_WIDE_INT bitsize, bitpos;
>> +  enum machine_mode mode;
>> +  int unsignedp, volatilep;
>> +  double_int index;
>> +  slsr_cand_t c;
>> +
>> +  if (gimple_vdef (gs))
>> +    ref_expr = gimple_assign_lhs (gs);
>> +  else
>> +    ref_expr = gimple_assign_rhs1 (gs);
>> +
>> +  if (!handled_component_p (ref_expr)
>> +      || TREE_CODE (ref_expr) == BIT_FIELD_REF
>> +      || (TREE_CODE (ref_expr) == COMPONENT_REF
>> +       && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
>> +    return;
>> +
>> +  base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
>> +                           &unsignedp, &volatilep, false);
>> +  index = uhwi_to_double_int (bitpos);
>> +
>> +  if (!restructure_reference (&base, &offset, &index, &type))
>> +    return;
>> +
>> +  c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
>> +                              type, 0);
>> +
>> +  /* Add the candidate to the statement-candidate mapping.  */
>> +  add_cand_for_stmt (gs, c);
>> +}
>> +
>>  /* Create a candidate entry for a statement GS, where GS multiplies
>>     two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
>>     about the two SSA names into the new candidate.  Return the new
>> @@ -1056,8 +1249,12 @@ find_candidates_in_block (struct dom_walk_data *wa
>>      {
>>        gimple gs = gsi_stmt (gsi);
>>
>> -      if (is_gimple_assign (gs)
>> -       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
>> +      if (gimple_vuse (gs) && gimple_assign_single_p (gs))
>> +     slsr_process_ref (gs);
>> +
>> +      else if (is_gimple_assign (gs)
>> +            && SCALAR_INT_MODE_P
>> +                 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
>>       {
>>         tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
>>
>> @@ -1151,6 +1348,15 @@ dump_candidate (slsr_cand_t c)
>>        print_generic_expr (dump_file, c->stride, 0);
>>        fputs (") : ", dump_file);
>>        break;
>> +    case CAND_REF:
>> +      fputs ("     REF  : ", dump_file);
>> +      print_generic_expr (dump_file, c->base_name, 0);
>> +      fputs (" + (", dump_file);
>> +      print_generic_expr (dump_file, c->stride, 0);
>> +      fputs (") + ", dump_file);
>> +      dump_double_int (dump_file, c->index, false);
>> +      fputs (" : ", dump_file);
>> +      break;
>>      default:
>>        gcc_unreachable ();
>>      }
>> @@ -1181,36 +1387,33 @@ dump_cand_vec (void)
>>      dump_candidate (c);
>>  }
>>
>> -/* Dump the candidate chains.  */
>> +/* Callback used to dump the candidate chains hash table.  */
>>
>> -static void
>> -dump_cand_chains (void)
>> +static int
>> +base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
>>  {
>> -  unsigned i;
>> +  const_cand_chain_t chain = *((const_cand_chain_t *) slot);
>> +  cand_chain_t p;
>>
>> -  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
>> +  print_generic_expr (dump_file, chain->base_name, 0);
>> +  fprintf (dump_file, " -> %d", chain->cand->cand_num);
>>
>> -  for (i = 0; i < num_ssa_names; i++)
>> -    {
>> -      const_cand_chain_t chain = base_cand_map[i];
>> +  for (p = chain->next; p; p = p->next)
>> +    fprintf (dump_file, " -> %d", p->cand->cand_num);
>>
>> -      if (chain)
>> -     {
>> -       cand_chain_t p;
>> +  fputs ("\n", dump_file);
>> +  return 1;
>> +}
>>
>> -       print_generic_expr (dump_file, chain->base_name, 0);
>> -       fprintf (dump_file, " -> %d", chain->cand->cand_num);
>> +/* Dump the candidate chains.  */
>>
>> -       for (p = chain->next; p; p = p->next)
>> -         fprintf (dump_file, " -> %d", p->cand->cand_num);
>> -
>> -       fputs ("\n", dump_file);
>> -     }
>> -    }
>> -
>> +static void
>> +dump_cand_chains (void)
>> +{
>> +  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
>> +  htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
>>    fputs ("\n", dump_file);
>>  }
>> -
>>
>>  /* Recursive helper for unconditional_cands_with_known_stride_p.
>>     Returns TRUE iff C, its siblings, and its dependents are all
>> @@ -1246,6 +1449,53 @@ unconditional_cands_with_known_stride_p (slsr_cand
>>    return unconditional_cands (lookup_cand (root->dependent));
>>  }
>>
>> +/* Replace *EXPR in candidate C with an equivalent strength-reduced
>> +   data reference.  */
>> +
>> +static void
>> +replace_ref (tree *expr, slsr_cand_t c)
>> +{
>> +  tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_name),
>> +                            c->base_name, c->stride);
>> +  tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
>> +                           double_int_to_tree (c->cand_type, c->index));
>> +
>> +  /* Gimplify the base addressing expression for the new MEM_REF tree.  */
>> +  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
>> +  TREE_OPERAND (mem_ref, 0)
>> +    = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
>> +                             /*simple_p=*/true, NULL,
>> +                             /*before=*/true, GSI_SAME_STMT);
>> +  copy_ref_info (mem_ref, *expr);
>> +  *expr = mem_ref;
>> +  update_stmt (c->cand_stmt);
>> +}
>> +
>> +/* Replace CAND_REF candidate C, each sibling of candidate C, and each
>> +   dependent of candidate C with an equivalent strength-reduced data
>> +   reference.  */
>> +
>> +static void
>> +replace_refs (slsr_cand_t c)
>> +{
>> +  if (gimple_vdef (c->cand_stmt))
>> +    {
>> +      tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
>> +      replace_ref (lhs, c);
>> +    }
>> +  else
>> +    {
>> +      tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
>> +      replace_ref (rhs, c);
>> +    }
>> +
>> +  if (c->sibling)
>> +    replace_refs (lookup_cand (c->sibling));
>> +
>> +  if (c->dependent)
>> +    replace_refs (lookup_cand (c->dependent));
>> +}
>> +
>>  /* Calculate the increment required for candidate C relative to
>>     its basis.  */
>>
>> @@ -1413,13 +1663,18 @@ analyze_candidates_and_replace (void)
>>
>>        first_dep = lookup_cand (c->dependent);
>>
>> +      /* If this is a chain of CAND_REFs, unconditionally replace
>> +      each of them with a strength-reduced data reference.  */
>> +      if (c->kind == CAND_REF)
>> +     replace_refs (c);
>> +
>>        /* If the common stride of all related candidates is a
>>        known constant, and none of these has a phi-dependence,
>>        then all replacements are considered profitable.
>>        Each replaces a multiply by a single add, with the
>>        possibility that a feeding add also goes dead as a
>>        result.  */
>> -      if (unconditional_cands_with_known_stride_p (c))
>> +      else if (unconditional_cands_with_known_stride_p (c))
>>       replace_dependents (first_dep);
>>
>>        /* TODO:  When the stride is an SSA name, it may still be
>> @@ -1428,9 +1683,6 @@ analyze_candidates_and_replace (void)
>>        can be reused, or are less expensive to calculate than
>>        the replaced statements.  */
>>
>> -      /* TODO:  Strength-reduce data references with implicit
>> -      multiplication in their addressing expressions.  */
>> -
>>        /* TODO:  When conditional increments occur so that a
>>        candidate is dependent upon a phi-basis, the cost of
>>        introducing a temporary must be accounted for.  */
>> @@ -1455,8 +1707,8 @@ execute_strength_reduction (void)
>>    gcc_obstack_init (&chain_obstack);
>>
>>    /* Allocate the mapping from base names to candidate chains.  */
>> -  base_cand_map = XNEWVEC (cand_chain_t, num_ssa_names);
>> -  memset (base_cand_map, 0, num_ssa_names * sizeof (cand_chain_t));
>> +  base_cand_map = htab_create (500, base_cand_hash,
>> +                            base_cand_eq, base_cand_free);
>>
>>    /* Initialize the loop optimizer.  We need to detect flow across
>>       back edges, and this gives us dominator information as well.  */
>> @@ -1490,7 +1742,7 @@ execute_strength_reduction (void)
>>    /* Free resources.  */
>>    fini_walk_dominator_tree (&walk_data);
>>    loop_optimizer_finalize ();
>> -  free (base_cand_map);
>> +  htab_delete (base_cand_map);
>>    obstack_free (&chain_obstack, NULL);
>>    pointer_map_destroy (stmt_cand_map);
>>    VEC_free (slsr_cand_t, heap, cand_vec);
>>
>
diff mbox

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c	(revision 0)
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom2" } */
+
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 3 "dom2" } } */
+/* { dg-final { cleanup-tree-dump "dom2" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c	(revision 0)
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom2" } */
+
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+  if (n > 12)
+    foo (p->a[n], p->c[n], p->b[n]);
+  else if (n > 3)
+    foo (p->b[n], p->a[n], p->c[n]);
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
+/* { dg-final { cleanup-tree-dump "dom2" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c	(revision 0)
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom2" } */
+
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+  if (n > 3)
+    {
+      foo (p->a[n], p->c[n], p->b[n]);
+      if (n > 12)
+	foo (p->b[n], p->a[n], p->c[n]);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ D" 1 "dom2" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct x \\*\\)D" 9 "dom2" } } */
+/* { dg-final { cleanup-tree-dump "dom2" } } */
Index: gcc/gimple-ssa-strength-reduction.c
===================================================================
--- gcc/gimple-ssa-strength-reduction.c	(revision 189025)
+++ gcc/gimple-ssa-strength-reduction.c	(working copy)
@@ -32,7 +32,7 @@  along with GCC; see the file COPYING3.  If not see
    2) Explicit multiplies, unknown constant multipliers,
       no conditional increments. (data gathering complete,
       replacements pending)
-   3) Implicit multiplies in addressing expressions. (pending)
+   3) Implicit multiplies in addressing expressions. (complete)
    4) Explicit multiplies, conditional increments. (pending)
 
    It would also be possible to apply strength reduction to divisions
@@ -107,9 +107,49 @@  along with GCC; see the file COPYING3.  If not see
 
    as a strength reduction opportunity, even though this S1 would
    also be replaceable by the S1' above.  This can be added if it
-   comes up in practice.  */
+   comes up in practice.
 
+   Strength reduction in addressing
+   --------------------------------
+   There is another kind of candidate known as CAND_REF.  A CAND_REF
+   describes a statement containing a memory reference having 
+   complex addressing that might benefit from strength reduction.
+   Specifically, we are interested in references for which 
+   get_inner_reference returns a base address, offset, and bitpos as
+   follows:
 
+     base:    MEM_REF (T1, C1)
+     offset:  MULT_EXPR (PLUS_EXPR (T2, C2), C3)
+     bitpos:  C4 * BITS_PER_UNIT
+
+   Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are 
+   arbitrary integer constants.  Note that C2 may be zero, in which
+   case the offset will be MULT_EXPR (T2, C3).
+
+   When this pattern is recognized, the original memory reference
+   can be replaced with:
+
+     MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
+              C1 + (C2 * C3) + C4)
+
+   which distributes the multiply to allow constant folding.  When
+   two or more addressing expressions can be represented by MEM_REFs
+   of this form, differing only in the constants C1, C2, and C4,
+   making this substitution produces more efficient addressing during
+   the RTL phases.  When there are not at least two expressions with
+   the same values of T1, T2, and C3, there is nothing to be gained
+   by the replacement.
+
+   Strength reduction of CAND_REFs uses the same infrastructure as
+   that used by CAND_MULTs and CAND_ADDs.  We record T1 in the base (B)
+   field, MULT_EXPR (T2, C3) in the stride (S) field, and 
+   C1 + (C2 * C3) + C4 in the index (i) field.  A basis for a CAND_REF
+   is thus another CAND_REF with the same B and S values.  When at 
+   least two CAND_REFs are chained together using the basis relation,
+   each of them is replaced as above, resulting in improved code
+   generation for addressing.  */
+
+
 /* Index into the candidate vector, offset by 1.  VECs are zero-based,
    while cand_idx's are one-based, with zero indicating null.  */
 typedef unsigned cand_idx;
@@ -118,7 +158,8 @@  typedef unsigned cand_idx;
 enum cand_kind
 {
   CAND_MULT,
-  CAND_ADD
+  CAND_ADD,
+  CAND_REF
 };
 
 struct slsr_cand_d
@@ -137,7 +178,9 @@  struct slsr_cand_d
 
   /* The type of the candidate.  This is normally the type of base_name,
      but casts may have occurred when combining feeding instructions.
-     A candidate can only be a basis for candidates of the same final type.  */
+     A candidate can only be a basis for candidates of the same final type.
+     (For CAND_REFs, this is the type to be used for operand 1 of the
+     replacement MEM_REF.)  */
   tree cand_type;
 
   /* The kind of candidate (CAND_MULT, etc.).  */
@@ -211,8 +254,8 @@  static struct pointer_map_t *stmt_cand_map;
 /* Obstack for candidates.  */
 static struct obstack cand_obstack;
 
-/* Array mapping from base SSA names to chains of candidates.  */
-static cand_chain_t *base_cand_map;
+/* Hash table embodying a mapping from base names to chains of candidates.  */
+static htab_t base_cand_map;
 
 /* Obstack for candidate chains.  */
 static struct obstack chain_obstack;
@@ -225,6 +268,33 @@  lookup_cand (cand_idx idx)
   return VEC_index (slsr_cand_t, cand_vec, idx - 1);
 }
 
+/* Callback to produce a hash value for a candidate chain header.  */
+
+static hashval_t
+base_cand_hash (const void *p)
+{
+  tree base_expr = ((const_cand_chain_t) p)->base_name;
+  return iterative_hash_expr (base_expr, 0);
+}
+
+/* Callback when an element is removed from the hash table.
+   We never remove entries until the entire table is released.  */
+
+static void
+base_cand_free (void *p ATTRIBUTE_UNUSED)
+{
+}
+
+/* Callback to return true if two candidate chain headers are equal.  */
+
+static int
+base_cand_eq (const void *p1, const void *p2)
+{
+  const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
+  const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
+  return operand_equal_p (chain1->base_name, chain2->base_name, 0);
+}
+
 /* Use the base name from candidate C to look for possible candidates
    that can serve as a basis for C.  Each potential basis must also
    appear in a block that dominates the candidate statement and have
@@ -235,11 +305,12 @@  lookup_cand (cand_idx idx)
 static int
 find_basis_for_candidate (slsr_cand_t c)
 {
+  cand_chain mapping_key;
   cand_chain_t chain;
   slsr_cand_t basis = NULL;
 
-  gcc_assert (TREE_CODE (c->base_name) == SSA_NAME);
-  chain = base_cand_map[SSA_NAME_VERSION (c->base_name)];
+  mapping_key.base_name = c->base_name;
+  chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
 
   for (; chain; chain = chain->next)
     {
@@ -273,23 +344,23 @@  find_basis_for_candidate (slsr_cand_t c)
 static void
 record_potential_basis (slsr_cand_t c)
 {
-  cand_chain_t node, head;
-  int index;
+  cand_chain_t node;
+  void **slot;
 
   node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
   node->base_name = c->base_name;
   node->cand = c;
   node->next = NULL;
-  index = SSA_NAME_VERSION (c->base_name);
-  head = base_cand_map[index];
+  slot = htab_find_slot (base_cand_map, node, INSERT);
 
-  if (head)
+  if (*slot)
     {
+      cand_chain_t head = (cand_chain_t) (*slot);
       node->next = head->next;
       head->next = node;
     }
   else
-    base_cand_map[index] = node;
+    *slot = node;
 }
 
 /* Allocate storage for a new candidate and initialize its fields.
@@ -390,10 +461,11 @@  base_cand_from_table (tree base_in)
     return (slsr_cand_t) NULL;
 
   result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
-  if (!result)
-    return (slsr_cand_t) NULL;
+  
+  if (result && (*result)->kind != CAND_REF)
+    return *result;
 
-  return *result;
+  return (slsr_cand_t) NULL;
 }
 
 /* Add an entry to the statement-to-candidate mapping.  */
@@ -406,6 +478,127 @@  add_cand_for_stmt (gimple gs, slsr_cand_t c)
   *slot = c;
 }
 
+/* Look for the following pattern:
+
+    *PBASE:    MEM_REF (T1, C1)
+
+    *POFFSET:  MULT_EXPR (T2, C3)        [C2 is zero]
+                     or
+               MULT_EXPR (PLUS_EXPR (T2, C2), C3)
+                     or
+               MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
+
+    *PINDEX:   C4 * BITS_PER_UNIT
+
+   If not present, leave the input values unchanged and return FALSE.
+   Otherwise, modify the input values as follows and return TRUE:
+
+    *PBASE:    T1
+    *POFFSET:  MULT_EXPR (T2, C3)
+    *PINDEX:   C1 + (C2 * C3) + C4  */
+
+static bool
+restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
+		       tree *ptype)
+{
+  tree base = *pbase, offset = *poffset;
+  double_int index = *pindex;
+  double_int bpu = uhwi_to_double_int (BITS_PER_UNIT);
+  tree mult_op0, mult_op1, t1, t2, type;
+  double_int c1, c2, c3, c4;
+
+  if (!base
+      || !offset
+      || TREE_CODE (base) != MEM_REF
+      || TREE_CODE (offset) != MULT_EXPR
+      || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
+      || !double_int_zero_p (double_int_umod (index, bpu, FLOOR_MOD_EXPR)))
+    return false;
+
+  t1 = TREE_OPERAND (base, 0);
+  c1 = mem_ref_offset (base);
+  type = TREE_TYPE (TREE_OPERAND (base, 1));
+
+  mult_op0 = TREE_OPERAND (offset, 0);
+  mult_op1 = TREE_OPERAND (offset, 1);
+
+  c3 = tree_to_double_int (mult_op1);
+
+  if (TREE_CODE (mult_op0) == PLUS_EXPR)
+
+    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
+      {
+	t2 = TREE_OPERAND (mult_op0, 0);
+	c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
+      }
+    else
+      return false;
+
+  else if (TREE_CODE (mult_op0) == MINUS_EXPR)
+
+    if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
+      {
+	t2 = TREE_OPERAND (mult_op0, 0);
+	c2 = double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0, 1)));
+      }
+    else
+      return false;
+
+  else
+    {
+      t2 = mult_op0;
+      c2 = double_int_zero;
+    }
+
+  c4 = double_int_udiv (index, bpu, FLOOR_DIV_EXPR);
+
+  *pbase = t1;
+  *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
+			  double_int_to_tree (sizetype, c3));
+  *pindex = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
+  *ptype = type;
+
+  return true;
+}
+
+/* Given GS which contains a data reference, create a CAND_REF entry in
+   the candidate table and attempt to find a basis.  */
+
+static void
+slsr_process_ref (gimple gs)
+{
+  tree ref_expr, base, offset, type;
+  HOST_WIDE_INT bitsize, bitpos;
+  enum machine_mode mode;
+  int unsignedp, volatilep;
+  double_int index;
+  slsr_cand_t c;
+
+  if (gimple_vdef (gs))
+    ref_expr = gimple_assign_lhs (gs);
+  else
+    ref_expr = gimple_assign_rhs1 (gs);
+
+  if (!handled_component_p (ref_expr)
+      || TREE_CODE (ref_expr) == BIT_FIELD_REF
+      || (TREE_CODE (ref_expr) == COMPONENT_REF
+	  && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
+    return;
+
+  base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
+			      &unsignedp, &volatilep, false);
+  index = uhwi_to_double_int (bitpos);
+
+  if (!restructure_reference (&base, &offset, &index, &type))
+    return;
+
+  c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
+				 type, 0);
+
+  /* Add the candidate to the statement-candidate mapping.  */
+  add_cand_for_stmt (gs, c);
+}
+
 /* Create a candidate entry for a statement GS, where GS multiplies
    two SSA names BASE_IN and STRIDE_IN.  Propagate any known information
    about the two SSA names into the new candidate.  Return the new
@@ -1056,8 +1249,12 @@  find_candidates_in_block (struct dom_walk_data *wa
     {
       gimple gs = gsi_stmt (gsi);
 
-      if (is_gimple_assign (gs)
-	  && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
+      if (gimple_vuse (gs) && gimple_assign_single_p (gs))
+	slsr_process_ref (gs);
+
+      else if (is_gimple_assign (gs)
+	       && SCALAR_INT_MODE_P
+	            (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
 	{
 	  tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
 
@@ -1151,6 +1348,15 @@  dump_candidate (slsr_cand_t c)
       print_generic_expr (dump_file, c->stride, 0);
       fputs (") : ", dump_file);
       break;
+    case CAND_REF:
+      fputs ("     REF  : ", dump_file);
+      print_generic_expr (dump_file, c->base_name, 0);
+      fputs (" + (", dump_file);
+      print_generic_expr (dump_file, c->stride, 0);
+      fputs (") + ", dump_file);
+      dump_double_int (dump_file, c->index, false);
+      fputs (" : ", dump_file);
+      break;
     default:
       gcc_unreachable ();
     }
@@ -1181,36 +1387,33 @@  dump_cand_vec (void)
     dump_candidate (c);
 }
 
-/* Dump the candidate chains.  */
+/* Callback used to dump the candidate chains hash table.  */
 
-static void
-dump_cand_chains (void)
+static int
+base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
 {
-  unsigned i;
+  const_cand_chain_t chain = *((const_cand_chain_t *) slot);
+  cand_chain_t p;
 
-  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
+  print_generic_expr (dump_file, chain->base_name, 0);
+  fprintf (dump_file, " -> %d", chain->cand->cand_num);
 
-  for (i = 0; i < num_ssa_names; i++)
-    {
-      const_cand_chain_t chain = base_cand_map[i];
+  for (p = chain->next; p; p = p->next)
+    fprintf (dump_file, " -> %d", p->cand->cand_num);
 
-      if (chain)
-	{
-	  cand_chain_t p;
+  fputs ("\n", dump_file);
+  return 1;
+}
 
-	  print_generic_expr (dump_file, chain->base_name, 0);
-	  fprintf (dump_file, " -> %d", chain->cand->cand_num);
+/* Dump the candidate chains.  */
 
-	  for (p = chain->next; p; p = p->next)
-	    fprintf (dump_file, " -> %d", p->cand->cand_num);
-
-	  fputs ("\n", dump_file);
-	}
-    }
-
+static void
+dump_cand_chains (void)
+{
+  fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
+  htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
   fputs ("\n", dump_file);
 }
-
 
 /* Recursive helper for unconditional_cands_with_known_stride_p.
    Returns TRUE iff C, its siblings, and its dependents are all
@@ -1246,6 +1449,53 @@  unconditional_cands_with_known_stride_p (slsr_cand
   return unconditional_cands (lookup_cand (root->dependent));
 }
 
+/* Replace *EXPR in candidate C with an equivalent strength-reduced
+   data reference.  */
+
+static void
+replace_ref (tree *expr, slsr_cand_t c)
+{
+  tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_name),
+			       c->base_name, c->stride);
+  tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
+			      double_int_to_tree (c->cand_type, c->index));
+  
+  /* Gimplify the base addressing expression for the new MEM_REF tree.  */
+  gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
+  TREE_OPERAND (mem_ref, 0)
+    = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
+				/*simple_p=*/true, NULL,
+				/*before=*/true, GSI_SAME_STMT);
+  copy_ref_info (mem_ref, *expr);
+  *expr = mem_ref;
+  update_stmt (c->cand_stmt);
+}
+
+/* Replace CAND_REF candidate C, each sibling of candidate C, and each
+   dependent of candidate C with an equivalent strength-reduced data
+   reference.  */
+
+static void
+replace_refs (slsr_cand_t c)
+{
+  if (gimple_vdef (c->cand_stmt))
+    {
+      tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
+      replace_ref (lhs, c);
+    }
+  else
+    {
+      tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
+      replace_ref (rhs, c);
+    }
+
+  if (c->sibling)
+    replace_refs (lookup_cand (c->sibling));
+
+  if (c->dependent)
+    replace_refs (lookup_cand (c->dependent));
+}
+
 /* Calculate the increment required for candidate C relative to 
    its basis.  */
 
@@ -1413,13 +1663,18 @@  analyze_candidates_and_replace (void)
 
       first_dep = lookup_cand (c->dependent);
 
+      /* If this is a chain of CAND_REFs, unconditionally replace
+	 each of them with a strength-reduced data reference.  */
+      if (c->kind == CAND_REF)
+	replace_refs (c);
+
       /* If the common stride of all related candidates is a
 	 known constant, and none of these has a phi-dependence,
 	 then all replacements are considered profitable.
 	 Each replaces a multiply by a single add, with the
 	 possibility that a feeding add also goes dead as a
 	 result.  */
-      if (unconditional_cands_with_known_stride_p (c))
+      else if (unconditional_cands_with_known_stride_p (c))
 	replace_dependents (first_dep);
 
       /* TODO:  When the stride is an SSA name, it may still be
@@ -1428,9 +1683,6 @@  analyze_candidates_and_replace (void)
 	 can be reused, or are less expensive to calculate than
 	 the replaced statements.  */
 
-      /* TODO:  Strength-reduce data references with implicit
-	 multiplication in their addressing expressions.  */
-
       /* TODO:  When conditional increments occur so that a 
 	 candidate is dependent upon a phi-basis, the cost of
 	 introducing a temporary must be accounted for.  */
@@ -1455,8 +1707,8 @@  execute_strength_reduction (void)
   gcc_obstack_init (&chain_obstack);
 
   /* Allocate the mapping from base names to candidate chains.  */
-  base_cand_map = XNEWVEC (cand_chain_t, num_ssa_names);
-  memset (base_cand_map, 0, num_ssa_names * sizeof (cand_chain_t));
+  base_cand_map = htab_create (500, base_cand_hash,
+			       base_cand_eq, base_cand_free);
 
   /* Initialize the loop optimizer.  We need to detect flow across
      back edges, and this gives us dominator information as well.  */
@@ -1490,7 +1742,7 @@  execute_strength_reduction (void)
   /* Free resources.  */
   fini_walk_dominator_tree (&walk_data);
   loop_optimizer_finalize ();
-  free (base_cand_map);
+  htab_delete (base_cand_map);
   obstack_free (&chain_obstack, NULL);
   pointer_map_destroy (stmt_cand_map);
   VEC_free (slsr_cand_t, heap, cand_vec);