diff mbox

[GCC8,22/33] Generate TMR in new reassociation order

Message ID CAHFci29WO5e9QTDN6CP=CAQcPDQ4c4R6afUMtdd4cC-S21B9-g@mail.gmail.com
State New
Headers show

Commit Message

Bin.Cheng May 2, 2017, 3 p.m. UTC
On Tue, May 2, 2017 at 3:09 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Wed, Apr 26, 2017 at 12:20 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>> This is another one where context diff might help.  No code change
>> from previous version.
>
> This isn't a context diff.
Thanks for reviewing.  I used git diff -U20 to generate patch.  Maybe
20 is too small?

>
> Anyways, changes like using 'tmp' really interfere with creating a
> useful diff so it's hard
> to review no-op changes from the real meat.  I spot re-ordering and
> doing parts.offset
> in a different way first.
>
> I wonder if we can do better by first re-factoring fields of mem_address to how
> TARGET_MEM_REF is laid out now -- merge symbol and base and introduce
> index2 so that create_mem_ref_raw becomes a 1:1 mapping.
Probably.  Note the mapping shall be done in addr_to_parts?  Changes
in create_mem_ref tries to simplify address expression not supported
by current target into supported forms.

>
> Anyway, the patch looks fine (after much staring) but it could really need some
> more commenting on what we try to do in what order and why.
Simple comments added as in updated patch.  Will commit this updated version.

Thanks,
bin
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>>
>> On Tue, Apr 18, 2017 at 11:49 AM, Bin Cheng <Bin.Cheng@arm.com> wrote:
>>> Hi,
>>> This patch generates TMR for ivopts in new re-association order.  General idea is,
>>> by querying target's addressing mode, we put as much address computation as possible
>>> in memory reference.  For computation that has to be done outside of memory reference,
>>> we re-associate the address expression in new order so that loop invariant expression
>>> is kept and exposed for later lim pass.
>>> Is it OK?
>>>
>>> Thanks,
>>> bin
>>> 2017-04-11  Bin Cheng  <bin.cheng@arm.com>
>>>
>>>         * tree-ssa-address.c: Include header file.
>>>         (move_hint_to_base): Return TRUE if BASE_HINT is moved to memory
>>>         address.
>>>         (add_to_parts): Refactor.
>>>         (addr_to_parts): New parameter.  Update use of move_hint_to_base.
>>>         (create_mem_ref): Update use of addr_to_parts.  Re-associate addr
>>>         in new order.

Comments

Marc Glisse May 2, 2017, 3:45 p.m. UTC | #1
On Tue, 2 May 2017, Bin.Cheng wrote:

> On Tue, May 2, 2017 at 3:09 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Wed, Apr 26, 2017 at 12:20 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>>> This is another one where context diff might help.  No code change
>>> from previous version.
>>
>> This isn't a context diff.
> Thanks for reviewing.  I used git diff -U20 to generate patch.  Maybe
> 20 is too small?

See option -c (instead of -u) in man diff.
diff mbox

Patch

diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 8aefed6..8257fde 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -29,40 +29,41 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree.h"
 #include "gimple.h"
 #include "memmodel.h"
 #include "stringpool.h"
 #include "tree-vrp.h"
 #include "tree-ssanames.h"
 #include "expmed.h"
 #include "insn-config.h"
 #include "emit-rtl.h"
 #include "recog.h"
 #include "tree-pretty-print.h"
 #include "fold-const.h"
 #include "stor-layout.h"
 #include "gimple-iterator.h"
 #include "gimplify-me.h"
 #include "tree-ssa-loop-ivopts.h"
 #include "expr.h"
 #include "tree-dfa.h"
 #include "dumpfile.h"
 #include "tree-affine.h"
+#include "gimplify.h"
 
 /* FIXME: We compute address costs using RTL.  */
 #include "tree-ssa-address.h"
 
 /* TODO -- handling of symbols (according to Richard Hendersons
    comments, http://gcc.gnu.org/ml/gcc-patches/2005-04/msg00949.html):
 
    There are at least 5 different kinds of symbols that we can run up against:
 
      (1) binds_local_p, small data area.
      (2) binds_local_p, eg local statics
      (3) !binds_local_p, eg global variables
      (4) thread local, local_exec
      (5) thread local, !local_exec
 
    Now, (1) won't appear often in an array context, but it certainly can.
    All you have to do is set -GN high enough, or explicitly mark any
    random object __attribute__((section (".sdata"))).
 
    All of these affect whether or not a symbol is in fact a valid address.
@@ -410,71 +411,73 @@  move_fixed_address_to_symbol (struct mem_address *parts, aff_tree *addr)
   tree val = NULL_TREE;
 
   for (i = 0; i < addr->n; i++)
     {
       if (addr->elts[i].coef != 1)
 	continue;
 
       val = addr->elts[i].val;
       if (TREE_CODE (val) == ADDR_EXPR
 	  && fixed_address_object_p (TREE_OPERAND (val, 0)))
 	break;
     }
 
   if (i == addr->n)
     return;
 
   parts->symbol = val;
   aff_combination_remove_elt (addr, i);
 }
 
-/* If ADDR contains an instance of BASE_HINT, move it to PARTS->base.  */
+/* Return true if ADDR contains an instance of BASE_HINT and it's moved to
+   PARTS->base.  */
 
-static void
+static bool
 move_hint_to_base (tree type, struct mem_address *parts, tree base_hint,
 		   aff_tree *addr)
 {
   unsigned i;
   tree val = NULL_TREE;
   int qual;
 
   for (i = 0; i < addr->n; i++)
     {
       if (addr->elts[i].coef != 1)
 	continue;
 
       val = addr->elts[i].val;
       if (operand_equal_p (val, base_hint, 0))
 	break;
     }
 
   if (i == addr->n)
-    return;
+    return false;
 
   /* Cast value to appropriate pointer type.  We cannot use a pointer
      to TYPE directly, as the back-end will assume registers of pointer
      type are aligned, and just the base itself may not actually be.
      We use void pointer to the type's address space instead.  */
   qual = ENCODE_QUAL_ADDR_SPACE (TYPE_ADDR_SPACE (type));
   type = build_qualified_type (void_type_node, qual);
   parts->base = fold_convert (build_pointer_type (type), val);
   aff_combination_remove_elt (addr, i);
+  return true;
 }
 
 /* If ADDR contains an address of a dereferenced pointer, move it to
    PARTS->base.  */
 
 static void
 move_pointer_to_base (struct mem_address *parts, aff_tree *addr)
 {
   unsigned i;
   tree val = NULL_TREE;
 
   for (i = 0; i < addr->n; i++)
     {
       if (addr->elts[i].coef != 1)
 	continue;
 
       val = addr->elts[i].val;
       if (POINTER_TYPE_P (TREE_TYPE (val)))
 	break;
     }
@@ -518,42 +521,41 @@  add_to_parts (struct mem_address *parts, tree elt)
 {
   tree type;
 
   if (!parts->index)
     {
       parts->index = fold_convert (sizetype, elt);
       return;
     }
 
   if (!parts->base)
     {
       parts->base = elt;
       return;
     }
 
   /* Add ELT to base.  */
   type = TREE_TYPE (parts->base);
   if (POINTER_TYPE_P (type))
     parts->base = fold_build_pointer_plus (parts->base, elt);
   else
-    parts->base = fold_build2 (PLUS_EXPR, type,
-			       parts->base, elt);
+    parts->base = fold_build2 (PLUS_EXPR, type, parts->base, elt);
 }
 
 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
    validity for a memory reference accessing memory of mode MODE in address
    space AS.  */
 
 static bool
 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
 				 addr_space_t as)
 {
 #define MAX_RATIO 128
   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
   static vec<sbitmap> valid_mult_list;
   sbitmap valid_mult;
 
   if (data_index >= valid_mult_list.length ())
     valid_mult_list.safe_grow_cleared (data_index + 1);
 
   valid_mult = valid_mult_list[data_index];
   if (!valid_mult)
@@ -651,87 +653,84 @@  most_expensive_mult_to_index (tree type, struct mem_address *parts,
 	  continue;
 	}
 
       elt = fold_convert (sizetype, addr->elts[i].val);
       if (mult_elt)
 	mult_elt = fold_build2 (op_code, sizetype, mult_elt, elt);
       else if (op_code == PLUS_EXPR)
 	mult_elt = elt;
       else
 	mult_elt = fold_build1 (NEGATE_EXPR, sizetype, elt);
     }
   addr->n = j;
 
   parts->index = mult_elt;
   parts->step = wide_int_to_tree (sizetype, best_mult);
 }
 
 /* Splits address ADDR for a memory access of type TYPE into PARTS.
    If BASE_HINT is non-NULL, it specifies an SSA name to be used
    preferentially as base of the reference, and IV_CAND is the selected
-   iv candidate used in ADDR.
+   iv candidate used in ADDR.  Store true to VAR_IN_BASE if variant
+   part of address is split to PARTS.base.
 
    TODO -- be more clever about the distribution of the elements of ADDR
    to PARTS.  Some architectures do not support anything but single
    register in address, possibly with a small integer offset; while
    create_mem_ref will simplify the address to an acceptable shape
    later, it would be more efficient to know that asking for complicated
    addressing modes is useless.  */
 
 static void
-addr_to_parts (tree type, aff_tree *addr, tree iv_cand,
-	       tree base_hint, struct mem_address *parts,
-               bool speed)
+addr_to_parts (tree type, aff_tree *addr, tree iv_cand, tree base_hint,
+	       struct mem_address *parts, bool *var_in_base, bool speed)
 {
   tree part;
   unsigned i;
 
   parts->symbol = NULL_TREE;
   parts->base = NULL_TREE;
   parts->index = NULL_TREE;
   parts->step = NULL_TREE;
 
   if (addr->offset != 0)
     parts->offset = wide_int_to_tree (sizetype, addr->offset);
   else
     parts->offset = NULL_TREE;
 
   /* Try to find a symbol.  */
   move_fixed_address_to_symbol (parts, addr);
 
-  /* No need to do address parts reassociation if the number of parts
-     is <= 2 -- in that case, no loop invariant code motion can be
-     exposed.  */
-
-  if (!base_hint && (addr->n > 2))
+  /* Since at the moment there is no reliable way to know how to
+     distinguish between pointer and its offset, we decide if var
+     part is the pointer based on guess.  */
+  *var_in_base = (base_hint != NULL && parts->symbol == NULL);
+  if (*var_in_base)
+    *var_in_base = move_hint_to_base (type, parts, base_hint, addr);
+  else
     move_variant_to_index (parts, addr, iv_cand);
 
-  /* First move the most expensive feasible multiplication
-     to index.  */
+  /* First move the most expensive feasible multiplication to index.  */
   if (!parts->index)
     most_expensive_mult_to_index (type, parts, addr, speed);
 
-  /* Try to find a base of the reference.  Since at the moment
-     there is no reliable way how to distinguish between pointer and its
-     offset, this is just a guess.  */
-  if (!parts->symbol && base_hint)
-    move_hint_to_base (type, parts, base_hint, addr);
+  /* Move pointer into base.  */
   if (!parts->symbol && !parts->base)
     move_pointer_to_base (parts, addr);
 
   /* Then try to process the remaining elements.  */
   for (i = 0; i < addr->n; i++)
     {
       part = fold_convert (sizetype, addr->elts[i].val);
       if (addr->elts[i].coef != 1)
 	part = fold_build2 (MULT_EXPR, sizetype, part,
 			    wide_int_to_tree (sizetype, addr->elts[i].coef));
       add_to_parts (parts, part);
     }
   if (addr->rest)
     add_to_parts (parts, fold_convert (sizetype, addr->rest));
 }
 
 /* Force the PARTS to register.  */
 
 static void
 gimplify_mem_ref_parts (gimple_stmt_iterator *gsi, struct mem_address *parts)
@@ -739,129 +738,201 @@  gimplify_mem_ref_parts (gimple_stmt_iterator *gsi, struct mem_address *parts)
   if (parts->base)
     parts->base = force_gimple_operand_gsi_1 (gsi, parts->base,
 					    is_gimple_mem_ref_addr, NULL_TREE,
 					    true, GSI_SAME_STMT);
   if (parts->index)
     parts->index = force_gimple_operand_gsi (gsi, parts->index,
 					     true, NULL_TREE,
 					     true, GSI_SAME_STMT);
 }
 
 /* Creates and returns a TARGET_MEM_REF for address ADDR.  If necessary
    computations are emitted in front of GSI.  TYPE is the mode
    of created memory reference. IV_CAND is the selected iv candidate in ADDR,
    and BASE_HINT is non NULL if IV_CAND comes from a base address
    object.  */
 
 tree
 create_mem_ref (gimple_stmt_iterator *gsi, tree type, aff_tree *addr,
 		tree alias_ptr_type, tree iv_cand, tree base_hint, bool speed)
 {
+  bool var_in_base;
   tree mem_ref, tmp;
   struct mem_address parts;
 
-  addr_to_parts (type, addr, iv_cand, base_hint, &parts, speed);
+  addr_to_parts (type, addr, iv_cand, base_hint, &parts, &var_in_base, speed);
   gimplify_mem_ref_parts (gsi, &parts);
   mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
   if (mem_ref)
     return mem_ref;
 
   /* The expression is too complicated.  Try making it simpler.  */
 
+  /* Merge symbol into other parts.  */
+  if (parts.symbol)
+    {
+      tmp = parts.symbol;
+      parts.symbol = NULL_TREE;
+      gcc_assert (is_gimple_val (tmp));
+
+      if (parts.base)
+	{
+	  gcc_assert (useless_type_conversion_p (sizetype,
+						 TREE_TYPE (parts.base)));
+
+	  if (parts.index)
+	    {
+	      /* Add the symbol to base, eventually forcing it to register.  */
+	      tmp = fold_build_pointer_plus (tmp, parts.base);
+	      tmp = force_gimple_operand_gsi_1 (gsi, tmp,
+						is_gimple_mem_ref_addr,
+						NULL_TREE, true,
+						GSI_SAME_STMT);
+	    }
+	  else
+	    {
+	      /* Move base to index, then move the symbol to base.  */
+	      parts.index = parts.base;
+	    }
+	  parts.base = tmp;
+	}
+      else
+	parts.base = tmp;
+
+      mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
+      if (mem_ref)
+	return mem_ref;
+    }
+
+  /* Move multiplication to index by transforming address expression:
+       [... + index << step + ...]
+     into:
+       index' = index << step;
+       [... + index' + ,,,].  */
   if (parts.step && !integer_onep (parts.step))
     {
-      /* Move the multiplication to index.  */
       gcc_assert (parts.index);
       parts.index = force_gimple_operand_gsi (gsi,
 				fold_build2 (MULT_EXPR, sizetype,
 					     parts.index, parts.step),
 				true, NULL_TREE, true, GSI_SAME_STMT);
       parts.step = NULL_TREE;
 
       mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
       if (mem_ref)
 	return mem_ref;
     }
 
-  if (parts.symbol)
+  /* Add offset to invariant part by transforming address expression:
+       [base + index + offset]
+     into:
+       base' = base + offset;
+       [base' + index]
+     or:
+       index' = index + offset;
+       [base + index']
+     depending on which one is invariant.  */
+  if (parts.offset && !integer_zerop (parts.offset))
     {
-      tmp = parts.symbol;
-      gcc_assert (is_gimple_val (tmp));
+      tree old_base = unshare_expr (parts.base);
+      tree old_index = unshare_expr (parts.index);
+      tree old_offset = unshare_expr (parts.offset);
 
-      /* Add the symbol to base, eventually forcing it to register.  */
-      if (parts.base)
+      tmp = parts.offset;
+      parts.offset = NULL_TREE;
+      /* Add offset to invariant part.  */
+      if (!var_in_base)
 	{
-	  gcc_assert (useless_type_conversion_p
-				(sizetype, TREE_TYPE (parts.base)));
-
-	  if (parts.index)
+	  if (parts.base)
 	    {
-	      parts.base = force_gimple_operand_gsi_1 (gsi,
-			fold_build_pointer_plus (tmp, parts.base),
-			is_gimple_mem_ref_addr, NULL_TREE, true, GSI_SAME_STMT);
+	      tmp = fold_build_pointer_plus (parts.base, tmp);
+	      tmp = force_gimple_operand_gsi_1 (gsi, tmp,
+						is_gimple_mem_ref_addr,
+						NULL_TREE, true,
+						GSI_SAME_STMT);
 	    }
-	  else
+	  parts.base = tmp;
+	}
+      else
+	{
+	  if (parts.index)
 	    {
-	      parts.index = parts.base;
-	      parts.base = tmp;
+	      tmp = fold_build_pointer_plus (parts.index, tmp);
+	      tmp = force_gimple_operand_gsi_1 (gsi, tmp,
+						is_gimple_mem_ref_addr,
+						NULL_TREE, true,
+						GSI_SAME_STMT);
 	    }
+	  parts.index = tmp;
 	}
-      else
-	parts.base = tmp;
-      parts.symbol = NULL_TREE;
 
       mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
       if (mem_ref)
 	return mem_ref;
+
+      /* Restore parts.base, index and offset so that we can check if
+	 [base + offset] addressing mode is supported in next step.
+	 This is necessary for targets only support [base + offset],
+	 but not [base + index] addressing mode.  */
+      parts.base = old_base;
+      parts.index = old_index;
+      parts.offset = old_offset;
     }
 
+  /* Transform [base + index + ...] into:
+       base' = base + index;
+       [base' + ...].  */
   if (parts.index)
     {
+      tmp = parts.index;
+      parts.index = NULL_TREE;
       /* Add index to base.  */
       if (parts.base)
 	{
-	  parts.base = force_gimple_operand_gsi_1 (gsi,
-			fold_build_pointer_plus (parts.base, parts.index),
-			is_gimple_mem_ref_addr, NULL_TREE, true, GSI_SAME_STMT);
+	  tmp = fold_build_pointer_plus (parts.base, tmp);
+	  tmp = force_gimple_operand_gsi_1 (gsi, tmp,
+					    is_gimple_mem_ref_addr,
+					    NULL_TREE, true, GSI_SAME_STMT);
 	}
-      else
-	parts.base = parts.index;
-      parts.index = NULL_TREE;
+      parts.base = tmp;
 
       mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
       if (mem_ref)
 	return mem_ref;
     }
 
+  /* Transform [base + offset] into:
+       base' = base + offset;
+       [base'].  */
   if (parts.offset && !integer_zerop (parts.offset))
     {
-      /* Try adding offset to base.  */
+      tmp = parts.offset;
+      parts.offset = NULL_TREE;
+      /* Add offset to base.  */
       if (parts.base)
 	{
-	  parts.base = force_gimple_operand_gsi_1 (gsi,
-			fold_build_pointer_plus (parts.base, parts.offset),
-			is_gimple_mem_ref_addr, NULL_TREE, true, GSI_SAME_STMT);
+	  tmp = fold_build_pointer_plus (parts.base, tmp);
+	  tmp = force_gimple_operand_gsi_1 (gsi, tmp,
+					    is_gimple_mem_ref_addr,
+					    NULL_TREE, true, GSI_SAME_STMT);
 	}
-      else
-	parts.base = parts.offset;
-
-      parts.offset = NULL_TREE;
+      parts.base = tmp;
 
       mem_ref = create_mem_ref_raw (type, alias_ptr_type, &parts, true);
       if (mem_ref)
 	return mem_ref;
     }
 
   /* Verify that the address is in the simplest possible shape
      (only a register).  If we cannot create such a memory reference,
      something is really wrong.  */
   gcc_assert (parts.symbol == NULL_TREE);
   gcc_assert (parts.index == NULL_TREE);
   gcc_assert (!parts.step || integer_onep (parts.step));
   gcc_assert (!parts.offset || integer_zerop (parts.offset));
   gcc_unreachable ();
 }
 
 /* Copies components of the address from OP to ADDR.  */
 
 void
 get_address_description (tree op, struct mem_address *addr)