diff mbox

Fix PR46556 (poor address generation)

Message ID 4E8D5D2A.1000208@gnu.org
State New
Headers show

Commit Message

Paolo Bonzini Oct. 6, 2011, 7:47 a.m. UTC
On 10/05/2011 10:16 PM, William J. Schmidt wrote:
> OK, I see.  If there's a better place downstream to make a swizzle, I'm
> certainly fine with that.
>
> I disabled locally_poor_mem_replacement and added some dump information
> in should_replace_address to show the costs for the replacement I'm
> trying to avoid:
>
> In should_replace_address:
>    old_rtx = (reg/f:DI 125 [ D.2036 ])
>    new_rtx = (plus:DI (reg/v/f:DI 126 [ p ])
>          (reg:DI 128))
>    address_cost (old_rtx) = 0
>    address_cost (new_rtx) = 0
>    set_src_cost (old_rtx) = 0
>    set_src_cost (new_rtx) = 4
>
> In insn 11, replacing
>   (mem/s:SI (reg/f:DI 125 [ D.2036 ]) [2 p_1(D)->a S4 A32])
>   with (mem/s:SI (plus:DI (reg/v/f:DI 126 [ p ])
>              (reg:DI 128)) [2 p_1(D)->a S4 A32])
> Changed insn 11
> deferring rescan insn with uid = 11.
> deferring rescan insn with uid = 11.

And IIUC the other address is based on pseudo 125 as well, but the 
combination is (plus (plus (reg 126) (reg 128)) (const_int X)) and 
cannot be represented on ppc.  I think _this_ is the problem, so I'm 
afraid your patch could cause pessimizations on x86 for example.  On 
x86, which has a cheap REG+REG+CONST addressing mode, it is much better 
to propagate pseudo 125 so that you can delete the set altogether.

However, indeed there is no downstream pass that undoes the 
transformation.  Perhaps we can do it in CSE, since this _is_ CSE after 
all. :)  The attached untested (uncompiled) patch is an attempt.

Paolo

Comments

Bill Schmidt Oct. 6, 2011, 1:07 p.m. UTC | #1
On Thu, 2011-10-06 at 09:47 +0200, Paolo Bonzini wrote:
> And IIUC the other address is based on pseudo 125 as well, but the 
> combination is (plus (plus (reg 126) (reg 128)) (const_int X)) and 
> cannot be represented on ppc.  I think _this_ is the problem, so I'm 
> afraid your patch could cause pessimizations on x86 for example.  On 
> x86, which has a cheap REG+REG+CONST addressing mode, it is much better 
> to propagate pseudo 125 so that you can delete the set altogether.
> 
> However, indeed there is no downstream pass that undoes the 
> transformation.  Perhaps we can do it in CSE, since this _is_ CSE after 
> all. :)  The attached untested (uncompiled) patch is an attempt.
> 
> Paolo

Thanks, Paolo!  This makes good sense.  I will play with your (second :)
patch and let you know how it goes.

Bill
diff mbox

Patch

Index: cse.c
===================================================================
--- cse.c	(revision 177688)
+++ cse.c	(working copy)
@@ -3136,6 +3136,75 @@  find_comparison_args (enum rtx_code code
   return code;
 }
 
+static rtx
+lookup_addr (rtx insn, rtx *loc, enum machine_mode mode)
+{
+  struct table_elt *elt, *p;
+  int regno;
+  int hash;
+  int base_cost;
+  rtx addr = *loc;
+  rtx exp;
+
+  /* Try to reuse existing registers for addresses, in hope of shortening
+     live ranges for the registers that compose the addresses.  This happens
+     when you have
+
+         (set (reg C) (plus (reg A) (reg B))
+         (set (reg D) (mem (reg C)))
+         (set (reg E) (mem (plus (reg C) (const_int X))))
+
+     In this case fwprop will try to propagate into the addresses, but
+     if propagation into reg E fails, the only result will have been to
+     uselessly lengthen the live range of A and B.  */
+
+  if (!REG_P (addr))
+    return;
+
+  regno = REGNO (addr);
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == HARD_FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return;
+
+   /* If this address is not in the hash table, we can't look for equivalences
+      of the whole address.  Also, ignore if volatile.  */
+
+  {
+    int save_do_not_record = do_not_record;
+    int save_hash_arg_in_memory = hash_arg_in_memory;
+    int addr_volatile;
+
+    do_not_record = 0;
+    hash = HASH (addr, Pmode);
+    addr_volatile = do_not_record;
+    do_not_record = save_do_not_record;
+    hash_arg_in_memory = save_hash_arg_in_memory;
+
+    if (addr_volatile)
+      return;
+  }
+
+  /* Try to find a REG that holds the same address.  */
+
+  elt = lookup (addr, hash, Pmode);
+  if (!elt)
+    return;
+
+  base_cost = address_cost (*loc, mode);
+  for (p = elt->first_same_value; p; p = p->next_same_value)
+    {
+      exp = p->exp;
+      if (REG_P (exp)
+          && exp_equiv_p (exp, exp, 1, false)
+          && address_cost (exp, mode) > base_cost)
+        break;
+    }
+
+  if (p)
+    validate_change (insn, loc, canon_reg (copy_rtx (exp), NULL_RTX), 0));
+}
+
 /* If X is a nontrivial arithmetic operation on an argument for which
    a constant value can be determined, return the result of operating
    on that value, as a constant.  Otherwise, return X, possibly with
@@ -3180,6 +3249,12 @@  fold_rtx (rtx x, rtx insn)
   switch (code)
     {
     case MEM:
+      if ((new_rtx = equiv_constant (x)) != NULL_RTX)
+        return new_rtx;
+      if (insn)
+        lookup_addr (insn, &XEXP (x, 0), GET_MODE (x));
+      return x;
+
     case SUBREG:
       if ((new_rtx = equiv_constant (x)) != NULL_RTX)
         return new_rtx;
Index: passes.c
===================================================================
--- passes.c	(revision 177688)
+++ passes.c	(working copy)
@@ -1448,9 +1448,9 @@  init_optimization_passes (void)
 	}
       NEXT_PASS (pass_web);
       NEXT_PASS (pass_rtl_cprop);
+      NEXT_PASS (pass_rtl_fwprop_addr);
       NEXT_PASS (pass_cse2);
       NEXT_PASS (pass_rtl_dse1);
-      NEXT_PASS (pass_rtl_fwprop_addr);
       NEXT_PASS (pass_inc_dec);
       NEXT_PASS (pass_initialize_regs);
       NEXT_PASS (pass_ud_rtl_dce);