diff mbox

[RFA,middle-end/53623] Improve extension elimination

Message ID 52B4AC16.8040300@redhat.com
State New
Headers show

Commit Message

Jeff Law Dec. 20, 2013, 8:44 p.m. UTC
On 12/20/13 10:25, Jakub Jelinek wrote:
> Yes.  So my suggestion actually was not correct for that:
>    && !reg_overlap_mentioned_p (dest, XEXP (src, 0))
> because the first extension above has r1:SI and r2:DI which don't
> overlap, only r1:DI and r2:DI overlap.  So it probably should be checked
> in combine_reaching_defs instead where you have already both the registers
> in the right modes available and can call reg_overlap_mentioned_p on them
> directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
> (cand->insn), right?
Here's the updated version.

1. Minor test tweak per Uros's suggestion.
2. Fix formatting
3. Add testing for two destinations overlapping per above.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  Ok for 
the trunk?

jeff
* ree.c (combine_set_extension): Handle case where source
	and destination registers in an extension insn are different.
	(combine_reaching_defs): Allow source and destination
	registers in extension to be different under limited
	circumstances.
	(add_removable_extension): Remove restriction that the
	source and destination registers in the extension are the
	same.
	(find_and_remove_re): Emit a copy from the extension's
	destination to its source after the defining insn if
	the source and destination registers are different.


testsuite/

	* gcc.target/i386/pr53623.c: New test.

Comments

Jakub Jelinek Dec. 20, 2013, 9:10 p.m. UTC | #1
On Fri, Dec 20, 2013 at 01:44:06PM -0700, Jeff Law wrote:
> @@ -342,7 +354,8 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
>        if (dump_file)
>          {
>            fprintf (dump_file,
> -		   "Tentatively merged extension with definition:\n");
> +		   "Tentatively merged extension with definition %s:\n",
> +		   (copy_needed) ? "(copy needed)" : "");

Missed this, you don't need the parens around the first copy_needed.

Ok with that change, thanks.

	Jakub
Jeff Law Jan. 8, 2014, 6:04 a.m. UTC | #2
On 12/20/13 13:44, Jeff Law wrote:
> On 12/20/13 10:25, Jakub Jelinek wrote:
>> Yes.  So my suggestion actually was not correct for that:
>>    && !reg_overlap_mentioned_p (dest, XEXP (src, 0))
>> because the first extension above has r1:SI and r2:DI which don't
>> overlap, only r1:DI and r2:DI overlap.  So it probably should be checked
>> in combine_reaching_defs instead where you have already both the
>> registers
>> in the right modes available and can call reg_overlap_mentioned_p on them
>> directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
>> (cand->insn), right?
> Here's the updated version.
>
> 1. Minor test tweak per Uros's suggestion.
> 2. Fix formatting
> 3. Add testing for two destinations overlapping per above.
>
> Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  Ok for
> the trunk?
Committed after private email approval from Jakub.  I made one 
additional trivial change (missing whitespace in a comment).

jeff
Eric Botcazou Jan. 8, 2014, 8:14 a.m. UTC | #3
> Committed after private email approval from Jakub.  I made one
> additional trivial change (missing whitespace in a comment).

This breaks bootstrap with RTL checking enabled:

/home/eric/svn/gcc/libgcc/config/libbid/bid64_noncomp.c:119:1: internal 
compiler error: RTL check: expected code 'set' or 'clobber', have 'parallel' 
in combine_reaching_defs, at ree.c:711
 }
 ^
0x9c5fcf rtl_check_failed_code2(rtx_def const*, rtx_code, rtx_code, char 
const*, int, char const*)
        /home/eric/svn/gcc/gcc/rtl.c:783
0x14626da combine_reaching_defs
        /home/eric/svn/gcc/gcc/ree.c:711
0x1464ae9 find_and_remove_re
        /home/eric/svn/gcc/gcc/ree.c:957
0x1464ae9 rest_of_handle_ree
        /home/eric/svn/gcc/gcc/ree.c:1019
0x1464ae9 execute
        /home/eric/svn/gcc/gcc/ree.c:1058
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.
make[3]: *** [bid64_noncomp.o] Error 1
make[3]: *** Waiting for unfinished jobs....
Jeff Law Jan. 8, 2014, 1:54 p.m. UTC | #4
On 01/08/14 01:14, Eric Botcazou wrote:
>> Committed after private email approval from Jakub.  I made one
>> additional trivial change (missing whitespace in a comment).
>
> This breaks bootstrap with RTL checking enabled:
[ ... ]
Thanks.  I'm on it.
jeff
diff mbox

Patch

diff --git a/gcc/ree.c b/gcc/ree.c
index 9938e98..873b6d4 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -282,8 +282,20 @@  static bool
 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
-  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
   rtx new_set;
+  rtx cand_pat = PATTERN (cand->insn);
+
+  /* If the extension's source/destination registers are not the same
+     then we need to change the original load to reference the destination
+     of the extension.  Then we need to emit a copy from that destination
+     to the original destination of the load.  */
+  rtx new_reg;
+  bool copy_needed
+    = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
+  if (copy_needed)
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
+  else
+    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
 
   /* Merge constants by directly moving the constant into the register under
      some conditions.  Recall that RTL constants are sign-extended.  */
@@ -342,7 +354,8 @@  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
       if (dump_file)
         {
           fprintf (dump_file,
-		   "Tentatively merged extension with definition:\n");
+		   "Tentatively merged extension with definition %s:\n",
+		   (copy_needed) ? "(copy needed)" : "");
           print_rtl_single (dump_file, curr_insn);
         }
       return true;
@@ -662,6 +675,53 @@  combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
   if (!outcome)
     return false;
 
+  /* If the destination operand of the extension is a different
+     register than the source operand, then additional restrictions
+     are needed.  */
+  if ((REGNO (SET_DEST (PATTERN (cand->insn)))
+       != REGNO (XEXP (SET_SRC (PATTERN (cand->insn)), 0))))
+    {
+      /* In theory we could handle more than one reaching def, it
+	 just makes the code to update the insn stream more complex.  */
+      if (state->defs_list.length () != 1)
+	return false;
+
+      /* We require the candidate not already be modified.  This may
+	 be overly conservative.  */
+      if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* There's only one reaching def.  */
+      rtx def_insn = state->defs_list[0];
+
+      /* The defining statementmust not have been modified either.  */
+      if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
+	return false;
+
+      /* The defining statement and candidate insn must be in the same block.
+	 This is merely to keep the test for safety and updating the insn
+	 stream simple.  */
+      if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn))
+	return false;
+
+      /* If there is an overlap between the destination of DEF_INSN and
+	 CAND->insn, then this transformation is not safe.  Note we have
+	 to test in the widened mode.  */
+      rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
+				 REGNO (SET_DEST (PATTERN (def_insn))));
+      if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
+	return false;
+
+      /* The destination register of the extension insn must not be
+	 used or set between the def_insn and cand->insn exclusive.  */
+      if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
+			      def_insn, cand->insn)
+	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
+				def_insn, cand->insn))
+	return false;
+    }
+
+
   /* If cand->insn has been already modified, update cand->mode to a wider
      mode if possible, or punt.  */
   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
@@ -778,8 +838,7 @@  add_removable_extension (const_rtx expr, rtx insn,
 
   if (REG_P (dest)
       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
-      && REG_P (XEXP (src, 0))
-      && REGNO (dest) == REGNO (XEXP (src, 0)))
+      && REG_P (XEXP (src, 0)))
     {
       struct df_link *defs, *def;
       ext_cand *cand;
@@ -863,6 +922,7 @@  find_and_remove_re (void)
   int num_re_opportunities = 0, num_realized = 0, i;
   vec<ext_cand> reinsn_list;
   auto_vec<rtx> reinsn_del_list;
+  auto_vec<rtx> reinsn_copy_list;
   ext_state state;
 
   /* Construct DU chain to get all reaching definitions of each
@@ -899,11 +959,41 @@  find_and_remove_re (void)
           if (dump_file)
             fprintf (dump_file, "Eliminated the extension.\n");
           num_realized++;
-          reinsn_del_list.safe_push (curr_cand->insn);
+	  if (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
+	      != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0)))
+	    {
+              reinsn_copy_list.safe_push (curr_cand->insn);
+              reinsn_copy_list.safe_push (state.defs_list[0]);
+	    }
+	  reinsn_del_list.safe_push (curr_cand->insn);
 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
         }
     }
 
+  /* The copy list contains pairs of insns which describe copies we
+     need to insert into the INSN stream.
+
+     The first insn in each pair is the extension insn, from which
+     we derive the source and destination of the copy.
+
+     The second insn in each pair is the memory reference where the
+     extension will ultimately happen.  We emit the new copy
+     immediately after this insn.
+
+     It may first appear that the arguments for the copy are reversed.
+     Remember that the memory reference will be changed to refer to the
+     destination of the extention.  So we're actually emitting a copy
+     from the new destination to the old destination.  */
+  for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
+    {
+      rtx curr_insn = reinsn_copy_list[i];
+      rtx pat = PATTERN (curr_insn);
+      rtx new_reg = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
+				 REGNO (XEXP (SET_SRC (pat), 0)));
+      rtx set = gen_rtx_SET (VOIDmode, new_reg, SET_DEST (pat));
+      emit_insn_after (set, reinsn_copy_list[i + 1]);
+    }
+
   /* Delete all useless extensions here in one sweep.  */
   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
     delete_insn (curr_insn);
diff --git a/gcc/testsuite/gcc.target/i386/pr53623.c b/gcc/testsuite/gcc.target/i386/pr53623.c
new file mode 100644
index 0000000..35c578b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53623.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile { target {! ia32 } } } */
+/* { dg-options "-O2 -fdump-rtl-ree" } */
+
+
+#include <stdint.h>
+
+typedef (*inst_t)(int64_t rdi, int64_t rsi, int64_t rdx);
+
+int16_t code[256];
+inst_t dispatch[256];
+
+void an_inst(int64_t rdi, int64_t rsi, int64_t rdx) {
+  rdx = code[rdx];
+  uint8_t inst = (uint8_t) rdx;
+  rdx >>= 8;
+  dispatch[inst](rdi, rsi, rdx);
+}
+
+int main(void) {
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump "copy needed" "ree" } } */
+/* { dg-final { cleanup-rtl-dump "ree" } } */
+