diff mbox

Don't combine param and return value copies

Message ID 20150523083305.GB4350@bubble.grove.modra.org
State New
Headers show

Commit Message

Alan Modra May 23, 2015, 8:33 a.m. UTC
This stops combine messing with parameter and return value copies
from/to hard registers.  Bootstrapped and regression tested
powerpc64le-linux, powerpc64-linux and x86_64-linux.  In looking at a
number of different powerpc64le gcc/*.o files, I noticed a few code
generation improvements.  There were cases where a register copy was
no longer needed, cmp used in place of mr., and rlwinm instead of
rldicl.  x86_64 gcc/*.o showed no changes (apart from combine.o of
course), but should see a small improvement in compile time with this
change.

The "clear next_use when !can_combine_p" change is to fix a non-bug.
Given

1) insn defining rn
   ...
2) insn defining rn but !can_combine_p
   ...
3) insn using rn

then create_log_links might create a link from (3) to (1), I thought.
However, can_combine_p doesn't currently allow this to happen.
Obviously, any can_combine_p result depending on regno shouldn't give
a different result at (1) and (2), but there is also at test of
DF_REF_PRE_POST_MODIFY that can.  The saving grace is that pre/post
modify insns also use the register, which means next_use[rn] will
point at (2), not (3), when (1) is processed.

I came across this because at one stage I considered modifying
can_combine_p.  Someone who does so in the future might trigger the
above problem, so I thought it worth posting the change.

OK for mainline?

	* combine.c (set_return_regs): New function.
	(twiddle_first_block, twiddle_last_block): New functions.
	(create_log_links): Exclude instructions copying parameter
	values from hard regs to pseudos, and instructions copying
	return value pseudos to hard regs.  Clear next_use when
	!can_combine_p.

Comments

Andrew Pinski May 23, 2015, 9:17 a.m. UTC | #1
On Sat, May 23, 2015 at 1:33 AM, Alan Modra <amodra@gmail.com> wrote:
> This stops combine messing with parameter and return value copies
> from/to hard registers.  Bootstrapped and regression tested
> powerpc64le-linux, powerpc64-linux and x86_64-linux.  In looking at a
> number of different powerpc64le gcc/*.o files, I noticed a few code
> generation improvements.  There were cases where a register copy was
> no longer needed, cmp used in place of mr., and rlwinm instead of
> rldicl.  x86_64 gcc/*.o showed no changes (apart from combine.o of
> course), but should see a small improvement in compile time with this
> change.

I noticed this a while back but I never got around to fixing this.
This should also improve other RISC targets like AARCH64 and MIPS
where I had saw issues like this too.

Thanks,
Andrew

>
> The "clear next_use when !can_combine_p" change is to fix a non-bug.
> Given
>
> 1) insn defining rn
>    ...
> 2) insn defining rn but !can_combine_p
>    ...
> 3) insn using rn
>
> then create_log_links might create a link from (3) to (1), I thought.
> However, can_combine_p doesn't currently allow this to happen.
> Obviously, any can_combine_p result depending on regno shouldn't give
> a different result at (1) and (2), but there is also at test of
> DF_REF_PRE_POST_MODIFY that can.  The saving grace is that pre/post
> modify insns also use the register, which means next_use[rn] will
> point at (2), not (3), when (1) is processed.
>
> I came across this because at one stage I considered modifying
> can_combine_p.  Someone who does so in the future might trigger the
> above problem, so I thought it worth posting the change.
>
> OK for mainline?
>
>         * combine.c (set_return_regs): New function.
>         (twiddle_first_block, twiddle_last_block): New functions.
>         (create_log_links): Exclude instructions copying parameter
>         values from hard regs to pseudos, and instructions copying
>         return value pseudos to hard regs.  Clear next_use when
>         !can_combine_p.
>
> Index: gcc/combine.c
> ===================================================================
> --- gcc/combine.c       (revision 223463)
> +++ gcc/combine.c       (working copy)
> @@ -1048,9 +1048,79 @@ can_combine_use_p (df_ref use)
>    return true;
>  }
>
> -/* Fill in log links field for all insns.  */
> +/* Used to build set of return value regs.  Add X to the set.  */
>
>  static void
> +set_return_regs (rtx x, void *arg)
> +{
> +  HARD_REG_SET *regs = (HARD_REG_SET *) arg;
> +
> +  add_to_hard_reg_set (regs, GET_MODE (x), REGNO (x));
> +}
> +
> +/* Twiddle BLOCK_FOR_INSN to TO for instructions in the first block BB
> +   that we don't want to combine with other instructions.  */
> +
> +static void
> +twiddle_first_block (basic_block bb, basic_block to)
> +{
> +  rtx_insn *insn;
> +
> +  FOR_BB_INSNS (bb, insn)
> +    {
> +      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG)
> +       break;
> +      if (!NONDEBUG_INSN_P (insn))
> +       continue;
> +
> +      /* reg,reg copies from parameter hard regs.  */
> +      rtx set = single_set (insn);
> +      if (set
> +         && REG_P (SET_DEST (set))
> +         && REG_P (SET_SRC (set))
> +         && HARD_REGISTER_P (SET_SRC (set)))
> +       set_block_for_insn (insn, to);
> +    }
> +}
> +
> +/* Twiddle BLOCK_FOR_INSN to TO for instructions in the last block BB
> +   that we don't want to combine with other instructions.  */
> +
> +static void
> +twiddle_last_block (basic_block bb, basic_block to, HARD_REG_SET return_regs)
> +{
> +  rtx_insn *insn;
> +
> +  FOR_BB_INSNS_REVERSE (bb, insn)
> +    {
> +      if (CALL_P (insn))
> +       break;
> +      if (!NONDEBUG_INSN_P (insn))
> +       continue;
> +
> +      rtx reg = NULL_RTX;
> +      /* use_return_regster added USEs.  */
> +      if (GET_CODE (PATTERN (insn)) == USE)
> +       reg = XEXP (PATTERN (insn), 0);
> +      else
> +       {
> +         /* reg,reg copies that set return value hard regs.  */
> +         rtx set = single_set (insn);
> +         if (set && REG_P (SET_SRC (set)))
> +           reg = SET_DEST (set);
> +       }
> +      if (reg
> +         && REG_P (reg)
> +         && HARD_REGISTER_P (reg)
> +         && overlaps_hard_reg_set_p (return_regs,
> +                                     GET_MODE (reg), REGNO (reg)))
> +       set_block_for_insn (insn, to);
> +    }
> +}
> +
> +/* Fill in log links field for all insns that we wish to combine.  */
> +
> +static void
>  create_log_links (void)
>  {
>    basic_block bb;
> @@ -1057,9 +1127,28 @@ create_log_links (void)
>    rtx_insn **next_use;
>    rtx_insn *insn;
>    df_ref def, use;
> +  HARD_REG_SET return_regs;
>
>    next_use = XCNEWVEC (rtx_insn *, max_reg_num ());
>
> +  /* Don't combine instructions copying parameter values from hard
> +     regs to pseudos.  Exclude such instructions from LOG_LINKS by
> +     temporarily zapping BLOCK_FOR_INSN.  */
> +
> +  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> +  twiddle_first_block (bb, 0);
> +
> +  /* Similarly, don't combine instructions copying return values
> +     from pseudos to hard regs.  */
> +
> +  CLEAR_HARD_REG_SET (return_regs);
> +  diddle_return_value (set_return_regs, &return_regs);
> +  if (!hard_reg_set_empty_p (return_regs))
> +    {
> +      bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
> +      twiddle_last_block (bb, 0, return_regs);
> +    }
> +
>    /* Pass through each block from the end, recording the uses of each
>       register and establishing log links when def is encountered.
>       Note that we do not clear next_use array in order to save time,
> @@ -1087,12 +1176,12 @@ create_log_links (void)
>                if (!next_use[regno])
>                  continue;
>
> +             use_insn = next_use[regno];
> +             next_use[regno] = NULL;
> +
>               if (!can_combine_def_p (def))
>                 continue;
>
> -             use_insn = next_use[regno];
> -             next_use[regno] = NULL;
> -
>               if (BLOCK_FOR_INSN (use_insn) != bb)
>                 continue;
>
> @@ -1103,7 +1192,7 @@ create_log_links (void)
>                  we might wind up changing the semantics of the insn,
>                  even if reload can make what appear to be valid
>                  assignments later.  */
> -             if (regno < FIRST_PSEUDO_REGISTER
> +             if (HARD_REGISTER_NUM_P (regno)
>                   && asm_noperands (PATTERN (use_insn)) >= 0)
>                 continue;
>
> @@ -1124,6 +1213,16 @@ create_log_links (void)
>          }
>      }
>
> +  /* Repair BLOCK_FOR_INSN.  */
> +
> +  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> +  twiddle_first_block (bb, bb);
> +  if (!hard_reg_set_empty_p (return_regs))
> +    {
> +      bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
> +      twiddle_last_block (bb, bb, return_regs);
> +    }
> +
>    free (next_use);
>  }
>
>
> --
> Alan Modra
> Australia Development Lab, IBM
Segher Boessenkool May 23, 2015, 1:45 p.m. UTC | #2
Hi Alan,

On Sat, May 23, 2015 at 06:03:05PM +0930, Alan Modra wrote:
> This stops combine messing with parameter and return value copies
> from/to hard registers.  Bootstrapped and regression tested
> powerpc64le-linux, powerpc64-linux and x86_64-linux.  In looking at a
> number of different powerpc64le gcc/*.o files, I noticed a few code
> generation improvements.  There were cases where a register copy was
> no longer needed, cmp used in place of mr., and rlwinm instead of
> rldicl.  x86_64 gcc/*.o showed no changes (apart from combine.o of
> course), but should see a small improvement in compile time with this
> change.

Thanks.  Did you see improvements around return as well, or mostly /
only related to the function start?

> The "clear next_use when !can_combine_p" change is to fix a non-bug.
> Given
> 
> 1) insn defining rn
>    ...
> 2) insn defining rn but !can_combine_p
>    ...
> 3) insn using rn
> 
> then create_log_links might create a link from (3) to (1), I thought.
> However, can_combine_p doesn't currently allow this to happen.
> Obviously, any can_combine_p result depending on regno shouldn't give
> a different result at (1) and (2), but there is also at test of
> DF_REF_PRE_POST_MODIFY that can.  The saving grace is that pre/post
> modify insns also use the register, which means next_use[rn] will
> point at (2), not (3), when (1) is processed.
> 
> I came across this because at one stage I considered modifying
> can_combine_p.  Someone who does so in the future might trigger the
> above problem, so I thought it worth posting the change.

I agree it looks like a bug waiting to happen.  Please post it as a
separate patch though.

> +/* Twiddle BLOCK_FOR_INSN to TO for instructions in the first block BB
> +   that we don't want to combine with other instructions.  */
> +
> +static void
> +twiddle_first_block (basic_block bb, basic_block to)

I don't like this much.  Messing with global state makes things harder
to change later.  If it is hard to come up with a good name for a
factor, it usually means it is not such a good factor.

You can do these inside can_combine_{def,use}_p as far as I can see?
Probably need to give those an extra parameter then: for _def, a bool
that says "don't allow moves from hard regs", set when the scan has
encountered a NOTE_INSN_FUNCTION_BEG in this bb; and for _use, a regset
of those hard regs we don't want to allow moves to (those seen in USEs
later in that same block).  Or do it in the main loop itself, _{def,use}
are each called in one place only; whatever works best.

What makes return values different from CALL args here, btw?  Why do
you not want to combine return values, but you leave alone call args?

> +/* Fill in log links field for all insns that we wish to combine.  */

Please don't change this comment; it was more correct before.

> @@ -1103,7 +1192,7 @@ create_log_links (void)
>  		 we might wind up changing the semantics of the insn,
>  		 even if reload can make what appear to be valid
>  		 assignments later.  */
> -	      if (regno < FIRST_PSEUDO_REGISTER
> +	      if (HARD_REGISTER_NUM_P (regno)
>  		  && asm_noperands (PATTERN (use_insn)) >= 0)
>  		continue;

An independent change.


Segher
diff mbox

Patch

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	(revision 223463)
+++ gcc/combine.c	(working copy)
@@ -1048,9 +1048,79 @@  can_combine_use_p (df_ref use)
   return true;
 }
 
-/* Fill in log links field for all insns.  */
+/* Used to build set of return value regs.  Add X to the set.  */
 
 static void
+set_return_regs (rtx x, void *arg)
+{
+  HARD_REG_SET *regs = (HARD_REG_SET *) arg;
+
+  add_to_hard_reg_set (regs, GET_MODE (x), REGNO (x));
+}
+
+/* Twiddle BLOCK_FOR_INSN to TO for instructions in the first block BB
+   that we don't want to combine with other instructions.  */
+
+static void
+twiddle_first_block (basic_block bb, basic_block to)
+{
+  rtx_insn *insn;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG)
+	break;
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      /* reg,reg copies from parameter hard regs.  */
+      rtx set = single_set (insn);
+      if (set
+	  && REG_P (SET_DEST (set))
+	  && REG_P (SET_SRC (set))
+	  && HARD_REGISTER_P (SET_SRC (set)))
+	set_block_for_insn (insn, to);
+    }
+}
+
+/* Twiddle BLOCK_FOR_INSN to TO for instructions in the last block BB
+   that we don't want to combine with other instructions.  */
+
+static void
+twiddle_last_block (basic_block bb, basic_block to, HARD_REG_SET return_regs)
+{
+  rtx_insn *insn;
+
+  FOR_BB_INSNS_REVERSE (bb, insn)
+    {
+      if (CALL_P (insn))
+	break;
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      rtx reg = NULL_RTX;
+      /* use_return_regster added USEs.  */
+      if (GET_CODE (PATTERN (insn)) == USE)
+	reg = XEXP (PATTERN (insn), 0);
+      else
+	{
+	  /* reg,reg copies that set return value hard regs.  */
+	  rtx set = single_set (insn);
+	  if (set && REG_P (SET_SRC (set)))
+	    reg = SET_DEST (set);
+	}
+      if (reg
+	  && REG_P (reg)
+	  && HARD_REGISTER_P (reg)
+	  && overlaps_hard_reg_set_p (return_regs,
+				      GET_MODE (reg), REGNO (reg)))
+	set_block_for_insn (insn, to);
+    }
+}
+
+/* Fill in log links field for all insns that we wish to combine.  */
+
+static void
 create_log_links (void)
 {
   basic_block bb;
@@ -1057,9 +1127,28 @@  create_log_links (void)
   rtx_insn **next_use;
   rtx_insn *insn;
   df_ref def, use;
+  HARD_REG_SET return_regs;
 
   next_use = XCNEWVEC (rtx_insn *, max_reg_num ());
 
+  /* Don't combine instructions copying parameter values from hard
+     regs to pseudos.  Exclude such instructions from LOG_LINKS by
+     temporarily zapping BLOCK_FOR_INSN.  */
+
+  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+  twiddle_first_block (bb, 0);
+
+  /* Similarly, don't combine instructions copying return values
+     from pseudos to hard regs.  */
+
+  CLEAR_HARD_REG_SET (return_regs);
+  diddle_return_value (set_return_regs, &return_regs);
+  if (!hard_reg_set_empty_p (return_regs))
+    {
+      bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
+      twiddle_last_block (bb, 0, return_regs);
+    }
+
   /* Pass through each block from the end, recording the uses of each
      register and establishing log links when def is encountered.
      Note that we do not clear next_use array in order to save time,
@@ -1087,12 +1176,12 @@  create_log_links (void)
               if (!next_use[regno])
                 continue;
 
+	      use_insn = next_use[regno];
+	      next_use[regno] = NULL;
+
 	      if (!can_combine_def_p (def))
 		continue;
 
-	      use_insn = next_use[regno];
-	      next_use[regno] = NULL;
-
 	      if (BLOCK_FOR_INSN (use_insn) != bb)
 		continue;
 
@@ -1103,7 +1192,7 @@  create_log_links (void)
 		 we might wind up changing the semantics of the insn,
 		 even if reload can make what appear to be valid
 		 assignments later.  */
-	      if (regno < FIRST_PSEUDO_REGISTER
+	      if (HARD_REGISTER_NUM_P (regno)
 		  && asm_noperands (PATTERN (use_insn)) >= 0)
 		continue;
 
@@ -1124,6 +1213,16 @@  create_log_links (void)
         }
     }
 
+  /* Repair BLOCK_FOR_INSN.  */
+
+  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+  twiddle_first_block (bb, bb);
+  if (!hard_reg_set_empty_p (return_regs))
+    {
+      bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
+      twiddle_last_block (bb, bb, return_regs);
+    }
+
   free (next_use);
 }