diff mbox series

Add a new pattern in 4-insn combine

Message ID 1fa8c69e-0010-06d9-deaf-f1f7d3352842@linux.ibm.com
State New
Headers show
Series Add a new pattern in 4-insn combine | expand

Commit Message

HAO CHEN GUI Nov. 30, 2020, 3:08 a.m. UTC
Hi,

   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
combine. In the patch, newpat is split twice. The newpat, newi2pat and 
newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is done 
at the end where all former attempts fail. In 4 insn combine pre-check, 
the zero and sign extend are added as the patch is for the issue 1 
listed in pr65010.

   The attachments are the patch diff file and change log file.

   Bootstrapped and tested on powerpc64le, ARM and x86 with no 
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
* combine.c (combine_validate_cost): Add an argument for newi1pat.
	(try_combine): Add a 4-insn combine pattern for optimizing rtx
	sign_extend (op:zero_extend, zero_extend).

Comments

HAO CHEN GUI Dec. 11, 2020, 2:14 a.m. UTC | #1
Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

On 30/11/2020 上午 11:08, HAO CHEN GUI wrote:
> Hi,
>
>   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
> combine. In the patch, newpat is split twice. The newpat, newi2pat and 
> newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is 
> done at the end where all former attempts fail. In 4 insn combine 
> pre-check, the zero and sign extend are added as the patch is for the 
> issue 1 listed in pr65010.
>
>   The attachments are the patch diff file and change log file.
>
>   Bootstrapped and tested on powerpc64le, ARM and x86 with no 
> regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
HAO CHEN GUI Jan. 4, 2021, 2:03 a.m. UTC | #2
Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

Thanks a lot.


On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
> Segher,
>
>     Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
HAO CHEN GUI Jan. 14, 2021, 8:44 a.m. UTC | #3
Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

Thanks a lot.

On 4/1/2021 上午 10:03, HAO CHEN GUI wrote:
> Segher,
>
>     Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
>
> Thanks a lot.
>
>
> On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
>> Segher,
>>
>>     Gentle ping this:
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
Segher Boessenkool Jan. 15, 2021, 7:01 p.m. UTC | #4
On Mon, Nov 30, 2020 at 11:08:22AM +0800, HAO CHEN GUI wrote:
>   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
> combine. In the patch, newpat is split twice. The newpat, newi2pat and 

As I said before, that has a lot of problems, and is only suitable for
stage 1 (after many fixes!)

I don't think this is a good idea at all, fwiw.  Before we even should
think about doing 4->3 combinations, we should stop severely limiting
what combinations of 4 insns we allow; and we do that because it will
slow down the compiler a lot, for no big gain.


Segher
diff mbox series

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..1a7c508217e 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@  do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-		       rtx newpat, rtx newi2pat, rtx newotherpat)
+		       rtx newpat, rtx newi2pat, rtx newotherpat,
+		       rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@  combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+	tmp = PATTERN (i1);
+	PATTERN (i1) = newi1pat;
+	tmpi = INSN_CODE (i1);
+	INSN_CODE (i1) = -1;
+	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+	PATTERN (i1) = tmp;
+	INSN_CODE (i1) = tmpi;
+	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@  combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
 	fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
 	fprintf (dump_file, "replacement costs %d + %d = %d\n",
 		 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@  combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+	INSN_COST (i1) = new_i1_cost;
+      else
+	INSN_COST (i1) = 0;
       if (i0)
 	INSN_COST (i0) = 0;
     }
@@ -2671,8 +2692,8 @@  static rtx_insn *
 try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
-  /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  /* New patterns for I3, I2 and I1, respectively.  */
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
-  int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  /* INSN_CODEs for new I3, new I2, new I1, and user of condition code.  */
+  int insn_code_number, i2_code_number = 0, i1_code_number = 0;
+  int other_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2702,7 +2724,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int i2dest_killed = 0, i1dest_killed = 0, i0dest_killed = 0;
   int i1_feeds_i2_n = 0, i0_feeds_i2_n = 0, i0_feeds_i1_n = 0;
   /* Notes that must be added to REG_NOTES in I3 and I2.  */
-  rtx new_i3_notes, new_i2_notes;
+  rtx new_i3_notes, new_i2_notes, new_i1_notes;
   /* Notes that we substituted I3 into I2 instead of the normal case.  */
   int i3_subst_into_i2 = 0;
   /* Notes that I1, I2 or I3 is a MULT operation.  */
@@ -2735,6 +2757,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       int i;
       int ngood = 0;
       int nshift = 0;
+      int nextend = 0;
       rtx set0, set3;
 
       if (!flag_expensive_optimizations)
@@ -2758,6 +2781,8 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
 		   || GET_CODE (src) == LSHIFTRT)
 	    nshift++;
+	  else if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+	    nextend++;
 	}
 
       /* If I0 loads a memory and I3 sets the same memory, then I1 and I2
@@ -2787,7 +2812,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  && rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
 	ngood += 2;
 
-      if (ngood < 2 && nshift < 2)
+      if (ngood < 2 && nshift < 2 && nextend < 2)
 	return 0;
     }
 
@@ -3399,6 +3424,12 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
 
+	  if (i0)
+	    {
+	      subst_low_luid = DF_INSN_LUID (i0);
+	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+	    }
+
 	  subst_low_luid = DF_INSN_LUID (i2);
 	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
@@ -3991,6 +4022,61 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     don't use one now.  */
 	  if (i2_code_number >= 0 && ! (split_code == MULT && ! have_mult))
 	    insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
+
+	  if (i0 && i1
+	      && GET_CODE (newpat) == SET
+	      && BINARY_P (SET_SRC (newpat))
+	      && insn_code_number < 0
+	      && i2_code_number >= 0)
+	    {
+	      newi1pat = NULL_RTX;
+	      rtx newi1dest;
+	      machine_mode new_mode;
+
+	      if ((split = find_split_point (&newpat, i3, false)) != 0
+		  && (!HAVE_cc0 || REG_P (i1dest))
+		  && (GET_MODE (*split) == GET_MODE (i1dest)
+		      || GET_MODE (*split) == VOIDmode
+		      || can_change_dest_mode (i1dest, added_sets_1,
+					       GET_MODE (*split)))
+		  && !reg_referenced_p (i1dest, newpat)
+		  && !modified_between_p (*split, i1, i3))
+		{
+		  new_mode = GET_MODE (*split);
+		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+		    newi1dest = gen_rtx_REG (new_mode, REGNO (i1dest));
+		  else
+		    {
+		      SUBST_MODE (regno_reg_rtx[REGNO (i1dest)], new_mode);
+		      newi1dest = regno_reg_rtx[REGNO (i1dest)];
+		    }
+		  newi1pat =  gen_rtx_SET (newi1dest, *split);
+		  SUBST (*split, newi1dest);
+
+		  i1_code_number = recog_for_combine (&newi1pat, i2,
+						      &new_i1_notes);
+
+		  if (i1_code_number < 0)
+		    {
+		      undo_all ();
+		      return 0;
+		    }
+		  else
+		    {
+		      /* swap newi1pat and newi2pat if dest of newi2pat is set
+			 in newi1pat.  */
+		      if (reg_referenced_p (newdest, newi1pat))
+			{
+			    std::swap (i1dest, i2dest);
+			    std::swap (newi1pat, newi2pat);
+			    std::swap (i1_code_number, i2_code_number);
+			}
+
+		      insn_code_number = recog_for_combine (&newpat, i3,
+							    &new_i3_notes);
+		    }
+		}
+	    }
 	}
     }
 
@@ -4209,11 +4295,12 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+				newi1pat))
+      {
+	undo_all ();
+	return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4329,12 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 		/* Put back the new mode.  */
 		adjust_reg_mode (reg, new_mode);
 	      }
+	    else if (reg == i1dest)
+	      {
+		propagate_for_debug (i1, last_combined_insn, reg, i1src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
 	    else
 	      {
 		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4446,6 +4539,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     reset_used_flags (i0notes);
     reset_used_flags (newpat);
     reset_used_flags (newi2pat);
+    reset_used_flags (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4455,6 +4549,7 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     i0notes = copy_rtx_if_shared (i0notes);
     newpat = copy_rtx_if_shared (newpat);
     newi2pat = copy_rtx_if_shared (newi2pat);
+    newi1pat = copy_rtx_if_shared (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4552,10 +4647,21 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       {
 	LOG_LINKS (i1) = NULL;
 	REG_NOTES (i1) = 0;
-	if (MAY_HAVE_DEBUG_BIND_INSNS)
-	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-			       this_basic_block);
-	SET_INSN_DELETED (i1);
+	if (newi1pat)
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				   this_basic_block);
+	    INSN_CODE (i1) = i1_code_number;
+	    PATTERN (i1) = newi1pat;
+	  }
+	else
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				 this_basic_block);
+	    SET_INSN_DELETED (i1);
+	  }
       }
 
     if (i0)
@@ -4579,6 +4685,8 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       from_luid = DF_INSN_LUID (i1);
     else
       from_luid = DF_INSN_LUID (i2);
+    if (newi1pat)
+      move_deaths (newi1pat, NULL_RTX, from_luid, i1, &midnotes);
     if (newi2pat)
       move_deaths (newi2pat, NULL_RTX, from_luid, i2, &midnotes);
     move_deaths (newpat, newi2pat, from_luid, i3, &midnotes);
@@ -4604,6 +4712,10 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
        know these are REG_UNUSED and want them to go to the desired insn,
        so we always pass it as i3.  */
 
+    if (newi1pat && new_i1_notes)
+      distribute_notes (new_i1_notes, i1, i1, NULL, NULL_RTX, NULL_RTX,
+			NULL_RTX);
+
     if (newi2pat && new_i2_notes)
       distribute_notes (new_i2_notes, i2, i2, NULL, NULL_RTX, NULL_RTX,
 			NULL_RTX);
@@ -4738,6 +4850,8 @@  try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     /* Update reg_stat[].nonzero_bits et al for any changes that may have
        been made to this insn.  The order is important, because newi2pat
        can affect nonzero_bits of newpat.  */
+    if (newi1pat)
+      note_pattern_stores (newi1pat, set_nonzero_bits_and_sign_copies, NULL);
     if (newi2pat)
       note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL);
     note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL);