diff mbox

[rtl-optimization] : Enhance post-reload compare elimination pass to handle arithmetic operations with implicit extensions

Message ID CAFULd4am-xFj14X-Y190YYiqDd-qmvLxqYhKUhYqEYDAW7ubuw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak April 24, 2012, 6:24 p.m. UTC
On Tue, Apr 24, 2012 at 6:56 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

> Back to converting x86 to post-reload compare elimination pass.
>
> Arithmetic operations in x86_64 can implicitly zero extend the result,
> and set flags according to the non-extended result. Following testcase
> should exercise both features:
>
> 2012-04-24  Uros Bizjak  <ubizjak@gmail.com>
>
>        * compare-elim.c (try_eliminate_compare): Also handle operands with
>        implicit extensions.
>
> Patch is lightly tested on x86_64-pc-linux-gnu, together with attached
> WIP patch.
>
> Opinions? Since it looks quite safe, is it OK for mainline?

The full bootstrap and regression test passed on x86_64-pc-linux-gnu
{,-m32} with attached x86 WIP patch (the updated patch disables some
interfering peephole2s). I am confident that compare-elim.c change
doesn't cause regressions, so this message is a formal request for a
patch inclusion.

BTW: This is also the first time x86 bootstrapped with enabled
post-reload compare elimination pass.

Uros.
diff mbox

Patch

Index: i386.c
===================================================================
--- i386.c	(revision 186721)
+++ i386.c	(working copy)
@@ -17861,19 +17861,32 @@  ix86_split_copysign_var (rtx operands[])
   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
 }
 
-/* Return TRUE or FALSE depending on whether the first SET in INSN
-   has source and destination with matching CC modes, and that the
+/* Return TRUE or FALSE depending on whether the first SET from COMPARE
+   in INSN has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
 
 bool
 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
 {
-  rtx set;
+  rtx pat, set;
   enum machine_mode set_mode;
+  int i;
 
-  set = PATTERN (insn);
-  if (GET_CODE (set) == PARALLEL)
-    set = XVECEXP (set, 0, 0);
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      set = NULL_RTX;
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  set = XVECEXP (pat, 0, i);
+	  if (GET_CODE (set) == SET
+	      && GET_CODE (SET_SRC (set)) == COMPARE)
+	    break;
+	}
+    }
+  else
+    set = pat;
+
   gcc_assert (GET_CODE (set) == SET);
   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
 
@@ -39090,6 +39103,8 @@  ix86_autovectorize_vector_sizes (void)
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
Index: i386.md
===================================================================
--- i386.md	(revision 186769)
+++ i386.md	(working copy)
@@ -5808,14 +5808,14 @@ 
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*add<mode>_2"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
-	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))
-	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
-	(plus:SWI (match_dup 1) (match_dup 2)))]
+	  (plus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
@@ -5857,13 +5857,14 @@ 
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+	  (plus:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6090,7 +6091,7 @@ 
 	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
 	  (const_int 0)))
    (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
+  "0 && ix86_match_ccmode (insn, CCGOCmode)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
@@ -17257,7 +17258,7 @@ 
 	      (clobber (reg:CC FLAGS_REG))])
    (set (match_dup 1) (match_dup 0))
    (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
-  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+  "0 && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    && peep2_reg_dead_p (4, operands[0])
    && !reg_overlap_mentioned_p (operands[0], operands[1])
    && ix86_match_ccmode (peep2_next_insn (3),
@@ -17284,7 +17285,7 @@ 
 	      (clobber (reg:CC FLAGS_REG))])
    (set (match_dup 1) (match_dup 0))
    (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
-  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+  "0 && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    && GET_CODE (operands[2]) != MINUS
    && peep2_reg_dead_p (3, operands[0])
    && !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -17313,7 +17314,7 @@ 
 	      (clobber (reg:CC FLAGS_REG))])
    (set (match_dup 1) (match_dup 0))
    (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
-  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+  "0 && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    && REG_P (operands[0]) && REG_P (operands[4])
    && REGNO (operands[0]) == REGNO (operands[4])
    && peep2_reg_dead_p (4, operands[0])