Patchwork [rtl-optimization] : Enhance post-reload compare elimination pass to handle arithmetic operations with implicit extensions

login
register
mail settings
Submitter Uros Bizjak
Date April 24, 2012, 4:56 p.m.
Message ID <CAFULd4Y2oKS1qmOyKBURrWKUStbjuqA1eLjNvY1rRcYb92rt_g@mail.gmail.com>
Download mbox | patch
Permalink /patch/154724/
State New
Headers show

Comments

Uros Bizjak - April 24, 2012, 4:56 p.m.
Hello!

Back to converting x86 to post-reload compare elimination pass.

Arithmetic operations in x86_64 can implicitly zero extend the result,
and set flags according to the non-extended result. Following testcase
should exercise both features:

--cut here--
void foo (long int, int);

void test (unsigned int a, unsigned int b)
{
  long int x = a + b;
  int f = a + b > 0;

  foo (x, f);
}
--cut here--

However, with pre-reload implementation, gcc was able to merge
operation + compare, while ignoring implicit extension:

test:
        addl    %esi, %edi      # 8     *addsi_2/3
        setne   %sil    # 19    *setcc_qi
        movl    %edi, %edi      # 11    *zero_extendsidi2_rex64/1
        movzbl  %sil, %esi      # 20    *zero_extendqisi2
        jmp     foo     # 14    *sibcall

Unpatched post-reload compare elimination was able to merge operation
+ zero extension (please note that attached WIP target patch is
needed):

test:
        addl    %esi, %edi      # 7     addsi_1_zext/2
        xorl    %esi, %esi      # 22    *movsi_xor
        testl   %edi, %edi      # 23    *cmpsi_ccno_1/1
        setne   %sil    # 24    *setcc_qi_slp
        jmp     foo     # 14    *sibcall

And patched post-reload compare finally produces optimal code:

test:
        addl    %esi, %edi      # 7     *addsi_2_zext/2
        setne   %sil    # 19    *setcc_qi
        movzbl  %sil, %esi      # 20    *zero_extendqisi2
        jmp     foo     # 14    *sibcall

Where the operation looks like:

#(insn:TI 7 9 19 2 (parallel [
#            (set (reg:DI 5 di)
#                (zero_extend:DI (plus:SI (reg/v:SI 4 si [orig:64 b ] [64])
#                        (reg/v:SI 5 di [orig:63 a ] [63]))))
#            (set (reg:CCZ 17 flags)
#                (compare:CCZ (plus:SI (reg/v:SI 4 si [orig:64 b ] [64])
#                        (reg/v:SI 5 di [orig:63 a ] [63]))
#                    (const_int 0 [0])))
#        ]) cmpadd.c:5 261 {*addsi_2_zext}
#     (expr_list:REG_DEAD (reg/v:SI 4 si [orig:64 b ] [64])
#        (nil)))
        addl    %esi, %edi      # 7     *addsi_2_zext/2 [length = 2]

Attached patch teaches post-reload compare optimization how to handle
arithmetic operations with implicit extensions. The input is:

(insn 7 4 8 2 (parallel [
            (set (reg:DI 5 di)
                (zero_extend:DI (plus:SI (reg/v:SI 4 si [orig:64 b ] [64])
                        (reg/v:SI 5 di [orig:63 a ] [63]))))
            (clobber (reg:CC 17 flags))
        ]) cmpadd.c:5 253 {addsi_1_zext}
     (nil))

(insn 8 7 9 2 (set (reg:CCZ 17 flags)
        (compare:CCZ (reg:SI 5 di [orig:59 D.1714 ] [59])
            (const_int 0 [0]))) cmpadd.c:6 2 {*cmpsi_ccno_1}
     (nil))

It simply checks if REGNOs of registers are the same, the mode of the
compared register (against zero!) is checked to the mode of the inner
part of the extension instruction.

2012-04-24  Uros Bizjak  <ubizjak@gmail.com>

	* compare-elim.c (try_eliminate_compare): Also handle operands with
	implicit extensions.

Patch is lightly tested on x86_64-pc-linux-gnu, together with attached
WIP patch.

Opinions? Since it looks quite safe, is it OK for mainline?

Uros.

Index: i386.md
===================================================================
--- i386.md	(revision 186769)
+++ i386.md	(working copy)
@@ -5808,14 +5808,14 @@
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*add<mode>_2"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
-	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))
-	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
-	(plus:SWI (match_dup 1) (match_dup 2)))]
+	  (plus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
@@ -5857,13 +5857,14 @@
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+	  (plus:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6090,7 +6091,7 @@
 	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
 	  (const_int 0)))
    (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
+  "0 && ix86_match_ccmode (insn, CCGOCmode)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
Index: i386.c
===================================================================
--- i386.c	(revision 186721)
+++ i386.c	(working copy)
@@ -17861,19 +17861,31 @@
   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
 }
 
-/* Return TRUE or FALSE depending on whether the first SET in INSN
-   has source and destination with matching CC modes, and that the
+/* Return TRUE or FALSE depending on whether the first SET from COMPARE
+   in INSN has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
 
 bool
 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
 {
-  rtx set;
+  rtx pat, set;
   enum machine_mode set_mode;
+  int i;
 
-  set = PATTERN (insn);
-  if (GET_CODE (set) == PARALLEL)
-    set = XVECEXP (set, 0, 0);
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  set = XVECEXP (pat, 0, i);
+	  if (GET_CODE (set) == SET
+	      && GET_CODE (SET_SRC (set)) == COMPARE)
+	    break;
+	}
+    }
+  else
+    set = pat;
+
   gcc_assert (GET_CODE (set) == SET);
   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
 
@@ -39090,6 +39102,8 @@
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
Richard Henderson - April 24, 2012, 6:40 p.m.
On 04/24/12 09:56, Uros Bizjak wrote:
> 2012-04-24  Uros Bizjak  <ubizjak@gmail.com>
> 
> 	* compare-elim.c (try_eliminate_compare): Also handle operands with
> 	implicit extensions.

Ok.


r~

Patch

Index: compare-elim.c
===================================================================
--- compare-elim.c	(revision 186721)
+++ compare-elim.c	(working copy)
@@ -563,10 +563,26 @@ 
      Validate that PREV_CLOBBER itself does in fact refer to IN_A.  Do
      recall that we've already validated the shape of PREV_CLOBBER.  */
   x = XVECEXP (PATTERN (insn), 0, 0);
-  if (!rtx_equal_p (SET_DEST (x), in_a))
+  if (rtx_equal_p (SET_DEST (x), in_a))
+    cmp_src = SET_SRC (x);
+
+  /* Also check operations with implicit extensions, e.g.:
+     [(set (reg:DI)
+	   (zero_extend:DI (plus:SI (reg:SI)(reg:SI))))
+      (set (reg:CCZ flags)
+	   (compare:CCZ
+	     (plus:SI (reg:SI)(reg:SI))
+	     (const_int 0)))]				*/
+  else if (REG_P (SET_DEST (x))
+	   && REG_P (in_a)
+	   && REGNO (SET_DEST (x)) == REGNO (in_a)
+	   && (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
+	       || GET_CODE (SET_SRC (x)) == SIGN_EXTEND)
+	   && GET_MODE (XEXP (SET_SRC (x), 0)) == GET_MODE (in_a))
+    cmp_src = XEXP (SET_SRC (x), 0);
+  else
     return false;
-  cmp_src = SET_SRC (x);
-  
+
   /* Determine if we ought to use a different CC_MODE here.  */
   flags = maybe_select_cc_mode (cmp, cmp_src, cmp->in_b);
   if (flags == NULL)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 186721)
+++ config/i386/i386.c	(working copy)
@@ -17861,19 +17861,31 @@ 
   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
 }
 
-/* Return TRUE or FALSE depending on whether the first SET in INSN
-   has source and destination with matching CC modes, and that the
+/* Return TRUE or FALSE depending on whether the first SET from COMPARE
+   in INSN has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
 
 bool
 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
 {
-  rtx set;
+  rtx pat, set;
   enum machine_mode set_mode;
+  int i;
 
-  set = PATTERN (insn);
-  if (GET_CODE (set) == PARALLEL)
-    set = XVECEXP (set, 0, 0);
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  set = XVECEXP (pat, 0, i);
+	  if (GET_CODE (set) == SET
+	      && GET_CODE (SET_SRC (set)) == COMPARE)
+	    break;
+	}
+    }
+  else
+    set = pat;
+
   gcc_assert (GET_CODE (set) == SET);
   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
 
@@ -39090,6 +39102,8 @@ 
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 186769)
+++ config/i386/i386.md	(working copy)
@@ -5808,14 +5808,14 @@ 
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*add<mode>_2"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
-	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))
-	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
-	(plus:SWI (match_dup 1) (match_dup 2)))]
+	  (plus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
@@ -5857,13 +5857,14 @@ 
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+	  (plus:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6090,7 +6091,7 @@ 
 	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
 	  (const_int 0)))
    (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
+  "0 && ix86_match_ccmode (insn, CCGOCmode)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))