Patchwork [RFC,i386] : Experimental patch to implement post-reload compare elimination

login
register
mail settings
Submitter Uros Bizjak
Date April 26, 2012, 3:18 p.m.
Message ID <CAFULd4aRVYnrHQ7CiGbn9wA_UNydRwXXL7qRgBJNs-fiUd4saQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/155291/
State New
Headers show

Comments

Uros Bizjak - April 26, 2012, 3:18 p.m.
Hello!

Attached patch implements post-reload compare elimination pass for x86
target. The patch converts arithmetic and logic patterns (that is:
plus, minus, and, or, xor, neg, one complement) to the patterns,
recognized by post-reload cmp elimination pass, together with all
relevant splitters and peephole2 RTXes to "conforming" patterns. The
patch bootstraps OK and regression test shows no regressions, so the
patch can be considered as production quality patch.

The purpose of the patch however, is to look at potential issues with
post-reload cmp elimination pass.

First, some numbers:

1. compiling combine.i:

without the patch: 2225 cmp, 1279 test instructions
with the patch: 2228 cmp, 1278 test instructions

2. compiling insn-attrtab.i:

without the patch: 17763 cmp, 11988 test instructions
with the patch: 17756 cmp, 12023 test instructions

So,  post-reload comparison elimination pass leaves some ~40 test
instructions when compiling insn-attrtab.i.

Looking at the differences, there are two problems:

1. post-reload splitters converts add (and some other) patterns to LEA
RTXes, where clobber is not present:

RA is free to allocate registers to PLUS RTX to form a non-destructive
add  (this RTX results in LEA insn). Unfortunately, LEA has no
"interesting" FLAGS_REG clobber, so following compare can't be merged
with preceding arith instruction. This problem is not present in
pre-reload compare elimination pass, and results in quite some
non-merged compares.

2. post-reload splitter converts non-SI mode patterns to SImode to
avoid size prefixes:

;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.

Post-reload compare elimination pass is not able to undo this change,
and it results in:

       orl     %eax, %r12d     # 10751 *iorsi_1/1      [length = 3]
       testb   %r12b, %r12b    # 5774  *cmpqi_ccno_1/1 [length = 3]

Although the second problem can be "fixed" by restricting the splitter
with "epilogue_completed", I don't think the problem with LEAs can be
fixed by restricting the relevant splitters to split4 pass.

All in all, my opinion is, that post-reload compare-elimination pass
is not that effective for x86, and brings more trouble than gain.
Maybe we can complement pre-reload pass with post-reload, but this
would involve many new patterns (insn, splitter and peephole2
patterns) for a very small (if any) gain.

Any other opinions?

2012-04-25  Uros Bizjak  <ubizjak@gmail.com>

        * config/i386/i386.c (TARGET_FLAGS_REGNUM): Define.
        (ix86_match_ccmode): Find first SET of flags reg from COMPARE RTX.
        * config/i386/i386.md (*add<mode>_2): Put compare RTX after
        operator RTX, as expected by post-reload compare elimination pass.
        (*addsi_2_zext): Ditto.
        (*add<mode>_5): Remove.
        (*sub<mode>_2): Put compare RTX after operator RTX.
        (*subsi_2_zext): Ditto.
        (*anddi_2): Ditto.
        (*andqi_2_maybe_si): Ditto.
        (*and<mode>_2): Ditto.
        (*andsi_2_zext): Ditto.
        (*<code><mode>_2): Ditto.
        (*<code>si_2_zext): Ditto.
        (*neg<dwi>2_doubleword): Update RTX.
        (*neg<mode>2_cmpz): Put compare RTX after operator RTX.
        (*negsi2_cmpz_zext): Ditto.
        (*one_cmpl<mode>2_2): Ditto. Update corresponding splitter.
        (*one_cmplsi2_2_zext): Ditto.  Update corresponfing splitter.

        * config/i386/i386.md: Update peephole2 patterns for changed RTXes.

The patch was tested on x86_64-pc-linux-gnu {,-m32} and is NOT
intended to be committed to SVN.

Uros.

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 186819)
+++ config/i386/i386.c	(working copy)
@@ -17861,19 +17861,32 @@  ix86_split_copysign_var (rtx operands[])
   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
 }
 
-/* Return TRUE or FALSE depending on whether the first SET in INSN
-   has source and destination with matching CC modes, and that the
+/* Return TRUE or FALSE depending on whether the first SET from COMPARE
+   in INSN has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
 
 bool
 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
 {
-  rtx set;
+  rtx pat, set;
   enum machine_mode set_mode;
+  int i;
 
-  set = PATTERN (insn);
-  if (GET_CODE (set) == PARALLEL)
-    set = XVECEXP (set, 0, 0);
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      set = NULL_RTX;
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  set = XVECEXP (pat, 0, i);
+	  if (GET_CODE (set) == SET
+	      && GET_CODE (SET_SRC (set)) == COMPARE)
+	    break;
+	}
+    }
+  else
+    set = pat;
+
   gcc_assert (GET_CODE (set) == SET);
   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
 
@@ -39448,6 +39461,8 @@  ix86_autovectorize_vector_sizes (void)
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 186819)
+++ config/i386/i386.md	(working copy)
@@ -5808,14 +5808,14 @@ 
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
 
 (define_insn "*add<mode>_2"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")))
+   (set (reg FLAGS_REG)
 	(compare
-	  (plus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
-	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0"))
-	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
-	(plus:SWI (match_dup 1) (match_dup 2)))]
+	  (plus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
@@ -5857,13 +5857,14 @@ 
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
-  [(set (reg FLAGS_REG)
-	(compare
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
 	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+		   (match_operand:SI 2 "x86_64_general_operand" "rme,0"))))
+   (set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
@@ -6082,53 +6083,6 @@ 
 	(const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*add<mode>_5"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
-	    (match_operand:SWI 2 "<general_operand>" "<g>,0"))
-	  (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-        return "inc{<imodesuffix>}\t%0";
-      else
-        {
-          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{<imodesuffix>}\t%0";
-	}
-
-    default:
-      if (which_alternative == 1)
-	{
-	  rtx tmp;
-	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
-	}
-
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
-
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:SWI 2 "incdec_operand")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "*addqi_ext_1_rex64"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
 			 (const_int 8)
@@ -6425,14 +6379,14 @@ 
    (set_attr "mode" "QI")])
 
 (define_insn "*sub<mode>_2"
-  [(set (reg FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (set (reg FLAGS_REG)
 	(compare
-	  (minus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
-	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
-	  (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
-	(minus:SWI (match_dup 1) (match_dup 2)))]
+	  (minus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
   "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
@@ -6440,15 +6394,14 @@ 
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_2_zext"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (minus:SI (match_operand:SI 1 "register_operand" "0")
-		    (match_operand:SI 2 "x86_64_general_operand" "rme"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (minus:SI (match_dup 1)
-		    (match_dup 2))))]
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (set (reg FLAGS_REG)
+	(compare
+	  (minus:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sub{l}\t{%2, %k0|%k0, %2}"
@@ -6473,8 +6426,7 @@ 
 		 (match_operand:SI 2 "x86_64_general_operand" "rme")))
    (set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (minus:SI (match_dup 1)
-		    (match_dup 2))))]
+	  (minus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
    && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sub{l}\t{%2, %1|%1, %2}"
@@ -7899,14 +7851,14 @@ 
   "operands[0] = gen_lowpart (SImode, operands[0]);")
 
 (define_insn "*anddi_2"
-  [(set (reg FLAGS_REG)
-	(compare
-	 (and:DI
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+	(and:DI
 	  (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
-	  (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
-	 (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
-	(and:DI (match_dup 1) (match_dup 2)))]
+	  (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (and:DI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (AND, DImode, operands)"
   "@
@@ -7917,13 +7869,13 @@ 
    (set_attr "mode" "SI,DI,DI")])
 
 (define_insn "*andqi_2_maybe_si"
-  [(set (reg FLAGS_REG)
-	(compare (and:QI
-		  (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-		  (match_operand:QI 2 "general_operand" "qmn,qn,n"))
-		 (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
-	(and:QI (match_dup 1) (match_dup 2)))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		(match_operand:QI 2 "general_operand" "qmn,qn,n")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (and:QI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_binary_operator_ok (AND, QImode, operands)
    && ix86_match_ccmode (insn,
 			 CONST_INT_P (operands[2])
@@ -7941,13 +7893,14 @@ 
    (set_attr "mode" "QI,QI,SI")])
 
 (define_insn "*and<mode>_2"
-  [(set (reg FLAGS_REG)
-	(compare (and:SWI124
-		  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
-		  (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>"))
-		 (const_int 0)))
-   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
-	(and:SWI124 (match_dup 1) (match_dup 2)))]
+  [(set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
+	(and:SWI124
+	  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (and:SWI124 (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
   "and{<imodesuffix>}\t{%2, %0|%0, %2}"
@@ -7956,13 +7909,13 @@ 
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*andsi_2_zext"
-  [(set (reg FLAGS_REG)
-	(compare (and:SI
-		  (match_operand:SI 1 "nonimmediate_operand" "%0")
-		  (match_operand:SI 2 "x86_64_general_operand" "rme"))
-		 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (set (reg FLAGS_REG)
+	(compare (and:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (AND, SImode, operands)"
   "and{l}\t{%2, %k0|%k0, %2}"
@@ -8210,13 +8163,14 @@ 
    (set_attr "mode" "QI")])
 
 (define_insn "*<code><mode>_2"
-  [(set (reg FLAGS_REG)
-	(compare (any_or:SWI
-		  (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
-		  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))
-		 (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
-	(any_or:SWI (match_dup 1) (match_dup 2)))]
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(any_or:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (any_or:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
@@ -8226,12 +8180,14 @@ 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 ;; ??? Special case for immediate operand is missing - it is tricky.
 (define_insn "*<code>si_2_zext"
-  [(set (reg FLAGS_REG)
-	(compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-			    (match_operand:SI 2 "x86_64_general_operand" "rme"))
-		 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		     (match_operand:SI 2 "x86_64_general_operand" "rme"))))
+   (set (reg FLAGS_REG)
+	(compare
+	  (any_or:SI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, SImode, operands)"
   "<logic>{l}\t{%2, %k0|%k0, %2}"
@@ -8266,19 +8222,6 @@ 
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
-(define_insn "*<code><mode>_3"
-  [(set (reg FLAGS_REG)
-	(compare (any_or:SWI
-		  (match_operand:SWI 1 "nonimmediate_operand" "%0")
-		  (match_operand:SWI 2 "<general_operand>" "<g>"))
-		 (const_int 0)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
-  "ix86_match_ccmode (insn, CCNOmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "*<code>qi_ext_0"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
 			 (const_int 8)
@@ -8484,9 +8427,9 @@ 
   "#"
   "reload_completed"
   [(parallel
-    [(set (reg:CCZ FLAGS_REG)
-	  (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))
-     (set (match_dup 0) (neg:DWIH (match_dup 1)))])
+    [(set (match_dup 0) (neg:DWIH (match_dup 1)))
+     (set (reg:CCZ FLAGS_REG)
+	  (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))])
    (parallel
     [(set (match_dup 2)
 	  (plus:DWIH (match_dup 3)
@@ -8514,7 +8457,7 @@ 
 	(lshiftrt:DI
 	  (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
 			     (const_int 32)))
-	(const_int 32)))
+	  (const_int 32)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
   "neg{l}\t%k0"
@@ -8526,30 +8469,30 @@ 
 ;; flag being the only useful item.
 
 (define_insn "*neg<mode>2_cmpz"
-  [(set (reg:CCZ FLAGS_REG)
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+   (set (reg:CCZ FLAGS_REG)
 	(compare:CCZ
-	  (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
-		   (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(neg:SWI (match_dup 1)))]
+	  (neg:SWI (match_dup 1))
+	  (const_int 0)))]
   "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
   "neg{<imodesuffix>}\t%0"
   [(set_attr "type" "negnot")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*negsi2_cmpz_zext"
-  [(set (reg:CCZ FLAGS_REG)
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	  (neg:DI (ashift:DI
+		    (match_operand:DI 1 "register_operand" "0")
+		    (const_int 32)))
+	  (const_int 32)))
+   (set (reg:CCZ FLAGS_REG)
 	(compare:CCZ
-	  (lshiftrt:DI
-	    (neg:DI (ashift:DI
-		      (match_operand:DI 1 "register_operand" "0")
-		      (const_int 32)))
-	    (const_int 32))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
-					(const_int 32)))
-		     (const_int 32)))]
+	  (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+					  (const_int 32)))
+		       (const_int 32))
+	(const_int 0)))]
   "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
   "neg{l}\t%k0"
   [(set_attr "type" "negnot")
@@ -8864,11 +8807,12 @@ 
    (set_attr "mode" "SI")])
 
 (define_insn "*one_cmpl<mode>2_2"
-  [(set (reg FLAGS_REG)
-	(compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
-		 (const_int 0)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(not:SWI (match_dup 1)))]
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (not:SWI (match_dup 1))
+	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
   "#"
@@ -8876,26 +8820,28 @@ 
    (set_attr "mode" "<MODE>")])
 
 (define_split
-  [(set (match_operand 0 "flags_reg_operand")
+  [(set (match_operand:SWI 1 "nonimmediate_operand")
+	(not:SWI (match_operand:SWI 3 "nonimmediate_operand")))
+   (set (match_operand 0 "flags_reg_operand")
 	(match_operator 2 "compare_operator"
-	  [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
-	   (const_int 0)]))
-   (set (match_operand:SWI 1 "nonimmediate_operand")
-	(not:SWI (match_dup 3)))]
+	  [(not:SWI (match_dup 3))
+	   (const_int 0)]))]
   "ix86_match_ccmode (insn, CCNOmode)"
-  [(parallel [(set (match_dup 0)
+  [(parallel [(set (match_dup 1)
+		   (xor:SWI (match_dup 3) (const_int -1)))
+	      (set (match_dup 0)
 		   (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
-				    (const_int 0)]))
-	      (set (match_dup 1)
-		   (xor:SWI (match_dup 3) (const_int -1)))])])
+				    (const_int 0)]))])])
 
 ;; ??? Currently never generated - xor is used instead.
 (define_insn "*one_cmplsi2_2_zext"
-  [(set (reg FLAGS_REG)
-	(compare (not:SI (match_operand:SI 1 "register_operand" "0"))
-		 (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (not:SI (match_dup 1))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (not:SI (match_operand:SI 1 "register_operand" "0"))))
+   (set (reg FLAGS_REG)
+	(compare
+	  (not:SI (match_dup 1))
+	  (const_int 0)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_unary_operator_ok (NOT, SImode, operands)"
   "#"
@@ -8903,18 +8849,19 @@ 
    (set_attr "mode" "SI")])
 
 (define_split
-  [(set (match_operand 0 "flags_reg_operand")
+  [(set (match_operand:DI 1 "register_operand")
+	(zero_extend:DI
+	  (not:SI (match_operand:SI 3 "register_operand"))))
+   (set (match_operand 0 "flags_reg_operand")
 	(match_operator 2 "compare_operator"
-	  [(not:SI (match_operand:SI 3 "register_operand"))
-	   (const_int 0)]))
-   (set (match_operand:DI 1 "register_operand")
-	(zero_extend:DI (not:SI (match_dup 3))))]
+	  [(not:SI (match_dup 3))
+	   (const_int 0)]))]
   "ix86_match_ccmode (insn, CCNOmode)"
-  [(parallel [(set (match_dup 0)
+  [(parallel [(set (match_dup 1)
+		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))
+	      (set (match_dup 0)
 		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
-				    (const_int 0)]))
-	      (set (match_dup 1)
-		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
+				    (const_int 0)]))])])
 
 ;; Shift instructions
 
@@ -11212,8 +11159,8 @@ 
 })
 
 (define_peephole2
-  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
-	      (match_operand 4)])
+  [(parallel [(match_operand 4)
+	      (set (reg FLAGS_REG) (match_operand 0))])
    (set (match_operand:QI 1 "register_operand")
 	(match_operator:QI 2 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))
@@ -11222,8 +11169,8 @@ 
   "(peep2_reg_dead_p (3, operands[1])
     || operands_match_p (operands[1], operands[3]))
    && ! reg_overlap_mentioned_p (operands[3], operands[0])"
-  [(parallel [(set (match_dup 5) (match_dup 0))
-	      (match_dup 4)])
+  [(parallel [(match_dup 4)
+  	      (set (match_dup 5) (match_dup 0))])
    (set (strict_low_part (match_dup 6))
 	(match_dup 2))]
 {
@@ -11254,8 +11201,8 @@ 
 })
 
 (define_peephole2
-  [(parallel [(set (reg FLAGS_REG) (match_operand 0))
-	      (match_operand 4)])
+  [(parallel [(match_operand 4)
+	      (set (reg FLAGS_REG) (match_operand 0))])
    (set (match_operand:QI 1 "register_operand")
 	(match_operator:QI 2 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))
@@ -11265,8 +11212,8 @@ 
   "(peep2_reg_dead_p (3, operands[1])
     || operands_match_p (operands[1], operands[3]))
    && ! reg_overlap_mentioned_p (operands[3], operands[0])"
-  [(parallel [(set (match_dup 5) (match_dup 0))
-	      (match_dup 4)])
+  [(parallel [(match_dup 4)
+	      (set (match_dup 5) (match_dup 0))])
    (set (strict_low_part (match_dup 6))
 	(match_dup 2))]
 {
@@ -16877,24 +16824,24 @@ 
 ; instruction size is unchanged, except in the %eax case for
 ; which it is increased by one byte, hence the ! optimize_size.
 (define_split
-  [(set (match_operand 0 "flags_reg_operand")
+  [(set (match_operand 1 "register_operand")
+	(and (match_operand 3 "aligned_operand")
+	     (match_operand 4 "const_int_operand")))
+   (set (match_operand 0 "flags_reg_operand")
 	(match_operator 2 "compare_operator"
-	  [(and (match_operand 3 "aligned_operand")
-		(match_operand 4 "const_int_operand"))
-	   (const_int 0)]))
-   (set (match_operand 1 "register_operand")
-	(and (match_dup 3) (match_dup 4)))]
+	  [(and (match_dup 3) (match_dup 4))
+	   (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && optimize_insn_for_speed_p ()
    && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
        || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
    /* Ensure that the operand will remain sign-extended immediate.  */
    && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
-  [(parallel [(set (match_dup 0)
+  [(parallel [(set (match_dup 1)
+		   (and:SI (match_dup 3) (match_dup 4)))
+	      (set (match_dup 0)
 		   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
-			            (const_int 0)]))
-	      (set (match_dup 1)
-		   (and:SI (match_dup 3) (match_dup 4)))])]
+			            (const_int 0)]))])]
 {
   operands[4]
     = gen_int_mode (INTVAL (operands[4])
@@ -17087,11 +17034,11 @@ 
        || satisfies_constraint_K (operands[3]))
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel
-     [(set (match_dup 0)
+     [(set (match_dup 2)
+	   (and:SI (match_dup 2) (match_dup 3)))
+      (set (match_dup 0)
 	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
-		            (const_int 0)]))
-      (set (match_dup 2)
-	   (and:SI (match_dup 2) (match_dup 3)))])])
+		            (const_int 0)]))])])
 
 ;; We don't need to handle HImode case, because it will be promoted to SImode
 ;; on ! TARGET_PARTIAL_REG_STALL
@@ -17107,11 +17054,11 @@ 
    && true_regnum (operands[2]) != AX_REG
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel
-     [(set (match_dup 0)
+     [(set (match_dup 2)
+	   (and:QI (match_dup 2) (match_dup 3)))
+      (set (match_dup 0)
 	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
-		            (const_int 0)]))
-      (set (match_dup 2)
-	   (and:QI (match_dup 2) (match_dup 3)))])])
+		            (const_int 0)]))])])
 
 (define_peephole2
   [(set (match_operand 0 "flags_reg_operand")
@@ -17264,9 +17211,9 @@ 
 			 (GET_CODE (operands[3]) == PLUS
 			  || GET_CODE (operands[3]) == MINUS)
 			 ? CCGOCmode : CCNOmode)"
-  [(parallel [(set (match_dup 4) (match_dup 5))
-	      (set (match_dup 1) (match_op_dup 3 [(match_dup 1)
-						  (match_dup 2)]))])]
+  [(parallel [(set (match_dup 1) (match_op_dup 3 [(match_dup 1)
+						  (match_dup 2)]))
+	      (set (match_dup 4) (match_dup 5))])]
 {
   operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
   operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
@@ -17291,9 +17238,9 @@ 
    && ix86_match_ccmode (peep2_next_insn (2),
 			 GET_CODE (operands[2]) == PLUS
 			 ? CCGOCmode : CCNOmode)"
-  [(parallel [(set (match_dup 3) (match_dup 4))
-	      (set (match_dup 1) (match_op_dup 2 [(match_dup 1)
-						  (match_dup 0)]))])]
+  [(parallel [(set (match_dup 1) (match_op_dup 2 [(match_dup 1)
+						  (match_dup 0)]))
+	      (set (match_dup 3) (match_dup 4))])]
 {
   operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
   operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), <MODE>mode,
@@ -17325,8 +17272,8 @@ 
 			 (GET_CODE (operands[3]) == PLUS
 			  || GET_CODE (operands[3]) == MINUS)
 			 ? CCGOCmode : CCNOmode)"
-  [(parallel [(set (match_dup 4) (match_dup 5))
-	      (set (match_dup 1) (match_dup 6))])]
+  [(parallel [(set (match_dup 1) (match_dup 6))
+	      (set (match_dup 4) (match_dup 5))])]
 {
   operands[2] = gen_lowpart (<MODE>mode, operands[2]);
   operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));