Patchwork [i386] : Add zero-extended variants of PLUS and MULT simple LEA peephole2s.

login
register
mail settings
Submitter Uros Bizjak
Date Aug. 10, 2012, 5:24 p.m.
Message ID <CAFULd4aFP0B5w7dnebC9h0_7J0+SNswaeGzCgk=ZXk3KHFUr0Q@mail.gmail.com>
Download mbox | patch
Permalink /patch/176553/
State New
Headers show

Comments

Uros Bizjak - Aug. 10, 2012, 5:24 p.m.
Hello!

Attached patch adds zero-extended variants of PLUS and MULT simple LEA
peephole2s.  Patch also disables PLUS peephole2s on TARGET_OPT_AGU
targets (atom), since we already split correct LEAs according to LEA
vs ADD priority.

2012-08-10  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (simple LEA peephole2s): Add zero-extend
	variants of PLUS and MULT simple LEA patterns.  Disable PLUS
	patterns for TARGET_OPT_AGU.

Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 190298)
+++ config/i386/i386.md	(working copy)
@@ -17317,12 +17317,15 @@ 
 
 ;; Attempt to convert simple lea to add/shift.
 ;; These can be created by move expanders.
+;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
+;; relevant lea instructions were already split.
 
 (define_peephole2
   [(set (match_operand:SWI48 0 "register_operand")
   	(plus:SWI48 (match_dup 0)
 		    (match_operand:SWI48 1 "<nonmemory_operand>")))]
-  "peep2_regno_dead_p (0, FLAGS_REG)"
+  "!TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
 	      (clobber (reg:CC FLAGS_REG))])])
 
@@ -17330,7 +17333,8 @@ 
   [(set (match_operand:SWI48 0 "register_operand")
   	(plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
 		    (match_dup 0)))]
-  "peep2_regno_dead_p (0, FLAGS_REG)"
+  "!TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
 	      (clobber (reg:CC FLAGS_REG))])])
 
@@ -17338,9 +17342,9 @@ 
   [(set (match_operand:SI 0 "register_operand")
   	(subreg:SI (plus:DI (match_operand:DI 1 "register_operand")
 			    (match_operand:DI 2 "nonmemory_operand")) 0))]
-  "TARGET_64BIT
-   && peep2_regno_dead_p (0, FLAGS_REG)
-   && REGNO (operands[0]) == REGNO (operands[1])"
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
@@ -17349,27 +17353,81 @@ 
   [(set (match_operand:SI 0 "register_operand")
   	(subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand")
 			    (match_operand:DI 2 "register_operand")) 0))]
-  "TARGET_64BIT
-   && peep2_regno_dead_p (0, FLAGS_REG)
-   && REGNO (operands[0]) == REGNO (operands[2])"
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (SImode, operands[1]);")
 
 (define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand"))))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonmemory_operand")
+		   (match_operand:SI 2 "register_operand"))))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (subreg:SI (plus:DI (match_dup 0)
+			      (match_operand:DI 1 "nonmemory_operand")) 0)))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand")
+		     	      (match_dup 0)) 0)))]
+  "TARGET_64BIT && !TARGET_OPT_AGU
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_peephole2
   [(set (match_operand:SWI48 0 "register_operand")
   	(mult:SWI48 (match_dup 0)
 		    (match_operand:SWI48 1 "const_int_operand")))]
   "exact_log2 (INTVAL (operands[1])) >= 0
    && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2)))
+  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
   	(subreg:SI (mult:DI (match_operand:DI 1 "register_operand")
-		   (match_operand:DI 2 "const_int_operand")) 0))]
+			    (match_operand:DI 2 "const_int_operand")) 0))]
   "TARGET_64BIT
    && exact_log2 (INTVAL (operands[2])) >= 0
    && REGNO (operands[0]) == REGNO (operands[1])
@@ -17378,6 +17436,36 @@ 
 	      (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
 
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+	  (mult:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "const_int_operand"))))]
+  "TARGET_64BIT
+   && exact_log2 (INTVAL (operands[2])) >= 0
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+  	(zero_extend:DI
+  	  (subreg:SI (mult:DI (match_dup 0)
+			      (match_operand:DI 1 "const_int_operand")) 0)))]
+  "TARGET_64BIT
+   && exact_log2 (INTVAL (operands[2])) >= 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:DI (ashift:SI (match_dup 2) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));
+  operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
 ;; The ESP adjustments can be done by the push and pop instructions.  Resulting
 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
 ;; On many CPUs it is also faster, since special hardware to avoid esp