Patchwork Add peepholes to optimize reg0=mem1;reg0 op=nonmem2;mem1=reg0;if(reg0!=0) (PR rtl-optimization/49194)

login
register
mail settings
Submitter Jakub Jelinek
Date May 27, 2011, 4:40 p.m.
Message ID <20110527164052.GC17079@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/97707/
State New
Headers show

Comments

Jakub Jelinek - May 27, 2011, 4:40 p.m.
Hi!

This patch adds a few peephole2s to help optimize if (!--*x) etc.
This is something the combiner doesn't and can't easily handle, because in
reg0 = mem1
reg0 {+,-,&,|,^}= x
mem1 = reg0
cc = compare (reg0, 0)
reg0 is used also by both the store and compare and there is no dependence
in between them, so for reg0 = mem1; reg0 {+,-,&,|,^}= x; mem1 = reg0
alone it can't do anything because reg0 is still needed and mem1 {+,-,&,|,^}= x
doesn't set it, and the compare, being second user of reg0, doesn't have any
LOG_LINKS and thus try_combine isn't called for it at all.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-05-27  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/49095
	* config/i386/predicates.md (plusminuslogic_operator): New predicate.
	* config/i386/i386.md: Add peepholes for mem {+,-,&,|,^}= x; mem != 0.

	* gcc.target/i386/pr49095.c: New test.


	Jakub
Uros Bizjak - May 29, 2011, 6:41 p.m.
On Fri, May 27, 2011 at 6:40 PM, Jakub Jelinek <jakub@redhat.com> wrote:

> This patch adds a few peephole2s to help optimize if (!--*x) etc.
> This is something the combiner doesn't and can't easily handle, because in
> reg0 = mem1
> reg0 {+,-,&,|,^}= x
> mem1 = reg0
> cc = compare (reg0, 0)
> reg0 is used also by both the store and compare and there is no dependence
> in between them, so for reg0 = mem1; reg0 {+,-,&,|,^}= x; mem1 = reg0
> alone it can't do anything because reg0 is still needed and mem1 {+,-,&,|,^}= x
> doesn't set it, and the compare, being second user of reg0, doesn't have any
> LOG_LINKS and thus try_combine isn't called for it at all.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-05-27  Jakub Jelinek  <jakub@redhat.com>
>
>        PR rtl-optimization/49095
>        * config/i386/predicates.md (plusminuslogic_operator): New predicate.
>        * config/i386/i386.md: Add peepholes for mem {+,-,&,|,^}= x; mem != 0.
>
>        * gcc.target/i386/pr49095.c: New test.

This is OK.

On a related note, we probably want to set TARGET_READ_MODIFY_WRITE in
ix86_option_override_internal when optimize_size is in effect.

Thanks,
Uros.

Patch

--- gcc/config/i386/predicates.md.jj	2011-05-11 19:39:00.000000000 +0200
+++ gcc/config/i386/predicates.md	2011-05-27 10:55:11.000000000 +0200
@@ -1066,6 +1066,10 @@  (define_predicate "mult_operator"
 (define_predicate "div_operator"
   (match_code "div"))
 
+;; Return true if this is a plus, minus, and, ior or xor operation.
+(define_predicate "plusminuslogic_operator"
+  (match_code "plus,minus,and,ior,xor"))
+
 ;; Return true if this is a float extend operation.
 (define_predicate "float_operator"
   (match_code "float"))
--- gcc/config/i386/i386.md.jj	2011-05-25 16:30:04.000000000 +0200
+++ gcc/config/i386/i386.md	2011-05-27 13:42:11.000000000 +0200
@@ -16852,6 +16852,91 @@  (define_peephole2
               (clobber (reg:CC FLAGS_REG))])
    (set (match_dup 0) (match_dup 2))])
 
+;; Attempt to use arith or logical operations with memory outputs with
+;; setting of flags.
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand" "")
+	(match_operand:SWI 1 "memory_operand" ""))
+   (parallel [(set (match_dup 0)
+		   (match_operator:SWI 3 "plusminuslogic_operator"
+		     [(match_dup 0)
+		      (match_operand:SWI 2 "<nonmemory_operand>" "")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (3),
+			 (GET_CODE (operands[3]) == PLUS
+			  || GET_CODE (operands[3]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 5))
+	      (set (match_dup 1) (match_op_dup 3 [(match_dup 1)
+						  (match_dup 2)]))])]
+  "operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
+   operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				 copy_rtx (operands[1]),
+				 copy_rtx (operands[2]));
+   operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				  operands[5], const0_rtx);")
+
+(define_peephole2
+  [(parallel [(set (match_operand:SWI 0 "register_operand" "")
+		   (match_operator:SWI 2 "plusminuslogic_operator"
+		     [(match_dup 0)
+		      (match_operand:SWI 1 "memory_operand" "")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && GET_CODE (operands[2]) != MINUS
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (2),
+			 GET_CODE (operands[2]) == PLUS
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 3) (match_dup 4))
+	      (set (match_dup 1) (match_op_dup 2 [(match_dup 1)
+						  (match_dup 0)]))])]
+  "operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
+   operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), <MODE>mode,
+				 copy_rtx (operands[1]),
+				 copy_rtx (operands[0]));
+   operands[4] = gen_rtx_COMPARE (GET_MODE (operands[3]),
+				  operands[4], const0_rtx);")
+
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand" "")
+	(match_operand:SWI12 1 "memory_operand" ""))
+   (parallel [(set (match_operand:SI 4 "register_operand" "")
+		   (match_operator:SI 3 "plusminuslogic_operator"
+		     [(match_dup 4)
+		      (match_operand:SI 2 "nonmemory_operand" "")]))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 0))
+   (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
+  "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REG_P (operands[0]) && REG_P (operands[4])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && ix86_match_ccmode (peep2_next_insn (3),
+			 (GET_CODE (operands[3]) == PLUS
+			  || GET_CODE (operands[3]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 5))
+	      (set (match_dup 1) (match_dup 6))])]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);
+   operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
+   operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				 copy_rtx (operands[1]), operands[2]);
+   operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				  operands[5], const0_rtx);
+   operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
+				 copy_rtx (operands[1]),
+				 copy_rtx (operands[2]));")
+
 ;; Attempt to always use XOR for zeroing registers.
 (define_peephole2
   [(set (match_operand 0 "register_operand" "")
--- gcc/testsuite/gcc.target/i386/pr49095.c.jj	2011-05-27 12:10:39.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/pr49095.c	2011-05-27 13:50:24.000000000 +0200
@@ -0,0 +1,73 @@ 
+/* PR rtl-optimization/49095 */
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+/* { dg-options "-Os -mregparm=2" { target ilp32 } } */
+
+void foo (void *);
+
+int *
+f1 (int *x)
+{
+  if (!--*x)
+    foo (x);
+  return x;
+}
+
+int
+g1 (int x)
+{
+  if (!--x)
+    foo ((void *) 0);
+  return x;
+}
+
+#define F(T, OP, OPN) \
+T *			\
+f##T##OPN (T *x, T y)	\
+{			\
+  *x OP y;		\
+  if (!*x)		\
+    foo (x);		\
+  return x;		\
+}			\
+			\
+T			\
+g##T##OPN (T x, T y)	\
+{			\
+  x OP y;		\
+  if (!x)		\
+    foo ((void *) 0);	\
+  return x;		\
+}			\
+			\
+T *			\
+h##T##OPN (T *x)	\
+{			\
+  *x OP 24;		\
+  if (!*x)		\
+    foo (x);		\
+  return x;		\
+}			\
+			\
+T			\
+i##T##OPN (T x, T y)	\
+{			\
+  x OP 24;		\
+  if (!x)		\
+    foo ((void *) 0);	\
+  return x;		\
+}
+
+#define G(T) \
+F (T, +=, plus)		\
+F (T, -=, minus)	\
+F (T, &=, and)		\
+F (T, |=, or)		\
+F (T, ^=, xor)
+
+G (char)
+G (short)
+G (int)
+G (long)
+
+/* { dg-final { scan-assembler-not "test\[lq\]" } } */