diff mbox

[AVR] : Better support CPSE (a bit)

Message ID 4F22D3B9.9030501@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay Jan. 27, 2012, 4:41 p.m. UTC
This patch aims at better support of CPSE instruction in cases where otherwise
code like

     TST   Rn     ; *cmpqi
     BREQ  .+2    ; branch
     RJMP  .Lm

would be produced.  As branch is not a single instruction, it cannot be skipped.

peephole seems to be the only feature that can do this optimization. I tried a
combine pattern and it works will and is straight forward, but then it's hard
to detect the situations where TST is superfluous because some arithmetic
instruct already set cc0.

I also tried movqicc to emit code and to prefer jumps that please CPSE, but the
middle end has it's own understanding of what the code should look like and
it's merely impossible to work against that in the backend. Moreover there was
changes (like for PR45416 to feature specific target) that turned out to give
worse code for AVR. But as so often the "what the hell is AVR?" target is left
behind.

So here is the patch even though I really don't like peephole[2]. It passes
without regressions and has a bit of code cleanup, too.

Ok to apply?

Johann

	* config/avr/avr-protos.h (lpm_reg_rtx, lpm_addr_reg_rtx,
	tmp_reg_rtx, zero_reg_rtx, all_regs_rtx, rampz_rtx): Make global.
	* config/avr/avr.c: Ditto.
	(avr_regnames): Remove because unused.
	* config/avr/avr.md (*cpse.ne): New peephole.
	(*cpse.eq): New peephole from former cpse peepholes.

Comments

Denis Chertykov Jan. 27, 2012, 5:47 p.m. UTC | #1
2012/1/27 Georg-Johann Lay <avr@gjlay.de>:
> This patch aims at better support of CPSE instruction in cases where otherwise
> code like
>
>     TST   Rn     ; *cmpqi
>     BREQ  .+2    ; branch
>     RJMP  .Lm
>
> would be produced.  As branch is not a single instruction, it cannot be skipped.
>
> peephole seems to be the only feature that can do this optimization. I tried a
> combine pattern and it works will and is straight forward, but then it's hard
> to detect the situations where TST is superfluous because some arithmetic
> instruct already set cc0.
>
> I also tried movqicc to emit code and to prefer jumps that please CPSE, but the
> middle end has it's own understanding of what the code should look like and
> it's merely impossible to work against that in the backend. Moreover there was
> changes (like for PR45416 to feature specific target) that turned out to give
> worse code for AVR. But as so often the "what the hell is AVR?" target is left
> behind.
>
> So here is the patch even though I really don't like peephole[2]. It passes
> without regressions and has a bit of code cleanup, too.
>
> Ok to apply?
>
> Johann
>
>        * config/avr/avr-protos.h (lpm_reg_rtx, lpm_addr_reg_rtx,
>        tmp_reg_rtx, zero_reg_rtx, all_regs_rtx, rampz_rtx): Make global.
>        * config/avr/avr.c: Ditto.
>        (avr_regnames): Remove because unused.
>        * config/avr/avr.md (*cpse.ne): New peephole.
>        (*cpse.eq): New peephole from former cpse peepholes.

Approved.

Denis.
diff mbox

Patch

Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 183529)
+++ config/avr/avr.md	(working copy)
@@ -5010,27 +5010,61 @@  (define_peephole
           AS1 (jmp,%1));
 }")
 
-(define_peephole
+
+(define_peephole ; "*cpse.eq"
   [(set (cc0)
-	(compare (match_operand:QI 0 "register_operand" "")
-		 (const_int 0)))
+        (compare (match_operand:QI 1 "register_operand" "r,r")
+                 (match_operand:QI 2 "reg_or_0_operand" "r,L")))
    (set (pc)
-	(if_then_else (eq (cc0) (const_int 0))
-		      (label_ref (match_operand 1 "" ""))
-		      (pc)))]
-  "jump_over_one_insn_p (insn, operands[1])"
-  "cpse %0,__zero_reg__")
+        (if_then_else (eq (cc0)
+                          (const_int 0))
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  "jump_over_one_insn_p (insn, operands[0])"
+  "@
+	cpse %1,%2
+	cpse %1,__zero_reg__")
 
-(define_peephole
+;; This peephole avoids code like
+;;
+;;     TST   Rn     ; *cmpqi
+;;     BREQ  .+2    ; branch
+;;     RJMP  .Lm
+;;
+;; Notice that the peephole is always shorter than cmpqi + branch.
+;; The reason to write it as peephole is that sequences like
+;;     
+;;     AND   Rm, Rn
+;;     BRNE  .La
+;;
+;; shall not be superseeded.  With a respective combine pattern
+;; the latter sequence would be 
+;;     
+;;     AND   Rm, Rn
+;;     CPSE  Rm, __zero_reg__
+;;     RJMP  .La
+;;
+;; and thus longer and slower and not easy to be rolled back.
+
+(define_peephole ; "*cpse.ne"
   [(set (cc0)
-        (compare (match_operand:QI 0 "register_operand" "")
-		 (match_operand:QI 1 "register_operand" "")))
+        (compare (match_operand:QI 1 "register_operand" "")
+                 (match_operand:QI 2 "reg_or_0_operand" "")))
    (set (pc)
-	(if_then_else (eq (cc0) (const_int 0))
-		      (label_ref (match_operand 2 "" ""))
+        (if_then_else (ne (cc0)
+                          (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
-  "jump_over_one_insn_p (insn, operands[2])"
-  "cpse %0,%1")
+  "!AVR_HAVE_JMP_CALL
+   || !avr_current_device->errata_skip"
+  {
+    if (operands[2] == const0_rtx)
+      operands[2] = zero_reg_rtx;
+
+    return 3 == avr_jump_mode (operands[0], insn)
+      ? "cpse %1,%2\;jmp %0"
+      : "cpse %1,%2\;rjmp %0";
+  })
 
 ;;pppppppppppppppppppppppppppppppppppppppppppppppppppp
 ;;prologue/epilogue support instructions
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h	(revision 183529)
+++ config/avr/avr-protos.h	(working copy)
@@ -124,6 +124,14 @@  extern bool avr_mem_flash_p (rtx);
 extern bool avr_mem_memx_p (rtx);
 extern bool avr_load_libgcc_p (rtx);
 extern bool avr_xload_libgcc_p (enum machine_mode);
+
+extern rtx lpm_reg_rtx;
+extern rtx lpm_addr_reg_rtx;
+extern rtx tmp_reg_rtx;
+extern rtx zero_reg_rtx;
+extern rtx all_regs_rtx[32];
+extern rtx rampz_rtx;
+
 #endif /* RTX_CODE */
 
 #ifdef REAL_VALUE_TYPE
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 183529)
+++ config/avr/avr.c	(working copy)
@@ -140,30 +140,33 @@  static bool avr_rtx_costs (rtx, int, int
 #define FIRST_CUM_REG 26
 
 /* Implicit target register of LPM instruction (R0) */
-static GTY(()) rtx lpm_reg_rtx;
+extern GTY(()) rtx lpm_reg_rtx;
+rtx lpm_reg_rtx;
 
 /* (Implicit) address register of LPM instruction (R31:R30 = Z) */
-static GTY(()) rtx lpm_addr_reg_rtx;
+extern GTY(()) rtx lpm_addr_reg_rtx;
+rtx lpm_addr_reg_rtx;
 
-/* Temporary register RTX (gen_rtx_REG (QImode, TMP_REGNO)) */
-static GTY(()) rtx tmp_reg_rtx;
+/* Temporary register RTX (reg:QI TMP_REGNO) */
+extern GTY(()) rtx tmp_reg_rtx;
+rtx tmp_reg_rtx;
+
+/* Zeroed register RTX (reg:QI ZERO_REGNO) */
+extern GTY(()) rtx zero_reg_rtx;
+rtx zero_reg_rtx;
 
-/* Zeroed register RTX (gen_rtx_REG (QImode, ZERO_REGNO)) */
-static GTY(()) rtx zero_reg_rtx;
+/* RTXs for all general purpose registers as QImode */
+extern GTY(()) rtx all_regs_rtx[32];
+rtx all_regs_rtx[32];
 
 /* RAMPZ special function register */
-static GTY(()) rtx rampz_rtx;
+extern GTY(()) rtx rampz_rtx;
+rtx rampz_rtx;
 
 /* RTX containing the strings "" and "e", respectively */
 static GTY(()) rtx xstring_empty;
 static GTY(()) rtx xstring_e;
 
-/* RTXs for all general purpose registers as QImode */
-static GTY(()) rtx all_regs_rtx[32];
-
-/* AVR register names {"r0", "r1", ..., "r31"} */
-static const char *const avr_regnames[] = REGISTER_NAMES;
-
 /* Preprocessor macros to define depending on MCU type.  */
 const char *avr_extra_arch_macro;