diff mbox

[AVR] : QI builtins for parity, popcount, 1<< n

Message ID 4DFB9EF8.2010109@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay June 17, 2011, 6:37 p.m. UTC
Georg-Johann Lay schrieb:

> To come back to the original topic, here is a tentative patch for
> better popcount and parity:
> 
> 	* config/avr/t-avr (LIB1ASMFUNCS): Rename _loop_ffsqi2 to
> 	_ffsqi2_nz.
> 	* confif/avr/libgcc.S: Ditto. Rename __loop_ffsqi2 to __ffsqi2_nz.
> 	(__ctzsi2, __ctzhi2): Map zero to 255.
> 	(__popcounthi2): Use r27 instead of r30.
> 	(__popcountdi2): Use r30 instead of r27.
> 	* config/avr/avr.md (parityhi2): New expander.
> 	(popcounthi2): New expander.
> 	(popcountsi2): New expander.
> 	(*parityhi2.libgcc): New insn.
> 	(*parityqihi2.libgcc): New insn.
> 	(*popcounthi2.libgcc): New insn.
> 	(*popcountsi2.libgcc): New insn.
> 	(*popcountqi2.libgcc): New insn.
> 	(*popcountqihi2.libgcc): New insn_and_split.
> 
> Johann

Oops, picked the wrong file.
diff mbox

Patch

Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 175149)
+++ config/avr/libgcc.S	(working copy)
@@ -935,7 +935,7 @@  DEFUN __ffssi2
     brne 1f
     ret
 1:  mov  r24, r22
-    XJMP __loop_ffsqi2
+    XJMP __ffsqi2_nz
 ENDF __ffssi2
 #endif /* defined (L_ffssi2) */
 
@@ -946,7 +946,7 @@  ENDF __ffssi2
 DEFUN __ffshi2
     clr  r26
     cpse r24, __zero_reg__
-1:  XJMP __loop_ffsqi2
+1:  XJMP __ffsqi2_nz
     ldi  r26, 8
     or   r24, r25
     brne 1b
@@ -954,20 +954,20 @@  DEFUN __ffshi2
 ENDF __ffshi2
 #endif /* defined (L_ffshi2) */
 
-#if defined (L_loop_ffsqi2)
+#if defined (L_ffsqi2_nz)
 ;; Helper for ffshi2, ffssi2
 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
 ;; r24 must be != 0
 ;; clobbers: r26
-DEFUN __loop_ffsqi2
+DEFUN __ffsqi2_nz
     inc  r26
     lsr  r24
-    brcc __loop_ffsqi2
+    brcc __ffsqi2_nz
     mov  r24, r26
     clr  r25
     ret    
-ENDF __loop_ffsqi2
-#endif /* defined (L_loop_ffsqi2) */
+ENDF __ffsqi2_nz
+#endif /* defined (L_ffsqi2_nz) */
 
 
 /**********************************
@@ -977,12 +977,11 @@  ENDF __loop_ffsqi2
 #if defined (L_ctzsi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; ctz(0) = 255
+;; Note that ctz(0) is undefined for GCC.
 DEFUN __ctzsi2
     XCALL __ffssi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 32
     ret
 ENDF __ctzsi2
 #endif /* defined (L_ctzsi2) */
@@ -990,12 +989,11 @@  ENDF __ctzsi2
 #if defined (L_ctzhi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; ctz(0) = 255
+;; Note that ctz(0) is undefined for GCC.
 DEFUN __ctzhi2
     XCALL __ffshi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 16
     ret
 ENDF __ctzhi2
 #endif /* defined (L_ctzhi2) */
@@ -1129,13 +1127,13 @@  ENDF __parityqi2
 #if defined (L_popcounthi2)
 ;; population count
 ;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: r27, __tmp_reg__
 DEFUN __popcounthi2
     XCALL __popcountqi2
-    mov  r30, r24
+    mov  r27, r24
     mov  r24, r25
     XCALL __popcountqi2
-    add  r24, r30
+    add  r24, r27
     clr  r25
     ret
 ENDF __popcounthi2
@@ -1144,7 +1142,7 @@  ENDF __popcounthi2
 #if defined (L_popcountsi2)
 ;; population count
 ;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: r26, r27, __tmp_reg__
 DEFUN __popcountsi2
     XCALL __popcounthi2
     mov   r26, r24
@@ -1162,13 +1160,13 @@  ENDF __popcountsi2
 ;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
 DEFUN __popcountdi2
     XCALL __popcountsi2
-    mov   r27, r24
+    mov   r30, r24
     mov_l r22, r18
     mov_h r23, r19
     mov_l r24, r20
     mov_h r25, r21
     XCALL __popcountsi2
-    add   r24, r27
+    add   r24, r30
     ret
 ENDF __popcountdi2
 #endif /* defined (L_popcountdi2) */
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 175149)
+++ config/avr/avr.md	(working copy)
@@ -3321,6 +3321,92 @@  (define_insn "delay_cycles_4"
   [(set_attr "length" "9")
    (set_attr "cc" "clobber")])
 
+(define_expand "parityhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (parity:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_insn "*parityhi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:HI 24)))]
+  ""
+  "%~call __parityhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:QI 24)))]
+  ""
+  "%~call __parityqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_expand "popcounthi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel[(set (reg:HI 24)
+                  (popcount:HI (reg:HI 24)))
+             (clobber (reg:QI 27))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "popcountsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel[(set (reg:HI 24)
+                  (popcount:HI (reg:SI 22)))
+             (clobber (reg:HI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*popcounthi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))
+   (clobber (reg:QI 27))]
+  ""
+  "%~call __popcounthi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:SI 22)))
+   (clobber (reg:HI 26))]
+  ""
+  "%~call __popcountsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))]
+  ""
+  "%~call __popcountqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:QI 24)))]
+  ""
+  "#"
+  ""
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))
+   (set (reg:QI 25)
+        (const_int 0))]
+  "")
+
 ;; CPU instructions
 
 ;; NOP taking 1 or 2 Ticks 
Index: config/avr/t-avr
===================================================================
--- config/avr/t-avr	(revision 175149)
+++ config/avr/t-avr	(working copy)
@@ -53,7 +53,7 @@  LIB1ASMFUNCS = \
 	_dtors \
 	_ffssi2 \
 	_ffshi2 \
-	_loop_ffsqi2 \
+	_ffsqi2_nz \
 	_ctzsi2 \
 	_ctzhi2 \
 	_clzdi2 \