[avr] : Speed up 64-bit shifts in libgcc

Message ID	513488C3.1000608@gjlay.de
State	New
Headers	show Return-Path: <gcc-patches-return-337714-incoming=patchwork.ozlabs.org@gcc.gnu.org> Comment: DKIM? See http://www.dkim.org Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:X-RZG-AUTH:X-RZG-CLASS-ID:Received:Message-ID:Date:From:User-Agent:MIME-Version:To:CC:Subject:Content-Type:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=b4CoTQgajhkz3HnAzc7xwa3MkKeLH0KPrTi8OKTkwmEngYOl0ImQCA8o5sRwI5 wbk14JeSrTlYqgBT1iK7wcwpgbcgL5ORhUWsc8aosYLDXRhyG6aThCbGggZqeHzB 2qegEZQt07o5Kc42rWVnHylYH2PRk6pL3xJ9G+utzZdSE=; Message-ID: <513488C3.1000608@gjlay.de> Date: Mon, 04 Mar 2013 12:42:59 +0100 From: Georg-Johann Lay <avr@gjlay.de> User-Agent: Thunderbird 2.0.0.24 (X11/20100302) MIME-Version: 1.0 To: gcc-patches@gcc.gnu.org CC: Denis Chertykov <chertykov@gmail.com>, Eric Weddington <eric.weddington@atmel.com> Subject: [Patch,avr]: Speed up 64-bit shifts in libgcc Content-Type: multipart/mixed; boundary="------------050508040904040501030206" Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Message ID

513488C3.1000608@gjlay.de

State

New

Headers

Comment: DKIM? See http://www.dkim.org
Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys
DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org;
	h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:X-RZG-AUTH:X-RZG-CLASS-ID:Received:Message-ID:Date:From:User-Agent:MIME-Version:To:CC:Subject:Content-Type:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To;
	b=b4CoTQgajhkz3HnAzc7xwa3MkKeLH0KPrTi8OKTkwmEngYOl0ImQCA8o5sRwI5
	wbk14JeSrTlYqgBT1iK7wcwpgbcgL5ORhUWsc8aosYLDXRhyG6aThCbGggZqeHzB
	2qegEZQt07o5Kc42rWVnHylYH2PRk6pL3xJ9G+utzZdSE=;
Message-ID: <513488C3.1000608@gjlay.de>
Date: Mon, 04 Mar 2013 12:42:59 +0100
From: Georg-Johann Lay <avr@gjlay.de>
User-Agent: Thunderbird 2.0.0.24 (X11/20100302)
MIME-Version: 1.0
To: gcc-patches@gcc.gnu.org
CC: Denis Chertykov <chertykov@gmail.com>,
	Eric Weddington <eric.weddington@atmel.com>
Subject: [Patch,avr]: Speed up 64-bit shifts in libgcc
Content-Type: multipart/mixed;
	boundary="------------050508040904040501030206"
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
Sender: gcc-patches-owner@gcc.gnu.org

Commit Message

Georg-Johann Lay March 4, 2013, 11:42 a.m. UTC

This patch fixed the speed of 64-bit shifts and rotate.

These operations were implemented by bit-wise shifts and thus the speed is not
reasonable for such basic arithmetic.

The new implementation first shifts byte-wise and only the remaining mod 8 is
shifted bit-wise.

The new methods needs few more instructions, but 64-bit arithmetic needs much
code, anyway...  But base arithmetic should operate reasonably fast and not
take 600 or mote ticks for a simple shift.

Ok for trunk?

Johann


	* config/avr/lib1funcs.S (__ashrdi3, __lshrdi3, __ashldi3)
	(__rotldi3): Shift bytewise if applicable.

Comments

Denis Chertykov March 4, 2013, 11:45 a.m. UTC | #1

2013/3/4 Georg-Johann Lay <avr@gjlay.de>:
> This patch fixed the speed of 64-bit shifts and rotate.
>
> These operations were implemented by bit-wise shifts and thus the speed is not
> reasonable for such basic arithmetic.
>
> The new implementation first shifts byte-wise and only the remaining mod 8 is
> shifted bit-wise.
>
> The new methods needs few more instructions, but 64-bit arithmetic needs much
> code, anyway...  But base arithmetic should operate reasonably fast and not
> take 600 or mote ticks for a simple shift.
>
> Ok for trunk?
>
> Johann
>
>
>         * config/avr/lib1funcs.S (__ashrdi3, __lshrdi3, __ashldi3)
>         (__rotldi3): Shift bytewise if applicable.
>

Approved.

Denis.

Index: config/avr/lib1funcs.S
===================================================================
--- config/avr/lib1funcs.S	(revision 196329)
+++ config/avr/lib1funcs.S	(working copy)
@@ -3030,64 +3030,73 @@  ENDF __bswapdi2
 ;; Arithmetic shift right
 ;; r25:r18 = ashr64 (r25:r18, r17:r16)
 DEFUN __ashrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  asr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
-    ret
-ENDF __ashrdi3
-#endif /* defined (L_ashrdi3) */
+    bst     r25, 7
+    bld     __zero_reg__, 0
+    ;; FALLTHRU
+ENDF  __ashrdi3
 
-#if defined (L_lshrdi3)
 ;; Logic shift right
 ;; r25:r18 = lshr64 (r25:r18, r17:r16)
 DEFUN __lshrdi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsr  r25
-    ror  r24
-    ror  r23
-    ror  r22
-    ror  r21
-    ror  r20
-    ror  r19
-    ror  r18
-    dec  r16
-    brne 1b
-2:  pop  r16
+    lsr     __zero_reg__
+    sbc     __tmp_reg__, __tmp_reg__
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    subi    r16, 8
+    mov     r18, r19
+    mov     r19, r20
+    mov     r20, r21
+    mov     r21, r22
+    mov     r22, r23
+    mov     r23, r24
+    mov     r24, r25
+    mov     r25, __tmp_reg__
+    rjmp 0b
+1:  asr     __tmp_reg__
+    ror     r25
+    ror     r24
+    ror     r23
+    ror     r22
+    ror     r21
+    ror     r20
+    ror     r19
+    ror     r18
+2:  dec     r16
+    brpl 1b
+    pop     r16
     ret
 ENDF __lshrdi3
-#endif /* defined (L_lshrdi3) */
+#endif /* defined (L_ashrdi3) */
 
 #if defined (L_ashldi3)
 ;; Shift left
 ;; r25:r18 = ashl64 (r25:r18, r17:r16)
 DEFUN __ashldi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsl  r18
-    rol  r19
-    rol  r20
-    rol  r21
-    rol  r22
-    rol  r23
-    rol  r24
-    rol  r25
-    dec  r16
-    brne 1b
-2:  pop  r16
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    mov     r25, r24
+    mov     r24, r23
+    mov     r23, r22
+    mov     r22, r21
+    mov     r21, r20
+    mov     r20, r19
+    mov     r19, r18
+    clr     r18
+    subi    r16, 8
+    rjmp 0b
+1:  lsl     r18
+    rol     r19
+    rol     r20
+    rol     r21
+    rol     r22
+    rol     r23
+    rol     r24
+    rol     r25
+2:  dec     r16
+    brpl 1b
+    pop     r16
     ret
 ENDF __ashldi3
 #endif /* defined (L_ashldi3) */
@@ -3096,21 +3105,32 @@  ENDF __ashldi3
 ;; Shift left
 ;; r25:r18 = rotl64 (r25:r18, r17:r16)
 DEFUN __rotldi3
-    push r16
-    andi r16, 63
-    breq 2f
-1:  lsl  r18
-    rol  r19
-    rol  r20
-    rol  r21
-    rol  r22
-    rol  r23
-    rol  r24
-    rol  r25
-    adc  r18, __zero_reg__
-    dec  r16
-    brne 1b
-2:  pop  r16
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    subi    r16, 8
+    mov     __tmp_reg__, r25
+    mov     r25, r24
+    mov     r24, r23
+    mov     r23, r22
+    mov     r22, r21
+    mov     r21, r20
+    mov     r20, r19
+    mov     r19, r18
+    mov     r18, __tmp_reg__
+    rjmp 0b
+1:  lsl     r18
+    rol     r19
+    rol     r20
+    rol     r21
+    rol     r22
+    rol     r23
+    rol     r24
+    rol     r25
+    adc     r18, __zero_reg__
+2:  dec     r16
+    brpl 1b
+    pop     r16
     ret
 ENDF __rotldi3
 #endif /* defined (L_rotldi3) */