From patchwork Thu Dec 8 15:44:10 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Georg-Johann Lay X-Patchwork-Id: 130190 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 6D1531007D1 for ; Fri, 9 Dec 2011 02:44:44 +1100 (EST) Received: (qmail 9777 invoked by alias); 8 Dec 2011 15:44:41 -0000 Received: (qmail 9765 invoked by uid 22791); 8 Dec 2011 15:44:39 -0000 X-SWARE-Spam-Status: No, hits=-1.5 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, RCVD_IN_DNSWL_NONE, TW_IV X-Spam-Check-By: sourceware.org Received: from mo-p00-ob.rzone.de (HELO mo-p00-ob.rzone.de) (81.169.146.160) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 08 Dec 2011 15:44:21 +0000 X-RZG-AUTH: :LXoWVUeid/7A29J/hMvvT2k715jHQaJercGObUOFkj18odoYNahU4Q== X-RZG-CLASS-ID: mo00 Received: from [192.168.0.22] (business-188-111-022-002.static.arcor-ip.net [188.111.22.2]) by smtp.strato.de (jimi mo13) (RZmta 26.10 AUTH) with ESMTPA id y03710nB8EuDMN ; Thu, 8 Dec 2011 16:44:11 +0100 (MET) Message-ID: <4EE0DB4A.6050604@gjlay.de> Date: Thu, 08 Dec 2011 16:44:10 +0100 From: Georg-Johann Lay User-Agent: Thunderbird 2.0.0.24 (X11/20100302) MIME-Version: 1.0 To: gcc-patches@gcc.gnu.org CC: Denis Chertykov , Eric Weddington Subject: [Path,AVR]: Implement __muldi3 in asm X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This are assembler implementations for Dimode multiplication. Tested without regressions, the only change in the test suite I get is for gcc.c-torture/execute/arith-rand-ll.c execution, -O0 UNTESTED -> PASS because the former vanilla C implementation ran into timeout. Ok for trunk? Johann * config/avr/t-avr (LIB1ASMFUNCS): Add _muldi3. * config/avr/lib1funcs.S (__muldi3): New function. Index: libgcc/config/avr/lib1funcs.S =================================================================== --- libgcc/config/avr/lib1funcs.S (revision 182106) +++ libgcc/config/avr/lib1funcs.S (working copy) @@ -464,6 +464,249 @@ ENDF __mulsi3 #undef C3 #endif /* __AVR_HAVE_MUL__ */ + +/******************************************************* + Multiplication 64 x 64 +*******************************************************/ + +#if defined (L_muldi3) + +;; A[] = A[] * B[] + +;; A[0..7]: In: Multiplicand +;; Out: Product +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 A0+5 +#define A6 A0+6 +#define A7 A0+7 + +;; B[0..7]: In: Multiplier +#define B0 10 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 +#define B7 B0+7 + +#if defined (__AVR_HAVE_MUL__) + +;; Define C[] for convenience +;; Notice that parts of C[] overlap A[] respective B[] +#define C0 16 +#define C1 C0+1 +#define C2 20 +#define C3 C2+1 +#define C4 28 +#define C5 C4+1 +#define C6 C4+2 +#define C7 C4+3 + +;; A[] *= B[] +;; R25:R18 *= R17:R10 +;; Ordinary ABI-Function + +DEFUN __muldi3 + push r29 + push r28 + push r17 + push r16 + + ;; Counting in Words, we have to perform a 4 * 4 Multiplication + + ;; 3 * 0 + 0 * 3 + mul A7,B0 $ $ mov C7,r0 + mul A0,B7 $ $ add C7,r0 + mul A6,B1 $ $ add C7,r0 + mul A6,B0 $ mov C6,r0 $ add C7,r1 + mul B6,A1 $ $ add C7,r0 + mul B6,A0 $ add C6,r0 $ adc C7,r1 + + ;; 1 * 2 + mul A2,B4 $ add C6,r0 $ adc C7,r1 + mul A3,B4 $ $ add C7,r0 + mul A2,B5 $ $ add C7,r0 + + push A5 + push A4 + push B1 + push B0 + push A3 + push A2 + + ;; 0 * 0 + wmov 26, B0 + XCALL __umulhisi3 + wmov C0, 22 + wmov C2, 24 + + ;; 0 * 2 + wmov 26, B4 + XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 + + wmov 26, B2 + ;; 0 * 1 + rcall __muldi3_6 + + pop A0 + pop A1 + ;; 1 * 1 + wmov 26, B2 + XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 + + pop r26 + pop r27 + ;; 1 * 0 + rcall __muldi3_6 + + pop A0 + pop A1 + ;; 2 * 0 + XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 + + ;; 2 * 1 + wmov 26, B2 + XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 + + ;; A[] = C[] + wmov A0, C0 + ;; A2 = C2 already + wmov A4, C4 + wmov A6, C6 + + clr __zero_reg__ + pop r16 + pop r17 + pop r28 + pop r29 + ret + +__muldi3_6: + XCALL __umulhisi3 + add C2, 22 + adc C3, 23 + adc C4, 24 + adc C5, 25 + brcc 0f + adiw C6, 1 +0: ret +ENDF __muldi3 + +#undef C7 +#undef C6 +#undef C5 +#undef C4 +#undef C3 +#undef C2 +#undef C1 +#undef C0 + +#else /* !HAVE_MUL */ + +#define C0 26 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 +#define C4 C0+4 +#define C5 C0+5 +#define C6 0 +#define C7 C6+1 + +#define Loop 9 + +;; A[] *= B[] +;; R25:R18 *= R17:R10 +;; Ordinary ABI-Function + +DEFUN __muldi3 + push r29 + push r28 + push Loop + + ldi C0, 64 + mov Loop, C0 + + ;; C[] = 0 + clr __tmp_reg__ + wmov C0, 0 + wmov C2, 0 + wmov C4, 0 + +0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] + ;; where N = 64 - Loop. + ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, + ;; B[] will have its initial Value again. + LSR B7 $ ror B6 $ ror B5 $ ror B4 + ror B3 $ ror B2 $ ror B1 $ ror B0 + + ;; If the N-th Bit of B[] was set then... + brcc 1f + ;; ...finish Rotation... + ori B7, 1 << 7 + + ;; ...and add A[] * 2^N to the Result C[] + ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 + adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 + +1: ;; Multiply A[] by 2 + LSL A0 $ rol A1 $ rol A2 $ rol A3 + rol A4 $ rol A5 $ rol A6 $ rol A7 + + dec Loop + brne 0b + + ;; We expanded the Result in C[] + ;; Copy Result to the Return Register A[] + wmov A0, C0 + wmov A2, C2 + wmov A4, C4 + wmov A6, C6 + + clr __zero_reg__ + pop Loop + pop r28 + pop r29 + ret +ENDF __muldi3 + +#undef Loop + +#undef C7 +#undef C6 +#undef C5 +#undef C4 +#undef C3 +#undef C2 +#undef C1 +#undef C0 + +#endif /* HAVE_MUL */ + +#undef B7 +#undef B6 +#undef B5 +#undef B4 +#undef B3 +#undef B2 +#undef B1 +#undef B0 + +#undef A7 +#undef A6 +#undef A5 +#undef A4 +#undef A3 +#undef A2 +#undef A1 +#undef A0 + +#endif /* L_muldi3 */ + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Index: libgcc/config/avr/t-avr =================================================================== --- libgcc/config/avr/t-avr (revision 182106) +++ libgcc/config/avr/t-avr (working copy) @@ -16,6 +16,7 @@ LIB1ASMFUNCS = \ _udivmodsi4 \ _divmodsi4 \ _divdi3 _udivdi3 \ + _muldi3 \ _udivmod64 \ _negdi2 \ _prologue \