Patchwork New bfin divsi/udivsi implementations

login
register
mail settings
Submitter Henderson, Stuart
Date May 3, 2011, 1:27 p.m.
Message ID <05E9E85E39C35B4D96ED3A3190E35A10A49CB860CC@LIMKCMBX1.ad.analog.com>
Download mbox | patch
Permalink /patch/93789/
State New
Headers show

Comments

Henderson, Stuart - May 3, 2011, 1:27 p.m.
Ping
---

The attached patch updates the blackfin ___divsi3 and ___udivsi3 implementations (and updates ___umodsi3 to match), as well as adding .size directives to all functions in the file.


2011-03-24  Stuart Henderson  <stuart.henderson@analog.com>

    * gcc/config/bfin/lib1funcs.asm (___divsi3): New implementation,
    add .size directive and unguard .text directive.
    (___udivsi3): New implementation and add .size directive.
    (___umodsi3): Update to match new ___divsi3/___udivsi3 implementations
    and add .size directive.
    (___modsi3): Add .size directive.
    (___umulsi3_highpart): Likewise.
    (___smulsi3_highpart): Likewise.



Thanks,
Stu

Patch

Index: gcc/config/bfin/lib1funcs.asm
===================================================================
--- gcc/config/bfin/lib1funcs.asm       (revision 171215)
+++ gcc/config/bfin/lib1funcs.asm       (working copy)
@@ -23,36 +23,101 @@ 
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */

+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+.text
+
 #ifdef L_divsi3
-.text
 .align 2
 .global ___divsi3;
 .type ___divsi3, STT_FUNC;

 ___divsi3:
-        [--SP]= RETS;
-       [--SP] = R7;

-       R2 = -R0;
-        CC = R0 < 0;
-       IF CC R0 = R2;
-       R7 = CC;
-
+.Ls_main_branch:
+       R3 = R0 ^ R1;
+       R2 = - R0;
+       R0 = MAX(R0,R2);
        R2 = -R1;
-        CC = R1 < 0;
-       IF CC R1 = R2;
-       R2 = CC;
-       R7 = R7 ^ R2;
+       R1 = MAX(R1,R2);
+       R2 = R0 >> 1;
+       CC = R2 < R1 (IU);
+       IF CC JUMP .Ls_Q_has_only_one_bit;

-        CALL ___udivsi3;
+       P1 = R3;
+       R3 = R1>>1;
+       R3.L = SIGNBITS R3;
+       R1 = LSHIFT R1 BY R3.L;
+       R2 = R1 << 15;
+       CC = R2 == 0;
+       IF !CC JUMP .Ls_use_sfw_D_has_16bit_or_more;

-       CC = R7;
+       R2.L = SIGNBITS R0;
+       R0 = LSHIFT R0 BY R2.L;
+       R2.L = R3.L - R2.L (NS);
+       P2 = R2;
+       CC = R0 == R1;
+       IF CC JUMP .Ls_N_is_MIN_D_is_1_bit_set;
+
+       R1 >>= 17;
+
+.Ls_use_divq_main_branch:
+       AQ = CC;
+
+       LOOP(s_lp_use_divq) LC0 = P2;
+       LOOP_BEGIN s_lp_use_divq;
+       DIVQ(R0, R1);
+       LOOP_END s_lp_use_divq;
+
+       R0 = EXTRACT(R0, R2.L) (Z);
        R1 = -R0;
+       CC = P1<0;
        IF CC R0 = R1;
+       RTS;

-       R7 = [SP++];
-        RETS = [SP++];
-        RTS;
+.Ls_N_is_MIN_D_is_1_bit_set:
+       R0 = 1;
+       R0 = LSHIFT R0 BY R2.L;
+       R1 = -R0;
+       CC = P1 < 0;
+       IF CC R0 = R1;
+       RTS;
+
+.Ls_use_sfw_D_has_16bit_or_more:
+       R2 = R0 >> 1;
+       R2.L = SIGNBITS R2;
+       R3.H = R3.L - R2.L (NS);
+       R3 = R3 >>16;
+       P2 = R3;
+       R0 = LSHIFT R0 BY R2.L;
+       R0 = R0 - R1;
+       CC = !BITTST(R0, 31);
+       R1 >>>= 1;
+
+       LOOP(s__use_sfw_loop) LC0 = P2;
+       LOOP_BEGIN s__use_sfw_loop;
+       R0 = R0 + R1, R2 = R0 - R1;
+       IF CC R0 = R2;
+       R0 = ROT R0 BY 1;
+       LOOP_END s__use_sfw_loop;
+
+       R0 = EXTRACT(R0, R3.L)(Z);
+       R0 = ROT R0 BY 1;
+       R1 = -R0;
+       CC = P1<0;
+       IF CC R0 = R1;
+       RTS;
+
+.Ls_Q_has_only_one_bit:
+       CC = R1 <= R0 (IU);
+       R0 = CC;
+       R1 = -R0;
+       CC = R3<0;
+       IF CC R0 = R1;
+       RTS;
+       .size ___divsi3, .-___divsi3;
 #endif

 #ifdef L_modsi3
@@ -71,6 +136,8 @@ 
        R0 = R1 - R2;
        RETS = [SP++];
        RTS;
+
+.size ___modsi3, .-___modsi3
 #endif

 #ifdef L_udivsi3
@@ -79,26 +146,87 @@ 
 .type ___udivsi3, STT_FUNC;

 ___udivsi3:
-        P0 = 32;
-        LSETUP (0f, 1f) LC0 = P0;
-       /* upper half of dividend */
-        R3 = 0;
-0:
-       /* The first time round in the loop we shift in garbage, but since we
-          perform 33 shifts, it doesn't matter.  */
+.Lu_main_branch:
+       R2 = R0 >> 1;
+       CC = R2 < R1 (IU);
+       IF CC JUMP .Lu_Q_has_only_one_bit;
+
+       R3 = R1 >> 1;
+       R3.L = SIGNBITS R3;
+       R1 = LSHIFT R1 BY R3.L;
+       R2 = R1 << 15;
+       CC = R2 == 0;
+       IF !CC JUMP .Lu_use_sfw_D_has_16bit_or_more;
+
+       CC = R0 < 0;
+       IF CC JUMP .Lu_MSB_of_N_is_1;
+
+       R1.L = SIGNBITS R0;
+       R2.L = R3.L - R1.L (NS);
+       P2 = R2;
+       R0 = LSHIFT R0 BY R1.L;
+       R1 >>= 17;
+
+.Lu_use_divq_main_branch:
+       AQ = CC;
+
+       LOOP(u_lp_use_divq_when_MSB_of_N_is_0) LC0 = P2;
+       LOOP_BEGIN u_lp_use_divq_when_MSB_of_N_is_0;
+       DIVQ(R0, R1);
+       LOOP_END u_lp_use_divq_when_MSB_of_N_is_0;
+
+       R0 = EXTRACT(R0, R2.L) (Z);
+       RTS;
+
+.Lu_MSB_of_N_is_1:
+       R3 = R3.L (Z);
+       P2 = R3;
+       R0 = R0 - R1;
+       R1 >>= 17;
+
+.Lu_use_divq_when_MSB_of_N_is_1:
+       R2 = ~R0;
+       R2 = R2 >> 31;
+       CC = BITTST(R0, 31);
+       AQ = CC;
+
+       LOOP(u_lp_use_divq_MSB_of_N_is_1) LC0 = P2;
+       LOOP_BEGIN u_lp_use_divq_MSB_of_N_is_1;
+       DIVQ(R0, R1);
+       LOOP_END u_lp_use_divq_MSB_of_N_is_1;
+
+       R2 = LSHIFT R2 BY R3.L;
+       R0 = EXTRACT(R0, R3.L) (Z);
+       R0 = R0+R2;
+       RTS;
+
+.Lu_use_sfw_D_has_16bit_or_more:
+       R2 = R0>>1;
+       R2.L = SIGNBITS R2;
+       R3.H = R3.L - R2.L (NS);
+       R3 = R3 >> 16;
+       P2 = R3;
+       R0 = LSHIFT R0 BY R2.L;
+       R0 = R0 - R1;
+       CC = !BITTST(R0, 31);
+       R1 >>>= 1;
+
+       LOOP(u__use_sfw_loop) LC0 = P2;
+       LOOP_BEGIN u__use_sfw_loop;
+       R0 = R0 + R1, R2 = R0 - R1;
+       IF CC R0 = R2;
        R0 = ROT R0 BY 1;
-       R3 = ROT R3 BY 1;
-       R2 = R3 - R1;
-        CC = R3 < R1 (IU);
-1:
-       /* Last instruction of the loop.  */
-       IF ! CC R3 = R2;
+       LOOP_END u__use_sfw_loop;

-       /* Shift in the last bit.  */
+       R0 = EXTRACT(R0, R3.L)(Z);
        R0 = ROT R0 BY 1;
-       /* R0 is the result, R3 contains the remainder.  */
-       R0 = ~ R0;
-        RTS;
+       RTS;
+
+.Lu_Q_has_only_one_bit:
+       CC = R1 <= R0 (IU);
+       R0 = CC;
+       RTS;
+       .size ___udivsi3, .-___udivsi3;
 #endif

 #ifdef L_umodsi3
@@ -108,10 +236,17 @@ 

 ___umodsi3:
        [--SP] = RETS;
+       [--SP] = R0;
+       [--SP] = R1;
        CALL ___udivsi3;
-       R0 = R3;
-       RETS = [SP++];
-       RTS;
+       R2 = [SP++];
+       R1 = [SP++];
+       R2 *= R0;
+       R0 = R1 - R2;
+       RETS = [SP++];
+       RTS;
+
+.size ___umodsi3, .-___umodsi3
 #endif

 #ifdef L_umulsi3_highpart
@@ -128,6 +263,8 @@ 
        A0 += A1;
        R0 = A0 (FU);
        RTS;
+
+.size ___umulsi3_highpart, .-___umulsi3_highpart
 #endif

 #ifdef L_smulsi3_highpart
@@ -143,4 +280,6 @@ 
        A1 = A1 >>> 16;
        R0 = (A0 += A1);
        RTS;
+
+.size ___smulsi3_highpart, .-___smulsi3_highpart
 #endif