Comments
Patch
===================================================================
@@ -23,36 +23,101 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+.text
+
#ifdef L_divsi3
-.text
.align 2
.global ___divsi3;
.type ___divsi3, STT_FUNC;
___divsi3:
- [--SP]= RETS;
- [--SP] = R7;
- R2 = -R0;
- CC = R0 < 0;
- IF CC R0 = R2;
- R7 = CC;
-
+.Ls_main_branch:
+ R3 = R0 ^ R1;
+ R2 = - R0;
+ R0 = MAX(R0,R2);
R2 = -R1;
- CC = R1 < 0;
- IF CC R1 = R2;
- R2 = CC;
- R7 = R7 ^ R2;
+ R1 = MAX(R1,R2);
+ R2 = R0 >> 1;
+ CC = R2 < R1 (IU);
+ IF CC JUMP .Ls_Q_has_only_one_bit;
- CALL ___udivsi3;
+ P1 = R3;
+ R3 = R1>>1;
+ R3.L = SIGNBITS R3;
+ R1 = LSHIFT R1 BY R3.L;
+ R2 = R1 << 15;
+ CC = R2 == 0;
+ IF !CC JUMP .Ls_use_sfw_D_has_16bit_or_more;
- CC = R7;
+ R2.L = SIGNBITS R0;
+ R0 = LSHIFT R0 BY R2.L;
+ R2.L = R3.L - R2.L (NS);
+ P2 = R2;
+ CC = R0 == R1;
+ IF CC JUMP .Ls_N_is_MIN_D_is_1_bit_set;
+
+ R1 >>= 17;
+
+.Ls_use_divq_main_branch:
+ AQ = CC;
+
+ LOOP(s_lp_use_divq) LC0 = P2;
+ LOOP_BEGIN s_lp_use_divq;
+ DIVQ(R0, R1);
+ LOOP_END s_lp_use_divq;
+
+ R0 = EXTRACT(R0, R2.L) (Z);
R1 = -R0;
+ CC = P1<0;
IF CC R0 = R1;
+ RTS;
- R7 = [SP++];
- RETS = [SP++];
- RTS;
+.Ls_N_is_MIN_D_is_1_bit_set:
+ R0 = 1;
+ R0 = LSHIFT R0 BY R2.L;
+ R1 = -R0;
+ CC = P1 < 0;
+ IF CC R0 = R1;
+ RTS;
+
+.Ls_use_sfw_D_has_16bit_or_more:
+ R2 = R0 >> 1;
+ R2.L = SIGNBITS R2;
+ R3.H = R3.L - R2.L (NS);
+ R3 = R3 >>16;
+ P2 = R3;
+ R0 = LSHIFT R0 BY R2.L;
+ R0 = R0 - R1;
+ CC = !BITTST(R0, 31);
+ R1 >>>= 1;
+
+ LOOP(s__use_sfw_loop) LC0 = P2;
+ LOOP_BEGIN s__use_sfw_loop;
+ R0 = R0 + R1, R2 = R0 - R1;
+ IF CC R0 = R2;
+ R0 = ROT R0 BY 1;
+ LOOP_END s__use_sfw_loop;
+
+ R0 = EXTRACT(R0, R3.L)(Z);
+ R0 = ROT R0 BY 1;
+ R1 = -R0;
+ CC = P1<0;
+ IF CC R0 = R1;
+ RTS;
+
+.Ls_Q_has_only_one_bit:
+ CC = R1 <= R0 (IU);
+ R0 = CC;
+ R1 = -R0;
+ CC = R3<0;
+ IF CC R0 = R1;
+ RTS;
+ .size ___divsi3, .-___divsi3;
#endif
#ifdef L_modsi3
@@ -71,6 +136,8 @@
R0 = R1 - R2;
RETS = [SP++];
RTS;
+
+.size ___modsi3, .-___modsi3
#endif
#ifdef L_udivsi3
@@ -79,26 +146,87 @@
.type ___udivsi3, STT_FUNC;
___udivsi3:
- P0 = 32;
- LSETUP (0f, 1f) LC0 = P0;
- /* upper half of dividend */
- R3 = 0;
-0:
- /* The first time round in the loop we shift in garbage, but since we
- perform 33 shifts, it doesn't matter. */
+.Lu_main_branch:
+ R2 = R0 >> 1;
+ CC = R2 < R1 (IU);
+ IF CC JUMP .Lu_Q_has_only_one_bit;
+
+ R3 = R1 >> 1;
+ R3.L = SIGNBITS R3;
+ R1 = LSHIFT R1 BY R3.L;
+ R2 = R1 << 15;
+ CC = R2 == 0;
+ IF !CC JUMP .Lu_use_sfw_D_has_16bit_or_more;
+
+ CC = R0 < 0;
+ IF CC JUMP .Lu_MSB_of_N_is_1;
+
+ R1.L = SIGNBITS R0;
+ R2.L = R3.L - R1.L (NS);
+ P2 = R2;
+ R0 = LSHIFT R0 BY R1.L;
+ R1 >>= 17;
+
+.Lu_use_divq_main_branch:
+ AQ = CC;
+
+ LOOP(u_lp_use_divq_when_MSB_of_N_is_0) LC0 = P2;
+ LOOP_BEGIN u_lp_use_divq_when_MSB_of_N_is_0;
+ DIVQ(R0, R1);
+ LOOP_END u_lp_use_divq_when_MSB_of_N_is_0;
+
+ R0 = EXTRACT(R0, R2.L) (Z);
+ RTS;
+
+.Lu_MSB_of_N_is_1:
+ R3 = R3.L (Z);
+ P2 = R3;
+ R0 = R0 - R1;
+ R1 >>= 17;
+
+.Lu_use_divq_when_MSB_of_N_is_1:
+ R2 = ~R0;
+ R2 = R2 >> 31;
+ CC = BITTST(R0, 31);
+ AQ = CC;
+
+ LOOP(u_lp_use_divq_MSB_of_N_is_1) LC0 = P2;
+ LOOP_BEGIN u_lp_use_divq_MSB_of_N_is_1;
+ DIVQ(R0, R1);
+ LOOP_END u_lp_use_divq_MSB_of_N_is_1;
+
+ R2 = LSHIFT R2 BY R3.L;
+ R0 = EXTRACT(R0, R3.L) (Z);
+ R0 = R0+R2;
+ RTS;
+
+.Lu_use_sfw_D_has_16bit_or_more:
+ R2 = R0>>1;
+ R2.L = SIGNBITS R2;
+ R3.H = R3.L - R2.L (NS);
+ R3 = R3 >> 16;
+ P2 = R3;
+ R0 = LSHIFT R0 BY R2.L;
+ R0 = R0 - R1;
+ CC = !BITTST(R0, 31);
+ R1 >>>= 1;
+
+ LOOP(u__use_sfw_loop) LC0 = P2;
+ LOOP_BEGIN u__use_sfw_loop;
+ R0 = R0 + R1, R2 = R0 - R1;
+ IF CC R0 = R2;
R0 = ROT R0 BY 1;
- R3 = ROT R3 BY 1;
- R2 = R3 - R1;
- CC = R3 < R1 (IU);
-1:
- /* Last instruction of the loop. */
- IF ! CC R3 = R2;
+ LOOP_END u__use_sfw_loop;
- /* Shift in the last bit. */
+ R0 = EXTRACT(R0, R3.L)(Z);
R0 = ROT R0 BY 1;
- /* R0 is the result, R3 contains the remainder. */
- R0 = ~ R0;
- RTS;
+ RTS;
+
+.Lu_Q_has_only_one_bit:
+ CC = R1 <= R0 (IU);
+ R0 = CC;
+ RTS;
+ .size ___udivsi3, .-___udivsi3;
#endif
#ifdef L_umodsi3
@@ -108,10 +236,17 @@
___umodsi3:
[--SP] = RETS;
+ [--SP] = R0;
+ [--SP] = R1;
CALL ___udivsi3;
- R0 = R3;
- RETS = [SP++];
- RTS;
+ R2 = [SP++];
+ R1 = [SP++];
+ R2 *= R0;
+ R0 = R1 - R2;
+ RETS = [SP++];
+ RTS;
+
+.size ___umodsi3, .-___umodsi3
#endif
#ifdef L_umulsi3_highpart
@@ -128,6 +263,8 @@
A0 += A1;
R0 = A0 (FU);
RTS;
+
+.size ___umulsi3_highpart, .-___umulsi3_highpart
#endif
#ifdef L_smulsi3_highpart
@@ -143,4 +280,6 @@
A1 = A1 >>> 16;
R0 = (A0 += A1);
RTS;
+
+.size ___smulsi3_highpart, .-___smulsi3_highpart
#endif
Ping --- The attached patch updates the blackfin ___divsi3 and ___udivsi3 implementations (and updates ___umodsi3 to match), as well as adding .size directives to all functions in the file. 2011-03-24 Stuart Henderson <stuart.henderson@analog.com> * gcc/config/bfin/lib1funcs.asm (___divsi3): New implementation, add .size directive and unguard .text directive. (___udivsi3): New implementation and add .size directive. (___umodsi3): Update to match new ___divsi3/___udivsi3 implementations and add .size directive. (___modsi3): Add .size directive. (___umulsi3_highpart): Likewise. (___smulsi3_highpart): Likewise. Thanks, Stu