diff mbox

[ARM,libgcc] New aeabi_idiv function for armv6-m

Message ID 003301d0092b$3325e6b0$9971b410$@arm.com
State New
Headers show

Commit Message

Hale Wang Nov. 26, 2014, 3:43 a.m. UTC
Hi,

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
(https://git.linaro.org/toolchain/cortex-strings.git), which was contributed
by ARM under Free BSD license. 

The new aeabi_idiv routine is used to replace the one in
libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1
wrapper. The new routine is under LGPLv3 license.

The main advantage of this version is that it can improve the performance of
the aeabi_idiv function for Thumb1. This solution will also increase the
code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.

Make check passed for armv6-m.

OK for trunk?

Thanks,
Hale Wang

libgcc/ChangeLog:

2014-11-26  Hale Wang  <hale.wang@arm.com>

	* config/arm/lib1funcs.S: Add new wrapper.

*/
 /* ------------------------------------------------------------------------
*/
@@ -937,6 +1066,7 @@ LSYM(Lgot_result):
 
 	FUNC_START udivsi3
 	FUNC_ALIAS aeabi_uidiv udivsi3
+#if defined(__OPTIMIZE_SIZE__)
 
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
@@ -954,6 +1084,13 @@ LSYM(udivsi3_skip_div0_test):
 	pop	{ work }
 	RET
 
+#else
+
+LSYM(udivsi3_skip_div0_test):
+	THUMB1_Div_Positive
+
+#endif
+
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 
 	ARM_FUNC_START udivsi3
@@ -1066,7 +1203,7 @@ LSYM(Lover10):
 	RET
 	
 #else  /* ARM version.  */
-	
+
 	FUNC_START umodsi3
 
 	subs	r2, r1, #1			@ compare divisor with 1
@@ -1091,8 +1228,9 @@ LSYM(Lover10):
 
 #if defined(__prefer_thumb__)
 
-	FUNC_START divsi3	
+	FUNC_START divsi3
 	FUNC_ALIAS aeabi_idiv divsi3
+#if defined(__OPTIMIZE_SIZE__)
 
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
@@ -1115,7 +1253,7 @@ LSYM(Lover11):
 	blo	LSYM(Lgot_result)
 
 	THUMB_DIV_MOD_BODY 0
-	
+
 	mov	r0, result
 	mov	work, ip
 	cmp	work, #0
@@ -1124,6 +1262,20 @@ LSYM(Lover11):
 LSYM(Lover12):
 	pop	{ work }
 	RET
+#else
+
+LSYM(divsi3_skip_div0_test):
+	cpy	curbit, dividend
+	orr	curbit, divisor
+	bmi	LSYM(Lthumb1_div_negative)
+
+LSYM(Lthumb1_div_positive):
+	THUMB1_Div_Positive
+
+LSYM(Lthumb1_div_negative):
+	THUMB1_Div_Negative
+
+#endif /* __OPTIMIZE_SIZE__ */
 
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 
@@ -1136,8 +1288,8 @@ LSYM(Lover12):
 	RET
 
 #else /* ARM/Thumb-2 version.  */
-	
-	ARM_FUNC_START divsi3	
+
+	ARM_FUNC_START divsi3
 	ARM_FUNC_ALIAS aeabi_idiv divsi3
 
 	cmp	r1, #0
===============================================

Comments

Joey Ye Nov. 27, 2014, 2 a.m. UTC | #1
OK applying to arm/embedded-4_9-branch, though you still need
maintainer approval into trunk.

- Joey

On Wed, Nov 26, 2014 at 11:43 AM, Hale Wang <hale.wang@arm.com> wrote:
> Hi,
>
> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
> (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed
> by ARM under Free BSD license.
>
> The new aeabi_idiv routine is used to replace the one in
> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1
> wrapper. The new routine is under LGPLv3 license.
>
> The main advantage of this version is that it can improve the performance of
> the aeabi_idiv function for Thumb1. This solution will also increase the
> code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.
>
> Make check passed for armv6-m.
>
> OK for trunk?
>
> Thanks,
> Hale Wang
>
> libgcc/ChangeLog:
>
> 2014-11-26  Hale Wang  <hale.wang@arm.com>
>
>         * config/arm/lib1funcs.S: Add new wrapper.
>
> ===============================================
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index b617137..de66c81 100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>  #ifdef __ARM_EABI__
>  .macro THUMB_LDIV0 name signed
>  #if defined(__ARM_ARCH_6M__)
> -       .ifc \signed, unsigned
> -       cmp     r0, #0
> -       beq     1f
> -       mov     r0, #0
> -       mvn     r0, r0          @ 0xffffffff
> -1:
> -       .else
> -       cmp     r0, #0
> -       beq     2f
> -       blt     3f
> +
> +       push    {r0, lr}
>         mov     r0, #0
> -       mvn     r0, r0
> -       lsr     r0, r0, #1      @ 0x7fffffff
> -       b       2f
> -3:     mov     r0, #0x80
> -       lsl     r0, r0, #24     @ 0x80000000
> -2:
> -       .endif
> -       push    {r0, r1, r2}
> -       ldr     r0, 4f
> -       adr     r1, 4f
> -       add     r0, r1
> -       str     r0, [sp, #8]
> -       @ We know we are not on armv4t, so pop pc is safe.
> -       pop     {r0, r1, pc}
> -       .align  2
> -4:
> -       .word   __aeabi_idiv0 - 4b
> +       bl      SYM(__aeabi_idiv0)
> +       pop     {r1, pc}
> +
>  #elif defined(__thumb2__)
>         .syntax unified
>         .ifc \signed, unsigned
> @@ -927,7 +905,158 @@ LSYM(Lover7):
>         add     dividend, work
>    .endif
>  LSYM(Lgot_result):
> -.endm
> +.endm
> +
> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
> +.macro BranchToDiv n, label
> +       lsr     curbit, dividend, \n
> +       cmp     curbit, divisor
> +       blo     \label
> +.endm
> +
> +.macro DoDiv n
> +       lsr     curbit, dividend, \n
> +       cmp     curbit, divisor
> +       bcc     1f
> +       lsl     curbit, divisor, \n
> +       sub     dividend, dividend, curbit
> +
> +1:     adc     result, result
> +.endm
> +
> +.macro THUMB1_Div_Positive
> +       mov     result, #0
> +       BranchToDiv #1, LSYM(Lthumb1_div1)
> +       BranchToDiv #4, LSYM(Lthumb1_div4)
> +       BranchToDiv #8, LSYM(Lthumb1_div8)
> +       BranchToDiv #12, LSYM(Lthumb1_div12)
> +       BranchToDiv #16, LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_large_positive):
> +       mov     result, #0xff
> +       lsl     divisor, divisor, #8
> +       rev     result, result
> +       lsr     curbit, dividend, #16
> +       cmp     curbit, divisor
> +       blo     1f
> +       asr     result, #8
> +       lsl     divisor, divisor, #8
> +       beq     LSYM(Ldivbyzero_waypoint)
> +
> +1:     lsr     curbit, dividend, #12
> +       cmp     curbit, divisor
> +       blo     LSYM(Lthumb1_div12)
> +       b       LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_loop):
> +       lsr     divisor, divisor, #8
> +LSYM(Lthumb1_div16):
> +       Dodiv   #15
> +       Dodiv   #14
> +       Dodiv   #13
> +       Dodiv   #12
> +LSYM(Lthumb1_div12):
> +       Dodiv   #11
> +       Dodiv   #10
> +       Dodiv   #9
> +       Dodiv   #8
> +       bcs     LSYM(Lthumb1_div_loop)
> +LSYM(Lthumb1_div8):
> +       Dodiv   #7
> +       Dodiv   #6
> +       Dodiv   #5
> +LSYM(Lthumb1_div5):
> +       Dodiv   #4
> +LSYM(Lthumb1_div4):
> +       Dodiv   #3
> +LSYM(Lthumb1_div3):
> +       Dodiv   #2
> +LSYM(Lthumb1_div2):
> +       Dodiv   #1
> +LSYM(Lthumb1_div1):
> +       sub     divisor, dividend, divisor
> +       bcs     1f
> +       cpy     divisor, dividend
> +
> +1:     adc     result, result
> +       cpy     dividend, result
> +       RET
> +
> +LSYM(Ldivbyzero_waypoint):
> +       b       LSYM(Ldiv0)
> +.endm
> +
> +.macro THUMB1_Div_Negative
> +       lsr     result, divisor, #31
> +       beq     1f
> +       neg     divisor, divisor
> +
> +1:     asr     curbit, dividend, #32
> +       bcc     2f
> +       neg     dividend, dividend
> +
> +2:     eor     curbit, result
> +       mov     result, #0
> +       cpy     ip, curbit
> +       BranchToDiv #4, LSYM(Lthumb1_div_negative4)
> +       BranchToDiv #8, LSYM(Lthumb1_div_negative8)
> +LSYM(Lthumb1_div_large):
> +       mov     result, #0xfc
> +       lsl     divisor, divisor, #6
> +       rev     result, result
> +       lsr     curbit, dividend, #8
> +       cmp     curbit, divisor
> +       blo     LSYM(Lthumb1_div_negative8)
> +
> +       lsl     divisor, divisor, #6
> +       asr     result, result, #6
> +       cmp     curbit, divisor
> +       blo     LSYM(Lthumb1_div_negative8)
> +
> +       lsl     divisor, divisor, #6
> +       asr     result, result, #6
> +       cmp     curbit, divisor
> +       blo     LSYM(Lthumb1_div_negative8)
> +
> +       lsl     divisor, divisor, #6
> +       beq     LSYM(Ldivbyzero_negative)
> +       asr     result, result, #6
> +       b       LSYM(Lthumb1_div_negative8)
> +LSYM(Lthumb1_div_negative_loop):
> +       lsr     divisor, divisor, #6
> +LSYM(Lthumb1_div_negative8):
> +       DoDiv   #7
> +       DoDiv   #6
> +       DoDiv   #5
> +       DoDiv   #4
> +LSYM(Lthumb1_div_negative4):
> +       DoDiv   #3
> +       DoDiv   #2
> +       bcs     LSYM(Lthumb1_div_negative_loop)
> +       DoDiv   #1
> +       sub     divisor, dividend, divisor
> +       bcs     1f
> +       cpy     divisor, dividend
> +
> +1:     cpy     curbit, ip
> +       adc     result, result
> +       asr     curbit, curbit, #1
> +       cpy     dividend, result
> +       bcc     2f
> +       neg     dividend, dividend
> +       cmp     curbit, #0
> +
> +2:     bpl     3f
> +       neg     divisor, divisor
> +
> +3:     RET
> +
> +LSYM(Ldivbyzero_negative):
> +       cpy     curbit, ip
> +       asr     curbit, curbit, #1
> +       bcc     LSYM(Ldiv0)
> +       neg     dividend, dividend
> +.endm
> +#endif /* ARM Thumb version.  */
> +
>  /* ------------------------------------------------------------------------
> */
>  /*             Start of the Real Functions
> */
>  /* ------------------------------------------------------------------------
> */
> @@ -937,6 +1066,7 @@ LSYM(Lgot_result):
>
>         FUNC_START udivsi3
>         FUNC_ALIAS aeabi_uidiv udivsi3
> +#if defined(__OPTIMIZE_SIZE__)
>
>         cmp     divisor, #0
>         beq     LSYM(Ldiv0)
> @@ -954,6 +1084,13 @@ LSYM(udivsi3_skip_div0_test):
>         pop     { work }
>         RET
>
> +#else
> +
> +LSYM(udivsi3_skip_div0_test):
> +       THUMB1_Div_Positive
> +
> +#endif
> +
>  #elif defined(__ARM_ARCH_EXT_IDIV__)
>
>         ARM_FUNC_START udivsi3
> @@ -1066,7 +1203,7 @@ LSYM(Lover10):
>         RET
>
>  #else  /* ARM version.  */
> -
> +
>         FUNC_START umodsi3
>
>         subs    r2, r1, #1                      @ compare divisor with 1
> @@ -1091,8 +1228,9 @@ LSYM(Lover10):
>
>  #if defined(__prefer_thumb__)
>
> -       FUNC_START divsi3
> +       FUNC_START divsi3
>         FUNC_ALIAS aeabi_idiv divsi3
> +#if defined(__OPTIMIZE_SIZE__)
>
>         cmp     divisor, #0
>         beq     LSYM(Ldiv0)
> @@ -1115,7 +1253,7 @@ LSYM(Lover11):
>         blo     LSYM(Lgot_result)
>
>         THUMB_DIV_MOD_BODY 0
> -
> +
>         mov     r0, result
>         mov     work, ip
>         cmp     work, #0
> @@ -1124,6 +1262,20 @@ LSYM(Lover11):
>  LSYM(Lover12):
>         pop     { work }
>         RET
> +#else
> +
> +LSYM(divsi3_skip_div0_test):
> +       cpy     curbit, dividend
> +       orr     curbit, divisor
> +       bmi     LSYM(Lthumb1_div_negative)
> +
> +LSYM(Lthumb1_div_positive):
> +       THUMB1_Div_Positive
> +
> +LSYM(Lthumb1_div_negative):
> +       THUMB1_Div_Negative
> +
> +#endif /* __OPTIMIZE_SIZE__ */
>
>  #elif defined(__ARM_ARCH_EXT_IDIV__)
>
> @@ -1136,8 +1288,8 @@ LSYM(Lover12):
>         RET
>
>  #else /* ARM/Thumb-2 version.  */
> -
> -       ARM_FUNC_START divsi3
> +
> +       ARM_FUNC_START divsi3
>         ARM_FUNC_ALIAS aeabi_idiv divsi3
>
>         cmp     r1, #0
> ===============================================
diff mbox

Patch

===============================================
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index b617137..de66c81 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -306,34 +306,12 @@  LSYM(Lend_fde):
 #ifdef __ARM_EABI__
 .macro THUMB_LDIV0 name signed
 #if defined(__ARM_ARCH_6M__)
-	.ifc \signed, unsigned
-	cmp	r0, #0
-	beq	1f
-	mov	r0, #0
-	mvn	r0, r0		@ 0xffffffff
-1:
-	.else
-	cmp	r0, #0
-	beq	2f
-	blt	3f
+
+	push	{r0, lr}
 	mov	r0, #0
-	mvn	r0, r0
-	lsr	r0, r0, #1	@ 0x7fffffff
-	b	2f
-3:	mov	r0, #0x80
-	lsl	r0, r0, #24	@ 0x80000000
-2:
-	.endif
-	push	{r0, r1, r2}
-	ldr	r0, 4f
-	adr	r1, 4f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-4:
-	.word	__aeabi_idiv0 - 4b
+	bl	SYM(__aeabi_idiv0)
+	pop	{r1, pc}
+
 #elif defined(__thumb2__)
 	.syntax unified
 	.ifc \signed, unsigned
@@ -927,7 +905,158 @@  LSYM(Lover7):
 	add	dividend, work
   .endif
 LSYM(Lgot_result):
-.endm	
+.endm
+
+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
+.macro BranchToDiv n, label
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	blo	\label
+.endm
+
+.macro DoDiv n
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	bcc	1f
+	lsl	curbit, divisor, \n
+	sub	dividend, dividend, curbit
+
+1:	adc	result, result
+.endm
+
+.macro THUMB1_Div_Positive
+	mov	result, #0
+	BranchToDiv #1, LSYM(Lthumb1_div1)
+	BranchToDiv #4, LSYM(Lthumb1_div4)
+	BranchToDiv #8, LSYM(Lthumb1_div8)
+	BranchToDiv #12, LSYM(Lthumb1_div12)
+	BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+	mov	result, #0xff
+	lsl	divisor, divisor, #8
+	rev	result, result
+	lsr	curbit, dividend, #16
+	cmp	curbit, divisor
+	blo	1f
+	asr	result, #8
+	lsl	divisor, divisor, #8
+	beq	LSYM(Ldivbyzero_waypoint)
+
+1:	lsr	curbit, dividend, #12
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div12)
+	b	LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+	lsr	divisor, divisor, #8
+LSYM(Lthumb1_div16):
+	Dodiv	#15
+	Dodiv	#14
+	Dodiv	#13
+	Dodiv	#12
+LSYM(Lthumb1_div12):
+	Dodiv	#11
+	Dodiv	#10
+	Dodiv	#9
+	Dodiv	#8
+	bcs	LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+	Dodiv	#7
+	Dodiv	#6
+	Dodiv	#5
+LSYM(Lthumb1_div5):
+	Dodiv	#4
+LSYM(Lthumb1_div4):
+	Dodiv	#3
+LSYM(Lthumb1_div3):
+	Dodiv	#2
+LSYM(Lthumb1_div2):
+	Dodiv	#1
+LSYM(Lthumb1_div1):
+	sub	divisor, dividend, divisor
+	bcs	1f
+	cpy	divisor, dividend
+
+1:	adc	result, result
+	cpy	dividend, result
+	RET
+
+LSYM(Ldivbyzero_waypoint):
+	b	LSYM(Ldiv0)
+.endm
+
+.macro THUMB1_Div_Negative
+	lsr	result, divisor, #31
+	beq	1f
+	neg	divisor, divisor
+
+1:	asr	curbit, dividend, #32
+	bcc	2f
+	neg	dividend, dividend
+
+2:	eor	curbit, result
+	mov	result, #0
+	cpy	ip, curbit
+	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+	mov	result, #0xfc
+	lsl	divisor, divisor, #6
+	rev	result, result
+	lsr	curbit, dividend, #8
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	beq	LSYM(Ldivbyzero_negative)
+	asr	result, result, #6
+	b	LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_negative_loop):
+	lsr	divisor, divisor, #6
+LSYM(Lthumb1_div_negative8):
+	DoDiv	#7
+	DoDiv	#6
+	DoDiv	#5
+	DoDiv	#4
+LSYM(Lthumb1_div_negative4):
+	DoDiv	#3
+	DoDiv	#2
+	bcs	LSYM(Lthumb1_div_negative_loop)
+	DoDiv	#1
+	sub	divisor, dividend, divisor
+	bcs	1f
+	cpy	divisor, dividend
+
+1:	cpy	curbit, ip
+	adc	result, result
+	asr	curbit, curbit, #1
+	cpy	dividend, result
+	bcc	2f
+	neg	dividend, dividend
+	cmp	curbit, #0
+
+2:	bpl	3f
+	neg	divisor, divisor
+
+3:	RET
+
+LSYM(Ldivbyzero_negative):
+	cpy	curbit, ip
+	asr	curbit, curbit, #1
+	bcc	LSYM(Ldiv0)
+	neg	dividend, dividend
+.endm
+#endif /* ARM Thumb version.  */
+
 /* ------------------------------------------------------------------------
*/
 /*		Start of the Real Functions