diff mbox

[AVR] : Cleanup libgcc.S

Message ID 4E37D023.6040405@gjlay.de
State New
Headers show

Commit Message

Georg-Johann Lay Aug. 2, 2011, 10:23 a.m. UTC
This patch fixes RCALL/RJMP instructions to other modules by replacing them
with XCALL resp. XJMP.

Moreover, now each function (except _cleanup) is enclosed in DEFUN/ENDF pairs
so that size information is available for each function.

Ok?

Johann

	* config/avr/libgcc.S: Gather related function in the
	same input section.
 	(__mulqihi3, __mulqihi3, __divmodqi4, __divmodhi4, __udivmodsi4,
	__divmodsi4): Use XCALL/XJMP instead of rcall/rjmp for external
	references.
	(__udivmodqi4, __divmodqi4, __udivmodhi4, __divmodhi4,
	__udivmodsi4, __divmodsi4, __prologue_saves__,
	__epilogue_restores__, _exit, __tablejump2__, __tablejump__,
	__do_copy_data, __do_clear_bss, __do_global_ctors,
	__do_global_dtors, __tablejump_elpm__): Enclose in DEFUN/ENDF.

Comments

Denis Chertykov Aug. 2, 2011, 11:59 a.m. UTC | #1
2011/8/2 Georg-Johann Lay <avr@gjlay.de>:
> This patch fixes RCALL/RJMP instructions to other modules by replacing them
> with XCALL resp. XJMP.
>
> Moreover, now each function (except _cleanup) is enclosed in DEFUN/ENDF pairs
> so that size information is available for each function.
>
> Ok?

Approved.

Denis.
Weddington, Eric Aug. 2, 2011, 12:27 p.m. UTC | #2
> -----Original Message-----
> From: Georg-Johann Lay [mailto:avr@gjlay.de]
> Sent: Tuesday, August 02, 2011 4:24 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Anatoly Sokolov; Denis Chertykov; Weddington, Eric
> Subject: [Patch,AVR]: Cleanup libgcc.S
> 
> This patch fixes RCALL/RJMP instructions to other modules by replacing
> them
> with XCALL resp. XJMP.
> 

Hi Johann,

Do we want to add a binutils bug report for the FIXME that you just added in this patch?

It should be fairly easy to add SORT to the default linker scripts.

Eric Weddington
Georg-Johann Lay Aug. 3, 2011, 10:42 a.m. UTC | #3
http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00140.html

Weddington, Eric wrote:

>>
>> This patch fixes RCALL/RJMP instructions to other modules by replacing
>> them
>> with XCALL resp. XJMP.
>>
> 
> Hi Johann,
> 
> Do we want to add a binutils bug report for the FIXME that you just added in this patch?

Yes I think so.  libgcc.S would be more efficient and easier to read because
it needed no special treatment of skip bug.

I think avr-libc could take advantage of that feature, too, e.g. getting rid
of relocation truncation issues like #33698.  For avr-libc it's a bit harder,
of course, because many modules need __addsf3 et al. But large routines like
printf stuff could be factored out.

> It should be fairly easy to add SORT to the default linker scripts.
> 
> Eric Weddington

What version?  Is already possible for 2.21?

Johann
Weddington, Eric Aug. 5, 2011, 3:48 a.m. UTC | #4
> -----Original Message-----
> From: Georg-Johann Lay [mailto:avr@gjlay.de]
> Sent: Wednesday, August 03, 2011 4:42 AM
> To: Weddington, Eric
> Cc: gcc-patches@gcc.gnu.org; Anatoly Sokolov; Denis Chertykov; Joerg
> Wunsch
> Subject: Re: [Patch,AVR]: Cleanup libgcc.S
> 
> 
> > It should be fairly easy to add SORT to the default linker scripts.
> >
> 
> What version?  Is already possible for 2.21?
> 

AFAIK binutils 2.21.1 is the latest release. We could do it for 2.22 I suppose.
diff mbox

Patch

Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 177070)
+++ config/avr/libgcc.S	(working copy)
@@ -34,7 +34,15 @@  see the files COPYING3 and COPYING.RUNTI
    This can make better code because GCC knows exactly which
    of the call-used registers (not all of them) are clobbered.  */
 
-	.section .text.libgcc, "ax", @progbits
+/* FIXME:  At present, there is no SORT directive in the linker
+           script so that we must not assume that different modules
+           in the same input section like .libgcc.text.mul will be
+           located close together.  Therefore, we cannot use
+           RCALL/RJMP to call a function like __udivmodhi4 from
+           __divmodhi4 and have to use lengthy XCALL/XJMP even
+           though they are in the same input section and all same
+           input sections together are small enough to reach every
+           location with a RCALL/RJMP instruction.  */
 
 	.macro	mov_l  r_dest, r_src
 #if defined (__AVR_HAVE_MOVW__)
@@ -72,6 +80,8 @@  see the files COPYING3 and COPYING.RUNTI
 .endm
 
 
+.section .text.libgcc.mul, "ax", @progbits
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 #if !defined (__AVR_HAVE_MUL__)
@@ -112,7 +122,7 @@  DEFUN __mulqihi3
 	clr	r23
 	sbrc	r22, 7
 	dec	r22
-	rjmp	__mulhi3
+	XJMP	__mulhi3
 ENDF __mulqihi3:
 #endif /* defined (L_mulqihi3) */
 
@@ -120,7 +130,7 @@  ENDF __mulqihi3:
 DEFUN __umulqihi3
 	clr	r25
 	clr	r23
-	rjmp	__mulhi3
+	XJMP	__mulhi3
 ENDF __umulqihi3
 #endif /* defined (L_umulqihi3) */
 
@@ -447,6 +457,8 @@  ENDF __mulsi3
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 	
 
+.section .text.libgcc.div, "ax", @progbits
+
 /*******************************************************
        Division 8 / 8 => (result + remainder)
 *******************************************************/
@@ -456,9 +468,7 @@  ENDF __mulsi3
 #define	r_cnt	r23	/* loop count */
 
 #if defined (L_udivmodqi4)
-	.global	__udivmodqi4
-	.func	__udivmodqi4
-__udivmodqi4:
+DEFUN __udivmodqi4
 	sub	r_rem,r_rem	; clear remainder and carry
 	ldi	r_cnt,9		; init loop counter
 	rjmp	__udivmodqi4_ep	; jump to entry point
@@ -474,13 +484,11 @@  __udivmodqi4_ep:
 	com	r_arg1		; complement result 
 				; because C flag was complemented in loop
 	ret
-	.endfunc
+ENDF __udivmodqi4
 #endif /* defined (L_udivmodqi4) */
 
 #if defined (L_divmodqi4)
-	.global	__divmodqi4
-	.func	__divmodqi4
-__divmodqi4:
+DEFUN __divmodqi4
         bst     r_arg1,7	; store sign of dividend
         mov     __tmp_reg__,r_arg1
         eor     __tmp_reg__,r_arg2; r0.7 is sign of result
@@ -488,7 +496,7 @@  __divmodqi4:
 	neg     r_arg1		; dividend negative : negate
         sbrc	r_arg2,7
 	neg     r_arg2		; divisor negative : negate
-	rcall	__udivmodqi4	; do the unsigned div/mod
+	XCALL	__udivmodqi4	; do the unsigned div/mod
 	brtc	__divmodqi4_1
 	neg	r_rem		; correct remainder sign
 __divmodqi4_1:
@@ -496,7 +504,7 @@  __divmodqi4_1:
 	neg	r_arg1		; correct result sign
 __divmodqi4_exit:
 	ret
-	.endfunc
+ENDF __divmodqi4
 #endif /* defined (L_divmodqi4) */
 
 #undef r_rem
@@ -522,9 +530,7 @@  __divmodqi4_exit:
 #define	r_cnt	r21	/* loop count */
 
 #if defined (L_udivmodhi4)
-	.global	__udivmodhi4
-	.func	__udivmodhi4
-__udivmodhi4:
+DEFUN __udivmodhi4
 	sub	r_remL,r_remL
 	sub	r_remH,r_remH	; clear remainder and carry
 	ldi	r_cnt,17	; init loop counter
@@ -550,13 +556,11 @@  __udivmodhi4_ep:
 	mov_l	r_arg1L, r_remL		; remainder
 	mov_h	r_arg1H, r_remH
 	ret
-	.endfunc
+ENDF __udivmodhi4
 #endif /* defined (L_udivmodhi4) */
 
 #if defined (L_divmodhi4)
-	.global	__divmodhi4
-	.func	__divmodhi4
-__divmodhi4:
+DEFUN __divmodhi4
 	.global	_div
 _div:
         bst     r_arg1H,7	; store sign of dividend
@@ -565,7 +569,7 @@  _div:
 	rcall	__divmodhi4_neg1 ; dividend negative : negate
 	sbrc	r_arg2H,7
 	rcall	__divmodhi4_neg2 ; divisor negative : negate
-	rcall	__udivmodhi4	; do the unsigned div/mod
+	XCALL	__udivmodhi4	; do the unsigned div/mod
 	rcall	__divmodhi4_neg1 ; correct remainder sign
 	tst	__tmp_reg__
 	brpl	__divmodhi4_exit
@@ -581,7 +585,7 @@  __divmodhi4_neg1:
 	neg	r_arg1L		; correct dividend/remainder sign
 	sbci	r_arg1H,0xff
 	ret
-	.endfunc
+ENDF __divmodhi4
 #endif /* defined (L_divmodhi4) */
 
 #undef r_remH  
@@ -618,9 +622,7 @@  __divmodhi4_neg1:
 #define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
 
 #if defined (L_udivmodsi4)
-	.global	__udivmodsi4
-	.func	__udivmodsi4
-__udivmodsi4:
+DEFUN __udivmodsi4
 	ldi	r_remL, 33	; init loop counter
 	mov	r_cnt, r_remL
 	sub	r_remL,r_remL
@@ -664,20 +666,18 @@  __udivmodsi4_ep:
 	mov_l	r_arg1HL, r_remHL
 	mov_h	r_arg1HH, r_remHH
 	ret
-	.endfunc
+ENDF __udivmodsi4
 #endif /* defined (L_udivmodsi4) */
 
 #if defined (L_divmodsi4)
-	.global	__divmodsi4
-	.func	__divmodsi4
-__divmodsi4:
+DEFUN __divmodsi4
         bst     r_arg1HH,7	; store sign of dividend
         mov     __tmp_reg__,r_arg1HH
         eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
 	rcall	__divmodsi4_neg1 ; dividend negative : negate
 	sbrc	r_arg2HH,7
 	rcall	__divmodsi4_neg2 ; divisor negative : negate
-	rcall	__udivmodsi4	; do the unsigned div/mod
+	XCALL	__udivmodsi4	; do the unsigned div/mod
 	rcall	__divmodsi4_neg1 ; correct remainder sign
 	rol	__tmp_reg__
 	brcc	__divmodsi4_exit
@@ -701,17 +701,18 @@  __divmodsi4_neg1:
 	sbci	r_arg1HL,0xff
 	sbci	r_arg1HH,0xff
 	ret
-	.endfunc
+ENDF __divmodsi4
 #endif /* defined (L_divmodsi4) */
 
+
+.section .text.libgcc.prologue, "ax", @progbits
+    
 /**********************************
  * This is a prologue subroutine
  **********************************/
 #if defined (L_prologue)
 
-	.global	__prologue_saves__
-	.func	__prologue_saves__
-__prologue_saves__:
+DEFUN __prologue_saves__
 	push r2
 	push r3
 	push r4
@@ -745,7 +746,7 @@  __prologue_saves__:
 	ijmp
 #endif
 
-.endfunc
+ENDF __prologue_saves__
 #endif /* defined (L_prologue) */
 
 /*
@@ -753,9 +754,7 @@  __prologue_saves__:
  */
 #if defined (L_epilogue)
 
-	.global	__epilogue_restores__
-	.func	__epilogue_restores__
-__epilogue_restores__:
+DEFUN __epilogue_restores__
 	ldd	r2,Y+18
 	ldd	r3,Y+17
 	ldd	r4,Y+16
@@ -784,17 +783,15 @@  __epilogue_restores__:
 	mov_l	r28, r26
 	mov_h	r29, r27
 	ret
-.endfunc
+ENDF __epilogue_restores__
 #endif /* defined (L_epilogue) */
 
 #ifdef L_exit
 	.section .fini9,"ax",@progbits
-	.global _exit
-	.func	_exit
-_exit:
+DEFUN _exit
 	.weak	exit
 exit:
-	.endfunc
+ENDF _exit
 
 	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
 
@@ -812,14 +809,17 @@  _cleanup:
 .endfunc
 #endif /* defined (L_cleanup) */
 
+
+.section .text.libgcc, "ax", @progbits
+    
 #ifdef L_tablejump
-	.global __tablejump2__
-	.func	__tablejump2__
-__tablejump2__:
+DEFUN __tablejump2__
 	lsl	r30
 	rol	r31
-	.global __tablejump__
-__tablejump__:
+    ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
 #if defined (__AVR_HAVE_LPMX__)
 	lpm	__tmp_reg__, Z+
 	lpm	r31, Z
@@ -842,13 +842,12 @@  __tablejump__:
 #endif
 	ret
 #endif
-	.endfunc
+ENDF __tablejump__
 #endif /* defined (L_tablejump) */
 
 #ifdef L_copy_data
 	.section .init4,"ax",@progbits
-	.global __do_copy_data
-__do_copy_data:
+DEFUN __do_copy_data
 #if defined(__AVR_HAVE_ELPMX__)
 	ldi	r17, hi8(__data_end)
 	ldi	r26, lo8(__data_start)
@@ -905,14 +904,14 @@  __do_copy_data:
 	cpc	r27, r17
 	brne	.L__do_copy_data_loop
 #endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+ENDF __do_copy_data
 #endif /* L_copy_data */
 
 /* __do_clear_bss is only necessary if there is anything in .bss section.  */
 
 #ifdef L_clear_bss
 	.section .init4,"ax",@progbits
-	.global __do_clear_bss
-__do_clear_bss:
+DEFUN __do_clear_bss
 	ldi	r17, hi8(__bss_end)
 	ldi	r26, lo8(__bss_start)
 	ldi	r27, hi8(__bss_start)
@@ -923,6 +922,7 @@  __do_clear_bss:
 	cpi	r26, lo8(__bss_end)
 	cpc	r27, r17
 	brne	.do_clear_bss_loop
+ENDF __do_clear_bss
 #endif /* L_clear_bss */
 
 /* __do_global_ctors and __do_global_dtors are only necessary
@@ -930,9 +930,8 @@  __do_clear_bss:
 
 #ifdef L_ctors
 	.section .init6,"ax",@progbits
-	.global	__do_global_ctors
+DEFUN __do_global_ctors
 #if defined(__AVR_HAVE_RAMPZ__)
-__do_global_ctors:
 	ldi	r17, hi8(__ctors_start)
 	ldi	r28, lo8(__ctors_end)
 	ldi	r29, hi8(__ctors_end)
@@ -952,7 +951,6 @@  __do_global_ctors:
 	cpc	r16, r24
 	brne	.L__do_global_ctors_loop
 #else
-__do_global_ctors:
 	ldi	r17, hi8(__ctors_start)
 	ldi	r28, lo8(__ctors_end)
 	ldi	r29, hi8(__ctors_end)
@@ -967,13 +965,13 @@  __do_global_ctors:
 	cpc	r29, r17
 	brne	.L__do_global_ctors_loop
 #endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_ctors
 #endif /* L_ctors */
 
 #ifdef L_dtors
 	.section .fini6,"ax",@progbits
-	.global	__do_global_dtors
+DEFUN __do_global_dtors
 #if defined(__AVR_HAVE_RAMPZ__)
-__do_global_dtors:
 	ldi	r17, hi8(__dtors_end)
 	ldi	r28, lo8(__dtors_start)
 	ldi	r29, hi8(__dtors_start)
@@ -993,7 +991,6 @@  __do_global_dtors:
 	cpc	r16, r24
 	brne	.L__do_global_dtors_loop
 #else
-__do_global_dtors:
 	ldi	r17, hi8(__dtors_end)
 	ldi	r28, lo8(__dtors_start)
 	ldi	r29, hi8(__dtors_start)
@@ -1008,12 +1005,13 @@  __do_global_dtors:
 	cpc	r29, r17
 	brne	.L__do_global_dtors_loop
 #endif /* defined(__AVR_HAVE_RAMPZ__) */
+ENDF __do_global_dtors
 #endif /* L_dtors */
 
+.section .text.libgcc, "ax", @progbits
+    
 #ifdef L_tablejump_elpm
-	.global __tablejump_elpm__
-	.func	__tablejump_elpm__
-__tablejump_elpm__:
+DEFUN __tablejump_elpm__
 #if defined (__AVR_HAVE_ELPM__)
 #if defined (__AVR_HAVE_LPMX__)
 	elpm	__tmp_reg__, Z+
@@ -1037,10 +1035,12 @@  __tablejump_elpm__:
 	ret
 #endif
 #endif /* defined (__AVR_HAVE_ELPM__) */
-	.endfunc
+ENDF __tablejump_elpm__
 #endif /* defined (L_tablejump_elpm) */
 
 
+.section .text.libgcc.builtins, "ax", @progbits
+
 /**********************************
  * Find first set Bit (ffs)
  **********************************/
@@ -1440,6 +1440,8 @@  DEFUN __ashldi3
 ENDF __ashldi3
 #endif /* defined (L_ashldi3) */
 
+
+.section .text.libgcc.fmul, "ax", @progbits
 
 /***********************************************************/    
 ;;; Softmul versions of FMUL, FMULS and FMULSU to implement