Patchwork [Darwin/PPC64] Fix boehm-gc for PPC64

login
register
mail settings
Submitter IainS
Date Dec. 2, 2010, 10:22 a.m.
Message ID <0DA938AC-A8F4-403C-8218-CCA4D9E8937D@sandoe-acoustics.co.uk>
Download mbox | patch
Permalink /patch/73950/
State New
Headers show

Comments

IainS - Dec. 2, 2010, 10:22 a.m.
well, I imagine this has never worked ...

... unfortunately,  'make check' doesn't seem to exercise the multi- 
lib for boehm-gc ...
(unless one does it by hand)

Anyway the patch turns a bunch of seg-faults into working tests for   
'make check' in the ppc64/boehm-gc dir.
(and, incidentally, allows the ppc libjava build to proceed with fewer  
errors for m64).

tested only on powerpc-darwin9.

I'd welcome comments on whether the eh could be done better/more  
automatically,

OK for trunk?
Iain


boehm-gc:

	* powerpc_darwin_mach_dep.s:  Update for m64.  Add eh frames.
	Do not build or use the picsymbol stub for Darwin >= 9.
	* tests/test.c (reverse_test):  Modify count for ppc64.
	* pthread_support.c (GC_get_thread_stack_base): Correct a debug  
statement.
Jack Howarth - Dec. 2, 2010, 2:12 p.m.
On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote:
> well, I imagine this has never worked ...
>
> ... unfortunately,  'make check' doesn't seem to exercise the multi-lib 
> for boehm-gc ...
> (unless one does it by hand)
>
> Anyway the patch turns a bunch of seg-faults into working tests for   
> 'make check' in the ppc64/boehm-gc dir.
> (and, incidentally, allows the ppc libjava build to proceed with fewer  
> errors for m64).

Iain,
   The remaining failures in libjava are likely due to the fact that
libffi in FSF gcc was never ported to ppc64 on darwin. Apple has a set
of ppc64 patches buried in their libffi sources but no one has ever
tried to extract them.
            Jack

>
> tested only on powerpc-darwin9.
>
> I'd welcome comments on whether the eh could be done better/more  
> automatically,
>
> OK for trunk?
> Iain
>
>
> boehm-gc:
>
> 	* powerpc_darwin_mach_dep.s:  Update for m64.  Add eh frames.
> 	Do not build or use the picsymbol stub for Darwin >= 9.
> 	* tests/test.c (reverse_test):  Modify count for ppc64.
> 	* pthread_support.c (GC_get_thread_stack_base): Correct a debug  
> statement.
>
>

> Index: boehm-gc/powerpc_darwin_mach_dep.s
> ===================================================================
> --- boehm-gc/powerpc_darwin_mach_dep.s	(revision 167325)
> +++ boehm-gc/powerpc_darwin_mach_dep.s	(working copy)
> @@ -4,12 +4,38 @@
>  #define MODE_CHOICE(x, y) x
>  #endif
>  
> -#define lgu     MODE_CHOICE(lwzu, ldu)
> +#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
>  
> -#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
> +; Define some pseudo-opcodes for size-independent load & store of GPRs ...
> +#define lgu		MODE_CHOICE(lwzu, ldu)
> +#define lg		MODE_CHOICE(lwz,ld)
> +#define sg		MODE_CHOICE(stw,std)
> +#define sgu		MODE_CHOICE(stwu,stdu)
>  
> -#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
> +; ... and the size of GPRs and their storage indicator.
> +#define GPR_BYTES	MODE_CHOICE(4,8)
> +#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
> +#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
>  
> +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04.
> +#define LINKAGE_SIZE	MODE_CHOICE(24,48)
> +#define PARAM_AREA	MODE_CHOICE(32,64)
> +#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
> +
> +; The whole stack frame **MUST** be 16byte-aligned.
> +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA)
> +
> +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
> +; We no longer need the pic symbol stub for Darwin >= 9.
> +#define BLGCP1	_GC_push_one
> +#undef WANT_STUB
> +#else
> +#define BLGCP1	L_GC_push_one$stub
> +#define WANT_STUB
> +#endif
> +
> +	.machine machine_choice
> +
>  ; GC_push_regs function. Under some optimization levels GCC will clobber
>  ; some of the non-volatile registers before we get a chance to save them
>  ; therefore, this cannot be inline asm.
> @@ -21,61 +47,116 @@ _GC_push_regs:
>      
>      ; Prolog
>  	mflr r0
> -	stw r0,8(r1)
> -	stwu r1,-80(r1)
> +	sg r0,SAVED_LR_OFFSET(r1)
> +	sgu r1,-SAVE_SIZE(r1)
>  
> -	; Push r13-r31
> +L_body:
> +    ; 'Push' r13-r31
>  	mr r3,r13
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r14
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r15
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r16
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r17
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r18
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r19
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r20
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r21
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r22
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r23
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r24
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r25
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r26
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r27
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r28
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r29
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r30
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  	mr r3,r31
> -	bl L_GC_push_one$stub
> +	bl BLGCP1
>  
> -    ; 
> -    lwz r0,88(r1)
> -    addi r1,r1,80
> +L_epilog:
> +    ; Epilog
> +	lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1)
> +	addi r1,r1,SAVE_SIZE
>  	mtlr r0
>      	
> -	; Return
> +    ; Return
>  	blr
> +LFE0:
>  
> -; PIC stuff, generated by GCC
> +; eh frames, for those that want 'em.
>  
> -.data
> -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
> +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
> +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70)
> +
> +	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
> +EH_frame1:
> +	.set L$set$0,LECIE1-LSCIE1
> +	.long L$set$0	; Length of Common Information Entry
> +LSCIE1:
> +	.long	0	; CIE Identifier Tag
> +	.byte	0x1	; CIE Version
> +	.ascii "zR\0"	; CIE Augmentation
> +	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
> +	.byte	EH_DATA_ALIGN_FACT	; sleb128 -4/-8; CIE Data Alignment Factor
> +	.byte	0x41	; CIE RA Column
> +	.byte	0x1	; uleb128 0x1; Augmentation size
> +	.byte	0x10	; FDE Encoding (pcrel)
> +	.byte	0xc	; DW_CFA_def_cfa
> +	.byte	0x1	; uleb128 0x1
> +	.byte	0	; uleb128 0
> +	.align	LOG2_GPR_BYTES
> +LECIE1:
> +
> +	.globl _GC_push_regs.eh
> +_GC_push_regs.eh:
> +LSFDE1:
> +	.set L$set$1,LEFDE1-LASFDE1
> +	.long L$set$1	; FDE Length
> +LASFDE1:
> +	.long	LASFDE1-EH_frame1	; FDE CIE offset
> +	.g_long	_GC_push_regs-.	; FDE initial location
> +	.set L$set$2,LFE0-_GC_push_regs
> +	.g_long L$set$2	; FDE address range
> +	.byte	0	; uleb128 0; Augmentation size
> +	.byte	0x4	; DW_CFA_advance_loc4
> +	.set L$set$3,L_body-_GC_push_regs
> +	.long L$set$3
> +	.byte	0xe	; DW_CFA_def_cfa_offset
> +	.byte	EH_FRAME_OFFSET	; uleb128 0x40/0x70
> +	.byte	0x11	; DW_CFA_offset_extended_sf
> +	.byte	0x41	; uleb128 0x41
> +	.byte	0x7e	; sleb128 -2
> +	.byte	0x4	; DW_CFA_advance_loc4
> +	.set L$set$4,L_epilog-L_body
> +	.long L$set$4
> +	.byte	0xe	; DW_CFA_def_cfa_offset
> +	.byte	0	; uleb128 0
> +	.align	LOG2_GPR_BYTES
> +LEFDE1:
> +
> +	.data
> +#ifdef WANT_STUB
> +; PIC stub stuff, generated by GCC
> +
> +	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
>  	.align LOG2_GPR_BYTES
>  L_GC_push_one$stub:
>  	.indirect_symbol _GC_push_one
> @@ -88,8 +169,10 @@ L0$_GC_push_one:
>  	lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11)
>  	mtctr r12
>  	bctr
> -.data
> -.lazy_symbol_pointer
> +
> +	.data
> +	.lazy_symbol_pointer
>  L_GC_push_one$lazy_ptr:
>  	.indirect_symbol _GC_push_one
>  	.g_long dyld_stub_binding_helper
> +#endif
> Index: boehm-gc/tests/test.c
> ===================================================================
> --- boehm-gc/tests/test.c	(revision 167325)
> +++ boehm-gc/tests/test.c	(working copy)
> @@ -624,7 +624,12 @@ void reverse_test()
>  	    /* OSF has limited stack space by default, and large frames. */
>  #           define BIG 200
>  #	  else
> -#           define BIG 4500
> +#	    if defined(__MACH__) && defined(__ppc64__)
> +	      /* Small stack and largish frames.  */
> +#             define BIG 2500	      
> +#	    else
> +#             define BIG 4500
> +#	    endif
>  #	  endif
>  #	endif
>  #     endif
> Index: boehm-gc/pthread_support.c
> ===================================================================
> --- boehm-gc/pthread_support.c	(revision 167325)
> +++ boehm-gc/pthread_support.c	(working copy)
> @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base()
>  
>  # else
>  #   ifdef DEBUG_THREADS
> -	GC_printf1("Can not determine stack base for attached thread");
> +	GC_printf0("Can not determine stack base for attached thread");
>  #   endif
>    return 0;
>  # endif

>
>
>
IainS - Dec. 2, 2010, 2:19 p.m.
On 2 Dec 2010, at 14:12, Jack Howarth wrote:

> On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote:
>> well, I imagine this has never worked ...
>>
>> ... unfortunately,  'make check' doesn't seem to exercise the multi- 
>> lib
>> for boehm-gc ...
>> (unless one does it by hand)
>>
>> Anyway the patch turns a bunch of seg-faults into working tests for
>> 'make check' in the ppc64/boehm-gc dir.
>> (and, incidentally, allows the ppc libjava build to proceed with  
>> fewer
>> errors for m64).
>
> Iain,
>   The remaining failures in libjava are likely due to the fact that
> libffi in FSF gcc was never ported to ppc64 on darwin.

yeah...

> . Apple has a set
> of ppc64 patches buried in their libffi sources but no one has ever
> tried to extract them.

... I didn't know about the Apple patches :-(

I've been rolling my own....

Ah well, at least that gives me another place to look ... if mine  
don't work out.

cheers
Iain

>>
>> tested only on powerpc-darwin9.
>>
>> I'd welcome comments on whether the eh could be done better/more
>> automatically,
>>
>> OK for trunk?
>> Iain
>>
>>
>> boehm-gc:
>>
>> 	* powerpc_darwin_mach_dep.s:  Update for m64.  Add eh frames.
>> 	Do not build or use the picsymbol stub for Darwin >= 9.
>> 	* tests/test.c (reverse_test):  Modify count for ppc64.
>> 	* pthread_support.c (GC_get_thread_stack_base): Correct a debug
>> statement.
>>
>>
>
>> Index: boehm-gc/powerpc_darwin_mach_dep.s
>> ===================================================================
>> --- boehm-gc/powerpc_darwin_mach_dep.s	(revision 167325)
>> +++ boehm-gc/powerpc_darwin_mach_dep.s	(working copy)
>> @@ -4,12 +4,38 @@
>> #define MODE_CHOICE(x, y) x
>> #endif
>>
>> -#define lgu     MODE_CHOICE(lwzu, ldu)
>> +#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
>>
>> -#define g_long  MODE_CHOICE(long, quad)         /* usage is  
>> ".g_long" */
>> +; Define some pseudo-opcodes for size-independent load & store of  
>> GPRs ...
>> +#define lgu		MODE_CHOICE(lwzu, ldu)
>> +#define lg		MODE_CHOICE(lwz,ld)
>> +#define sg		MODE_CHOICE(stw,std)
>> +#define sgu		MODE_CHOICE(stwu,stdu)
>>
>> -#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES)  
>> */
>> +; ... and the size of GPRs and their storage indicator.
>> +#define GPR_BYTES	MODE_CHOICE(4,8)
>> +#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
>> +#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
>>
>> +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version  
>> 2009-02-04.
>> +#define LINKAGE_SIZE	MODE_CHOICE(24,48)
>> +#define PARAM_AREA	MODE_CHOICE(32,64)
>> +#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
>> +
>> +; The whole stack frame **MUST** be 16byte-aligned.
>> +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA)
>> +
>> +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) &&  
>> __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
>> +; We no longer need the pic symbol stub for Darwin >= 9.
>> +#define BLGCP1	_GC_push_one
>> +#undef WANT_STUB
>> +#else
>> +#define BLGCP1	L_GC_push_one$stub
>> +#define WANT_STUB
>> +#endif
>> +
>> +	.machine machine_choice
>> +
>> ; GC_push_regs function. Under some optimization levels GCC will  
>> clobber
>> ; some of the non-volatile registers before we get a chance to save  
>> them
>> ; therefore, this cannot be inline asm.
>> @@ -21,61 +47,116 @@ _GC_push_regs:
>>
>>     ; Prolog
>> 	mflr r0
>> -	stw r0,8(r1)
>> -	stwu r1,-80(r1)
>> +	sg r0,SAVED_LR_OFFSET(r1)
>> +	sgu r1,-SAVE_SIZE(r1)
>>
>> -	; Push r13-r31
>> +L_body:
>> +    ; 'Push' r13-r31
>> 	mr r3,r13
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r14
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r15
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r16
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r17
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r18
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r19
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r20
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r21
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r22
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r23
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r24
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r25
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r26
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r27
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r28
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r29
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r30
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>> 	mr r3,r31
>> -	bl L_GC_push_one$stub
>> +	bl BLGCP1
>>
>> -    ;
>> -    lwz r0,88(r1)
>> -    addi r1,r1,80
>> +L_epilog:
>> +    ; Epilog
>> +	lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1)
>> +	addi r1,r1,SAVE_SIZE
>> 	mtlr r0
>>     	
>> -	; Return
>> +    ; Return
>> 	blr
>> +LFE0:
>>
>> -; PIC stuff, generated by GCC
>> +; eh frames, for those that want 'em.
>>
>> -.data
>> -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
>> +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
>> +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70)
>> +
>> +	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms 
>> +live_support
>> +EH_frame1:
>> +	.set L$set$0,LECIE1-LSCIE1
>> +	.long L$set$0	; Length of Common Information Entry
>> +LSCIE1:
>> +	.long	0	; CIE Identifier Tag
>> +	.byte	0x1	; CIE Version
>> +	.ascii "zR\0"	; CIE Augmentation
>> +	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
>> +	.byte	EH_DATA_ALIGN_FACT	; sleb128 -4/-8; CIE Data Alignment Factor
>> +	.byte	0x41	; CIE RA Column
>> +	.byte	0x1	; uleb128 0x1; Augmentation size
>> +	.byte	0x10	; FDE Encoding (pcrel)
>> +	.byte	0xc	; DW_CFA_def_cfa
>> +	.byte	0x1	; uleb128 0x1
>> +	.byte	0	; uleb128 0
>> +	.align	LOG2_GPR_BYTES
>> +LECIE1:
>> +
>> +	.globl _GC_push_regs.eh
>> +_GC_push_regs.eh:
>> +LSFDE1:
>> +	.set L$set$1,LEFDE1-LASFDE1
>> +	.long L$set$1	; FDE Length
>> +LASFDE1:
>> +	.long	LASFDE1-EH_frame1	; FDE CIE offset
>> +	.g_long	_GC_push_regs-.	; FDE initial location
>> +	.set L$set$2,LFE0-_GC_push_regs
>> +	.g_long L$set$2	; FDE address range
>> +	.byte	0	; uleb128 0; Augmentation size
>> +	.byte	0x4	; DW_CFA_advance_loc4
>> +	.set L$set$3,L_body-_GC_push_regs
>> +	.long L$set$3
>> +	.byte	0xe	; DW_CFA_def_cfa_offset
>> +	.byte	EH_FRAME_OFFSET	; uleb128 0x40/0x70
>> +	.byte	0x11	; DW_CFA_offset_extended_sf
>> +	.byte	0x41	; uleb128 0x41
>> +	.byte	0x7e	; sleb128 -2
>> +	.byte	0x4	; DW_CFA_advance_loc4
>> +	.set L$set$4,L_epilog-L_body
>> +	.long L$set$4
>> +	.byte	0xe	; DW_CFA_def_cfa_offset
>> +	.byte	0	; uleb128 0
>> +	.align	LOG2_GPR_BYTES
>> +LEFDE1:
>> +
>> +	.data
>> +#ifdef WANT_STUB
>> +; PIC stub stuff, generated by GCC
>> +
>> +	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
>> 	.align LOG2_GPR_BYTES
>> L_GC_push_one$stub:
>> 	.indirect_symbol _GC_push_one
>> @@ -88,8 +169,10 @@ L0$_GC_push_one:
>> 	lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11)
>> 	mtctr r12
>> 	bctr
>> -.data
>> -.lazy_symbol_pointer
>> +
>> +	.data
>> +	.lazy_symbol_pointer
>> L_GC_push_one$lazy_ptr:
>> 	.indirect_symbol _GC_push_one
>> 	.g_long dyld_stub_binding_helper
>> +#endif
>> Index: boehm-gc/tests/test.c
>> ===================================================================
>> --- boehm-gc/tests/test.c	(revision 167325)
>> +++ boehm-gc/tests/test.c	(working copy)
>> @@ -624,7 +624,12 @@ void reverse_test()
>> 	    /* OSF has limited stack space by default, and large frames. */
>> #           define BIG 200
>> #	  else
>> -#           define BIG 4500
>> +#	    if defined(__MACH__) && defined(__ppc64__)
>> +	      /* Small stack and largish frames.  */
>> +#             define BIG 2500	
>> +#	    else
>> +#             define BIG 4500
>> +#	    endif
>> #	  endif
>> #	endif
>> #     endif
>> Index: boehm-gc/pthread_support.c
>> ===================================================================
>> --- boehm-gc/pthread_support.c	(revision 167325)
>> +++ boehm-gc/pthread_support.c	(working copy)
>> @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base()
>>
>> # else
>> #   ifdef DEBUG_THREADS
>> -	GC_printf1("Can not determine stack base for attached thread");
>> +	GC_printf0("Can not determine stack base for attached thread");
>> #   endif
>>   return 0;
>> # endif
>
>>
>>
>>
>
>>
Jack Howarth - Dec. 2, 2010, 2:39 p.m.
On Thu, Dec 02, 2010 at 02:19:10PM +0000, IainS wrote:
>
> On 2 Dec 2010, at 14:12, Jack Howarth wrote:
>
>> On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote:
>>> well, I imagine this has never worked ...
>>>
>>> ... unfortunately,  'make check' doesn't seem to exercise the multi- 
>>> lib
>>> for boehm-gc ...
>>> (unless one does it by hand)
>>>
>>> Anyway the patch turns a bunch of seg-faults into working tests for
>>> 'make check' in the ppc64/boehm-gc dir.
>>> (and, incidentally, allows the ppc libjava build to proceed with  
>>> fewer
>>> errors for m64).
>>
>> Iain,
>>   The remaining failures in libjava are likely due to the fact that
>> libffi in FSF gcc was never ported to ppc64 on darwin.
>
> yeah...
>
>> . Apple has a set
>> of ppc64 patches buried in their libffi sources but no one has ever
>> tried to extract them.
>
> ... I didn't know about the Apple patches :-(
>
> I've been rolling my own....
>
> Ah well, at least that gives me another place to look ... if mine don't 
> work out.

Iain,
   My understanding is that Apple's libffi is based on that from the PyObjC
project. I do see a ppc64-darwin-_closure.S file in the libffi-src tree there...

http://svn.red-bean.com/pyobjc/trunk/pyobjc/pyobjc-core/libffi-src/powerpc/

however I suspect it would take a bit of work to merge that into the FSF gcc
libffi sources. Also, I am unclear on what the license situation would be for
using their code in GPLv3 sources.

http://svn.red-bean.com/pyobjc/trunk/pyobjc/pyobjc-core/libffi-src/LICENSE

        Jack
   
>
> cheers
> Iain
>
>>>
>>> tested only on powerpc-darwin9.
>>>
>>> I'd welcome comments on whether the eh could be done better/more
>>> automatically,
>>>
>>> OK for trunk?
>>> Iain
>>>
>>>
>>> boehm-gc:
>>>
>>> 	* powerpc_darwin_mach_dep.s:  Update for m64.  Add eh frames.
>>> 	Do not build or use the picsymbol stub for Darwin >= 9.
>>> 	* tests/test.c (reverse_test):  Modify count for ppc64.
>>> 	* pthread_support.c (GC_get_thread_stack_base): Correct a debug
>>> statement.
>>>
>>>
>>
>>> Index: boehm-gc/powerpc_darwin_mach_dep.s
>>> ===================================================================
>>> --- boehm-gc/powerpc_darwin_mach_dep.s	(revision 167325)
>>> +++ boehm-gc/powerpc_darwin_mach_dep.s	(working copy)
>>> @@ -4,12 +4,38 @@
>>> #define MODE_CHOICE(x, y) x
>>> #endif
>>>
>>> -#define lgu     MODE_CHOICE(lwzu, ldu)
>>> +#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
>>>
>>> -#define g_long  MODE_CHOICE(long, quad)         /* usage is  
>>> ".g_long" */
>>> +; Define some pseudo-opcodes for size-independent load & store of  
>>> GPRs ...
>>> +#define lgu		MODE_CHOICE(lwzu, ldu)
>>> +#define lg		MODE_CHOICE(lwz,ld)
>>> +#define sg		MODE_CHOICE(stw,std)
>>> +#define sgu		MODE_CHOICE(stwu,stdu)
>>>
>>> -#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES)  
>>> */
>>> +; ... and the size of GPRs and their storage indicator.
>>> +#define GPR_BYTES	MODE_CHOICE(4,8)
>>> +#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
>>> +#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
>>>
>>> +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version  
>>> 2009-02-04.
>>> +#define LINKAGE_SIZE	MODE_CHOICE(24,48)
>>> +#define PARAM_AREA	MODE_CHOICE(32,64)
>>> +#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
>>> +
>>> +; The whole stack frame **MUST** be 16byte-aligned.
>>> +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA)
>>> +
>>> +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) &&  
>>> __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
>>> +; We no longer need the pic symbol stub for Darwin >= 9.
>>> +#define BLGCP1	_GC_push_one
>>> +#undef WANT_STUB
>>> +#else
>>> +#define BLGCP1	L_GC_push_one$stub
>>> +#define WANT_STUB
>>> +#endif
>>> +
>>> +	.machine machine_choice
>>> +
>>> ; GC_push_regs function. Under some optimization levels GCC will  
>>> clobber
>>> ; some of the non-volatile registers before we get a chance to save  
>>> them
>>> ; therefore, this cannot be inline asm.
>>> @@ -21,61 +47,116 @@ _GC_push_regs:
>>>
>>>     ; Prolog
>>> 	mflr r0
>>> -	stw r0,8(r1)
>>> -	stwu r1,-80(r1)
>>> +	sg r0,SAVED_LR_OFFSET(r1)
>>> +	sgu r1,-SAVE_SIZE(r1)
>>>
>>> -	; Push r13-r31
>>> +L_body:
>>> +    ; 'Push' r13-r31
>>> 	mr r3,r13
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r14
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r15
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r16
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r17
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r18
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r19
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r20
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r21
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r22
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r23
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r24
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r25
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r26
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r27
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r28
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r29
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r30
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>> 	mr r3,r31
>>> -	bl L_GC_push_one$stub
>>> +	bl BLGCP1
>>>
>>> -    ;
>>> -    lwz r0,88(r1)
>>> -    addi r1,r1,80
>>> +L_epilog:
>>> +    ; Epilog
>>> +	lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1)
>>> +	addi r1,r1,SAVE_SIZE
>>> 	mtlr r0
>>>     	
>>> -	; Return
>>> +    ; Return
>>> 	blr
>>> +LFE0:
>>>
>>> -; PIC stuff, generated by GCC
>>> +; eh frames, for those that want 'em.
>>>
>>> -.data
>>> -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
>>> +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
>>> +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70)
>>> +
>>> +	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms 
>>> +live_support
>>> +EH_frame1:
>>> +	.set L$set$0,LECIE1-LSCIE1
>>> +	.long L$set$0	; Length of Common Information Entry
>>> +LSCIE1:
>>> +	.long	0	; CIE Identifier Tag
>>> +	.byte	0x1	; CIE Version
>>> +	.ascii "zR\0"	; CIE Augmentation
>>> +	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
>>> +	.byte	EH_DATA_ALIGN_FACT	; sleb128 -4/-8; CIE Data Alignment Factor
>>> +	.byte	0x41	; CIE RA Column
>>> +	.byte	0x1	; uleb128 0x1; Augmentation size
>>> +	.byte	0x10	; FDE Encoding (pcrel)
>>> +	.byte	0xc	; DW_CFA_def_cfa
>>> +	.byte	0x1	; uleb128 0x1
>>> +	.byte	0	; uleb128 0
>>> +	.align	LOG2_GPR_BYTES
>>> +LECIE1:
>>> +
>>> +	.globl _GC_push_regs.eh
>>> +_GC_push_regs.eh:
>>> +LSFDE1:
>>> +	.set L$set$1,LEFDE1-LASFDE1
>>> +	.long L$set$1	; FDE Length
>>> +LASFDE1:
>>> +	.long	LASFDE1-EH_frame1	; FDE CIE offset
>>> +	.g_long	_GC_push_regs-.	; FDE initial location
>>> +	.set L$set$2,LFE0-_GC_push_regs
>>> +	.g_long L$set$2	; FDE address range
>>> +	.byte	0	; uleb128 0; Augmentation size
>>> +	.byte	0x4	; DW_CFA_advance_loc4
>>> +	.set L$set$3,L_body-_GC_push_regs
>>> +	.long L$set$3
>>> +	.byte	0xe	; DW_CFA_def_cfa_offset
>>> +	.byte	EH_FRAME_OFFSET	; uleb128 0x40/0x70
>>> +	.byte	0x11	; DW_CFA_offset_extended_sf
>>> +	.byte	0x41	; uleb128 0x41
>>> +	.byte	0x7e	; sleb128 -2
>>> +	.byte	0x4	; DW_CFA_advance_loc4
>>> +	.set L$set$4,L_epilog-L_body
>>> +	.long L$set$4
>>> +	.byte	0xe	; DW_CFA_def_cfa_offset
>>> +	.byte	0	; uleb128 0
>>> +	.align	LOG2_GPR_BYTES
>>> +LEFDE1:
>>> +
>>> +	.data
>>> +#ifdef WANT_STUB
>>> +; PIC stub stuff, generated by GCC
>>> +
>>> +	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
>>> 	.align LOG2_GPR_BYTES
>>> L_GC_push_one$stub:
>>> 	.indirect_symbol _GC_push_one
>>> @@ -88,8 +169,10 @@ L0$_GC_push_one:
>>> 	lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11)
>>> 	mtctr r12
>>> 	bctr
>>> -.data
>>> -.lazy_symbol_pointer
>>> +
>>> +	.data
>>> +	.lazy_symbol_pointer
>>> L_GC_push_one$lazy_ptr:
>>> 	.indirect_symbol _GC_push_one
>>> 	.g_long dyld_stub_binding_helper
>>> +#endif
>>> Index: boehm-gc/tests/test.c
>>> ===================================================================
>>> --- boehm-gc/tests/test.c	(revision 167325)
>>> +++ boehm-gc/tests/test.c	(working copy)
>>> @@ -624,7 +624,12 @@ void reverse_test()
>>> 	    /* OSF has limited stack space by default, and large frames. */
>>> #           define BIG 200
>>> #	  else
>>> -#           define BIG 4500
>>> +#	    if defined(__MACH__) && defined(__ppc64__)
>>> +	      /* Small stack and largish frames.  */
>>> +#             define BIG 2500	
>>> +#	    else
>>> +#             define BIG 4500
>>> +#	    endif
>>> #	  endif
>>> #	endif
>>> #     endif
>>> Index: boehm-gc/pthread_support.c
>>> ===================================================================
>>> --- boehm-gc/pthread_support.c	(revision 167325)
>>> +++ boehm-gc/pthread_support.c	(working copy)
>>> @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base()
>>>
>>> # else
>>> #   ifdef DEBUG_THREADS
>>> -	GC_printf1("Can not determine stack base for attached thread");
>>> +	GC_printf0("Can not determine stack base for attached thread");
>>> #   endif
>>>   return 0;
>>> # endif
>>
>>>
>>>
>>>
>>
>>>
Mike Stump - Dec. 2, 2010, 8:13 p.m.
On Dec 2, 2010, at 2:22 AM, IainS wrote:
> Anyway the patch turns a bunch of seg-faults into working tests for  'make check' in the ppc64/boehm-gc dir.

> (and, incidentally, allows the ppc libjava build to proceed with fewer errors for m64).

> I'd welcome comments on whether the eh could be done better/more automatically,

Pretty common writing that type of code with .s files...  The only thing that might make writing it easier would be to write the code in a .c/.C file.

> OK for trunk?

Ok.
IainS - Dec. 10, 2010, 10:09 a.m.
On 2 Dec 2010, at 20:13, Mike Stump wrote:

> On Dec 2, 2010, at 2:22 AM, IainS wrote:
>> Anyway the patch turns a bunch of seg-faults into working tests  
>> for  'make check' in the ppc64/boehm-gc dir.
>
>> (and, incidentally, allows the ppc libjava build to proceed with  
>> fewer errors for m64).
>
>> I'd welcome comments on whether the eh could be done better/more  
>> automatically,
>
> Pretty common writing that type of code with .s files...  The only  
> thing that might make writing it easier would be to write the code  
> in a .c/.C file.
>
>> OK for trunk?
>
> Ok.
r167681
Iain

Patch

Index: boehm-gc/powerpc_darwin_mach_dep.s
===================================================================
--- boehm-gc/powerpc_darwin_mach_dep.s	(revision 167325)
+++ boehm-gc/powerpc_darwin_mach_dep.s	(working copy)
@@ -4,12 +4,38 @@ 
 #define MODE_CHOICE(x, y) x
 #endif
 
-#define lgu     MODE_CHOICE(lwzu, ldu)
+#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
 
-#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+; Define some pseudo-opcodes for size-independent load & store of GPRs ...
+#define lgu		MODE_CHOICE(lwzu, ldu)
+#define lg		MODE_CHOICE(lwz,ld)
+#define sg		MODE_CHOICE(stw,std)
+#define sgu		MODE_CHOICE(stwu,stdu)
 
-#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+; ... and the size of GPRs and their storage indicator.
+#define GPR_BYTES	MODE_CHOICE(4,8)
+#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
+#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
 
+; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04.
+#define LINKAGE_SIZE	MODE_CHOICE(24,48)
+#define PARAM_AREA	MODE_CHOICE(32,64)
+#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
+
+; The whole stack frame **MUST** be 16byte-aligned.
+#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA)
+
+#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
+; We no longer need the pic symbol stub for Darwin >= 9.
+#define BLGCP1	_GC_push_one
+#undef WANT_STUB
+#else
+#define BLGCP1	L_GC_push_one$stub
+#define WANT_STUB
+#endif
+
+	.machine machine_choice
+
 ; GC_push_regs function. Under some optimization levels GCC will clobber
 ; some of the non-volatile registers before we get a chance to save them
 ; therefore, this cannot be inline asm.
@@ -21,61 +47,116 @@  _GC_push_regs:
     
     ; Prolog
 	mflr r0
-	stw r0,8(r1)
-	stwu r1,-80(r1)
+	sg r0,SAVED_LR_OFFSET(r1)
+	sgu r1,-SAVE_SIZE(r1)
 
-	; Push r13-r31
+L_body:
+    ; 'Push' r13-r31
 	mr r3,r13
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r14
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r15
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r16
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r17
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r18
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r19
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r20
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r21
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r22
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r23
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r24
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r25
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r26
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r27
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r28
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r29
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r30
-	bl L_GC_push_one$stub
+	bl BLGCP1
 	mr r3,r31
-	bl L_GC_push_one$stub
+	bl BLGCP1
 
-    ; 
-    lwz r0,88(r1)
-    addi r1,r1,80
+L_epilog:
+    ; Epilog
+	lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1)
+	addi r1,r1,SAVE_SIZE
 	mtlr r0
     	
-	; Return
+    ; Return
 	blr
+LFE0:
 
-; PIC stuff, generated by GCC
+; eh frames, for those that want 'em.
 
-.data
-.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
+#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70)
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set L$set$0,LECIE1-LSCIE1
+	.long L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0	; CIE Identifier Tag
+	.byte	0x1	; CIE Version
+	.ascii "zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	EH_DATA_ALIGN_FACT	; sleb128 -4/-8; CIE Data Alignment Factor
+	.byte	0x41	; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0	; uleb128 0
+	.align	LOG2_GPR_BYTES
+LECIE1:
+
+	.globl _GC_push_regs.eh
+_GC_push_regs.eh:
+LSFDE1:
+	.set L$set$1,LEFDE1-LASFDE1
+	.long L$set$1	; FDE Length
+LASFDE1:
+	.long	LASFDE1-EH_frame1	; FDE CIE offset
+	.g_long	_GC_push_regs-.	; FDE initial location
+	.set L$set$2,LFE0-_GC_push_regs
+	.g_long L$set$2	; FDE address range
+	.byte	0	; uleb128 0; Augmentation size
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set L$set$3,L_body-_GC_push_regs
+	.long L$set$3
+	.byte	0xe	; DW_CFA_def_cfa_offset
+	.byte	EH_FRAME_OFFSET	; uleb128 0x40/0x70
+	.byte	0x11	; DW_CFA_offset_extended_sf
+	.byte	0x41	; uleb128 0x41
+	.byte	0x7e	; sleb128 -2
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set L$set$4,L_epilog-L_body
+	.long L$set$4
+	.byte	0xe	; DW_CFA_def_cfa_offset
+	.byte	0	; uleb128 0
+	.align	LOG2_GPR_BYTES
+LEFDE1:
+
+	.data
+#ifdef WANT_STUB
+; PIC stub stuff, generated by GCC
+
+	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
 	.align LOG2_GPR_BYTES
 L_GC_push_one$stub:
 	.indirect_symbol _GC_push_one
@@ -88,8 +169,10 @@  L0$_GC_push_one:
 	lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11)
 	mtctr r12
 	bctr
-.data
-.lazy_symbol_pointer
+
+	.data
+	.lazy_symbol_pointer
 L_GC_push_one$lazy_ptr:
 	.indirect_symbol _GC_push_one
 	.g_long dyld_stub_binding_helper
+#endif
Index: boehm-gc/tests/test.c
===================================================================
--- boehm-gc/tests/test.c	(revision 167325)
+++ boehm-gc/tests/test.c	(working copy)
@@ -624,7 +624,12 @@  void reverse_test()
 	    /* OSF has limited stack space by default, and large frames. */
 #           define BIG 200
 #	  else
-#           define BIG 4500
+#	    if defined(__MACH__) && defined(__ppc64__)
+	      /* Small stack and largish frames.  */
+#             define BIG 2500	      
+#	    else
+#             define BIG 4500
+#	    endif
 #	  endif
 #	endif
 #     endif
Index: boehm-gc/pthread_support.c
===================================================================
--- boehm-gc/pthread_support.c	(revision 167325)
+++ boehm-gc/pthread_support.c	(working copy)
@@ -1158,7 +1158,7 @@  GC_PTR GC_get_thread_stack_base()
 
 # else
 #   ifdef DEBUG_THREADS
-	GC_printf1("Can not determine stack base for attached thread");
+	GC_printf0("Can not determine stack base for attached thread");
 #   endif
   return 0;
 # endif