Message ID | 0DA938AC-A8F4-403C-8218-CCA4D9E8937D@sandoe-acoustics.co.uk |
---|---|
State | New |
Headers | show |
On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote: > well, I imagine this has never worked ... > > ... unfortunately, 'make check' doesn't seem to exercise the multi-lib > for boehm-gc ... > (unless one does it by hand) > > Anyway the patch turns a bunch of seg-faults into working tests for > 'make check' in the ppc64/boehm-gc dir. > (and, incidentally, allows the ppc libjava build to proceed with fewer > errors for m64). Iain, The remaining failures in libjava are likely due to the fact that libffi in FSF gcc was never ported to ppc64 on darwin. Apple has a set of ppc64 patches buried in their libffi sources but no one has ever tried to extract them. Jack > > tested only on powerpc-darwin9. > > I'd welcome comments on whether the eh could be done better/more > automatically, > > OK for trunk? > Iain > > > boehm-gc: > > * powerpc_darwin_mach_dep.s: Update for m64. Add eh frames. > Do not build or use the picsymbol stub for Darwin >= 9. > * tests/test.c (reverse_test): Modify count for ppc64. > * pthread_support.c (GC_get_thread_stack_base): Correct a debug > statement. > > > Index: boehm-gc/powerpc_darwin_mach_dep.s > =================================================================== > --- boehm-gc/powerpc_darwin_mach_dep.s (revision 167325) > +++ boehm-gc/powerpc_darwin_mach_dep.s (working copy) > @@ -4,12 +4,38 @@ > #define MODE_CHOICE(x, y) x > #endif > > -#define lgu MODE_CHOICE(lwzu, ldu) > +#define machine_choice MODE_CHOICE(ppc7400,ppc64) > > -#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ > +; Define some pseudo-opcodes for size-independent load & store of GPRs ... > +#define lgu MODE_CHOICE(lwzu, ldu) > +#define lg MODE_CHOICE(lwz,ld) > +#define sg MODE_CHOICE(stw,std) > +#define sgu MODE_CHOICE(stwu,stdu) > > -#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ > +; ... and the size of GPRs and their storage indicator. > +#define GPR_BYTES MODE_CHOICE(4,8) > +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ > +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ > > +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. > +#define LINKAGE_SIZE MODE_CHOICE(24,48) > +#define PARAM_AREA MODE_CHOICE(32,64) > +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ > + > +; The whole stack frame **MUST** be 16byte-aligned. > +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA) > + > +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 > +; We no longer need the pic symbol stub for Darwin >= 9. > +#define BLGCP1 _GC_push_one > +#undef WANT_STUB > +#else > +#define BLGCP1 L_GC_push_one$stub > +#define WANT_STUB > +#endif > + > + .machine machine_choice > + > ; GC_push_regs function. Under some optimization levels GCC will clobber > ; some of the non-volatile registers before we get a chance to save them > ; therefore, this cannot be inline asm. > @@ -21,61 +47,116 @@ _GC_push_regs: > > ; Prolog > mflr r0 > - stw r0,8(r1) > - stwu r1,-80(r1) > + sg r0,SAVED_LR_OFFSET(r1) > + sgu r1,-SAVE_SIZE(r1) > > - ; Push r13-r31 > +L_body: > + ; 'Push' r13-r31 > mr r3,r13 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r14 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r15 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r16 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r17 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r18 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r19 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r20 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r21 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r22 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r23 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r24 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r25 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r26 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r27 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r28 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r29 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r30 > - bl L_GC_push_one$stub > + bl BLGCP1 > mr r3,r31 > - bl L_GC_push_one$stub > + bl BLGCP1 > > - ; > - lwz r0,88(r1) > - addi r1,r1,80 > +L_epilog: > + ; Epilog > + lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1) > + addi r1,r1,SAVE_SIZE > mtlr r0 > > - ; Return > + ; Return > blr > +LFE0: > > -; PIC stuff, generated by GCC > +; eh frames, for those that want 'em. > > -.data > -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 > +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) > +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70) > + > + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support > +EH_frame1: > + .set L$set$0,LECIE1-LSCIE1 > + .long L$set$0 ; Length of Common Information Entry > +LSCIE1: > + .long 0 ; CIE Identifier Tag > + .byte 0x1 ; CIE Version > + .ascii "zR\0" ; CIE Augmentation > + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor > + .byte EH_DATA_ALIGN_FACT ; sleb128 -4/-8; CIE Data Alignment Factor > + .byte 0x41 ; CIE RA Column > + .byte 0x1 ; uleb128 0x1; Augmentation size > + .byte 0x10 ; FDE Encoding (pcrel) > + .byte 0xc ; DW_CFA_def_cfa > + .byte 0x1 ; uleb128 0x1 > + .byte 0 ; uleb128 0 > + .align LOG2_GPR_BYTES > +LECIE1: > + > + .globl _GC_push_regs.eh > +_GC_push_regs.eh: > +LSFDE1: > + .set L$set$1,LEFDE1-LASFDE1 > + .long L$set$1 ; FDE Length > +LASFDE1: > + .long LASFDE1-EH_frame1 ; FDE CIE offset > + .g_long _GC_push_regs-. ; FDE initial location > + .set L$set$2,LFE0-_GC_push_regs > + .g_long L$set$2 ; FDE address range > + .byte 0 ; uleb128 0; Augmentation size > + .byte 0x4 ; DW_CFA_advance_loc4 > + .set L$set$3,L_body-_GC_push_regs > + .long L$set$3 > + .byte 0xe ; DW_CFA_def_cfa_offset > + .byte EH_FRAME_OFFSET ; uleb128 0x40/0x70 > + .byte 0x11 ; DW_CFA_offset_extended_sf > + .byte 0x41 ; uleb128 0x41 > + .byte 0x7e ; sleb128 -2 > + .byte 0x4 ; DW_CFA_advance_loc4 > + .set L$set$4,L_epilog-L_body > + .long L$set$4 > + .byte 0xe ; DW_CFA_def_cfa_offset > + .byte 0 ; uleb128 0 > + .align LOG2_GPR_BYTES > +LEFDE1: > + > + .data > +#ifdef WANT_STUB > +; PIC stub stuff, generated by GCC > + > + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 > .align LOG2_GPR_BYTES > L_GC_push_one$stub: > .indirect_symbol _GC_push_one > @@ -88,8 +169,10 @@ L0$_GC_push_one: > lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11) > mtctr r12 > bctr > -.data > -.lazy_symbol_pointer > + > + .data > + .lazy_symbol_pointer > L_GC_push_one$lazy_ptr: > .indirect_symbol _GC_push_one > .g_long dyld_stub_binding_helper > +#endif > Index: boehm-gc/tests/test.c > =================================================================== > --- boehm-gc/tests/test.c (revision 167325) > +++ boehm-gc/tests/test.c (working copy) > @@ -624,7 +624,12 @@ void reverse_test() > /* OSF has limited stack space by default, and large frames. */ > # define BIG 200 > # else > -# define BIG 4500 > +# if defined(__MACH__) && defined(__ppc64__) > + /* Small stack and largish frames. */ > +# define BIG 2500 > +# else > +# define BIG 4500 > +# endif > # endif > # endif > # endif > Index: boehm-gc/pthread_support.c > =================================================================== > --- boehm-gc/pthread_support.c (revision 167325) > +++ boehm-gc/pthread_support.c (working copy) > @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base() > > # else > # ifdef DEBUG_THREADS > - GC_printf1("Can not determine stack base for attached thread"); > + GC_printf0("Can not determine stack base for attached thread"); > # endif > return 0; > # endif > > >
On 2 Dec 2010, at 14:12, Jack Howarth wrote: > On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote: >> well, I imagine this has never worked ... >> >> ... unfortunately, 'make check' doesn't seem to exercise the multi- >> lib >> for boehm-gc ... >> (unless one does it by hand) >> >> Anyway the patch turns a bunch of seg-faults into working tests for >> 'make check' in the ppc64/boehm-gc dir. >> (and, incidentally, allows the ppc libjava build to proceed with >> fewer >> errors for m64). > > Iain, > The remaining failures in libjava are likely due to the fact that > libffi in FSF gcc was never ported to ppc64 on darwin. yeah... > . Apple has a set > of ppc64 patches buried in their libffi sources but no one has ever > tried to extract them. ... I didn't know about the Apple patches :-( I've been rolling my own.... Ah well, at least that gives me another place to look ... if mine don't work out. cheers Iain >> >> tested only on powerpc-darwin9. >> >> I'd welcome comments on whether the eh could be done better/more >> automatically, >> >> OK for trunk? >> Iain >> >> >> boehm-gc: >> >> * powerpc_darwin_mach_dep.s: Update for m64. Add eh frames. >> Do not build or use the picsymbol stub for Darwin >= 9. >> * tests/test.c (reverse_test): Modify count for ppc64. >> * pthread_support.c (GC_get_thread_stack_base): Correct a debug >> statement. >> >> > >> Index: boehm-gc/powerpc_darwin_mach_dep.s >> =================================================================== >> --- boehm-gc/powerpc_darwin_mach_dep.s (revision 167325) >> +++ boehm-gc/powerpc_darwin_mach_dep.s (working copy) >> @@ -4,12 +4,38 @@ >> #define MODE_CHOICE(x, y) x >> #endif >> >> -#define lgu MODE_CHOICE(lwzu, ldu) >> +#define machine_choice MODE_CHOICE(ppc7400,ppc64) >> >> -#define g_long MODE_CHOICE(long, quad) /* usage is >> ".g_long" */ >> +; Define some pseudo-opcodes for size-independent load & store of >> GPRs ... >> +#define lgu MODE_CHOICE(lwzu, ldu) >> +#define lg MODE_CHOICE(lwz,ld) >> +#define sg MODE_CHOICE(stw,std) >> +#define sgu MODE_CHOICE(stwu,stdu) >> >> -#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) >> */ >> +; ... and the size of GPRs and their storage indicator. >> +#define GPR_BYTES MODE_CHOICE(4,8) >> +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ >> +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ >> >> +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version >> 2009-02-04. >> +#define LINKAGE_SIZE MODE_CHOICE(24,48) >> +#define PARAM_AREA MODE_CHOICE(32,64) >> +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ >> + >> +; The whole stack frame **MUST** be 16byte-aligned. >> +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA) >> + >> +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && >> __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 >> +; We no longer need the pic symbol stub for Darwin >= 9. >> +#define BLGCP1 _GC_push_one >> +#undef WANT_STUB >> +#else >> +#define BLGCP1 L_GC_push_one$stub >> +#define WANT_STUB >> +#endif >> + >> + .machine machine_choice >> + >> ; GC_push_regs function. Under some optimization levels GCC will >> clobber >> ; some of the non-volatile registers before we get a chance to save >> them >> ; therefore, this cannot be inline asm. >> @@ -21,61 +47,116 @@ _GC_push_regs: >> >> ; Prolog >> mflr r0 >> - stw r0,8(r1) >> - stwu r1,-80(r1) >> + sg r0,SAVED_LR_OFFSET(r1) >> + sgu r1,-SAVE_SIZE(r1) >> >> - ; Push r13-r31 >> +L_body: >> + ; 'Push' r13-r31 >> mr r3,r13 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r14 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r15 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r16 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r17 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r18 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r19 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r20 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r21 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r22 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r23 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r24 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r25 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r26 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r27 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r28 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r29 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r30 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> mr r3,r31 >> - bl L_GC_push_one$stub >> + bl BLGCP1 >> >> - ; >> - lwz r0,88(r1) >> - addi r1,r1,80 >> +L_epilog: >> + ; Epilog >> + lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1) >> + addi r1,r1,SAVE_SIZE >> mtlr r0 >> >> - ; Return >> + ; Return >> blr >> +LFE0: >> >> -; PIC stuff, generated by GCC >> +; eh frames, for those that want 'em. >> >> -.data >> -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 >> +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) >> +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70) >> + >> + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms >> +live_support >> +EH_frame1: >> + .set L$set$0,LECIE1-LSCIE1 >> + .long L$set$0 ; Length of Common Information Entry >> +LSCIE1: >> + .long 0 ; CIE Identifier Tag >> + .byte 0x1 ; CIE Version >> + .ascii "zR\0" ; CIE Augmentation >> + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor >> + .byte EH_DATA_ALIGN_FACT ; sleb128 -4/-8; CIE Data Alignment Factor >> + .byte 0x41 ; CIE RA Column >> + .byte 0x1 ; uleb128 0x1; Augmentation size >> + .byte 0x10 ; FDE Encoding (pcrel) >> + .byte 0xc ; DW_CFA_def_cfa >> + .byte 0x1 ; uleb128 0x1 >> + .byte 0 ; uleb128 0 >> + .align LOG2_GPR_BYTES >> +LECIE1: >> + >> + .globl _GC_push_regs.eh >> +_GC_push_regs.eh: >> +LSFDE1: >> + .set L$set$1,LEFDE1-LASFDE1 >> + .long L$set$1 ; FDE Length >> +LASFDE1: >> + .long LASFDE1-EH_frame1 ; FDE CIE offset >> + .g_long _GC_push_regs-. ; FDE initial location >> + .set L$set$2,LFE0-_GC_push_regs >> + .g_long L$set$2 ; FDE address range >> + .byte 0 ; uleb128 0; Augmentation size >> + .byte 0x4 ; DW_CFA_advance_loc4 >> + .set L$set$3,L_body-_GC_push_regs >> + .long L$set$3 >> + .byte 0xe ; DW_CFA_def_cfa_offset >> + .byte EH_FRAME_OFFSET ; uleb128 0x40/0x70 >> + .byte 0x11 ; DW_CFA_offset_extended_sf >> + .byte 0x41 ; uleb128 0x41 >> + .byte 0x7e ; sleb128 -2 >> + .byte 0x4 ; DW_CFA_advance_loc4 >> + .set L$set$4,L_epilog-L_body >> + .long L$set$4 >> + .byte 0xe ; DW_CFA_def_cfa_offset >> + .byte 0 ; uleb128 0 >> + .align LOG2_GPR_BYTES >> +LEFDE1: >> + >> + .data >> +#ifdef WANT_STUB >> +; PIC stub stuff, generated by GCC >> + >> + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 >> .align LOG2_GPR_BYTES >> L_GC_push_one$stub: >> .indirect_symbol _GC_push_one >> @@ -88,8 +169,10 @@ L0$_GC_push_one: >> lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11) >> mtctr r12 >> bctr >> -.data >> -.lazy_symbol_pointer >> + >> + .data >> + .lazy_symbol_pointer >> L_GC_push_one$lazy_ptr: >> .indirect_symbol _GC_push_one >> .g_long dyld_stub_binding_helper >> +#endif >> Index: boehm-gc/tests/test.c >> =================================================================== >> --- boehm-gc/tests/test.c (revision 167325) >> +++ boehm-gc/tests/test.c (working copy) >> @@ -624,7 +624,12 @@ void reverse_test() >> /* OSF has limited stack space by default, and large frames. */ >> # define BIG 200 >> # else >> -# define BIG 4500 >> +# if defined(__MACH__) && defined(__ppc64__) >> + /* Small stack and largish frames. */ >> +# define BIG 2500 >> +# else >> +# define BIG 4500 >> +# endif >> # endif >> # endif >> # endif >> Index: boehm-gc/pthread_support.c >> =================================================================== >> --- boehm-gc/pthread_support.c (revision 167325) >> +++ boehm-gc/pthread_support.c (working copy) >> @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base() >> >> # else >> # ifdef DEBUG_THREADS >> - GC_printf1("Can not determine stack base for attached thread"); >> + GC_printf0("Can not determine stack base for attached thread"); >> # endif >> return 0; >> # endif > >> >> >> > >>
On Thu, Dec 02, 2010 at 02:19:10PM +0000, IainS wrote: > > On 2 Dec 2010, at 14:12, Jack Howarth wrote: > >> On Thu, Dec 02, 2010 at 10:22:17AM +0000, IainS wrote: >>> well, I imagine this has never worked ... >>> >>> ... unfortunately, 'make check' doesn't seem to exercise the multi- >>> lib >>> for boehm-gc ... >>> (unless one does it by hand) >>> >>> Anyway the patch turns a bunch of seg-faults into working tests for >>> 'make check' in the ppc64/boehm-gc dir. >>> (and, incidentally, allows the ppc libjava build to proceed with >>> fewer >>> errors for m64). >> >> Iain, >> The remaining failures in libjava are likely due to the fact that >> libffi in FSF gcc was never ported to ppc64 on darwin. > > yeah... > >> . Apple has a set >> of ppc64 patches buried in their libffi sources but no one has ever >> tried to extract them. > > ... I didn't know about the Apple patches :-( > > I've been rolling my own.... > > Ah well, at least that gives me another place to look ... if mine don't > work out. Iain, My understanding is that Apple's libffi is based on that from the PyObjC project. I do see a ppc64-darwin-_closure.S file in the libffi-src tree there... http://svn.red-bean.com/pyobjc/trunk/pyobjc/pyobjc-core/libffi-src/powerpc/ however I suspect it would take a bit of work to merge that into the FSF gcc libffi sources. Also, I am unclear on what the license situation would be for using their code in GPLv3 sources. http://svn.red-bean.com/pyobjc/trunk/pyobjc/pyobjc-core/libffi-src/LICENSE Jack > > cheers > Iain > >>> >>> tested only on powerpc-darwin9. >>> >>> I'd welcome comments on whether the eh could be done better/more >>> automatically, >>> >>> OK for trunk? >>> Iain >>> >>> >>> boehm-gc: >>> >>> * powerpc_darwin_mach_dep.s: Update for m64. Add eh frames. >>> Do not build or use the picsymbol stub for Darwin >= 9. >>> * tests/test.c (reverse_test): Modify count for ppc64. >>> * pthread_support.c (GC_get_thread_stack_base): Correct a debug >>> statement. >>> >>> >> >>> Index: boehm-gc/powerpc_darwin_mach_dep.s >>> =================================================================== >>> --- boehm-gc/powerpc_darwin_mach_dep.s (revision 167325) >>> +++ boehm-gc/powerpc_darwin_mach_dep.s (working copy) >>> @@ -4,12 +4,38 @@ >>> #define MODE_CHOICE(x, y) x >>> #endif >>> >>> -#define lgu MODE_CHOICE(lwzu, ldu) >>> +#define machine_choice MODE_CHOICE(ppc7400,ppc64) >>> >>> -#define g_long MODE_CHOICE(long, quad) /* usage is >>> ".g_long" */ >>> +; Define some pseudo-opcodes for size-independent load & store of >>> GPRs ... >>> +#define lgu MODE_CHOICE(lwzu, ldu) >>> +#define lg MODE_CHOICE(lwz,ld) >>> +#define sg MODE_CHOICE(stw,std) >>> +#define sgu MODE_CHOICE(stwu,stdu) >>> >>> -#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) >>> */ >>> +; ... and the size of GPRs and their storage indicator. >>> +#define GPR_BYTES MODE_CHOICE(4,8) >>> +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ >>> +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ >>> >>> +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version >>> 2009-02-04. >>> +#define LINKAGE_SIZE MODE_CHOICE(24,48) >>> +#define PARAM_AREA MODE_CHOICE(32,64) >>> +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ >>> + >>> +; The whole stack frame **MUST** be 16byte-aligned. >>> +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA) >>> + >>> +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && >>> __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 >>> +; We no longer need the pic symbol stub for Darwin >= 9. >>> +#define BLGCP1 _GC_push_one >>> +#undef WANT_STUB >>> +#else >>> +#define BLGCP1 L_GC_push_one$stub >>> +#define WANT_STUB >>> +#endif >>> + >>> + .machine machine_choice >>> + >>> ; GC_push_regs function. Under some optimization levels GCC will >>> clobber >>> ; some of the non-volatile registers before we get a chance to save >>> them >>> ; therefore, this cannot be inline asm. >>> @@ -21,61 +47,116 @@ _GC_push_regs: >>> >>> ; Prolog >>> mflr r0 >>> - stw r0,8(r1) >>> - stwu r1,-80(r1) >>> + sg r0,SAVED_LR_OFFSET(r1) >>> + sgu r1,-SAVE_SIZE(r1) >>> >>> - ; Push r13-r31 >>> +L_body: >>> + ; 'Push' r13-r31 >>> mr r3,r13 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r14 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r15 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r16 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r17 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r18 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r19 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r20 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r21 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r22 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r23 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r24 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r25 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r26 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r27 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r28 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r29 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r30 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> mr r3,r31 >>> - bl L_GC_push_one$stub >>> + bl BLGCP1 >>> >>> - ; >>> - lwz r0,88(r1) >>> - addi r1,r1,80 >>> +L_epilog: >>> + ; Epilog >>> + lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1) >>> + addi r1,r1,SAVE_SIZE >>> mtlr r0 >>> >>> - ; Return >>> + ; Return >>> blr >>> +LFE0: >>> >>> -; PIC stuff, generated by GCC >>> +; eh frames, for those that want 'em. >>> >>> -.data >>> -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 >>> +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) >>> +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70) >>> + >>> + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms >>> +live_support >>> +EH_frame1: >>> + .set L$set$0,LECIE1-LSCIE1 >>> + .long L$set$0 ; Length of Common Information Entry >>> +LSCIE1: >>> + .long 0 ; CIE Identifier Tag >>> + .byte 0x1 ; CIE Version >>> + .ascii "zR\0" ; CIE Augmentation >>> + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor >>> + .byte EH_DATA_ALIGN_FACT ; sleb128 -4/-8; CIE Data Alignment Factor >>> + .byte 0x41 ; CIE RA Column >>> + .byte 0x1 ; uleb128 0x1; Augmentation size >>> + .byte 0x10 ; FDE Encoding (pcrel) >>> + .byte 0xc ; DW_CFA_def_cfa >>> + .byte 0x1 ; uleb128 0x1 >>> + .byte 0 ; uleb128 0 >>> + .align LOG2_GPR_BYTES >>> +LECIE1: >>> + >>> + .globl _GC_push_regs.eh >>> +_GC_push_regs.eh: >>> +LSFDE1: >>> + .set L$set$1,LEFDE1-LASFDE1 >>> + .long L$set$1 ; FDE Length >>> +LASFDE1: >>> + .long LASFDE1-EH_frame1 ; FDE CIE offset >>> + .g_long _GC_push_regs-. ; FDE initial location >>> + .set L$set$2,LFE0-_GC_push_regs >>> + .g_long L$set$2 ; FDE address range >>> + .byte 0 ; uleb128 0; Augmentation size >>> + .byte 0x4 ; DW_CFA_advance_loc4 >>> + .set L$set$3,L_body-_GC_push_regs >>> + .long L$set$3 >>> + .byte 0xe ; DW_CFA_def_cfa_offset >>> + .byte EH_FRAME_OFFSET ; uleb128 0x40/0x70 >>> + .byte 0x11 ; DW_CFA_offset_extended_sf >>> + .byte 0x41 ; uleb128 0x41 >>> + .byte 0x7e ; sleb128 -2 >>> + .byte 0x4 ; DW_CFA_advance_loc4 >>> + .set L$set$4,L_epilog-L_body >>> + .long L$set$4 >>> + .byte 0xe ; DW_CFA_def_cfa_offset >>> + .byte 0 ; uleb128 0 >>> + .align LOG2_GPR_BYTES >>> +LEFDE1: >>> + >>> + .data >>> +#ifdef WANT_STUB >>> +; PIC stub stuff, generated by GCC >>> + >>> + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 >>> .align LOG2_GPR_BYTES >>> L_GC_push_one$stub: >>> .indirect_symbol _GC_push_one >>> @@ -88,8 +169,10 @@ L0$_GC_push_one: >>> lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11) >>> mtctr r12 >>> bctr >>> -.data >>> -.lazy_symbol_pointer >>> + >>> + .data >>> + .lazy_symbol_pointer >>> L_GC_push_one$lazy_ptr: >>> .indirect_symbol _GC_push_one >>> .g_long dyld_stub_binding_helper >>> +#endif >>> Index: boehm-gc/tests/test.c >>> =================================================================== >>> --- boehm-gc/tests/test.c (revision 167325) >>> +++ boehm-gc/tests/test.c (working copy) >>> @@ -624,7 +624,12 @@ void reverse_test() >>> /* OSF has limited stack space by default, and large frames. */ >>> # define BIG 200 >>> # else >>> -# define BIG 4500 >>> +# if defined(__MACH__) && defined(__ppc64__) >>> + /* Small stack and largish frames. */ >>> +# define BIG 2500 >>> +# else >>> +# define BIG 4500 >>> +# endif >>> # endif >>> # endif >>> # endif >>> Index: boehm-gc/pthread_support.c >>> =================================================================== >>> --- boehm-gc/pthread_support.c (revision 167325) >>> +++ boehm-gc/pthread_support.c (working copy) >>> @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base() >>> >>> # else >>> # ifdef DEBUG_THREADS >>> - GC_printf1("Can not determine stack base for attached thread"); >>> + GC_printf0("Can not determine stack base for attached thread"); >>> # endif >>> return 0; >>> # endif >> >>> >>> >>> >> >>>
On Dec 2, 2010, at 2:22 AM, IainS wrote: > Anyway the patch turns a bunch of seg-faults into working tests for 'make check' in the ppc64/boehm-gc dir. > (and, incidentally, allows the ppc libjava build to proceed with fewer errors for m64). > I'd welcome comments on whether the eh could be done better/more automatically, Pretty common writing that type of code with .s files... The only thing that might make writing it easier would be to write the code in a .c/.C file. > OK for trunk? Ok.
On 2 Dec 2010, at 20:13, Mike Stump wrote: > On Dec 2, 2010, at 2:22 AM, IainS wrote: >> Anyway the patch turns a bunch of seg-faults into working tests >> for 'make check' in the ppc64/boehm-gc dir. > >> (and, incidentally, allows the ppc libjava build to proceed with >> fewer errors for m64). > >> I'd welcome comments on whether the eh could be done better/more >> automatically, > > Pretty common writing that type of code with .s files... The only > thing that might make writing it easier would be to write the code > in a .c/.C file. > >> OK for trunk? > > Ok. r167681 Iain
Index: boehm-gc/powerpc_darwin_mach_dep.s =================================================================== --- boehm-gc/powerpc_darwin_mach_dep.s (revision 167325) +++ boehm-gc/powerpc_darwin_mach_dep.s (working copy) @@ -4,12 +4,38 @@ #define MODE_CHOICE(x, y) x #endif -#define lgu MODE_CHOICE(lwzu, ldu) +#define machine_choice MODE_CHOICE(ppc7400,ppc64) -#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ +; Define some pseudo-opcodes for size-independent load & store of GPRs ... +#define lgu MODE_CHOICE(lwzu, ldu) +#define lg MODE_CHOICE(lwz,ld) +#define sg MODE_CHOICE(stw,std) +#define sgu MODE_CHOICE(stwu,stdu) -#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +; ... and the size of GPRs and their storage indicator. +#define GPR_BYTES MODE_CHOICE(4,8) +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. +#define LINKAGE_SIZE MODE_CHOICE(24,48) +#define PARAM_AREA MODE_CHOICE(32,64) +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ + +; The whole stack frame **MUST** be 16byte-aligned. +#define SAVE_SIZE (LINKAGE_SIZE+PARAM_AREA) + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 +; We no longer need the pic symbol stub for Darwin >= 9. +#define BLGCP1 _GC_push_one +#undef WANT_STUB +#else +#define BLGCP1 L_GC_push_one$stub +#define WANT_STUB +#endif + + .machine machine_choice + ; GC_push_regs function. Under some optimization levels GCC will clobber ; some of the non-volatile registers before we get a chance to save them ; therefore, this cannot be inline asm. @@ -21,61 +47,116 @@ _GC_push_regs: ; Prolog mflr r0 - stw r0,8(r1) - stwu r1,-80(r1) + sg r0,SAVED_LR_OFFSET(r1) + sgu r1,-SAVE_SIZE(r1) - ; Push r13-r31 +L_body: + ; 'Push' r13-r31 mr r3,r13 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r14 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r15 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r16 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r17 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r18 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r19 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r20 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r21 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r22 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r23 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r24 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r25 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r26 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r27 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r28 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r29 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r30 - bl L_GC_push_one$stub + bl BLGCP1 mr r3,r31 - bl L_GC_push_one$stub + bl BLGCP1 - ; - lwz r0,88(r1) - addi r1,r1,80 +L_epilog: + ; Epilog + lg r0,SAVE_SIZE+SAVED_LR_OFFSET(r1) + addi r1,r1,SAVE_SIZE mtlr r0 - ; Return + ; Return blr +LFE0: -; PIC stuff, generated by GCC +; eh frames, for those that want 'em. -.data -.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) +#define EH_FRAME_OFFSET MODE_CHOICE(0x40,0x70) + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 ; Length of Common Information Entry +LSCIE1: + .long 0 ; CIE Identifier Tag + .byte 0x1 ; CIE Version + .ascii "zR\0" ; CIE Augmentation + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor + .byte EH_DATA_ALIGN_FACT ; sleb128 -4/-8; CIE Data Alignment Factor + .byte 0x41 ; CIE RA Column + .byte 0x1 ; uleb128 0x1; Augmentation size + .byte 0x10 ; FDE Encoding (pcrel) + .byte 0xc ; DW_CFA_def_cfa + .byte 0x1 ; uleb128 0x1 + .byte 0 ; uleb128 0 + .align LOG2_GPR_BYTES +LECIE1: + + .globl _GC_push_regs.eh +_GC_push_regs.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 ; FDE Length +LASFDE1: + .long LASFDE1-EH_frame1 ; FDE CIE offset + .g_long _GC_push_regs-. ; FDE initial location + .set L$set$2,LFE0-_GC_push_regs + .g_long L$set$2 ; FDE address range + .byte 0 ; uleb128 0; Augmentation size + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$3,L_body-_GC_push_regs + .long L$set$3 + .byte 0xe ; DW_CFA_def_cfa_offset + .byte EH_FRAME_OFFSET ; uleb128 0x40/0x70 + .byte 0x11 ; DW_CFA_offset_extended_sf + .byte 0x41 ; uleb128 0x41 + .byte 0x7e ; sleb128 -2 + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$4,L_epilog-L_body + .long L$set$4 + .byte 0xe ; DW_CFA_def_cfa_offset + .byte 0 ; uleb128 0 + .align LOG2_GPR_BYTES +LEFDE1: + + .data +#ifdef WANT_STUB +; PIC stub stuff, generated by GCC + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 .align LOG2_GPR_BYTES L_GC_push_one$stub: .indirect_symbol _GC_push_one @@ -88,8 +169,10 @@ L0$_GC_push_one: lgu r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11) mtctr r12 bctr -.data -.lazy_symbol_pointer + + .data + .lazy_symbol_pointer L_GC_push_one$lazy_ptr: .indirect_symbol _GC_push_one .g_long dyld_stub_binding_helper +#endif Index: boehm-gc/tests/test.c =================================================================== --- boehm-gc/tests/test.c (revision 167325) +++ boehm-gc/tests/test.c (working copy) @@ -624,7 +624,12 @@ void reverse_test() /* OSF has limited stack space by default, and large frames. */ # define BIG 200 # else -# define BIG 4500 +# if defined(__MACH__) && defined(__ppc64__) + /* Small stack and largish frames. */ +# define BIG 2500 +# else +# define BIG 4500 +# endif # endif # endif # endif Index: boehm-gc/pthread_support.c =================================================================== --- boehm-gc/pthread_support.c (revision 167325) +++ boehm-gc/pthread_support.c (working copy) @@ -1158,7 +1158,7 @@ GC_PTR GC_get_thread_stack_base() # else # ifdef DEBUG_THREADS - GC_printf1("Can not determine stack base for attached thread"); + GC_printf0("Can not determine stack base for attached thread"); # endif return 0; # endif