diff mbox

[RFC] Port libitm to powerpc

Message ID 49A2BE0E-A002-44EE-837D-8A41B83A6498@sandoe-acoustics.co.uk
State New
Headers show

Commit Message

Iain Sandoe Dec. 2, 2011, 11:36 p.m. UTC
Hi Richard,

On 2 Dec 2011, at 22:59, Richard Henderson wrote:

> On 12/02/2011 05:37 AM, Iain Sandoe wrote:
>> Richard: things that I did, intentionally, differently (and I'm not  
>> sure are correct).
>>
>> 1. I saved the CR
>> 2. Once the vrs are saved, I update the VRsave mask to reflect that.
>
> Don't update VRsave.  This gives the OS license to clobber those  
> registers on task switch, which means that you'd need to reload all  
> of those registers and restore VRsave after calling  
> _GTM_begin_transaction.

thanks (will fix when updating).

>> +	mffs f0
>> +	stfd f31,-8(r1)
>
> You didn't actually save FPSCRS, only copied it to f0.

it's saved after the stack is updated - because of being outside the  
red zone.

>> +#ifdef __ppc64__
>> +  double fpscr;			/* ??? should we save this.  */
>> +  unsigned int vscrpad;	
>> +  unsigned int vscr;		/* VRsave */
>> +#else
>> +  double fpscr;			/* ??? should we save this.  */
>> +  unsigned int vscrpad[2];	
>> +  unsigned int vscr;		/* VRsave */
>> +#endif
>
> Why the difference?

...because the VRsave reg is included in the red-zone for m32 but not  
for m64...

it also makes the alignment padding explicit ...

>> +  unsigned int cr;		/* Saved CR.  */
>> +#ifdef __ppc64__
>> +  unsigned int crpad;
>> +#endif
>
> Why not just make it unsigned long and be done with it?  There's  
> nothing saying that you can't use lg/stg for the gp that holds the  
> crs.  Nothing except for your corresponding longjmp cares about the  
> layout.

yeah .. it got like that because of ...

> I personally think the whole thing would be much easier to read  
> without relying on the redzone.  Aside from that, there's actually  
> very little real difference in the two files.  Essentially, you're  
> storing the registers in a different order because the prologue  
> does, just so you can make use of the redzone.

OK -  I guess I got carried away with thinking that I might be able to  
re-use the save_world () routine - but that doesn't look feasible  
after all so....

> The aix abi saves r2; darwin 32-bit saves r13.  One extra register  
> in both cases, which could use the same slot.

... will take another look tomorrow....
.... although we still have some syntax issues that might make sharing  
the original code somewhat ugly....

====

... in the meantime, I found two obvious stupid typos in my current  
version - the attached runs without any (unexpected) fails;

Iain

Native configuration is powerpc-apple-darwin9

                 === libitm tests ===

Schedule of variations:
     unix/-m32
     unix/-m64

Running target unix/-m32
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable

                 === libitm Summary for unix/-m32 ===

# of expected passes            23
# of expected failures          5
Running target unix/-m64
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/gcc/testsuite/config/default.exp as tool-and- 
target-specific interface file.
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable

                 === libitm Summary for unix/-m64 ===

# of expected passes            23
# of expected failures          5

                 === libitm Summary ===

# of expected passes            46
# of expected failures          10
diff mbox

Patch

Index: libitm/config/darwin/powerpc/sjlj.S
===================================================================
--- libitm/config/darwin/powerpc/sjlj.S	(revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S	(revision 0)
@@ -0,0 +1,345 @@ 
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <iains@gcc.gnu.org>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define MACHINE MODE_CHOICE(ppc7400,ppc64)
+#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+#define GPR_BYTES       MODE_CHOICE(4,8)        /* size of a GPR in bytes */
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
+#define cmpg    MODE_CHOICE(cmpw, cmpd)
+#define lg      MODE_CHOICE(lwz, ld)
+#define sg     MODE_CHOICE(stw, std)
+#define lgx     MODE_CHOICE(lwzx, ldx)
+#define sgx    MODE_CHOICE(stwx, stdx)
+#define lgu     MODE_CHOICE(lwzu, ldu)
+#define sgu    MODE_CHOICE(stwu, stdu)
+#define lgux    MODE_CHOICE(lwzux, ldux)
+#define sgux   MODE_CHOICE(stwux, stdux)
+#define lgwa    MODE_CHOICE(lwz, lwa)
+
+/* Stack frame constants.  */
+#define RESC_OFFSET MODE_CHOICE(12,24)
+#define SAVED_LR_OFFSET MODE_CHOICE(8,16)
+#define SAVED_CR_OFFSET MODE_CHOICE(4,8)
+#define RED_ZONE_SIZE MODE_CHOICE(224,288)
+#define LINKAGE_SIZE MODE_CHOICE(24,48)
+#define GPR_COUNT MODE_CHOICE(19,18)
+
+/* For Darwin we will assume that Altivec is present.  If the code should be
+   ported to support G3, then the vecSave/Restore sections should be
+   conditionalized on __cpu_has_altivec from the system framework (as is done in
+   save_world () from libgcc).  */
+
+# define VECS_SIZ	12*16
+
+#ifdef __ppc64__
+# define VRSAVE_SIZ	4
+# define FPSCRPAD	4
+#else
+/* Included in red zone.  */
+# define VRSAVE_SIZ	0
+# define FPSCRPAD	8
+#endif
+
+#define FPSCR		8
+
+#define VECSCR (VECS_SIZ + VRSAVE_SIZ + FPSCRPAD + FPSCR)
+
+/* The red zone is enough to accommodate all the call-saved regs other than
+   the Vecs.  NEXT_FRAME is the space reserved for the start of the next
+   frame.  */
+
+/* We call GTM_begin_transaction (uint32_t, gtm_jmpbuf*).  So we must provide
+   space for those params in the stack frame so that the callee can save them.
+*/
+#define NEXT_FRAME (LINKAGE_SIZE + 2 * GPR_BYTES)
+
+#define FRAME_SIZE (((NEXT_FRAME + RED_ZONE_SIZE + VECSCR + 15) / 16) * 16)
+
+/* From the start of the gtm_jmpbuf.  */
+#define VRSAVE_OFF (VECS_SIZ + FPSCR + FPSCRPAD)
+#define FPSCRS_OFF VECS_SIZ
+#define GPRS_OFF (VRSAVE_OFF + 4)
+#define FPRS_OFF (GPRS_OFF + GPR_COUNT * GPR_BYTES)
+#define PREV_OFF (FPRS_OFF + 8 * 18)
+
+	.text
+
+	.machine MACHINE
+	
+	.align	4
+	
+	/*  _ITM_beginTransaction(uint32_t, ...) */
+	
+	.globl __ITM_beginTransaction
+__ITM_beginTransaction:
+
+	mflr	r0
+	sg r0,SAVED_LR_OFFSET(r1)	/* Stash return addr.  */
+	
+	mfcr r4
+	stw r4,SAVED_CR_OFFSET(r1)	/* stash CR  */		
+
+	/* ??? check me, is this really allowed?  */
+	sg r1,0(r1)			/* Save stack ptr.  */
+
+	/* ??? Determine when FPRs not present.  */
+	/* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+	   This is not yet set by the compiler.  */
+
+	stfd f14,-144(r1)
+	stfd f15,-136(r1)
+	stfd f16,-128(r1)
+	stfd f17,-120(r1)
+	stfd f18,-112(r1)
+	stfd f19,-104(r1)
+	stfd f20,-96(r1)
+	stfd f21,-88(r1)
+	stfd f22,-80(r1)
+	stfd f23,-72(r1)
+	stfd f24,-64(r1)
+	stfd f25,-56(r1)
+	stfd f26,-48(r1)
+	stfd f27,-40(r1)
+	stfd f28,-32(r1)
+	stfd f29,-24(r1)
+	stfd f30,-16(r1)
+	mffs f0
+	stfd f31,-8(r1)
+
+#ifndef __ppc64__
+	/* This might not be the best approach - stmw could be slower than the
+	   one-by-one store.
+
+	   | gpr13
+	   ======= -220 (r1)
+	   | 4 bytes of VRSave 
+	   ======= -224 (r1)
+	   | 8 bytes pad.
+	   ======= -232 (r1)
+	   | 8 bytes of FPSCR
+	   ======= -240 (r1) (16 byte boundary)
+	   | vecs...
+	*/
+	stmw r13,-220(r1)				
+
+	addi r4,r1,-240	/* set r4 pointing at the fpscr save */		
+
+#else
+
+	/* | gpr14
+	   ======= -288 (r1)
+	*/
+
+	sg r14,(-304 +  2 * GPR_BYTES)(r1)
+	sg r15,(-304 +  3 * GPR_BYTES)(r1)
+	sg r16,(-304 +  4 * GPR_BYTES)(r1)
+	sg r17,(-304 +  5 * GPR_BYTES)(r1)
+	sg r18,(-304 +  6 * GPR_BYTES)(r1)
+	sg r19,(-304 +  7 * GPR_BYTES)(r1)
+	sg r20,(-304 +  8 * GPR_BYTES)(r1)
+	sg r21,(-304 +  9 * GPR_BYTES)(r1)
+	sg r22,(-304 + 10 * GPR_BYTES)(r1)
+	sg r23,(-304 + 11 * GPR_BYTES)(r1)
+	sg r24,(-304 + 12 * GPR_BYTES)(r1)
+	sg r25,(-304 + 13 * GPR_BYTES)(r1)
+	sg r26,(-304 + 14 * GPR_BYTES)(r1)
+	sg r27,(-304 + 15 * GPR_BYTES)(r1)
+	sg r28,(-304 + 16 * GPR_BYTES)(r1)
+	sg r29,(-304 + 17 * GPR_BYTES)(r1)
+	sg r30,(-304 + 18 * GPR_BYTES)(r1)
+	sg r31,(-304 + 19 * GPR_BYTES)(r1)
+
+	/* ===
+	   | 4 bytes of VRSave | 4 of padding
+	   ======= -296 (r1)
+	   | 8 bytes fpscr.
+	   ======= -304 (r1) (16 byte boundry). 
+	   | vecs...
+	*/
+
+	addi r4,r1,-304		/* set r4 pointing at the fpscr save */			
+#endif
+
+	sgu r1,-FRAME_SIZE(r1)	/* allocate stack frame  */
+
+	/* ??? Determine when VRs not present.  */
+	/* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+	   This is not yet set by the compiler.  */
+
+	mfspr r0,VRsave
+	addi  r5,r4,-16		/* Now r5 points at V31 save.  */
+#ifndef __ppc64__
+	stw r0,16(r4)		/* VRsave lives at -224(R1)  */
+#else
+	stw r0,12(r4)		/* VRsave lives at -292(R1)  */
+#endif
+	stfd f0,0(r4)		/* save FPSCR.  */
+	
+	addi r4,r4,-32
+	stvx v31,0,r5
+	addi r5,r5,-32
+	stvx v30,0,r4
+	addi r4,r4,-32
+	stvx v29,0,r5
+	addi r5,r5,-32
+	stvx v28,0,r4
+	addi r4,r4,-32
+	stvx v27,0,r5
+	addi r5,r5,-32
+	stvx v26,0,r4
+	addi r4,r4,-32
+	stvx v25,0,r5
+	addi r5,r5,-32
+	stvx v24,0,r4
+	addi r4,r4,-32
+	stvx v23,0,r5
+	addi r5,r5,-32
+	stvx v22,0,r4
+	addi r4,r4,-32
+	stvx v21,0,r5
+	ori  r0,r0,0x0fff /* we just saved these.  */
+	stvx v20,0,r4
+	mtspr VRsave,r0
+
+	/* r4 points at our saved data "gtm_jmpbuf *".  */
+	/* r3 is unchanged.  */
+	/* GTM_begin_transaction (uint32_t prop, gtm_jmpbuf *bf) */
+	bl	_GTM_begin_transaction
+	nop
+
+	lg	r0,(SAVED_LR_OFFSET + FRAME_SIZE)(r1)
+	mtlr	r0
+	addi	r1, r1, FRAME_SIZE
+	blr
+
+	/* End of _ITM_beginTransaction.  */
+
+	/* uint32_t GTM_longjmp (uint32_t, const gtm_jmpbuf *, uint32_t) */ 
+
+	.private_extern	_GTM_longjmp
+_GTM_longjmp:
+
+	/* ??? Determine when VRs not present.  */
+	/* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+	   This is not yet set by the compiler.  */
+	
+	mr r14,r4
+	lwz r0,VRSAVE_OFF(r4)
+	addi r15,r4,16	
+
+	lvx v20,0,r14
+	addi r14,r14,32
+	lvx v21,0,r15
+	addi r15,r15,32
+	lvx v22,0,r14
+	addi r14,r14,32
+	lvx v23,0,r15
+	addi r15,r15,32
+	lvx v24,0,r14
+	addi r14,r14,32
+	lvx v25,0,r15
+	addi r15,r15,32
+	lvx v26,0,r14
+	addi r14,r14,32
+	lvx v27,0,r15
+	addi r15,r15,32
+	lvx v28,0,r14
+	addi r14,r14,32
+	lvx v29,0,r15
+	addi r15,r15,32
+	lvx v30,0,r14
+	addi r14,r14,32
+	lvx v31,0,r15
+
+	mtspr VRsave,r0		/* Restored... */
+	
+#ifndef __ppc64__
+	lmw r13,GPRS_OFF(r4)
+#else
+	lg r14,(GPRS_OFF +  0 * GPR_BYTES)(r4)
+	lg r15,(GPRS_OFF +  1 * GPR_BYTES)(r4)
+	lg r16,(GPRS_OFF +  2 * GPR_BYTES)(r4)
+	lg r17,(GPRS_OFF +  3 * GPR_BYTES)(r4)
+	lg r18,(GPRS_OFF +  4 * GPR_BYTES)(r4)
+	lg r19,(GPRS_OFF +  5 * GPR_BYTES)(r4)
+	lg r20,(GPRS_OFF +  6 * GPR_BYTES)(r4)
+	lg r21,(GPRS_OFF +  7 * GPR_BYTES)(r4)
+	lg r22,(GPRS_OFF +  8 * GPR_BYTES)(r4)
+	lg r23,(GPRS_OFF +  9 * GPR_BYTES)(r4)
+	lg r24,(GPRS_OFF + 10 * GPR_BYTES)(r4)
+	lg r25,(GPRS_OFF + 11 * GPR_BYTES)(r4)
+	lg r26,(GPRS_OFF + 12 * GPR_BYTES)(r4)
+	lg r27,(GPRS_OFF + 13 * GPR_BYTES)(r4)
+	lg r28,(GPRS_OFF + 14 * GPR_BYTES)(r4)
+	lg r29,(GPRS_OFF + 15 * GPR_BYTES)(r4)
+	lg r30,(GPRS_OFF + 16 * GPR_BYTES)(r4)
+	lg r31,(GPRS_OFF + 17 * GPR_BYTES)(r4)
+#endif
+
+	/* ??? Determine when FPRs not present.  */
+	/* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+	   This is not yet set by the compiler.  */
+
+	lfd f0,FPSCRS_OFF(r4)
+
+	lfd f14,(FPRS_OFF +   0)(r4)
+	lfd f15,(FPRS_OFF +   8)(r4)
+	lfd f16,(FPRS_OFF +  16)(r4)
+	lfd f17,(FPRS_OFF +  24)(r4)
+	lfd f18,(FPRS_OFF +  32)(r4)
+	lfd f19,(FPRS_OFF +  40)(r4)
+	lfd f20,(FPRS_OFF +  48)(r4)
+	lfd f21,(FPRS_OFF +  56)(r4)
+	lfd f22,(FPRS_OFF +  64)(r4)
+	lfd f23,(FPRS_OFF +  72)(r4)
+	lfd f24,(FPRS_OFF +  80)(r4)
+	lfd f25,(FPRS_OFF +  88)(r4)
+	lfd f26,(FPRS_OFF +  96)(r4)
+	lfd f27,(FPRS_OFF + 104)(r4)
+	lfd f28,(FPRS_OFF + 112)(r4)
+	lfd f29,(FPRS_OFF + 120)(r4)
+	lfd f30,(FPRS_OFF + 128)(r4)
+	lfd f31,(FPRS_OFF + 136)(r4)
+	
+	mtfsf 0xff,f0			/* Restore fpscr.  */
+
+	/* So now pick up the previous data.  */
+	lwz r0,(PREV_OFF + SAVED_CR_OFFSET)(r4)
+	mtcr r0				/* Restore CR.  */
+	lg r0,(PREV_OFF + SAVED_LR_OFFSET)(r4)
+	mtlr r0				/* Ret addr.  */
+	lg r1,(PREV_OFF + 0)(r4)	/* SP.  */
+	blr
+
+	/* End of GTM_longjump */
+	/* End of sjlj.S */
+
Index: libitm/config/darwin/powerpc/target.h
===================================================================
--- libitm/config/darwin/powerpc/target.h	(revision 0)
+++ libitm/config/darwin/powerpc/target.h	(revision 0)
@@ -0,0 +1,88 @@ 
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <iains@gcc.gnu.org>.
+   Based on libitm/powerpc/target.h by Richard Henderson.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+/* We will assume that the code is to be built for a processor with
+   Altivec.  If it is desired to make this run on G3 for Darwin < 9 then the
+   vecSave code in sjlj.S will need to be made conditional on __cpu_has_altivec
+   from the system framework (as is done in Darwin's 'world_save ()' routine in
+   libgcc).  */
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+   
+typedef struct gtm_jmpbuf
+{
+  v128 vr[12];			/* vr20-vr31 */
+#ifdef __ppc64__
+  double fpscr;			/* ??? should we save this.  */
+  unsigned int vscrpad;	
+  unsigned int vscr;		/* VRsave */
+#else
+  double fpscr;			/* ??? should we save this.  */
+  unsigned int vscrpad[2];	
+  unsigned int vscr;		/* VRsave */
+#endif
+#ifdef __ppc64__
+  unsigned long gr[18];		/* r14-r31 */
+#else
+  unsigned long gr[19];		/* r13-r31 */
+#endif
+  double fr[18];		/* f14-f31 */
+  void *backchain;		/* backchain. */
+  unsigned int cr;		/* Saved CR.  */
+#ifdef __ppc64__
+  unsigned int crpad;
+#endif
+  unsigned long pc;		/* Saved LR.  */
+  void *savsp;			/* Saved SP.  */
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#ifdef __ppc64__
+#  define HW_CACHELINE_SIZE 128
+#else
+#  define HW_CACHELINE_SIZE 64
+#endif
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt	(revision 181931)
+++ libitm/configure.tgt	(working copy)
@@ -46,7 +46,8 @@  fi
 # Map the target cpu to an ARCH sub-directory.  At the same time,
 # work out any special compilation flags as necessary.
 case "${target_cpu}" in
-  alpha*)	ARCH=alpha ;;
+  alpha*)		ARCH=alpha ;;
+  rs6000 | powerpc*)	ARCH=powerpc ;;
 
   i[3456]86)
 	case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,15 @@  case "${target}" in
 	fi
 	;;
 
+  powerpc*-*-darwin*)
+	config_path="darwin/$ARCH $config_path" 
+	;;
+
+  powerpc*-*-aix* | rs6000-*-aix*)
+	# The system ought to be supported, but sjlj.S has not been ported.
+	UNSUPPORTED=1
+	;;
+
   *-*-gnu* | *-*-k*bsd*-gnu \
   | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
   | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \