diff mbox

[RFC] Port libitm to powerpc

Message ID 882A9065-D914-420D-80EA-A97F1AE8A69D@sandoe-acoustics.co.uk
State New
Headers show

Commit Message

Iain Sandoe Dec. 2, 2011, 1:37 p.m. UTC
On 1 Dec 2011, at 23:28, Iain Sandoe wrote:
>> now I'm slightly confused - do we need to preserve if across the  
>> call  or not?
>
> erm.  not well phrased.
>
> I am trying to get a grasp on what determines the set of registers  
> that should be saved.
>
> Initially, I was thinking that it was the "call-saved" set - which,  
> in the Darwin ABI is silent about the FPSCR (consistent with  
> Joseph's remark - although I note that the ABI doc, in most cases,  
> states YES/NO for each register).
>
> Now I'm wondering if the saved set needs to include most/all of the  
> set that are saved for exceptions?

Notwithstanding the questions above, (which still stand) - attached is  
a first stab at the Darwin version.

David: I steered clear of using R2 - so it might have some bits useful  
for Aix too.

Richard: things that I did, intentionally, differently (and I'm not  
sure are correct).

1. I saved the CR
2. Once the vrs are saved, I update the VRsave mask to reflect that.

I used the config machinery - because the sjlj.S code ended up looking  
more different than similar.

two failures to track down .... but maybe the answers to the questions  
above will produce a reason ;-)

cheers
Iain

=--------=

Native configuration is powerpc-apple-darwin9

                 === libitm tests ===

Schedule of variations:
     unix/-m32
     unix/-m64

Running target unix/-m32
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
FAIL: libitm.c/cancel.c execution test
FAIL: libitm.c/simple-2.c execution test
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable

                 === libitm Summary for unix/-m32 ===

# of expected passes            21
# of unexpected failures        2
# of expected failures          5
Running target unix/-m64
Using /usr/local/dejagnu-1-4-4/share/dejagnu/baseboards/unix.exp as  
board description file for target.
Using /usr/local/dejagnu-1-4-4/share/dejagnu/config/unix.exp as  
generic interface file for target.
Using /GCC/gcc-live-trunk/libitm/testsuite/config/default.exp as tool- 
and-target-specific interface file.
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c/c.exp ...
FAIL: libitm.c/cancel.c execution test
FAIL: libitm.c/simple-2.c execution test
Running /GCC/gcc-live-trunk/libitm/testsuite/libitm.c++/c++.exp ...
WARNING: libitm.c++/static_ctor.C compilation failed to produce  
executable

                 === libitm Summary for unix/-m64 ===

# of expected passes            21
# of unexpected failures        2
# of expected failures          5

                 === libitm Summary ===

# of expected passes            42
# of unexpected failures        4
# of expected failures          10

==--===

Comments

Richard Henderson Dec. 2, 2011, 10:59 p.m. UTC | #1
On 12/02/2011 05:37 AM, Iain Sandoe wrote:
> Richard: things that I did, intentionally, differently (and I'm not sure are correct).
> 
> 1. I saved the CR
> 2. Once the vrs are saved, I update the VRsave mask to reflect that.

Don't update VRsave.  This gives the OS license to clobber those registers on task switch, which means that you'd need to reload all of those registers and restore VRsave after calling _GTM_begin_transaction.

> +	mffs f0
> +	stfd f31,-8(r1)

You didn't actually save FPSCRS, only copied it to f0.

> +#ifdef __ppc64__
> +  double fpscr;			/* ??? should we save this.  */
> +  unsigned int vscrpad;	
> +  unsigned int vscr;		/* VRsave */
> +#else
> +  double fpscr;			/* ??? should we save this.  */
> +  unsigned int vscrpad[2];	
> +  unsigned int vscr;		/* VRsave */
> +#endif

Why the difference?

> +  unsigned int cr;		/* Saved CR.  */
> +#ifdef __ppc64__
> +  unsigned int crpad;
> +#endif

Why not just make it unsigned long and be done with it?  There's nothing saying that you can't use lg/stg for the gp that holds the crs.  Nothing except for your corresponding longjmp cares about the layout.

I personally think the whole thing would be much easier to read without relying on the redzone.  Aside from that, there's actually very little real difference in the two files.  Essentially, you're storing the registers in a different order because the prologue does, just so you can make use of the redzone.  

The aix abi saves r2; darwin 32-bit saves r13.  One extra register in both cases, which could use the same slot.


r~
Mike Stump Dec. 3, 2011, 3:59 p.m. UTC | #2
Once you want to check it in, consider any Darwin aspects pre-approved.
diff mbox

Patch

Index: libitm/config/darwin/powerpc/sjlj.S
===================================================================
--- libitm/config/darwin/powerpc/sjlj.S	(revision 0)
+++ libitm/config/darwin/powerpc/sjlj.S	(revision 0)
@@ -0,0 +1,335 @@ 
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <iains@gcc.gnu.org>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define MACHINE MODE_CHOICE(ppc7400,ppc64)
+#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+#define GPR_BYTES       MODE_CHOICE(4,8)        /* size of a GPR in bytes */
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
+#define cmpg    MODE_CHOICE(cmpw, cmpd)
+#define lg      MODE_CHOICE(lwz, ld)
+#define stg     MODE_CHOICE(stw, std)
+#define lgx     MODE_CHOICE(lwzx, ldx)
+#define stgx    MODE_CHOICE(stwx, stdx)
+#define lgu     MODE_CHOICE(lwzu, ldu)
+#define stgu    MODE_CHOICE(stwu, stdu)
+#define lgux    MODE_CHOICE(lwzux, ldux)
+#define stgux   MODE_CHOICE(stwux, stdux)
+#define lgwa    MODE_CHOICE(lwz, lwa)
+
+/* Stack frame constants.  */
+#define SAVED_LR_OFFSET MODE_CHOICE(8,16)
+#define SAVED_CR_OFFSET MODE_CHOICE(4,8)
+#define RED_ZONE_SIZE MODE_CHOICE(224,288)
+#define LINKAGE_SIZE MODE_CHOICE(24,48)
+#define GPR_COUNT MODE_CHOICE(19,18)
+
+/* For Darwin we will assume that Altivec is present.  If the code should be
+   ported to support G3, then the vecSave/Restore sections should be
+   conditionalized on __cpu_has_altivec from the system framework (as is done in
+   save_world () from libgcc).  */
+
+# define VECS_SIZ	12*16
+
+#ifdef __ppc64__
+# define VRSAVE_SIZ	4
+# define FPSCRPAD	4
+#else
+/* Included in red zone.  */
+# define VRSAVE_SIZ	0
+# define FPSCRPAD	8
+#endif
+
+#define FPSCR		8
+
+#define VECSCR (VECS_SIZ + VRSAVE_SIZ + FPSCRPAD + FPSCR)
+
+/* The red zone is enough to accommodate all the call-saved regs other than
+   the Vecs.  LINKAGE_SIZE is the space reserved for the start of the next
+   frame.  */
+
+#define FRAME_SIZE (((LINKAGE_SIZE + RED_ZONE_SIZE + VECSCR + 15) / 16) * 16)
+
+/* From the start of the gtm_jmpbuf.  */
+#define VRSAVE_OFF (VECS_SIZ + FPSCR + FPSCRPAD)
+#define FPSCRS_OFF VECS_SIZ
+#define GPRS_OFF (VRSAVE_OFF + 4)
+#define FPRS_OFF (GPRS_OFF + GPR_COUNT * GPR_BYTES)
+#define PREV_OFF (FPRS_OFF + 8 * 18)
+
+	.text
+
+	.machine MACHINE
+	
+	.align	4
+	
+	/*  _ITM_beginTransaction(uint32_t, ...) */
+	
+	.globl __ITM_beginTransaction
+__ITM_beginTransaction:
+
+	mflr	r0
+	stg r0,SAVED_LR_OFFSET(r1)	/* Stash return addr.  */
+	
+	mfcr r4
+	stw r4,SAVED_CR_OFFSET(r1)	/* stash CR  */		
+
+	/* ??? Determine when FPRs not present.  */
+	/* ??? Test r3 for pr_hasNoFloatUpdate and skip the fp save.
+	   This is not yet set by the compiler.  */
+
+	stfd f14,-144(r1)
+	stfd f15,-136(r1)
+	stfd f16,-128(r1)
+	stfd f17,-120(r1)
+	stfd f18,-112(r1)
+	stfd f19,-104(r1)
+	stfd f20,-96(r1)
+	stfd f21,-88(r1)
+	stfd f22,-80(r1)
+	stfd f23,-72(r1)
+	stfd f24,-64(r1)
+	stfd f25,-56(r1)
+	stfd f26,-48(r1)
+	stfd f27,-40(r1)
+	stfd f28,-32(r1)
+	stfd f29,-24(r1)
+	stfd f30,-16(r1)
+	mffs f0
+	stfd f31,-8(r1)
+
+#ifndef __ppc64__
+	/* This might not be the best approach - stmw could be slower than the
+	   one-by-one store.
+
+	   | gpr13
+	   ======= -220 (r1)
+	   | 4 bytes of VRSave 
+	   ======= -224 (r1)
+	   | 8 bytes pad.
+	   ======= -232 (r1)
+	   | 8 bytes of FPSCR
+	   ======= -240 (r1) (16 byte boundary)
+	   | vecs...
+	*/
+	stmw r13,-220(r1)				
+
+	addi r4,r1,-240	/* set r4 pointing at the fpscr save */		
+
+#else
+
+	/* | gpr14
+	   ======= -288 (r1)
+	*/
+
+	stg r14,(-304 +  2 * GPR_BYTES)(r1)
+	stg r15,(-304 +  3 * GPR_BYTES)(r1)
+	stg r16,(-304 +  4 * GPR_BYTES)(r1)
+	stg r17,(-304 +  5 * GPR_BYTES)(r1)
+	stg r18,(-304 +  6 * GPR_BYTES)(r1)
+	stg r19,(-304 +  7 * GPR_BYTES)(r1)
+	stg r20,(-304 +  8 * GPR_BYTES)(r1)
+	stg r21,(-304 +  9 * GPR_BYTES)(r1)
+	stg r22,(-304 + 10 * GPR_BYTES)(r1)
+	stg r23,(-304 + 11 * GPR_BYTES)(r1)
+	stg r24,(-304 + 12 * GPR_BYTES)(r1)
+	stg r25,(-304 + 13 * GPR_BYTES)(r1)
+	stg r26,(-304 + 14 * GPR_BYTES)(r1)
+	stg r27,(-304 + 15 * GPR_BYTES)(r1)
+	stg r28,(-304 + 16 * GPR_BYTES)(r1)
+	stg r29,(-304 + 17 * GPR_BYTES)(r1)
+	stg r30,(-304 + 18 * GPR_BYTES)(r1)
+	stg r31,(-304 + 19 * GPR_BYTES)(r1)
+
+	/* ===
+	   | 4 bytes of VRSave | 4 of padding
+	   ======= -296 (r1)
+	   | 8 bytes fpscr.
+	   ======= -304 (r1) (16 byte boundry). 
+	   | vecs...
+	*/
+
+	addi r4,r1,-304		/* set r4 pointing at the fpscr save */			
+#endif
+
+	stgu r1,-FRAME_SIZE(r1)	/* allocate stack frame  */
+
+	/* ??? Determine when VRs not present.  */
+	/* ??? Test r3 for pr_hasNoVectorUpdate and skip the vr save.
+	   This is not yet set by the compiler.  */
+
+	mfspr r0,VRsave
+	addi  r5,r4,-16		/* Now r5 points at V31 save.  */
+#ifndef __ppc64__
+	stw r0,16(r4)		/* VRsave lives at -224(R1)  */
+#else
+	stw r0,12(r4)		/* VRsave lives at -292(R1)  */
+#endif
+	stfd f0,0(r4)		/* save FPSCR.  */
+	
+	addi r4,r4,-32
+	stvx v31,0,r5
+	addi r5,r5,-32
+	stvx v30,0,r4
+	addi r4,r4,-32
+	stvx v29,0,r5
+	addi r5,r5,-32
+	stvx v28,0,r4
+	addi r4,r4,-32
+	stvx v27,0,r5
+	addi r5,r5,-32
+	stvx v26,0,r4
+	addi r4,r4,-32
+	stvx v25,0,r5
+	addi r5,r5,-32
+	stvx v24,0,r4
+	addi r4,r4,-32
+	stvx v23,0,r5
+	addi r5,r5,-32
+	stvx v22,0,r4
+	addi r4,r4,-32
+	stvx v21,0,r5
+	ori  r0,r0,0x0fff /* we just saved these.  */
+	stvx v20,0,r4
+	mtspr VRsave,r0
+
+	/* Point r4 at our saved data "gtm_jmpbuf *".  */
+	addi r4, r1, LINKAGE_SIZE
+
+	bl	_GTM_begin_transaction
+	nop
+
+	lg	r0,(SAVED_LR_OFFSET + FRAME_SIZE)(r1)
+	mtlr	r0
+	addi	r1, r1, FRAME_SIZE
+	blr
+
+	/* End of _ITM_beginTransaction.  */
+
+	/* uint32_t GTM_longjmp (uint32_t, const gtm_jmpbuf *, uint32_t) */ 
+
+	.private_extern	_GTM_longjmp
+_GTM_longjmp:
+
+	/* ??? Determine when VRs not present.  */
+	/* ??? Test r5 for pr_hasNoVectorUpdate and skip the vr restore.
+	   This is not yet set by the compiler.  */
+	
+	mr r14,r4
+	lwz r0,VRSAVE_OFF(r4)
+	addi r15,r4,16	
+
+	lvx v20,0,r14
+	addi r14,r14,32
+	lvx v21,0,r15
+	addi r15,r15,32
+	lvx v22,0,r14
+	addi r14,r14,32
+	lvx v23,0,r15
+	addi r15,r15,32
+	lvx v24,0,r14
+	addi r14,r14,32
+	lvx v25,0,r15
+	addi r15,r15,32
+	lvx v26,0,r14
+	addi r14,r14,32
+	lvx v27,0,r15
+	addi r15,r15,32
+	lvx v28,0,r14
+	addi r14,r14,32
+	lvx v29,0,r15
+	addi r15,r15,32
+	lvx v30,0,r14
+	addi r14,r14,32
+	lvx v31,0,r15
+
+	mtspr VRsave,r0		/* Restored... */
+	
+#ifndef __ppc64__
+	lmw r13,GPRS_OFF(r4)
+#else
+	lg r14,(GPRS_OFF +  0 * GPR_BYTES)(r4)
+	lg r15,(GPRS_OFF +  1 * GPR_BYTES)(r4)
+	lg r16,(GPRS_OFF +  2 * GPR_BYTES)(r4)
+	lg r17,(GPRS_OFF +  3 * GPR_BYTES)(r4)
+	lg r18,(GPRS_OFF +  4 * GPR_BYTES)(r4)
+	lg r19,(GPRS_OFF +  5 * GPR_BYTES)(r4)
+	lg r20,(GPRS_OFF +  6 * GPR_BYTES)(r4)
+	lg r21,(GPRS_OFF +  7 * GPR_BYTES)(r4)
+	lg r22,(GPRS_OFF +  8 * GPR_BYTES)(r4)
+	lg r23,(GPRS_OFF +  9 * GPR_BYTES)(r4)
+	lg r24,(GPRS_OFF + 10 * GPR_BYTES)(r4)
+	lg r25,(GPRS_OFF + 11 * GPR_BYTES)(r4)
+	lg r26,(GPRS_OFF + 12 * GPR_BYTES)(r4)
+	lg r27,(GPRS_OFF + 13 * GPR_BYTES)(r4)
+	lg r28,(GPRS_OFF + 14 * GPR_BYTES)(r4)
+	lg r29,(GPRS_OFF + 15 * GPR_BYTES)(r4)
+	lg r30,(GPRS_OFF + 16 * GPR_BYTES)(r4)
+	lg r31,(GPRS_OFF + 17 * GPR_BYTES)(r4)
+#endif
+
+	/* ??? Determine when FPRs not present.  */
+	/* ??? Test r5 for pr_hasNoFloatUpdate and skip the fp load.
+	   This is not yet set by the compiler.  */
+
+	lfd f0,FPSCRS_OFF(r4)
+	lfd f14,(FPRS_OFF +   0)(r4)
+	lfd f15,(FPRS_OFF +   8)(r4)
+	lfd f16,(FPRS_OFF +  16)(r4)
+	lfd f17,(FPRS_OFF +  24)(r4)
+	lfd f18,(FPRS_OFF +  32)(r4)
+	lfd f19,(FPRS_OFF +  40)(r4)
+	lfd f20,(FPRS_OFF +  48)(r4)
+	lfd f21,(FPRS_OFF +  56)(r4)
+	lfd f22,(FPRS_OFF +  64)(r4)
+	lfd f23,(FPRS_OFF +  72)(r4)
+	lfd f24,(FPRS_OFF +  80)(r4)
+	lfd f25,(FPRS_OFF +  88)(r4)
+	lfd f26,(FPRS_OFF +  96)(r4)
+	lfd f27,(FPRS_OFF + 104)(r4)
+	lfd f28,(FPRS_OFF + 112)(r4)
+	lfd f29,(FPRS_OFF + 120)(r4)
+	lfd f30,(FPRS_OFF + 128)(r4)
+	lfd f31,(FPRS_OFF + 136)(r4)
+	
+	mtfsf 0xff,f0			/* Restore fpscr.  */
+
+	/* So now pick up the previous data.  */
+	addi r4,r4,PREV_OFF
+	lwz r0,(PREV_OFF + SAVED_CR_OFFSET)(r4)
+	mtcr r0				/* Restore CR.  */
+	lg r0,(PREV_OFF + SAVED_LR_OFFSET)(r4)
+	mtlr r0				/* Ret addr.  */
+	lg r1,(PREV_OFF + 0)(r4)	/* SP.  */
+	blr
+
+	/* End of GTM_longjump */
+
Index: libitm/config/darwin/powerpc/target.h
===================================================================
--- libitm/config/darwin/powerpc/target.h	(revision 0)
+++ libitm/config/darwin/powerpc/target.h	(revision 0)
@@ -0,0 +1,87 @@ 
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <iains@gcc.gnu.org>.
+   Based on libitm/powerpc/target.h by Richard Henderson.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+/* We will assume that the code is to be built for a processor with
+   Altivec.  If it is desired to make this run on G3 for Darwin < 9 then the
+   vecSave code in sjlj.S will need to be made conditional on __cpu_has_altivec
+   from the system framework (as is done in Darwin's 'world_save ()' routine in
+   libgcc).  */
+
+typedef int v128 __attribute__((vector_size(16), may_alias, aligned(16)));
+   
+typedef struct gtm_jmpbuf
+{
+  v128 vr[12];			/* vr20-vr31 */
+#ifdef __ppc64__
+  double fpscr;			/* ??? should we save this.  */
+  unsigned int vscrpad;	
+  unsigned int vscr;		/* VRsave */
+#else
+  double fpscr;			/* ??? should we save this.  */
+  unsigned int vscrpad[2];	
+  unsigned int vscr;		/* VRsave */
+#endif
+#ifdef __ppc64__
+  unsigned long gr[18];		/* r14-r31 */
+#else
+  unsigned long gr[19];		/* r13-r31 */
+#endif
+  double fr[18];		/* f14-f31 */
+  void *cfa;			/* backchain. */
+  unsigned int cr;		/* Saved CR.  */
+#ifdef __ppc64__
+  unsigned int crpad;
+#endif
+  unsigned long pc;		/* Saved LR.  */
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#ifdef __ppc64__
+#  define HW_CACHELINE_SIZE 128
+#else
+#  define HW_CACHELINE_SIZE 32
+#endif
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile ("" : : : "memory");
+}
+
+static inline void
+atomic_read_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+static inline void
+atomic_write_barrier (void)
+{
+  __sync_synchronize ();
+}
+
+} // namespace GTM
Index: libitm/configure.tgt
===================================================================
--- libitm/configure.tgt	(revision 181902)
+++ libitm/configure.tgt	(working copy)
@@ -46,7 +46,8 @@  fi
 # Map the target cpu to an ARCH sub-directory.  At the same time,
 # work out any special compilation flags as necessary.
 case "${target_cpu}" in
-  alpha*)	ARCH=alpha ;;
+  alpha*)		ARCH=alpha ;;
+  rs6000 | powerpc*)	ARCH=powerpc ;;
 
   i[3456]86)
 	case " ${CC} ${CFLAGS} " in
@@ -90,6 +91,15 @@  case "${target}" in
 	fi
 	;;
 
+  powerpc*-*-darwin*)
+	config_path="darwin/$ARCH $config_path" 
+	;;
+
+  powerpc*-*-aix* | rs6000-*-aix*)
+	# The system ought to be supported, but sjlj.S has not been ported.
+	UNSUPPORTED=1
+	;;
+
   *-*-gnu* | *-*-k*bsd*-gnu \
   | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
   | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \