Patchwork [Darwin/PPC64] Fix ABI break on long double structs.

login
register
mail settings
Submitter IainS
Date Dec. 9, 2010, 6:29 p.m.
Message ID <273835E6-6764-4FFF-82DD-9FD0FEF2F351@sandoe-acoustics.co.uk>
Download mbox | patch
Permalink /patch/74956/
State New
Headers show

Comments

IainS - Dec. 9, 2010, 6:29 p.m.
In the case that we have a structure containing exactly enough long  
doubles to straddle the split-point (between pass-in-reg and pass-on- 
stack) we currently get it wrong and believe that we can pass the  
whole entity in regs.

Incidentally, the system gcc-4.2.1 is also broken this way (dunno if  
it's worth filing a radar tho).

OK for trunk?
Iain

gcc:

	* config/rs6000/rs6000.c
	(rs6000_darwin64_record_arg_advance_recurse): Name register increment  
explicitly.
	(rs6000_darwin64_record_arg_recurse): Make sure we split long doubles  
when we
	run out of regs.   Also flag that this requires stack and therefore  
cannot be returned by
	value.
	(rs6000_darwin64_record_arg): Update comment.
	(rs6000_function_value): Likewise.

gcc/testsuite:

	* gcc.target/powerpc/darwin-split-ld-stret.c: New test.

  				   gen_rtx_REG (mode, cum->fregno++),
@@ -8679,7 +8689,7 @@ rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_
       for the chunks of memory that go in int regs.  Note we start at
       element 1; 0 is reserved for an indication of using memory, and
       may or may not be filled in below. */
-  rs6000_darwin64_record_arg_recurse (cum, type, 0, rvec, &k);
+  rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */  
0, rvec, &k);
    rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT,  
rvec, &k);

    /* If any part of the struct went on the stack put all of it there.
@@ -8807,7 +8817,7 @@ rs6000_function_arg (CUMULATIVE_ARGS *cum, enum ma

    if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
      {
-      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, false);
+      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, / 
*retval= */false);
        if (rslt != NULL_RTX)
  	return rslt;
        /* Else fall through to usual handling.  */
@@ -26902,7 +26912,7 @@ rs6000_function_value (const_tree valtype,
        valcum.vregno = ALTIVEC_ARG_MIN_REG;
        /* Do a trial code generation as if this were going to be  
passed as
  	 an argument; if any part goes in memory, we return NULL.  */
-      valret = rs6000_darwin64_record_arg (&valcum, valtype, true,  
true);
+      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /*  
retval= */ true);
        if (valret)
  	return valret;
        /* Otherwise fall through to standard ABI rules.  */
Index: gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c	(revision  
0)
+++ gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c	(revision  
0)
@@ -0,0 +1,87 @@
+/* Check for Darwin m64 that we do not try to pass & return by value  
for a
+   struct exceeding the number of arg FPRs (the struct here straddles  
the
+   split-point).  */
+/* { dg-do run { target { powerpc*-*-darwin* && lp64 } } } */
+
+extern void abort (void);
+
+/*#define DEBUG*/
+
+#ifdef DEBUG
+extern int printf (const char *, ...);
+extern int printf$LDBL128 (const char *, ...);
+#endif
+
+typedef struct fourteen {
+  long double a, b, c, d, e, f, g;
+} fourteen_t ;
+
+fourteen_t foo (fourteen_t, fourteen_t) __attribute__ ((noinline));
+
+fourteen_t
+foo (fourteen_t aa, fourteen_t bb)
+{
+  fourteen_t r;
+
+  r.a = aa.a + bb.a;
+  r.b = aa.b + bb.b;
+  r.c = aa.c + bb.c;
+  r.d = aa.d + bb.d;
+  r.e = aa.e + bb.e;
+  r.f = aa.f + bb.f;
+  r.g = aa.g + bb.g;
+
+#ifdef DEBUG
+#ifdef __ppc64__
+  printf
+#else
+  printf$LDBL128
+#endif	
+	("%Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg: "
+	"%Lg %Lg %Lg %Lg %Lg %Lg %Lg\n",
+	aa.a, aa.b, aa.c, aa.d, aa.e, aa.f, aa.g,
+	bb.a, bb.b, bb.c, bb.d, bb.e, bb.f, bb.g,
+	r.a, r.b, r.c, r.d, r.e, r.f, r.g);
+ printf ("aa.g %ll16x %ll16x\nbb.g %ll16x %ll16x\n",
+		*(unsigned long long*)&aa.g,
+		*(unsigned long long*)(((char *)&aa.g)+8),
+		*(unsigned long long*)&bb.g,
+		*(unsigned long long*)(((char *)&bb.g)+8));
+
+#endif
+
+  __asm__ (""); /* double make sure we don't get inlined */
+  return r;
+}
+
+int
+main (void)
+{
+  fourteen_t x = { 1.L, 2.L,  3.L,  4.L,  5.L,   
6.L,-12.3456789123456789L };
+  fourteen_t y = { 8.L, 9.L, 10.L, 11.L, 12.L, 13.L,  
12.3456789123456789L };
+  fourteen_t z ;
+  long double zz;
+
+  z = foo (x,y);
+  zz = x.g + y.g;
+#ifdef DEBUG
+#ifdef __ppc64__
+	printf
+#else
+	printf$LDBL128
+#endif	
+		("  z: %Lg %Lg %Lg %Lg %Lg %Lg %Lg\n"
+		 "ret: %ll16x %ll16x\nzz : %ll16x %ll16x\n",
+		z.a, z.b, z.c, z.d, z.e, z.f, z.g,
+		*(unsigned long long*)&z.g,
+		*(unsigned long long*)(((char *)&z.g)+8),
+		*(unsigned long long*)&zz,
+		*(unsigned long long*)(((char *)&zz)+8));
+#endif
+
+  /* Yes, we really do want to do an equality test here.  */
+  if (z.g != zz)
+    abort ();
+
+  return 0;
+}
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 167645)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -8150,8 +8150,9 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULA
 	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
 	else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
 	  {
+	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
-	    cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	    cum->fregno += n_fpregs;
 	    /* Single-precision floats present a special problem for
 	       us, because they are smaller than an 8-byte GPR, and so
 	       the structure-packing rules combined with the standard
@@ -8186,7 +8187,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULA
 		  }
 	      }
 	    else
-	      cum->words += (GET_MODE_SIZE (mode) + 7) >> 3;
+	      cum->words += n_fpregs;
 	  }
 	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
 	  {
@@ -8612,6 +8613,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARG
 	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
 	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
 	  {
+	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
 #if 0
 	    switch (mode)
 	      {
@@ -8622,6 +8624,14 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARG
 	      }
 #endif
 	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	      {
+		gcc_assert (cum->fregno == FP_ARG_MAX_REG
+			    && (mode == TFmode || mode == TDmode));
+		/* Long double or _Decimal128 split over regs and memory.  */
+		mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
+		cum->use_stack=1;
+	      }
 	    rvec[(*k)++]
 	      = gen_rtx_EXPR_LIST (VOIDmode,
 				   gen_rtx_REG (mode, cum->fregno++),
@@ -8679,7 +8689,7 @@ rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_
      for the chunks of memory that go in int regs.  Note we start at
      element 1; 0 is reserved for an indication of using memory, and
      may or may not be filled in below. */
-  rs6000_darwin64_record_arg_recurse (cum, type, 0, rvec, &k);
+  rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
   rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
 
   /* If any part of the struct went on the stack put all of it there.
@@ -8807,7 +8817,7 @@ rs6000_function_arg (CUMULATIVE_ARGS *cum, enum ma
 
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
     {
-      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, false);
+      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
       if (rslt != NULL_RTX)
 	return rslt;
       /* Else fall through to usual handling.  */
@@ -26902,7 +26912,7 @@ rs6000_function_value (const_tree valtype,
       valcum.vregno = ALTIVEC_ARG_MIN_REG;
       /* Do a trial code generation as if this were going to be passed as
 	 an argument; if any part goes in memory, we return NULL.  */
-      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, true);
+      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
       if (valret)
 	return valret;
       /* Otherwise fall through to standard ABI rules.  */
Index: gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/darwin-split-ld-stret.c	(revision 0)
@@ -0,0 +1,87 @@
+/* Check for Darwin m64 that we do not try to pass & return by value for a
+   struct exceeding the number of arg FPRs (the struct here straddles the 
+   split-point).  */
+/* { dg-do run { target { powerpc*-*-darwin* && lp64 } } } */
+
+extern void abort (void);
+
+/*#define DEBUG*/
+
+#ifdef DEBUG
+extern int printf (const char *, ...);
+extern int printf$LDBL128 (const char *, ...);
+#endif
+
+typedef struct fourteen {
+  long double a, b, c, d, e, f, g;
+} fourteen_t ;
+
+fourteen_t foo (fourteen_t, fourteen_t) __attribute__ ((noinline));
+
+fourteen_t 
+foo (fourteen_t aa, fourteen_t bb) 
+{
+  fourteen_t r;
+
+  r.a = aa.a + bb.a;
+  r.b = aa.b + bb.b;
+  r.c = aa.c + bb.c;
+  r.d = aa.d + bb.d;
+  r.e = aa.e + bb.e;
+  r.f = aa.f + bb.f;
+  r.g = aa.g + bb.g;
+
+#ifdef DEBUG
+#ifdef __ppc64__
+  printf
+#else
+  printf$LDBL128
+#endif	  
+	("%Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg %Lg: "
+	"%Lg %Lg %Lg %Lg %Lg %Lg %Lg\n",
+	aa.a, aa.b, aa.c, aa.d, aa.e, aa.f, aa.g,
+	bb.a, bb.b, bb.c, bb.d, bb.e, bb.f, bb.g,
+	r.a, r.b, r.c, r.d, r.e, r.f, r.g);
+ printf ("aa.g %ll16x %ll16x\nbb.g %ll16x %ll16x\n",
+		*(unsigned long long*)&aa.g,
+		*(unsigned long long*)(((char *)&aa.g)+8),
+		*(unsigned long long*)&bb.g,
+		*(unsigned long long*)(((char *)&bb.g)+8));
+ 
+#endif
+
+  __asm__ (""); /* double make sure we don't get inlined */
+  return r;
+}
+
+int
+main (void)
+{
+  fourteen_t x = { 1.L, 2.L,  3.L,  4.L,  5.L,  6.L,-12.3456789123456789L };
+  fourteen_t y = { 8.L, 9.L, 10.L, 11.L, 12.L, 13.L, 12.3456789123456789L };
+  fourteen_t z ;
+  long double zz;
+  
+  z = foo (x,y);
+  zz = x.g + y.g;
+#ifdef DEBUG
+#ifdef __ppc64__
+	printf
+#else
+	printf$LDBL128
+#endif	  
+		("  z: %Lg %Lg %Lg %Lg %Lg %Lg %Lg\n"
+		 "ret: %ll16x %ll16x\nzz : %ll16x %ll16x\n",
+		z.a, z.b, z.c, z.d, z.e, z.f, z.g,
+		*(unsigned long long*)&z.g,
+		*(unsigned long long*)(((char *)&z.g)+8),
+		*(unsigned long long*)&zz,
+		*(unsigned long long*)(((char *)&zz)+8));
+#endif
+
+  /* Yes, we really do want to do an equality test here.  */
+  if (z.g != zz)
+    abort ();
+
+  return 0;
+}
Mike Stump - Dec. 9, 2010, 6:59 p.m.
On Dec 9, 2010, at 10:29 AM, IainS wrote:
> In the case that we have a structure containing exactly enough long doubles to straddle the split-point (between pass-in-reg and pass-on-stack) we currently get it wrong and believe that we can pass the whole entity in regs.
> 
> Incidentally, the system gcc-4.2.1 is also broken this way (dunno if it's worth filing a radar tho).

Probably not.

> OK for trunk?

Ok.
IainS - Dec. 10, 2010, 10:14 a.m.
On 9 Dec 2010, at 18:59, Mike Stump wrote:

> On Dec 9, 2010, at 10:29 AM, IainS wrote:
>> In the case that we have a structure containing exactly enough long  
>> doubles to straddle the split-point (between pass-in-reg and pass- 
>> on-stack) we currently get it wrong and believe that we can pass  
>> the whole entity in regs.
>>
>>

>> OK for trunk?
>
> Ok.
r167682
Iain

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 167645)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -8150,8 +8150,9 @@  rs6000_darwin64_record_arg_advance_recurse (CUMULA
  	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
  	else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
  	  {
+	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
  	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
-	    cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	    cum->fregno += n_fpregs;
  	    /* Single-precision floats present a special problem for
  	       us, because they are smaller than an 8-byte GPR, and so
  	       the structure-packing rules combined with the standard
@@ -8186,7 +8187,7 @@  rs6000_darwin64_record_arg_advance_recurse (CUMULA
  		  }
  	      }
  	    else
-	      cum->words += (GET_MODE_SIZE (mode) + 7) >> 3;
+	      cum->words += n_fpregs;
  	  }
  	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
  	  {
@@ -8612,6 +8613,7 @@  rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARG
  	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
  	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
  	  {
+	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
  #if 0
  	    switch (mode)
  	      {
@@ -8622,6 +8624,14 @@  rs6000_darwin64_record_arg_recurse  
(CUMULATIVE_ARG
  	      }
  #endif
  	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	      {
+		gcc_assert (cum->fregno == FP_ARG_MAX_REG
+			    && (mode == TFmode || mode == TDmode));
+		/* Long double or _Decimal128 split over regs and memory.  */
+		mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
+		cum->use_stack=1;
+	      }
  	    rvec[(*k)++]
  	      = gen_rtx_EXPR_LIST (VOIDmode,