Patchwork [RS6000] PR54009 again

login
register
mail settings
Submitter Alan Modra
Date Feb. 7, 2013, 2:39 a.m.
Message ID <20130207023917.GU5023@bubble.grove.modra.org>
Download mbox | patch
Permalink /patch/218812/
State New
Headers show

Comments

Alan Modra - Feb. 7, 2013, 2:39 a.m.
On Wed, Feb 06, 2013 at 01:24:43PM -0500, David Edelsohn wrote:
> We just need to tweak it until we close these corner cases.
> 
> Do you have a testcase that can be added?

David's "corner case" comment prompted me to look at this again, and
sure enough, there was another case that could be triggered with
-m32 -mpowerpc64.  Discussed off-list with David, final patch as
follows.  Committed revision 195835 (pr54131 test) and 195836.

gcc/
	PR target/54009
	* config/rs6000/rs6000.c (mem_operand_gpr): Check that LO_SUM
	addresses won't wrap when offsetting.
	(rs6000_secondary_reload): Provide secondary reloads needed for
	wrapping LO_SUM addresses.

gcc/testsuite/
	PR target/54131
	* gfortran.dg/pr54131.f: New test.

	PR target/54009
	* gcc.target/powerpc/pr54009.c: New test.

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 195707)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -5135,17 +5135,14 @@ 
   if (TARGET_POWERPC64 && (offset & 3) != 0)
     return false;
 
+  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
+  gcc_assert (extra >= 0);
+
   if (GET_CODE (addr) == LO_SUM)
-    /* We know by alignment that ABI_AIX medium/large model toc refs
-       will not cross a 32k boundary, since all entries in the
-       constant pool are naturally aligned and we check alignment for
-       other medium model toc-relative addresses.  For ABI_V4 and
-       ABI_DARWIN lo_sum addresses, we just check that 64-bit
-       offsets are 4-byte aligned.  */
-    return true;
+    /* For lo_sum addresses, we must allow any offset except one that
+       causes a wrap, so test only the low 16 bits.  */
+    offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
 
-  extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
-  gcc_assert (extra >= 0);
   return offset + 0x8000 < 0x10000u - extra;
 }
 
@@ -13823,19 +13819,36 @@ 
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
-      rtx off = address_offset (XEXP (x, 0));
-      unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+      rtx addr = XEXP (x, 0);
+      rtx off = address_offset (addr);
 
-      if (off != NULL_RTX
-	  && (INTVAL (off) & 3) != 0
-	  && (unsigned HOST_WIDE_INT) INTVAL (off) + 0x8000 < 0x10000 - extra)
+      if (off != NULL_RTX)
 	{
-	  if (in_p)
-	    sri->icode = CODE_FOR_reload_di_load;
+	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+	  unsigned HOST_WIDE_INT offset = INTVAL (off);
+
+	  /* We need a secondary reload when our legitimate_address_p
+	     says the address is good (as otherwise the entire address
+	     will be reloaded), and the offset is not a multiple of
+	     four or we have an address wrap.  Address wrap will only
+	     occur for LO_SUMs since legitimate_offset_address_p
+	     rejects addresses for 16-byte mems that will wrap.  */
+	  if (GET_CODE (addr) == LO_SUM
+	      ? (1 /* legitimate_address_p allows any offset for lo_sum */
+		 && ((offset & 3) != 0
+		     || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
+	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
+		 && (offset & 3) != 0))
+	    {
+	      if (in_p)
+		sri->icode = CODE_FOR_reload_di_load;
+	      else
+		sri->icode = CODE_FOR_reload_di_store;
+	      sri->extra_cost = 2;
+	      ret = NO_REGS;
+	    }
 	  else
-	    sri->icode = CODE_FOR_reload_di_store;
-	  sri->extra_cost = 2;
-	  ret = NO_REGS;
+	    default_p = true;
 	}
       else
 	default_p = true;
@@ -13845,25 +13858,43 @@ 
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
-      rtx off = address_offset (XEXP (x, 0));
-      unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+      rtx addr = XEXP (x, 0);
+      rtx off = address_offset (addr);
 
-      /* We need a secondary reload only when our legitimate_address_p
-	 says the address is good (as otherwise the entire address
-	 will be reloaded).  So for mode sizes of 8 and 16 this will
-	 be when the offset is in the ranges [0x7ffc,0x7fff] and
-	 [0x7ff4,0x7ff7] respectively.  Note that the address we see
-	 here may have been manipulated by legitimize_reload_address.  */
-      if (off != NULL_RTX
-	  && ((unsigned HOST_WIDE_INT) INTVAL (off) - (0x8000 - extra)
-	      < UNITS_PER_WORD))
+      if (off != NULL_RTX)
 	{
-	  if (in_p)
-	    sri->icode = CODE_FOR_reload_si_load;
+	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
+	  unsigned HOST_WIDE_INT offset = INTVAL (off);
+
+	  /* We need a secondary reload when our legitimate_address_p
+	     says the address is good (as otherwise the entire address
+	     will be reloaded), and we have a wrap.
+
+	     legitimate_lo_sum_address_p allows LO_SUM addresses to
+	     have any offset so test for wrap in the low 16 bits.
+
+	     legitimate_offset_address_p checks for the range
+	     [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
+	     for mode size of 16.  We wrap at [0x7ffc,0x7fff] and
+	     [0x7ff4,0x7fff] respectively, so test for the
+	     intersection of these ranges, [0x7ffc,0x7fff] and
+	     [0x7ff4,0x7ff7] respectively.
+
+	     Note that the address we see here may have been
+	     manipulated by legitimize_reload_address.  */
+	  if (GET_CODE (addr) == LO_SUM
+	      ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
+	      : offset - (0x8000 - extra) < UNITS_PER_WORD)
+	    {
+	      if (in_p)
+		sri->icode = CODE_FOR_reload_si_load;
+	      else
+		sri->icode = CODE_FOR_reload_si_store;
+	      sri->extra_cost = 2;
+	      ret = NO_REGS;
+	    }
 	  else
-	    sri->icode = CODE_FOR_reload_si_store;
-	  sri->extra_cost = 2;
-	  ret = NO_REGS;
+	    default_p = true;
 	}
       else
 	default_p = true;
Index: gcc/testsuite/gcc.target/powerpc/pr54009.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr54009.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr54009.c	(revision 0)
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_fprs } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\[\+\]32768" } } */
+
+/* -O2 -m32 store to x.d in w was
+    lis 9,x+32764@ha
+    stw 10,x+32764@l(9)
+    stw 11,x+32768@l(9)  <--  wrap!  */
+
+struct big {
+  char a[32764];
+  double d __attribute__ ((aligned (4)));
+} __attribute__ ((packed));
+
+extern struct big x;
+double y;
+
+void r (void)
+{
+  double tmp = x.d;
+#if 1
+  asm ("#": "+r" (tmp)
+       : : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
+           "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
+           "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",
+           "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31");
+#endif
+  y = tmp;
+}
+
+void w (void)
+{
+  double tmp = y;
+#if 1
+  asm ("#": "+r" (tmp)
+       : : "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
+           "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
+           "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",
+           "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31");
+#endif
+  x.d = tmp;
+}
Index: gcc/testsuite/gfortran.dg/pr54131.f
===================================================================
--- gcc/testsuite/gfortran.dg/pr54131.f	(revision 0)
+++ gcc/testsuite/gfortran.dg/pr54131.f	(revision 0)
@@ -0,0 +1,23 @@ 
+! { dg-do compile }
+! { dg-options "-O2 -funroll-loops" }
+
+      SUBROUTINE EFPGRD(IFCM,NAT,NVIB,NPUN,FCM,
+     *                  DEN,GRD,ENG,DIP,NVST,NFTODO,LIST)
+      IMPLICIT DOUBLE PRECISION (A-H,O-Z)
+      DIMENSION DEN(*),GRD(*),ENG(*),DIP(*),LIST(*)
+      PARAMETER (MXPT=100, MXFRG=50, MXFGPT=MXPT*MXFRG)
+      COMMON /FGRAD / DEF(3,MXFGPT),DEFT(3,MXFRG),TORQ(3,MXFRG),
+     *                ATORQ(3,MXFRG)
+      IF(NVST.EQ.0) THEN
+         CALL PUVIB(IFCM,IW,.FALSE.,NCOORD,IVIB,IATOM,ICOORD,
+     *              ENG(IENG),GRD(IGRD),DIP(IDIP))
+      END IF
+      DO 290 IVIB=1,NVIB
+               DO 220 IFRG=1,NFRG
+                  DO 215  J=1,3
+                     DEFT(J,IFRG)=GRD(INDX+J-1)
+  215             CONTINUE
+                  INDX=INDX+6
+  220          CONTINUE
+  290 CONTINUE
+      END