diff mbox

[powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised)

Message ID 20131017170917.GA30103@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner Oct. 17, 2013, 5:09 p.m. UTC
On Mon, Oct 14, 2013 at 08:51:15PM -0400, David Edelsohn wrote:
> > [gcc]
> > 2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
> >
> >         PR target/58673
> >         * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
> >         restrict TImode addresses to single indirect registers if both
> >         -mquad-memory and -mvsx-timode are used.
> >         (rs6000_output_move_128bit): Use quad_load_store_p to determine if
> >         we should emit load/store quad.  Remove using %y for quad memory
> >         addresses.
> >
> >         * config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
> >         constraints to allow load/store quad on machines where TImode is
> >         not allowed in VSX registers.
> >
> > [gcc/testsuite]
> > 2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
> >
> >         PR target/58673
> >         * gcc.target/powerpc/pr58673-1.c: New file to test whether
> >         -mquad-word + -mno-vsx-timode causes errors.
> >         * gcc.target/powerpc/pr58673-2.c: Likewise.
> 
> This is okay, except:
> 
> -  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
> -    (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
> +  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
> +    (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))]
> 
> As Ken Zadeck and Richard Sandiford noticed this morning, the "F"
> constraint seems to be a cut-and-paste error because the "F"
> constraint corresponds to an immediate scalar or vector float value,
> which never will match TImode. Should this constraint be "n"?
> 
> The GCC testsuite apparently is missing a test to ensure that a large
> TImode immediate value can be loaded on targets that support int128_t.

I changed this to use the 'n' constraint.  This is the patch I committed:

[gcc]
2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
	restrict TImode addresses to single indirect registers if both
	-mquad-memory and -mvsx-timode are used.
	(rs6000_output_move_128bit): Use quad_load_store_p to determine if
	we should emit load/store quad.  Remove using %y for quad memory
	addresses.

	* config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
	constraints to allow load/store quad on machines where TImode is
	not allowed in VSX registers.  Use 'n' instead of 'F' constraint
	for TImode to load integer constants.

[gcc/testsuite]
2013-10-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* gcc.target/powerpc/pr58673-1.c: New file to test whether
	-mquad-word + -mno-vsx-timode causes errors.
	* gcc.target/powerpc/pr58673-2.c: Likewise.
diff mbox

Patch

Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
@@ -0,0 +1,78 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
+
+enum typecode
+{
+  QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
+    DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
+};
+enum bytecode_opcode
+{
+  neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
+    constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
+    loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
+    storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
+    addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
+    convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
+    convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
+    convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
+    convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
+    convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
+    convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
+    convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
+    convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
+    convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
+    convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
+    addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
+    mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
+    divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
+    xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
+    rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
+    leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
+    geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
+    eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
+    negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
+    predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
+    preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
+    preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
+    postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
+    postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
+    postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
+    caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
+};
+struct binary_operator
+{
+  enum bytecode_opcode opcode;
+  enum typecode arg0;
+};
+static struct conversion_recipe
+{
+  unsigned char *opcodes;
+  int cost;
+}
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
+						     LAST_AND_UNUSED_TYPECODE)];
+static struct conversion_recipe
+deduce_conversion (from, to)
+     enum typecode from, to;
+{
+  (conversion_recipe[(int) from][(int) to].
+   opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
+		  deduce_conversion (from, to), 0));
+}
+
+void
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
+     struct binary_operator optab[];
+{
+  int i, besti, cost, bestcost;
+  enum typecode resultcode, arg0code;
+  for (i = 0; optab[i].opcode != -1; ++i)
+    {
+      (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
+       opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
+		      deduce_conversion (arg0code, optab[i].arg0), 0));
+    }
+}
Index: gcc/testsuite/gcc.target/powerpc/pr58673-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
@@ -0,0 +1,217 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+typedef long unsigned int size_t;
+typedef struct _IO_FILE FILE;
+typedef float real;
+typedef real rvec[3];
+typedef real matrix[3][3];
+typedef real tensor[3][3];
+enum
+{
+  F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
+    F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
+    F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
+    F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
+    F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
+    F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
+    F_DVDL, F_DVDLKIN, F_NRE
+};
+typedef union
+{
+  struct
+  {
+  }
+  bham;
+  struct
+  {
+    real rA, krA, rB, krB;
+  }
+  harmonic;
+}
+t_iparams;
+typedef struct
+{
+  t_iparams *iparams;
+}
+t_idef;
+typedef struct
+{
+}
+t_inputrec;
+typedef struct
+{
+}
+t_commrec;
+typedef struct
+{
+}
+t_forcerec;
+typedef struct
+{
+}
+t_mdatoms;
+typedef struct
+{
+}
+t_filenm;
+enum
+{
+  eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
+    eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
+    eoObsNR, eoInter, eoUseVirial, eoNR
+};
+extern char *eoNames[eoNR];
+typedef struct
+{
+  int bPrint;
+}
+t_coupl_LJ;
+typedef struct
+{
+  int eObs;
+  t_iparams xi;
+}
+t_coupl_iparams;
+typedef struct
+{
+  real act_value[eoObsNR];
+  real av_value[eoObsNR];
+  real ref_value[eoObsNR];
+  int bObsUsed[eoObsNR];
+  int nLJ, nBU, nQ, nIP;
+  t_coupl_LJ *tcLJ;
+}
+t_coupl_rec;
+static void
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
+       t_filenm fnm[])
+{
+  static FILE *prop;
+  static FILE **out = ((void *) 0);
+  static FILE **qq = ((void *) 0);
+  static FILE **ip = ((void *) 0);
+  char buf[256];
+  char *leg[] = {
+    "C12", "C6"
+  };
+  char **raleg;
+  int i, j, index;
+  if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
+      && (ip == ((void *) 0)))
+    {
+      for (i = j = 0; (i < eoObsNR); i++)
+	{
+	  if (tcr->bObsUsed[i])
+	    {
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (eoNames[i])
+		  && ((size_t) (const void *) ((eoNames[i]) + 1) -
+		      (size_t) (const void *) (eoNames[i]) ==
+		      1) ? (((const char *) (eoNames[i]))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (eoNames
+									    [i])
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (eoNames[i])));
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (buf)
+		  && ((size_t) (const void *) ((buf) + 1) -
+		      (size_t) (const void *) (buf) ==
+		      1) ? (((const char *) (buf))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (buf)
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (buf)));
+	    }
+	}
+      if (tcr->nLJ)
+	{
+	  for (i = 0; (i < tcr->nLJ); i++)
+	    {
+	      if (tcr->tcLJ[i].bPrint)
+		{
+		  xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
+			       leg);
+		}
+	    }
+	}
+    }
+}
+
+void
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
+	     int step, real ener[], t_forcerec * fr, t_inputrec * ir,
+	     int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
+	     int nmols, t_commrec * cr, matrix box, tensor virial,
+	     tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
+{
+  int i, j, ati, atj, atnr2, type, ftype;
+  real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
+  real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
+  int bTest, bPrint;
+  t_coupl_iparams *tip;
+  if (bPrint)
+    {
+      pr_ff (tcr, t, idef, cr, nfile, fnm);
+    }
+  for (i = 0; (i < eoObsNR); i++)
+    {
+      deviation[i] =
+	calc_deviation (tcr->av_value[i], tcr->act_value[i],
+			tcr->ref_value[i]);
+      prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
+    }
+  if (bPrint)
+    pr_dev (tcr, t, prdev, cr, nfile, fnm);
+  for (i = 0; (i < atnr2); i++)
+    {
+      factor = dt * deviation[tip->eObs];
+      switch (ftype)
+	{
+	case F_BONDS:
+	  if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
+	    idef->iparams[type].harmonic.krA *=
+	      (1 + factor / tip->xi.harmonic.krA);
+	}
+    }
+}
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 203770)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -7186,12 +7186,12 @@  rs6000_legitimate_address_p (enum machin
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
-  /* For TImode, if we have load/store quad, only allow register indirect
-     addresses.  This will allow the values to go in either GPRs or VSX
-     registers without reloading.  The vector types would tend to go into VSX
-     registers, so we allow REG+REG, while TImode seems somewhat split, in that
-     some uses are GPR based, and some VSX based.  */
-  if (mode == TImode && TARGET_QUAD_MEMORY)
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
     return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
@@ -16064,13 +16064,8 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (dest_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
-	      && quad_memory_operand (src, mode)
-	      && !reg_overlap_mentioned_p (dest, src))
-	    {
-	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
-	    }
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "lq %0,%1";
 	  else
 	    return "#";
 	}
@@ -16099,12 +16094,8 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (src_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
-	      && quad_memory_operand (dest, mode))
-	    {
-	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
-	    }
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "stq %1,%0";
 	  else
 	    return "#";
 	}
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 203770)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -10312,15 +10312,15 @@  (define_insn "*mov<mode>_string"
 					  (const_string "conditional")))])
 
 (define_insn "*mov<mode>_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
-	(match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
   "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode)))"
 {
   return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "store,load,*,*")
+  [(set_attr "type" "store,store,load,load,*,*")
    (set_attr "length" "8")])
 
 (define_split