diff mbox

[powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised)

Message ID 20131011184326.GA12021@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner Oct. 11, 2013, 6:43 p.m. UTC
Whoops, I didn't notice I had left in old code for lq/stq that I had commented
out.  This patch replaces the previous patch, and eliminates the commented out
code.  Sorry about that.

[gcc]
2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
	restrict TImode addresses to single indirect registers if both
	-mquad-memory and -mvsx-timode are used.
	(rs6000_output_move_128bit): Use quad_load_store_p to determine if
	we should emit load/store quad.  Remove using %y for quad memory
	addresses.

	* config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
	constraints to allow load/store quad on machines where TImode is
	not allowed in VSX registers.

[gcc/testsuite]
2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* gcc.target/powerpc/pr58673-1.c: New file to test whether
	-mquad-word + -mno-vsx-timode causes errors.
	* gcc.target/powerpc/pr58673-2.c: Likewise.

Comments

David Edelsohn Oct. 15, 2013, 12:51 a.m. UTC | #1
> [gcc]
> 2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         PR target/58673
>         * config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
>         restrict TImode addresses to single indirect registers if both
>         -mquad-memory and -mvsx-timode are used.
>         (rs6000_output_move_128bit): Use quad_load_store_p to determine if
>         we should emit load/store quad.  Remove using %y for quad memory
>         addresses.
>
>         * config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
>         constraints to allow load/store quad on machines where TImode is
>         not allowed in VSX registers.
>
> [gcc/testsuite]
> 2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         PR target/58673
>         * gcc.target/powerpc/pr58673-1.c: New file to test whether
>         -mquad-word + -mno-vsx-timode causes errors.
>         * gcc.target/powerpc/pr58673-2.c: Likewise.

This is okay, except:

-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
-    (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+    (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))]

As Ken Zadeck and Richard Sandiford noticed this morning, the "F"
constraint seems to be a cut-and-paste error because the "F"
constraint corresponds to an immediate scalar or vector float value,
which never will match TImode. Should this constraint be "n"?

The GCC testsuite apparently is missing a test to ensure that a large
TImode immediate value can be loaded on targets that support int128_t.

Thanks, David
Michael Meissner Oct. 16, 2013, 7:08 p.m. UTC | #2
On Mon, Oct 14, 2013 at 08:51:15PM -0400, David Edelsohn wrote:
> As Ken Zadeck and Richard Sandiford noticed this morning, the "F"
> constraint seems to be a cut-and-paste error because the "F"
> constraint corresponds to an immediate scalar or vector float value,
> which never will match TImode. Should this constraint be "n"?
> 
> The GCC testsuite apparently is missing a test to ensure that a large
> TImode immediate value can be loaded on targets that support int128_t.

I'll try "n".  It probably should be something that allows CONST_DOUBLE ints as
well as CONST_INT.
David Edelsohn Oct. 16, 2013, 9:22 p.m. UTC | #3
On Wed, Oct 16, 2013 at 3:08 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:

> I'll try "n".  It probably should be something that allows CONST_DOUBLE ints as
> well as CONST_INT.

The 'n' constraint matches CONST_INT and CONST_DOUBLE.

- David
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 203389)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -7182,12 +7182,12 @@  rs6000_legitimate_address_p (enum machin
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
-  /* For TImode, if we have load/store quad, only allow register indirect
-     addresses.  This will allow the values to go in either GPRs or VSX
-     registers without reloading.  The vector types would tend to go into VSX
-     registers, so we allow REG+REG, while TImode seems somewhat split, in that
-     some uses are GPR based, and some VSX based.  */
-  if (mode == TImode && TARGET_QUAD_MEMORY)
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
     return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
@@ -16060,13 +16060,8 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (dest_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
-	      && quad_memory_operand (src, mode)
-	      && !reg_overlap_mentioned_p (dest, src))
-	    {
-	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
-	    }
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "lq %0,%1";
 	  else
 	    return "#";
 	}
@@ -16095,12 +16090,8 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (src_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
-	      && quad_memory_operand (dest, mode))
-	    {
-	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
-	    }
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "stq %1,%0";
 	  else
 	    return "#";
 	}
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 203389)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -10312,15 +10312,15 @@  (define_insn "*mov<mode>_string"
 					  (const_string "conditional")))])
 
 (define_insn "*mov<mode>_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
-	(match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))]
   "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode)))"
 {
   return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "store,load,*,*")
+  [(set_attr "type" "store,store,load,load,*,*")
    (set_attr "length" "8")])
 
 (define_split
Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
@@ -0,0 +1,78 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
+
+enum typecode
+{
+  QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
+    DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
+};
+enum bytecode_opcode
+{
+  neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
+    constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
+    loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
+    storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
+    addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
+    convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
+    convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
+    convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
+    convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
+    convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
+    convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
+    convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
+    convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
+    convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
+    convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
+    addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
+    mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
+    divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
+    xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
+    rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
+    leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
+    geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
+    eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
+    negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
+    predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
+    preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
+    preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
+    postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
+    postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
+    postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
+    caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
+};
+struct binary_operator
+{
+  enum bytecode_opcode opcode;
+  enum typecode arg0;
+};
+static struct conversion_recipe
+{
+  unsigned char *opcodes;
+  int cost;
+}
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
+						     LAST_AND_UNUSED_TYPECODE)];
+static struct conversion_recipe
+deduce_conversion (from, to)
+     enum typecode from, to;
+{
+  (conversion_recipe[(int) from][(int) to].
+   opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
+		  deduce_conversion (from, to), 0));
+}
+
+void
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
+     struct binary_operator optab[];
+{
+  int i, besti, cost, bestcost;
+  enum typecode resultcode, arg0code;
+  for (i = 0; optab[i].opcode != -1; ++i)
+    {
+      (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
+       opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
+		      deduce_conversion (arg0code, optab[i].arg0), 0));
+    }
+}
Index: gcc/testsuite/gcc.target/powerpc/pr58673-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
@@ -0,0 +1,217 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+typedef long unsigned int size_t;
+typedef struct _IO_FILE FILE;
+typedef float real;
+typedef real rvec[3];
+typedef real matrix[3][3];
+typedef real tensor[3][3];
+enum
+{
+  F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
+    F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
+    F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
+    F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
+    F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
+    F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
+    F_DVDL, F_DVDLKIN, F_NRE
+};
+typedef union
+{
+  struct
+  {
+  }
+  bham;
+  struct
+  {
+    real rA, krA, rB, krB;
+  }
+  harmonic;
+}
+t_iparams;
+typedef struct
+{
+  t_iparams *iparams;
+}
+t_idef;
+typedef struct
+{
+}
+t_inputrec;
+typedef struct
+{
+}
+t_commrec;
+typedef struct
+{
+}
+t_forcerec;
+typedef struct
+{
+}
+t_mdatoms;
+typedef struct
+{
+}
+t_filenm;
+enum
+{
+  eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
+    eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
+    eoObsNR, eoInter, eoUseVirial, eoNR
+};
+extern char *eoNames[eoNR];
+typedef struct
+{
+  int bPrint;
+}
+t_coupl_LJ;
+typedef struct
+{
+  int eObs;
+  t_iparams xi;
+}
+t_coupl_iparams;
+typedef struct
+{
+  real act_value[eoObsNR];
+  real av_value[eoObsNR];
+  real ref_value[eoObsNR];
+  int bObsUsed[eoObsNR];
+  int nLJ, nBU, nQ, nIP;
+  t_coupl_LJ *tcLJ;
+}
+t_coupl_rec;
+static void
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
+       t_filenm fnm[])
+{
+  static FILE *prop;
+  static FILE **out = ((void *) 0);
+  static FILE **qq = ((void *) 0);
+  static FILE **ip = ((void *) 0);
+  char buf[256];
+  char *leg[] = {
+    "C12", "C6"
+  };
+  char **raleg;
+  int i, j, index;
+  if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
+      && (ip == ((void *) 0)))
+    {
+      for (i = j = 0; (i < eoObsNR); i++)
+	{
+	  if (tcr->bObsUsed[i])
+	    {
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (eoNames[i])
+		  && ((size_t) (const void *) ((eoNames[i]) + 1) -
+		      (size_t) (const void *) (eoNames[i]) ==
+		      1) ? (((const char *) (eoNames[i]))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (eoNames
+									    [i])
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (eoNames[i])));
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (buf)
+		  && ((size_t) (const void *) ((buf) + 1) -
+		      (size_t) (const void *) (buf) ==
+		      1) ? (((const char *) (buf))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (buf)
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (buf)));
+	    }
+	}
+      if (tcr->nLJ)
+	{
+	  for (i = 0; (i < tcr->nLJ); i++)
+	    {
+	      if (tcr->tcLJ[i].bPrint)
+		{
+		  xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
+			       leg);
+		}
+	    }
+	}
+    }
+}
+
+void
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
+	     int step, real ener[], t_forcerec * fr, t_inputrec * ir,
+	     int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
+	     int nmols, t_commrec * cr, matrix box, tensor virial,
+	     tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
+{
+  int i, j, ati, atj, atnr2, type, ftype;
+  real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
+  real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
+  int bTest, bPrint;
+  t_coupl_iparams *tip;
+  if (bPrint)
+    {
+      pr_ff (tcr, t, idef, cr, nfile, fnm);
+    }
+  for (i = 0; (i < eoObsNR); i++)
+    {
+      deviation[i] =
+	calc_deviation (tcr->av_value[i], tcr->act_value[i],
+			tcr->ref_value[i]);
+      prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
+    }
+  if (bPrint)
+    pr_dev (tcr, t, prdev, cr, nfile, fnm);
+  for (i = 0; (i < atnr2); i++)
+    {
+      factor = dt * deviation[tip->eObs];
+      switch (ftype)
+	{
+	case F_BONDS:
+	  if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
+	    idef->iparams[type].harmonic.krA *=
+	      (1 + factor / tip->xi.harmonic.krA);
+	}
+    }
+}