[powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised)

Message ID	20131017170917.GA30103@ibm-tiger.the-meissners.org
State	New
Headers	show Return-Path: <gcc-patches-return-351540-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:cc:subject:message-id:references:mime-version :content-type:in-reply-to; q=dns; s=default; b=fQGy2dzMIZAHdAKTM PGJNv1y3p3gxNSdnF6n1CJxnjCUYJPMpY/XwsvtyqOrWnTHSp+I6Oz6ywyknZtG5 Ew9TFNSVm+muYHn6FlJ3Q4hVgYF67FPyUPY4ZM8cQquw4vTEIMJL1eMg20mhl7fj 4yesMaG4XGZgkXXtWpnJZb37iM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Gateway: Authorized Use Only! Violators will be prosecuted for <gcc-patches@gcc.gnu.org> from <meissner@ibm-tiger.the-meissners.org>; Thu, 17 Oct 2013 13:09:25 -0400 Gateway: Authorized Use Only! Violators will be prosecuted; Thu, 17 Oct 2013 13:09:22 -0400 Date: Thu, 17 Oct 2013 13:09:17 -0400 From: Michael Meissner <meissner@linux.vnet.ibm.com> To: David Edelsohn <dje.gcc@gmail.com> Cc: Michael Meissner <meissner@linux.vnet.ibm.com>, GCC Patches <gcc-patches@gcc.gnu.org> Subject: Re: [PATCH, powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised) Message-ID: <20131017170917.GA30103@ibm-tiger.the-meissners.org> Mail-Followup-To: Michael Meissner <meissner@linux.vnet.ibm.com>, David Edelsohn <dje.gcc@gmail.com>, GCC Patches <gcc-patches@gcc.gnu.org> References: <20131011182537.GA6220@ibm-tiger.the-meissners.org> <20131011184326.GA12021@ibm-tiger.the-meissners.org> <CAGWvnykBpuHyO_asiApJxiWUwhGDG_B5LxqDVaoYAmHSGgTmTA@mail.gmail.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="/9DWx/yDrRhgMJTb" Content-Disposition: inline In-Reply-To: <CAGWvnykBpuHyO_asiApJxiWUwhGDG_B5LxqDVaoYAmHSGgTmTA@mail.gmail.com> User-Agent: Mutt/1.5.20 (2009-12-10)

Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr58673-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c (revision 0) @@ -0,0 +1,78 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -m64 -O1" } */ + +enum typecode +{ + QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode, + DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE +}; +enum bytecode_opcode +{ + neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI, + constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI, + loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI, + storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK, + addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI, + convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU, + convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI, + convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF, + convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF, + convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF, + convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI, + convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI, + convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI, + convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT, + convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF, + addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI, + mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF, + divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI, + xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU, + rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI, + leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF, + geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI, + eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI, + negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI, + predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI, + preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF, + preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP, + postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI, + postincSI, postincDI, postincP, postincSF, postincDF, postincXF, + postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI, + caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE +}; +struct binary_operator +{ + enum bytecode_opcode opcode; + enum typecode arg0; +}; +static struct conversion_recipe +{ + unsigned char *opcodes; + int cost; +} +conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int) + LAST_AND_UNUSED_TYPECODE)]; +static struct conversion_recipe +deduce_conversion (from, to) + enum typecode from, to; +{ + (conversion_recipe[(int) from][(int) to]. + opcodes ? 0 : (conversion_recipe[(int) from][(int) to] = + deduce_conversion (from, to), 0)); +} + +void +bc_expand_binary_operation (optab, resulttype, arg0, arg1) + struct binary_operator optab[]; +{ + int i, besti, cost, bestcost; + enum typecode resultcode, arg0code; + for (i = 0; optab[i].opcode != -1; ++i) + { + (conversion_recipe[(int) arg0code][(int) optab[i].arg0]. + opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] = + deduce_conversion (arg0code, optab[i].arg0), 0)); + } +} Index: gcc/testsuite/gcc.target/powerpc/pr58673-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr58673-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c (revision 0) @@ -0,0 +1,217 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */ + +#include <stddef.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +typedef long unsigned int size_t; +typedef struct _IO_FILE FILE; +typedef float real; +typedef real rvec[3]; +typedef real matrix[3][3]; +typedef real tensor[3][3]; +enum +{ + F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC, + F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ, + F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES, + F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE, + F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD, + F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES, + F_DVDL, F_DVDLKIN, F_NRE +}; +typedef union +{ + struct + { + } + bham; + struct + { + real rA, krA, rB, krB; + } + harmonic; +} +t_iparams; +typedef struct +{ + t_iparams *iparams; +} +t_idef; +typedef struct +{ +} +t_inputrec; +typedef struct +{ +} +t_commrec; +typedef struct +{ +} +t_forcerec; +typedef struct +{ +} +t_mdatoms; +typedef struct +{ +} +t_filenm; +enum +{ + eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy, + eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory = + eoObsNR, eoInter, eoUseVirial, eoNR +}; +extern char *eoNames[eoNR]; +typedef struct +{ + int bPrint; +} +t_coupl_LJ; +typedef struct +{ + int eObs; + t_iparams xi; +} +t_coupl_iparams; +typedef struct +{ + real act_value[eoObsNR]; + real av_value[eoObsNR]; + real ref_value[eoObsNR]; + int bObsUsed[eoObsNR]; + int nLJ, nBU, nQ, nIP; + t_coupl_LJ *tcLJ; +} +t_coupl_rec; +static void +pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile, + t_filenm fnm[]) +{ + static FILE *prop; + static FILE **out = ((void *) 0); + static FILE **qq = ((void *) 0); + static FILE **ip = ((void *) 0); + char buf[256]; + char *leg[] = { + "C12", "C6" + }; + char **raleg; + int i, j, index; + if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0)) + && (ip == ((void *) 0))) + { + for (i = j = 0; (i < eoObsNR); i++) + { + if (tcr->bObsUsed[i]) + { + raleg[j++] = + (__extension__ + (__builtin_constant_p (eoNames[i]) + && ((size_t) (const void *) ((eoNames[i]) + 1) - + (size_t) (const void *) (eoNames[i]) == + 1) ? (((const char *) (eoNames[i]))[0] == + '\0' ? (char *) calloc ((size_t) 1, + (size_t) 1) : ( + { + size_t + __len + = + strlen + (eoNames + [i]) + + + 1; + char + *__retval + = + (char + *) + malloc + (__len); + __retval;} + )): __strdup (eoNames[i]))); + raleg[j++] = + (__extension__ + (__builtin_constant_p (buf) + && ((size_t) (const void *) ((buf) + 1) - + (size_t) (const void *) (buf) == + 1) ? (((const char *) (buf))[0] == + '\0' ? (char *) calloc ((size_t) 1, + (size_t) 1) : ( + { + size_t + __len + = + strlen + (buf) + + + 1; + char + *__retval + = + (char + *) + malloc + (__len); + __retval;} + )): __strdup (buf))); + } + } + if (tcr->nLJ) + { + for (i = 0; (i < tcr->nLJ); i++) + { + if (tcr->tcLJ[i].bPrint) + { + xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])), + leg); + } + } + } + } +} + +void +do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t, + int step, real ener[], t_forcerec * fr, t_inputrec * ir, + int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver, + int nmols, t_commrec * cr, matrix box, tensor virial, + tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt) +{ + int i, j, ati, atj, atnr2, type, ftype; + real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf; + real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind; + int bTest, bPrint; + t_coupl_iparams *tip; + if (bPrint) + { + pr_ff (tcr, t, idef, cr, nfile, fnm); + } + for (i = 0; (i < eoObsNR); i++) + { + deviation[i] = + calc_deviation (tcr->av_value[i], tcr->act_value[i], + tcr->ref_value[i]); + prdev[i] = tcr->ref_value[i] - tcr->act_value[i]; + } + if (bPrint) + pr_dev (tcr, t, prdev, cr, nfile, fnm); + for (i = 0; (i < atnr2); i++) + { + factor = dt * deviation[tip->eObs]; + switch (ftype) + { + case F_BONDS: + if (fabs (tip->xi.harmonic.krA) > 1.2e-38) + idef->iparams[type].harmonic.krA *= + (1 + factor / tip->xi.harmonic.krA); + } + } +} Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 203770) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -7186,12 +7186,12 @@ rs6000_legitimate_address_p (enum machin if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; - /* For TImode, if we have load/store quad, only allow register indirect - addresses. This will allow the values to go in either GPRs or VSX - registers without reloading. The vector types would tend to go into VSX - registers, so we allow REG+REG, while TImode seems somewhat split, in that - some uses are GPR based, and some VSX based. */ - if (mode == TImode && TARGET_QUAD_MEMORY) + /* For TImode, if we have load/store quad and TImode in VSX registers, only + allow register indirect addresses. This will allow the values to go in + either GPRs or VSX registers without reloading. The vector types would + tend to go into VSX registers, so we allow REG+REG, while TImode seems + somewhat split, in that some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict @@ -16064,13 +16064,8 @@ rs6000_output_move_128bit (rtx operands[ { if (dest_gpr_p) { - if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 - && quad_memory_operand (src, mode) - && !reg_overlap_mentioned_p (dest, src)) - { - /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; - } + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "lq %0,%1"; else return "#"; } @@ -16099,12 +16094,8 @@ rs6000_output_move_128bit (rtx operands[ { if (src_gpr_p) { - if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 - && quad_memory_operand (dest, mode)) - { - /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; - } + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "stq %1,%0"; else return "#"; } Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 203770) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -10312,15 +10312,15 @@ (define_insn "*mov<mode>_string" (const_string "conditional")))]) (define_insn "*mov<mode>_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r") + (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))] "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "store,load,*,*") + [(set_attr "type" "store,store,load,load,*,*") (set_attr "length" "8")]) (define_split

[powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised)

Commit Message

Patch