diff mbox

[07/16] S/390: Use wfc for scalar vector compares

Message ID 20170324141053.16840-8-krebbel@linux.vnet.ibm.com
State New
Headers show

Commit Message

Andreas Krebbel March 24, 2017, 2:10 p.m. UTC
The z13 vector support used the vector style comparison instructions
also for the scalar compares in vector registers.  However, it is much
more convenient to just use the compare scalar instruction for that
purpose.  The advantage is that this instruction generates a CC result
as our compares usually do.  So this results in quite some code to be
removed from the backend.

Regression tested on s390x.

gcc/ChangeLog:

2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/2964.md: Remove the single element vector compare
	instructions which are no longer used.
	* config/s390/s390.c (s390_select_ccmode): Remove handling of
	vector CCmodes.
	(s390_canonicalize_comparison): Remove handling of DFmode
	compares.
	(s390_expand_vec_compare_scalar): Remove function.
	(s390_emit_compare): Don't call s390_expand_vec_compare_scalar.
	* config/s390/s390.md ("*vec_cmp<insn_cmp>df_cconly"): Remove
	pattern.
	("*cmp<mode>_ccs"): Add wfcdb instruction.

gcc/testsuite/ChangeLog:

2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/vector/vec-scalar-cmp-1.c: Adjust for the
	comparison instructions used from now on.
---
 gcc/ChangeLog                                      |  14 +++
 gcc/config/s390/2964.md                            |   8 +-
 gcc/config/s390/s390.c                             | 102 +--------------------
 gcc/config/s390/s390.md                            |  26 ++----
 gcc/testsuite/ChangeLog                            |   5 +
 .../gcc.target/s390/vector/vec-scalar-cmp-1.c      |  31 +++++--
 6 files changed, 57 insertions(+), 129 deletions(-)
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4dd2be6..fef571c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@ 
 2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* config/s390/2964.md: Remove the single element vector compare
+	instructions which are no longer used.
+	* config/s390/s390.c (s390_select_ccmode): Remove handling of
+	vector CCmodes.
+	(s390_canonicalize_comparison): Remove handling of DFmode
+	compares.
+	(s390_expand_vec_compare_scalar): Remove function.
+	(s390_emit_compare): Don't call s390_expand_vec_compare_scalar.
+	* config/s390/s390.md ("*vec_cmp<insn_cmp>df_cconly"): Remove
+	pattern.
+	("*cmp<mode>_ccs"): Add wfcdb instruction.
+
+2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* config/s390/s390.md ("mov<mode>_64dfp" DD_DF): Use vleig for loading a
 	FP zero.
 	("*mov<mode>_64" DD_DF): Remove the vector instructions. These
diff --git a/gcc/config/s390/2964.md b/gcc/config/s390/2964.md
index 374e2e3..d9b6729 100644
--- a/gcc/config/s390/2964.md
+++ b/gcc/config/s390/2964.md
@@ -88,7 +88,7 @@  vsh,vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,\
 vmrhg,vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,\
 vmloh,lxdb,ldeb,mdtr,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,mxtr,vchlf,vchlg,\
 vchlh,vfcedbs,vfcedb,vceqgs,cxbr,msdbr,vcdgb,debr,vceqhs,meeb,lcxbr,vavglb,\
-vavglf,vavglg,vavglh,wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,\
+vavglf,vavglg,vavglh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,tcdb,\
 vmahh,vsrlb,wcgdb,lcdbr,vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,\
 ley,vistrb,vistrf,vistrh,tceb,wfsqdb,sqeb,vsumqf,vsumqg,vesrlb,vfeezbs,\
 maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,\
@@ -96,7 +96,7 @@  vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,vzero,msebr,veslb,veslf,veslg,vfenezb,\
 vfenezf,vfenezh,vistrfs,vchf,vchg,vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,\
 veslvf,veslvg,veslvh,wclgdb,vfmdb,vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,\
 vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,vmxlh,ltdtr,vsbcbiq,ceb,wfddb,sebr,vistrhs,\
-lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,\
+lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,vlpf,vlpg,vsegb,vaq,vsegf,vsegh,\
 sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,\
 vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,vpksf,vpksg,vpksh,sqdb,mxbr,sqdbr,\
 vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,ddtr,verllf,verllg,verllh,\
@@ -164,7 +164,7 @@  vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,vmrhg,\
 vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,vmloh,\
 lxdb,ldeb,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,vchlf,vchlg,vchlh,vfcedbs,\
 vfcedb,vceqgs,cxbr,msdbr,vcdgb,vceqhs,meeb,lcxbr,vavglb,vavglf,vavglg,vavglh,\
-wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,vmahh,vsrlb,wcgdb,lcdbr,\
+vmrlb,vmrlf,vmrlg,vmrlh,vmxb,tcdb,vmahh,vsrlb,wcgdb,lcdbr,\
 vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,ley,vistrb,vistrf,vistrh,\
 tceb,vsumqf,vsumqg,vesrlb,vfeezbs,maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,\
 vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,\
@@ -172,7 +172,7 @@  vzero,msebr,veslb,veslf,veslg,vfenezb,vfenezf,vfenezh,vistrfs,vchf,vchg,\
 vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,veslvf,veslvg,veslvh,wclgdb,vfmdb,\
 vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,\
 vmxlh,ltdtr,vsbcbiq,ceb,sebr,vistrhs,lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,\
-vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,\
+vlpf,vlpg,vsegb,vaq,vsegf,vsegh,sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,\
 vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,\
 vpksf,vpksg,vpksh,vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,verllf,\
 verllg,verllh,wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,\
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 65a7546..eac39c5 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1402,29 +1402,6 @@  s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
 machine_mode
 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
 {
-  if (TARGET_VX
-      && register_operand (op0, DFmode)
-      && register_operand (op1, DFmode))
-    {
-      /* LT, LE, UNGT, UNGE require swapping OP0 and OP1.  Either
-	 s390_emit_compare or s390_canonicalize_comparison will take
-	 care of it.  */
-      switch (code)
-	{
-	case EQ:
-	case NE:
-	  return CCVEQmode;
-	case GT:
-	case UNLE:
-	  return CCVFHmode;
-	case GE:
-	case UNLT:
-	  return CCVFHEmode;
-	default:
-	  ;
-	}
-    }
-
   switch (code)
     {
       case EQ:
@@ -1703,26 +1680,6 @@  s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
       *code = (int)swap_condition ((enum rtx_code)*code);
     }
 
-  /* Using the scalar variants of vector instructions for 64 bit FP
-     comparisons might require swapping the operands.  */
-  if (TARGET_VX
-      && register_operand (*op0, DFmode)
-      && register_operand (*op1, DFmode)
-      && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
-    {
-      rtx tmp;
-
-      switch (*code)
-	{
-	case LT:   *code = GT; break;
-	case LE:   *code = GE; break;
-	case UNGT: *code = UNLE; break;
-	case UNGE: *code = UNLT; break;
-	default: ;
-	}
-      tmp = *op0; *op0 = *op1; *op1 = tmp;
-    }
-
   /* A comparison result is compared against zero.  Replace it with
      the (perhaps inverted) original comparison.
      This probably should be done by simplify_relational_operation.  */
@@ -1749,56 +1706,6 @@  s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
     }
 }
 
-/* Helper function for s390_emit_compare.  If possible emit a 64 bit
-   FP compare using the single element variant of vector instructions.
-   Replace CODE with the comparison code to be used in the CC reg
-   compare and return the condition code register RTX in CC.  */
-
-static bool
-s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
-				rtx *cc)
-{
-  machine_mode cmp_mode;
-  bool swap_p = false;
-
-  switch (*code)
-    {
-    case EQ:   cmp_mode = CCVEQmode;  break;
-    case NE:   cmp_mode = CCVEQmode;  break;
-    case GT:   cmp_mode = CCVFHmode;  break;
-    case GE:   cmp_mode = CCVFHEmode; break;
-    case UNLE: cmp_mode = CCVFHmode;  break;
-    case UNLT: cmp_mode = CCVFHEmode; break;
-    case LT:   cmp_mode = CCVFHmode;  *code = GT;   swap_p = true; break;
-    case LE:   cmp_mode = CCVFHEmode; *code = GE;   swap_p = true; break;
-    case UNGE: cmp_mode = CCVFHmode;  *code = UNLE; swap_p = true; break;
-    case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
-    default: return false;
-    }
-
-  if (swap_p)
-    {
-      rtx tmp = cmp2;
-      cmp2 = cmp1;
-      cmp1 = tmp;
-    }
-
-  emit_insn (gen_rtx_PARALLEL (VOIDmode,
-	       gen_rtvec (2,
-			  gen_rtx_SET (gen_rtx_REG (cmp_mode, CC_REGNUM),
-				       gen_rtx_COMPARE (cmp_mode, cmp1,
-							cmp2)),
-			  gen_rtx_CLOBBER (VOIDmode,
-					   gen_rtx_SCRATCH (V2DImode)))));
-
-  /* This is the cc reg how it will be used in the cc mode consumer.
-     It either needs to be CCVFALL or CCVFANY.  However, CC1 will
-     never be set by the scalar variants.  So it actually doesn't
-     matter which one we choose here.  */
-  *cc = gen_rtx_REG (CCVFALLmode, CC_REGNUM);
-  return true;
-}
-
 
 /* Emit a compare instruction suitable to implement the comparison
    OP0 CODE OP1.  Return the correct condition RTL to be placed in
@@ -1810,14 +1717,7 @@  s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
   machine_mode mode = s390_select_ccmode (code, op0, op1);
   rtx cc;
 
-  if (TARGET_VX
-      && register_operand (op0, DFmode)
-      && register_operand (op1, DFmode)
-      && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
-    {
-      /* Work has been done by s390_expand_vec_compare_scalar already.  */
-    }
-  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
     {
       /* Do not output a redundant compare instruction if a
 	 compare_and_swap pattern already computed the result and the
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 554fb37..e72d5be 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1317,28 +1317,20 @@ 
  })
 
 
-; cxtr, cxbr, cdtr, cdbr, cebr, cdb, ceb
+; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcdb
 (define_insn "*cmp<mode>_ccs"
   [(set (reg CC_REGNUM)
-        (compare (match_operand:FP 0 "register_operand" "f,f")
-                 (match_operand:FP 1 "general_operand"  "f,R")))]
+        (compare (match_operand:FP 0 "register_operand" "f,f,v")
+                 (match_operand:FP 1 "general_operand"  "f,R,v")))]
   "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
   "@
    c<xde><bt>r\t%0,%1
-   c<xde>b\t%0,%1"
-   [(set_attr "op_type" "RRE,RXE")
-    (set_attr "type"  "fsimp<mode>")
-    (set_attr "enabled" "*,<DSF>")])
-
-; wfcedbs, wfchdbs, wfchedbs
-(define_insn "*vec_cmp<insn_cmp>df_cconly"
-  [(set (reg:VFCMP CC_REGNUM)
-	(compare:VFCMP (match_operand:DF 0 "register_operand" "v")
-		       (match_operand:DF 1 "register_operand" "v")))
-   (clobber (match_scratch:V2DI 2 "=v"))]
-  "TARGET_VX && TARGET_HARD_FLOAT"
-  "wfc<asm_fcmp>dbs\t%v2,%v0,%v1"
-  [(set_attr "op_type" "VRR")])
+   c<xde>b\t%0,%1
+   wfcdb\t%0,%1"
+  [(set_attr "op_type" "RRE,RXE,VRR")
+   (set_attr "cpu_facility" "*,*,vx")
+   (set_attr "enabled" "*,<DSF>,<DFDI>")])
+
 
 ; Compare and Branch instructions
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0f0877c..e6f0e2b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@ 
 2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* gcc.target/s390/vector/vec-scalar-cmp-1.c: Adjust for the
+	comparison instructions used from now on.
+
+2017-03-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* gcc.target/s390/s390.exp (check_effective_target_vector):
 	Include target-supports.exp and move target_vector check routine
 	...
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
index 46a261f..ea51d0c 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
@@ -6,48 +6,65 @@ 
 int
 eq (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a == b;
 }
 
-/* { dg-final { scan-assembler "eq:\n\twfcedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+/* { dg-final { scan-assembler "eq:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochine\t%r2,0" } } */
 
 int
 ne (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a != b;
 }
 
-/* { dg-final { scan-assembler "ne:\n\twfcedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochie\t%r2,0" } } */
+/* { dg-final { scan-assembler "ne:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochie\t%r2,0" } } */
 
 int
 gt (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a > b;
 }
 
-/* { dg-final { scan-assembler "gt:\n\twfchdbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+/* { dg-final { scan-assembler "gt:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochinh\t%r2,0" } } */
 
 int
 ge (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a >= b;
 }
 
-/* { dg-final { scan-assembler "ge:\n\twfchedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+/* { dg-final { scan-assembler "ge:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochinhe\t%r2,0" } } */
 
 int
 lt (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a < b;
 }
 
-/* { dg-final { scan-assembler "lt:\n\twfchdbs\t%v\[0-9\]*,%v2,%v0\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+/* { dg-final { scan-assembler "lt:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochinl\t%r2,0" } } */
 
 int
 le (double a, double b)
 {
+  asm ("" : : :
+       "f0", "f1",  "f2",  "f3",  "f4" , "f5",  "f6",  "f7",
+       "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15");
   return a <= b;
 }
 
-/* { dg-final { scan-assembler "le:\n\twfchedbs\t%v\[0-9\]*,%v2,%v0\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
-
+/* { dg-final { scan-assembler "le:\n\[^:\]*\twfcdb\t%v\[0-9\]*,%v\[0-9\]*\n\t\[^:\]+\tlochinle\t%r2,0" } } */