diff mbox series

[1/2] S/390: Support vector load/store alignment hints

Message ID 20200715155327.3911845-2-stefansf@linux.ibm.com
State New
Headers show
Series S/390: Support vector load/store alignment hints | expand

Commit Message

Stefan Schulze Frielinghaus July 15, 2020, 3:53 p.m. UTC
From: Andreas Krebbel <krebbel@linux.ibm.com>

The IBM z14 POP adds an optional alignment operand to the vl, vst,
vlm, and vstm instruction (vector loads and stores). Vectors residing
on 8 or 16 byte boundaries might get loaded or stored faster on some
models given the instruction uses the proper hint operand.  A wrong
hint will hurt performance though.

The attached testcase align-1 currently fails due to:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

gcc/ChangeLog:

2018-11-21  Andreas Krebbel  <krebbel@linux.ibm.com>

	* configure.ac: Add check for Binutils to determine whether vector
	load/store alignments hints are being supported.
	* config.in: Regenerate.
	* configure: Regenerate.
	* config/s390/s390.c (print_operand): Support new output
	modifier A.
	* config/s390/s390.md ("movti"): Append alignment hint output
	using the new output modifier 'A'.
	* config/s390/vector.md ("mov<mode>", "*vec_tf_to_v1tf")
	("*vec_ti_to_v1ti"): Likewise.

gcc/testsuite/ChangeLog:

2018-11-21  Andreas Krebbel  <krebbel@linux.ibm.com>

	* gcc.target/s390/vector/align-1.c: New test.
	* gcc.target/s390/vector/align-2.c: New test.

From-SVN: r266336

(cherry picked from commit b8923037ef1b229326b7f238580d91bbbb76b8ff)
---
 gcc/config.in                                 |  7 ++++
 gcc/config/s390/s390.c                        | 13 +++++++
 gcc/config/s390/s390.md                       |  4 +--
 gcc/config/s390/vector.md                     | 12 +++----
 gcc/configure                                 | 36 +++++++++++++++++++
 gcc/configure.ac                              |  6 ++++
 .../gcc.target/s390/vector/align-1.c          | 30 ++++++++++++++++
 .../gcc.target/s390/vector/align-2.c          | 29 +++++++++++++++
 8 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-2.c

Comments

Andreas Krebbel July 16, 2020, 7:18 a.m. UTC | #1
On 15.07.20 17:53, Stefan Schulze Frielinghaus wrote:
> From: Andreas Krebbel <krebbel@linux.ibm.com>
> 
> The IBM z14 POP adds an optional alignment operand to the vl, vst,
> vlm, and vstm instruction (vector loads and stores). Vectors residing
> on 8 or 16 byte boundaries might get loaded or stored faster on some
> models given the instruction uses the proper hint operand.  A wrong
> hint will hurt performance though.
> 
> The attached testcase align-1 currently fails due to:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085
> 
> gcc/ChangeLog:
> 
> 2018-11-21  Andreas Krebbel  <krebbel@linux.ibm.com>
> 
> 	* configure.ac: Add check for Binutils to determine whether vector
> 	load/store alignments hints are being supported.
> 	* config.in: Regenerate.
> 	* configure: Regenerate.
> 	* config/s390/s390.c (print_operand): Support new output
> 	modifier A.
> 	* config/s390/s390.md ("movti"): Append alignment hint output
> 	using the new output modifier 'A'.
> 	* config/s390/vector.md ("mov<mode>", "*vec_tf_to_v1tf")
> 	("*vec_ti_to_v1ti"): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-11-21  Andreas Krebbel  <krebbel@linux.ibm.com>
> 
> 	* gcc.target/s390/vector/align-1.c: New test.
> 	* gcc.target/s390/vector/align-2.c: New test.

Ok, thanks!

Andreas
diff mbox series

Patch

diff --git a/gcc/config.in b/gcc/config.in
index 5bccb408016..4924b8a0c32 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -717,6 +717,13 @@ 
 #endif
 
 
+/* Define if your assembler supports vl/vst/vlm/vstm with an optional
+   alignment hint argument. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
+#endif
+
+
 /* Define if your assembler supports VSX instructions. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_VSX
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9e7fd2b04dd..5aff2084e1b 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -7697,6 +7697,8 @@  print_operand_address (FILE *file, rtx addr)
    CODE specified the format flag.  The following format flags
    are recognized:
 
+    'A': On z14 or higher: If operand is a mem print the alignment
+	 hint usable with vl/vst prefixed by a comma.
     'C': print opcode suffix for branch condition.
     'D': print opcode suffix for inverse branch condition.
     'E': print opcode suffix for branch on index instruction.
@@ -7734,6 +7736,17 @@  print_operand (FILE *file, rtx x, int code)
 
   switch (code)
     {
+    case 'A':
+#ifdef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
+      if (TARGET_ARCH12 && MEM_P (x))
+	{
+	  if (MEM_ALIGN (x) >= 128)
+	    fprintf (file, ",4");
+	  else if (MEM_ALIGN (x) == 64)
+	    fprintf (file, ",3");
+	}
+#endif
+      return;
     case 'C':
       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
       return;
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 93b1ff0b8db..7114609b676 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1516,8 +1516,8 @@ 
    vone\t%v0
    vlvgp\t%v0,%1,%N1
    #
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
    #
    #"
   [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*")
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 24b200e0c57..0fcb8d2e158 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -198,8 +198,8 @@ 
   ""
   "@
    vlr\t%v0,%v1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
    vzero\t%v0
    vone\t%v0
    vgbm\t%v0,%t1
@@ -549,8 +549,8 @@ 
   "TARGET_VX"
   "@
    vmrhg\t%v0,%1,%N1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
    vzero\t%v0
    vlvgp\t%v0,%1,%N1"
   [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")])
@@ -561,8 +561,8 @@ 
   "TARGET_VX"
   "@
    vlr\t%v0,%v1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
    vzero\t%v0
    vone\t%v0
    vlvgp\t%v0,%1,%N1"
diff --git a/gcc/configure b/gcc/configure
index 97ba7d7d69c..4dd81d24241 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27753,6 +27753,42 @@  $as_echo "#define HAVE_AS_ARCHITECTURE_MODIFIERS 1" >>confdefs.h
 
 fi
 
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for vector load/store alignment hints" >&5
+$as_echo_n "checking assembler for vector load/store alignment hints... " >&6; }
+if ${gcc_cv_as_s390_vector_loadstore_alignment_hints+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_s390_vector_loadstore_alignment_hints=no
+    if test $in_tree_gas = yes; then
+    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 31 \) \* 1000 + 0`
+  then gcc_cv_as_s390_vector_loadstore_alignment_hints=yes
+fi
+  elif test x$gcc_cv_as != x; then
+    $as_echo '	vl %v24,0(%r15),3 ' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_s390_vector_loadstore_alignment_hints=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_s390_vector_loadstore_alignment_hints" >&5
+$as_echo "$gcc_cv_as_s390_vector_loadstore_alignment_hints" >&6; }
+if test $gcc_cv_as_s390_vector_loadstore_alignment_hints = yes; then
+
+$as_echo "#define HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS 1" >>confdefs.h
+
+fi
+
+
     ;;
 esac
 
diff --git a/gcc/configure.ac b/gcc/configure.ac
index d6f2d5b2ed0..6173a1c4f23 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4878,6 +4878,12 @@  pointers into PC-relative form.])
       [	.machine z13+vx ],,
       [AC_DEFINE(HAVE_AS_ARCHITECTURE_MODIFIERS, 1,
 	  [Define if your assembler supports architecture modifiers.])])
+    gcc_GAS_CHECK_FEATURE([vector load/store alignment hints],
+      gcc_cv_as_s390_vector_loadstore_alignment_hints, [2,31,0],,
+      [	vl %v24,0(%r15),3 ],,
+      [AC_DEFINE(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS, 1,
+	  [Define if your assembler supports vl/vst/vlm/vstm with an optional alignment hint argument.])])
+
     ;;
 esac
 
diff --git a/gcc/testsuite/gcc.target/s390/vector/align-1.c b/gcc/testsuite/gcc.target/s390/vector/align-1.c
new file mode 100644
index 00000000000..cc7777ad22a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/align-1.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+
+/* The user alignment ends up in DECL_ALIGN of the VAR_DECL and is
+   currently ignored if it is smaller than the alignment of the type.
+   In this testcase an alignment hint ",3" is emitted also for
+   accessing a4 which is wrong.
+   Hence this testcase currently fails:
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 */
+
+typedef int __attribute__((vector_size(16))) v4si;
+
+v4si a4 __attribute__((aligned(4)));
+v4si a8 __attribute__((aligned(8)));
+v4si a16 __attribute__((aligned(16)));
+v4si a32 __attribute__((aligned(32)));
+
+void
+foo (v4si a)
+{
+  a4 += a;  /* vl ...   vst ... */
+  a8 += a;  /* vl ...,3 vst ...,3 */
+  a16 += a; /* vl ...,4 vst ...,4 */
+  a32 += a; /* vl ...,4 vst ...,4 */
+}
+
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),3\n" 1 } } */
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),4\n" 2 } } */
+/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),3\n" 1 } } */
+/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),4" 2 } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/align-2.c b/gcc/testsuite/gcc.target/s390/vector/align-2.c
new file mode 100644
index 00000000000..e4e2fba6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/align-2.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+
+/* The user alignment ends up in TYPE_ALIGN of the type of the
+   VAR_DECL.  */
+
+typedef int __attribute__((vector_size(16),aligned(4))) v4si_4;
+typedef int __attribute__((vector_size(16),aligned(8))) v4si_8;
+typedef int __attribute__((vector_size(16),aligned(16))) v4si_16;
+typedef int __attribute__((vector_size(16),aligned(32))) v4si_32;
+
+v4si_4 a4;
+v4si_8 a8;
+v4si_16 a16;
+v4si_32 a32;
+
+void
+foo (v4si_8 a)
+{
+  a4 += a;  /* vl ...   vst ... */
+  a8 += a;  /* vl ...,3 vst ...,3 */
+  a16 += a; /* vl ...,4 vst ...,4 */
+  a32 += a; /* vl ...,4 vst ...,4 */
+}
+
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),3\n" 1 } } */
+/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),4\n" 2 } } */
+/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),3\n" 1 } } */
+/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,\[0-9\]*\\(%r\[0-9\]*\\),4" 2 } } */