[Committed] S/390: Don't unroll memory blk op loops

Message ID 20180806150149.13537-1-krebbel@linux.ibm.com
State New
Headers show
Series
  • [Committed] S/390: Don't unroll memory blk op loops
Related show

Commit Message

Andreas Krebbel Aug. 6, 2018, 3:01 p.m.
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>

gcc/ChangeLog:

2018-08-06  Andreas Krebbel  <krebbel@linux.ibm.com>

	* config/s390/s390.c (s390_loop_unroll_adjust): Prevent small
	loops with memory block operations from getting unrolled.

gcc/testsuite/ChangeLog:

2018-08-06  Andreas Krebbel  <krebbel@linux.ibm.com>

	* gcc.target/s390/nomemloopunroll-1.c: New test.
---
 gcc/config/s390/s390.c                            | 31 ++++++++++++++++++++---
 gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c | 27 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c

Patch

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index ec588a2..aa34f56 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -390,6 +390,11 @@  static unsigned vfu_longrunning[NUM_SIDES];
    base and index are registers of the class ADDR_REGS,
    displacement is an unsigned 12-bit immediate constant.  */
 
+/* The max number of insns of backend generated memset/memcpy/memcmp
+   loops.  This value is used in the unroll adjust hook to detect such
+   loops.  Current max is 9 coming from the memcmp loop.  */
+#define BLOCK_MEM_OPS_LOOP_INSNS 9
+
 struct s390_address
 {
   rtx base;
@@ -15385,9 +15390,29 @@  s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
   for (i = 0; i < loop->num_nodes; i++)
     FOR_BB_INSNS (bbs[i], insn)
       if (INSN_P (insn) && INSN_CODE (insn) != -1)
-	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
-	  if (MEM_P (*iter))
-	    mem_count += 1;
+	{
+	  rtx set;
+
+	  /* The runtime of small loops with memory block operations
+	     will be determined by the memory operation.  Doing
+	     unrolling doesn't help here.  Measurements to confirm
+	     this where only done on recent CPU levels.  So better do
+	     not change anything for older CPUs.  */
+	  if (s390_tune >= PROCESSOR_2964_Z13
+	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
+	      && ((set = single_set (insn)) != NULL_RTX)
+	      && ((GET_MODE (SET_DEST (set)) == BLKmode
+		   && (GET_MODE (SET_SRC (set)) == BLKmode
+		       || SET_SRC (set) == const0_rtx))
+		  || (GET_CODE (SET_SRC (set)) == COMPARE
+		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
+		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
+	    return 1;
+
+	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+	    if (MEM_P (*iter))
+	      mem_count += 1;
+	}
   free (bbs);
 
   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
diff --git a/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c b/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c
new file mode 100644
index 0000000..695d925
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/nomemloopunroll-1.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -march=z13" } */
+
+/* 2x mvc */
+void *
+foo (char *a, int c, long len)
+{
+  return __builtin_memset (a, c, len);
+}
+
+/* 2x mvc */
+void
+bar (char *a, char *b)
+{
+  __builtin_memcpy (a, b, 30000);
+}
+
+/* 2x clc */
+
+int
+baz (char *a, char *b)
+{
+  return __builtin_memcmp (a, b, 30000);
+}
+
+/* { dg-final { scan-assembler-times "\\\smvc\\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\\sclc\\\s" 2 } } */