diff mbox series

[V4] rs6000: Refine unroll factor in loop_unroll_adjust hook

Message ID 1573024350-106791-1-git-send-email-guojiufu@linux.ibm.com
State New
Headers show
Series [V4] rs6000: Refine unroll factor in loop_unroll_adjust hook | expand

Commit Message

Jiufu Guo Nov. 6, 2019, 7:12 a.m. UTC
From: guojiufu <guojiufu@linux.ibm.com>

Hi,

Here is updated patch combining comments and suggestions from previous review.
In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
target related heuristic adjustment in this hook.  In this patch, -funroll-loops
is enabled for small loops at O2 and O3.  Based on [PATCH V3], this patch
enhanced a little by adding an option -munroll-small-loops to guard the small
loops unrolling to make it works fine with -flto.

Successfully bootstrapped and regtested on powerpc64le.

OK for trunk?

Jiufu Guo
BR.

gcc/
2019-11-06  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc/config/rs6000/rs6000.opt (-munroll-small-loops): New option.
	* gcc/common/config/rs6000/rs6000-common.c
	(rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
	Turn on -funroll-loops and -munroll-small-loops.
	[OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
	* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
	set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
	Turn off -munroll-small-loops, turn on -fweb and -frename-registers
	for explicit funroll-loops.
	(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
	(rs6000_loop_unroll_adjust): Define it.  Use -munroll-small-loops.

gcc.testsuite/
2019-11-06  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc.dg/pr59643.c: Update back to r277550.
diff mbox series

Patch

Index: gcc/common/config/rs6000/rs6000-common.c
===================================================================
--- gcc/common/config/rs6000/rs6000-common.c	(revision 277871)
+++ gcc/common/config/rs6000/rs6000-common.c	(working copy)
@@ -35,7 +35,12 @@  static const struct default_options rs6000_option_
     { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Enable -fsched-pressure for first pass instruction scheduling.  */
     { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
-    { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
+    /* Enable  -funroll-loops with -munroll-small-loops.  */
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
+    /* Disable -fweb and -frename-registers.  */
+    { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
+    { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 277871)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1428,6 +1428,9 @@  static const struct attribute_spec rs6000_attribut
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,24 +4543,18 @@  rs6000_option_override_internal (bool global_init_
 			     global_options.x_param_values,
 			     global_options_set.x_param_values);
 
-      /* unroll very small loops 2 time if no -funroll-loops.  */
-      if (!global_options_set.x_flag_unroll_loops
-	  && !global_options_set.x_flag_unroll_all_loops)
+      /* Explicit -funroll-loops turns -munroll-small-loops off, and turns
+	 fweb or frename-registers on.  */
+      if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+	  || (global_options_set.x_flag_unroll_all_loops
+	      && flag_unroll_all_loops))
 	{
-	  maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
-				 global_options.x_param_values,
-				 global_options_set.x_param_values);
-
-	  maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
-				 global_options.x_param_values,
-				 global_options_set.x_param_values);
-
-	  /* If fweb or frename-registers are not specificed in command-line,
-	     do not turn them on implicitly.  */
+	  if (!global_options_set.x_unroll_small_loops)
+	    unroll_small_loops = 0;
 	  if (!global_options_set.x_flag_web)
-	    global_options.x_flag_web = 0;
+	    flag_web = 1;
 	  if (!global_options_set.x_flag_rename_registers)
-	    global_options.x_flag_rename_registers = 0;
+	    flag_rename_registers = 1;
 	}
 
       /* If using typedef char *va_list, signal that
@@ -5101,6 +5098,25 @@  rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/*  Implement targetm.loop_unroll_adjust.  */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
+{
+   if (unroll_small_loops)
+    {
+      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
+	 example we may want to unroll very small loops more times (4 perhaps).
+	 We also should use a PARAM for this.  */
+      if (loop->ninsns <= 10)
+	return MIN (2, nunroll);
+      else
+	return 0;
+    }
+
+  return nunroll;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt	(revision 277871)
+++ gcc/config/rs6000/rs6000.opt	(working copy)
@@ -501,6 +501,10 @@  moptimize-swaps
 Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
 Analyze and remove doubleword swaps from VSX computations.
 
+munroll-small-loops
+Target Undocumented Var(unroll_small_loops) Init(0) Save
+Use conservative small loop unrolling.
+
 mpower9-misc
 Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
 Use certain scalar instructions added in ISA 3.0.
Index: gcc/testsuite/gcc.dg/pr59643.c
===================================================================
--- gcc/testsuite/gcc.dg/pr59643.c	(revision 277871)
+++ gcc/testsuite/gcc.dg/pr59643.c	(working copy)
@@ -1,9 +1,6 @@ 
 /* PR tree-optimization/59643 */
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-tree-pcom-details" } */
-/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
-/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
-   loop of this case.  */
 
 void
 foo (double *a, double *b, double *c, double d, double e, int n)