diff mbox

Add fopt-info-oacc

Message ID 569D2059.4010105@mentor.com
State New
Headers show

Commit Message

Tom de Vries Jan. 18, 2016, 5:26 p.m. UTC
Hi,

This patch introduces an option fopt-info-oacc.

When using the option like this with a kernels region in kernels-loop.c 
that parloops does not manage to parallelize:
...
$ gcc kernels-loop.c -S -O2 -fopenacc -fopt-info-oacc-all
...

we get a message:
...
kernels-loop.c:23:9: note: kernels region executed sequentially. 
Consider mapping it to host execution, to avoid data copy penalty.
...

Any comments?

Thanks,
- Tom

Comments

Sandra Loosemore Jan. 18, 2016, 6:28 p.m. UTC | #1
On 01/18/2016 10:26 AM, Tom de Vries wrote:
> Hi,
>
> This patch introduces an option fopt-info-oacc.
>
> When using the option like this with a kernels region in kernels-loop.c
> that parloops does not manage to parallelize:
> ...
> $ gcc kernels-loop.c -S -O2 -fopenacc -fopt-info-oacc-all
> ...
>
> we get a message:
> ...
> kernels-loop.c:23:9: note: kernels region executed sequentially.
> Consider mapping it to host execution, to avoid data copy penalty.
> ...
>
> Any comments?

Needs documentation?

-Sandra
Richard Sandiford Jan. 18, 2016, 8:30 p.m. UTC | #2
Sandra Loosemore <sandra@codesourcery.com> writes:
> On 01/18/2016 10:26 AM, Tom de Vries wrote:
>> Hi,
>>
>> This patch introduces an option fopt-info-oacc.
>>
>> When using the option like this with a kernels region in kernels-loop.c
>> that parloops does not manage to parallelize:
>> ...
>> $ gcc kernels-loop.c -S -O2 -fopenacc -fopt-info-oacc-all
>> ...
>>
>> we get a message:
>> ...
>> kernels-loop.c:23:9: note: kernels region executed sequentially.
>> Consider mapping it to host execution, to avoid data copy penalty.
>> ...
>>
>> Any comments?
>
> Needs documentation?

Also, sorry for the drive-by comment, but: -fopt-info-openacc-all seems
more consistent with -fopenacc and is only three characters longer.

Thanks,
Richard
diff mbox

Patch

Add fopt-info-oacc

---
 gcc/dumpfile.c |  1 +
 gcc/dumpfile.h |  5 +++--
 gcc/omp-low.c  | 30 +++++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 144e371..e8aa0e1 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -137,6 +137,7 @@  static const struct dump_option_value_info optgroup_options[] =
   {"loop", OPTGROUP_LOOP},
   {"inline", OPTGROUP_INLINE},
   {"vec", OPTGROUP_VEC},
+  {"oacc", OPTGROUP_OACC},
   {"optall", OPTGROUP_ALL},
   {NULL, 0}
 };
diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h
index c168cbf..6e1c657 100644
--- a/gcc/dumpfile.h
+++ b/gcc/dumpfile.h
@@ -97,9 +97,10 @@  enum tree_dump_index
 #define OPTGROUP_LOOP        (1 << 2)   /* Loop optimization passes */
 #define OPTGROUP_INLINE      (1 << 3)   /* Inlining passes */
 #define OPTGROUP_VEC         (1 << 4)   /* Vectorization passes */
-#define OPTGROUP_OTHER       (1 << 5)   /* All other passes */
+#define OPTGROUP_OACC        (1 << 5)   /* Openacc passes */
+#define OPTGROUP_OTHER       (1 << 6)   /* All other passes */
 #define OPTGROUP_ALL	     (OPTGROUP_IPA | OPTGROUP_LOOP | OPTGROUP_INLINE \
-                              | OPTGROUP_VEC | OPTGROUP_OTHER)
+                              | OPTGROUP_VEC | OPTGROUP_OACC | OPTGROUP_OTHER)
 
 /* Define a tree dump switch.  */
 struct dump_file_info
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a6e3fe3..d5c3484 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -20139,6 +20139,34 @@  execute_oacc_device_lower ()
 	     : fn_level < 0 ? "Function is parallel offload\n"
 	     : "Function is routine level %d\n", fn_level);
 
+#if defined ACCEL_COMPILER
+  bool is_kernels = oacc_fn_attrib_kernels_p (attrs);
+  if (is_kernels)
+    {
+      bool all_one = true;
+      tree pos = TREE_VALUE (attrs);
+      for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
+	{
+	  tree tree_val = TREE_VALUE (pos);
+	  unsigned HOST_WIDE_INT val = (tree_val
+					? TREE_INT_CST_LOW (tree_val)
+					: 1);
+	  if (val != 1)
+	    {
+	      all_one = false;
+	      break;
+	    }
+	  pos = TREE_CHAIN (pos);
+	}
+
+      if (all_one)
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, cfun->function_start_locus,
+			 "Kernels region executed sequentially.  Consider"
+			 " mapping it to host execution, to avoid data copy"
+			 " penalty.\n");
+    }
+#endif
+
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
   int dims[GOMP_DIM_MAX];
@@ -20312,7 +20340,7 @@  const pass_data pass_data_oacc_device_lower =
 {
   GIMPLE_PASS, /* type */
   "oaccdevlow", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
+  OPTGROUP_OACC, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_cfg, /* properties_required */
   0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */