2015-08-04 Nathan Sidwell <nathan@codesourcery.com>
gcc/
* doc/tm.texi.in (TARGET_GOACC_VALIDATE_DIMS): Add hook.
* doc/tm.texi: Regenerated.
* target.def (TARGET_GOACC): New hook prefix.
(validate_dims): New.
* targhooks.h (default_goacc_validate_dims): Declare.
* internal-fn.def: Add comments.
(GOACC_DIM_POS): Make pure.
* config/nvptx/nvptx.c (nvptx_validate_dims): New.
(TARGET_GOACC_VALIDATE_DIMS): Override.
* omp-low.h (set_oacc_fn_attrib): Leave default dims as NULL.
(oacc_xform_dim): New.
(execute_oacc_transform): Process launch dimensions. Optimize
DIM_SIZE and DIM_POS. Delete FORK & JOIN on host.
(default_oacc_validate_dims): New.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Add warning.
* testsuite/libgomp.oacc-c-c++-common/routine-2.c: Add warning.
===================================================================
@@ -64,7 +64,22 @@ DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST
DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG, ECF_NOTHROW, ".r")
+
+/* FORK and JOIN mark the points at which partitioned execution is
+ entered or exited. We arrange for these two function to be
+ unduplicable and uncombinable in order to preserve the SESE CFG
+ property of partitioned loops. These are non-const functions to prevent
+ optimizations migrating memory accesses across a partition change
+ boundary. They take a single INTEGER_CST
+ argument and return nothing. */
DEF_INTERNAL_FN (GOACC_FORK, ECF_NOTHROW | ECF_LEAF, ".")
DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW | ECF_LEAF, ".")
+
+/* DIM_SIZE and DIM_POS return the size of a particular compute
+ dimension and the executing thread's position within that
+ dimension. DIM_POS is pure (and not const) so that it isn't
+ thought to clobber memory and can be gcse'd within a single
+ parallel region, but not across FORK/JOIN boundaries. They take a
+ single INTEGER_CST argument. */
DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
-DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_NOTHROW | ECF_LEAF, ".")
+DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
===================================================================
@@ -3524,6 +3524,57 @@ nvptx_expand_builtin (tree exp, rtx targ
return d->expander (exp, target, mode, ignore);
}
+/* Validate compute dimensions, fill in defaults. */
+
+static tree
+nvptx_validate_dims (tree decl, tree dims)
+{
+ tree adims[GOMP_DIM_MAX];
+ unsigned ix;
+ bool changed = false;
+ tree pos = dims;
+
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ {
+ adims[ix] = TREE_VALUE (pos);
+ pos = TREE_CHAIN (pos);
+ }
+ /* Define vector size for known hardware. */
+#define PTX_VECTOR_LENGTH 32
+ /* If the worker size is not 1, the vector size must be 32. If
+ the vector size is not 1, it must be 32. */
+ if ((adims[GOMP_DIM_WORKER]
+ && TREE_INT_CST_LOW (adims[GOMP_DIM_WORKER]) != 1)
+ || (adims[GOMP_DIM_VECTOR]
+ && TREE_INT_CST_LOW (adims[GOMP_DIM_VECTOR]) != 1))
+ {
+ if (!adims[GOMP_DIM_VECTOR]
+ || TREE_INT_CST_LOW (adims[GOMP_DIM_VECTOR]) != PTX_VECTOR_LENGTH)
+ {
+ tree use = build_int_cst (integer_type_node, PTX_VECTOR_LENGTH);
+ if (adims[GOMP_DIM_VECTOR])
+ warning_at (DECL_SOURCE_LOCATION (decl), 0,
+ TREE_INT_CST_LOW (adims[GOMP_DIM_VECTOR])
+ ? "using vector_length (%E), ignoring %E"
+ : "using vector_length (%E), ignoring runtime setting",
+ use, adims[GOMP_DIM_VECTOR]);
+ adims[GOMP_DIM_VECTOR] = use;
+ }
+ }
+
+ /* Set defaults. */
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ if (!adims[ix])
+ adims[ix] = integer_one_node;
+
+ /* Write results. */
+ pos = dims;
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++, pos = TREE_CHAIN (pos))
+ TREE_VALUE (pos) = adims[ix];
+
+ return dims;
+}
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -3618,6 +3669,9 @@ nvptx_expand_builtin (tree exp, rtx targ
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL nvptx_builtin_decl
+#undef TARGET_GOACC_VALIDATE_DIMS
+#define TARGET_GOACC_VALIDATE_DIMS nvptx_validate_dims
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-nvptx.h"
===================================================================
@@ -1639,6 +1639,21 @@ int, (struct cgraph_node *), NULL)
HOOK_VECTOR_END (simd_clone)
+/* Functions relating to openacc. */
+#undef HOOK_PREFIX
+#define HOOK_PREFIX "TARGET_GOACC_"
+HOOK_VECTOR (TARGET_GOACC, goacc)
+
+DEFHOOK
+(validate_dims,
+"This hook should check the launch dimensions provided/ It should fill\n\
+in default values and verify non-defaults. The TREE_LIST is unshared\n\
+and may be overwritten. Diagnostics should be issued as appropriate.",
+tree, (tree, tree),
+default_goacc_validate_dims)
+
+HOOK_VECTOR_END (goacc)
+
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_VECTORIZE_"
===================================================================
@@ -107,6 +107,8 @@ extern unsigned default_add_stmt_cost (v
extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
extern void default_destroy_cost_data (void *);
+extern tree default_goacc_validate_dims (tree, tree);
+
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
===================================================================
@@ -5740,6 +5740,12 @@ usable. In that case, the smaller the n
to use it.
@end deftypefn
+@deftypefn {Target Hook} tree TARGET_GOACC_VALIDATE_DIMS (tree, @var{tree})
+This hook should check the launch dimensions provided/ It should fill
+in default values and verify non-defaults. The TREE_LIST is unshared
+and may be overwritten. Diagnostics should be issued as appropriate.
+@end deftypefn
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
===================================================================
@@ -4245,6 +4245,8 @@ address; but often a machine-dependent
@hook TARGET_SIMD_CLONE_USABLE
+@hook TARGET_GOACC_VALIDATE_DIMS
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
===================================================================
@@ -9293,7 +9293,9 @@ oacc_launch_pack (unsigned code, tree de
The attribute value is a TREE_LIST. A set of dimensions is
represented as a list of INTEGER_CST. Those that are runtime
- expres are represented as an INTEGER_CST of zero.
+ expres are represented as an INTEGER_CST of zero. Defaults are set
+ as NULL_TREE and will be filled in later by the target hook
+ TARGET_OACC_VALIDATE_DIMS.
TOOO. Normally the attribute will just contain a single such list. If
however it contains a list of lists, this will represent the use of
@@ -9317,14 +9319,12 @@ set_oacc_fn_attrib (tree clauses, tree f
for (ix = GOMP_DIM_MAX; ix--;)
{
tree clause = find_omp_clause (clauses, ids[ix]);
- tree dim;
+ tree dim = NULL_TREE;
- if (!clause)
- dim = integer_one_node;
- else
+ if (clause)
dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
dims[ix] = dim;
- if (TREE_CODE (dim) != INTEGER_CST)
+ if (dim && TREE_CODE (dim) != INTEGER_CST)
{
dim = integer_zero_node;
non_const |= GOMP_DIM_MASK (ix);
@@ -14586,35 +14586,126 @@ oacc_xform_on_device (gimple_stmt_iterat
gsi_replace_with_seq (gsi, replace, false);
}
+/* Transform oacc_dim_size and oacc_dim_pos internal function calls to
+ constants, where possible. */
+
+static void
+oacc_xform_dim (gimple_stmt_iterator *gsi, gimple stmt,
+ tree dims, bool is_pos)
+{
+ tree arg = gimple_call_arg (stmt, 0);
+ unsigned axis = (unsigned)TREE_INT_CST_LOW (arg);
+ while (axis--)
+ dims = TREE_CHAIN (dims);
+ unsigned size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+ if (size == 0)
+ /* Dimension size is dynamic. */
+ return;
+ if (is_pos)
+ {
+ if (size != 1)
+ /* Size is more than 1. */
+ return;
+ size = 0;
+ }
+
+ /* Replace the internal call with a constant. */
+ tree lhs = gimple_call_lhs (stmt);
+ gimple g = gimple_build_assign
+ (lhs, build_int_cst (unsigned_type_node, size));
+ gsi_replace (gsi, g, false);
+}
+
/* Main entry point for oacc transformations which run on the device
- compilerafter LTO, so we know what the target device is at this
+ compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
static unsigned int
execute_oacc_transform ()
{
basic_block bb;
-
- if (!get_oacc_fn_attrib (current_function_decl))
+ tree attrs = get_oacc_fn_attrib (current_function_decl);
+
+ if (!attrs)
+ /* Not an offloaded function. */
return 0;
+ tree dims = TREE_VALUE (attrs);
+ if (dims)
+ dims = targetm.goacc.validate_dims (current_function_decl, dims);
+ /* Safe to overwrite, this attribute chain is unshared. */
+ TREE_VALUE (attrs) = dims;
+
FOR_ALL_BB_FN (bb, cfun)
{
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
- !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
- /* acc_on_device must be evaluated at compile time for
- constant arguments. */
- if (gimple_call_builtin_p (stmt, BUILT_IN_ACC_ON_DEVICE))
- oacc_xform_on_device (&gsi, stmt);
+ if (is_gimple_call (stmt))
+ {
+ /* acc_on_device must be evaluated at compile time for
+ constant arguments. */
+ if (gimple_call_builtin_p (stmt, BUILT_IN_ACC_ON_DEVICE))
+ oacc_xform_on_device (&gsi, stmt);
+
+ if (gimple_call_internal_p (stmt))
+ switch (gimple_call_internal_fn (stmt))
+ {
+ default: break;
+
+ case IFN_GOACC_DIM_SIZE:
+ if (dims)
+ oacc_xform_dim (&gsi, stmt, dims, false);
+ break;
+
+ case IFN_GOACC_DIM_POS:
+ if (dims)
+ oacc_xform_dim (&gsi, stmt, dims, true);
+ break;
+
+#ifndef ACCEL_COMPILER
+ case IFN_GOACC_FORK:
+ case IFN_GOACC_JOIN:
+ /* These are irrelevant on the host. */
+ replace_uses_by (gimple_vdef (stmt), gimple_vuse (stmt));
+ gsi_remove (&gsi, true);
+ /* Removal will have advanced the iterator. */
+ continue;
+#endif
+ }
+ }
+ gsi_next (&gsi);
}
}
return 0;
}
+/* Default launch dimension validator. */
+
+tree
+default_goacc_validate_dims (tree ARG_UNUSED (decl), tree dims)
+{
+ tree pos = dims;
+ for (unsigned ix = GOMP_DIM_MAX; ix--; pos = TREE_CHAIN (pos))
+ {
+ tree val = TREE_VALUE (pos);
+
+#ifdef ACCEL_COMPILER
+ if (!val)
+ val = integer_one_node;
+#else
+ /* Set to 1 on the host. */
+ val = integer_one_node;
+#endif
+ TREE_VALUE (pos) = val;
+ }
+
+ return dims;
+}
+
namespace {
const pass_data pass_data_oacc_transform =
===================================================================
@@ -1,6 +1,6 @@
/* FIXME: remove -fno-var-tracking from dg-aditional-options. */
-/* { dg-do run } */
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-fno-inline -fno-var-tracking" } */
#include <stdio.h>
@@ -23,7 +23,7 @@ main()
a = (int *)malloc (sizeof (int) * n);
-#pragma acc parallel copy (a[0:n]) vector_length (5)
+#pragma acc parallel copy (a[0:n]) vector_length (5) /* { dg-warning "using vector_length" } */
{
#pragma acc loop
for (i = 0; i < n; i++)
===================================================================
@@ -1,6 +1,6 @@
/* FIXME: remove -fno-var-tracking from dg-additional-options. */
-/* { dg-do run } */
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-fno-inline -fno-var-tracking" } */
#include <stdio.h>
@@ -24,7 +24,7 @@ main()
a = (int *)malloc (sizeof (int) * n);
-#pragma acc parallel copy (a[0:n]) vector_length (5)
+#pragma acc parallel copy (a[0:n]) vector_length (5) /* { dg-warning "using vector_length" } */
{
#pragma acc loop
for (i = 0; i < n; i++)