[openacc] Add target hook TARGET_GOACC_ADJUST_PARALLELISM
gcc/
* doc/tm.texi.in: Add placeholder for TARGET_GOACC_ADJUST_PARALLELISM.
* doc/tm.texi: Regenerate.
* omp-offload.c (oacc_loop_fixed_partitions): Use the adjust_parallelism
hook to modify this_mask.
(oacc_loop_auto_partitions): Use the adjust_parallelism hook to modify
this_mask and loop->mask.
(default_goacc_adjust_parallelism): New function.
* target.def (adjust_parallelism): New hook.
* targhooks.h (default_goacc_adjust_parallelism): Declare.
@@ -6029,6 +6029,12 @@ This hook should return the maximum size of a particular dimension,
or zero if unbounded.
@end deftypefn
+@deftypefn {Target Hook} unsigned TARGET_GOACC_ADJUST_PARALLELISM (unsigned @var{this_mask}, unsigned @var{outer_mask})
+This hook allows the accelerator compiler to remove any unused
+parallelism exposed in the current loop @var{THIS_MASK}, and the
+enclosing loop @var{OUTER_MASK}. It returns an adjusted mask.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gcall *@var{call}, const int *@var{dims}, bool @var{is_fork})
This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN
function calls to target-specific gimple, or indicate whether they
@@ -4145,6 +4145,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_GOACC_DIM_LIMIT
+@hook TARGET_GOACC_ADJUST_PARALLELISM
+
@hook TARGET_GOACC_FORK_JOIN
@hook TARGET_GOACC_REDUCTION
@@ -1218,6 +1218,13 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
}
+ /* Ideally, we should be coalescing parallelism here if the
+ hardware supports it. E.g. Instead of partitioning a loop
+ across worker and vector axes, sometimes the hardware can
+ execute those loops together without resorting to placing
+ extra thread barriers. */
+ this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask);
+
mask_all |= this_mask;
if (loop->flags & OLF_TILE)
@@ -1302,6 +1309,7 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
this_mask ^= loop->e_mask;
}
+ this_mask = targetm.goacc.adjust_parallelism (this_mask, outer_mask);
loop->mask |= this_mask;
}
@@ -1350,6 +1358,8 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
}
loop->mask |= this_mask;
+ loop->mask = targetm.goacc.adjust_parallelism (loop->mask, outer_mask);
+
if (!loop->mask && noisy)
warning_at (loop->loc, 0,
tiling
@@ -1684,6 +1694,15 @@ default_goacc_dim_limit (int ARG_UNUSED (axis))
#endif
}
+/* Default adjustment of loop parallelism is not required. */
+
+unsigned
+default_goacc_adjust_parallelism (unsigned this_mask,
+ unsigned ARG_UNUSED (outer_mask))
+{
+ return this_mask;
+}
+
namespace {
const pass_data pass_data_oacc_device_lower =
@@ -1678,6 +1678,14 @@ or zero if unbounded.",
int, (int axis),
default_goacc_dim_limit)
+DEFHOOK
+(adjust_parallelism,
+"This hook allows the accelerator compiler to remove any unused\n\
+parallelism exposed in the current loop @var{THIS_MASK}, and the\n\
+enclosing loop @var{OUTER_MASK}. It returns an adjusted mask.",
+unsigned, (unsigned this_mask, unsigned outer_mask),
+default_goacc_adjust_parallelism)
+
DEFHOOK
(fork_join,
"This hook can be used to convert IFN_GOACC_FORK and IFN_GOACC_JOIN\n\
@@ -125,6 +125,7 @@ extern bool default_goacc_validate_dims (tree, int [], int);
extern int default_goacc_dim_limit (int);
extern bool default_goacc_fork_join (gcall *, const int [], bool);
extern void default_goacc_reduction (gcall *);
+extern unsigned default_goacc_adjust_parallelism (unsigned, unsigned);
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
--
2.17.1