Message ID | 13f6f0f3-82d0-7464-38e8-6f2792c09227@codesourcery.com |
---|---|
State | New |
Headers | show |
Series | [og7] Update deviceptr handling in Fortran | expand |
Hi Cesar! On Mon, 7 May 2018 08:49:26 -0700, Cesar Philippidis <cesar@codesourcery.com> wrote: > This patch teaches both the Fortran FE and the gimplifier how to only > utilize one data mapping for OpenACC deviceptr clauses. [...] Thanks! (I didn't verify your code changes.) > In addition to XPASS'ing devicetpr-1.f90, this patch [...] Apart from one remaining XFAIL for "-Os" (see PR80995), I now too see the following XPASSes on my main development machine: PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 execution test PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O1 (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O1 execution test [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 execution test [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -g (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -g execution test XFAIL: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -Os (test for excess errors) PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -Os execution test > I've applied this patch to og7 [...]. It was tempting to remove the > XFAIL from deviceptr-1.f90, but the test case still fails on at least > one legacy driver. That's surprising. These XFAILs were because "OpenACC kernels construct will be executed sequentially", so shouldn't have any relationship to Nvidia driver versions. If you identified such a problem (which versions and hardware exactly?), that's a separate problam and needs to be filed as a new issue, and the reference in the test case file updated. So please verify that, and/or alternatively remove the non-"-Os" XFAILs. Also please verify and resolve the following regression introduced by your patch: PASS: c-c++-common/goacc/deviceptr-4.c (test for excess errors) [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++11 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 PASS: c-c++-common/goacc/deviceptr-4.c -std=c++11 (test for excess errors) [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++14 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 PASS: c-c++-common/goacc/deviceptr-4.c -std=c++14 (test for excess errors) [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++98 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 PASS: c-c++-common/goacc/deviceptr-4.c -std=c++98 (test for excess errors) Grüße Thomas
On 05/09/2018 03:50 AM, Thomas Schwinge wrote: >> In addition to XPASS'ing devicetpr-1.f90, this patch [...] > > Apart from one remaining XFAIL for "-Os" (see PR80995), I now too see the > following XPASSes on my main development machine: > > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 execution test > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O1 (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O1 execution test > [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O2 execution test > [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test > [-XFAIL:-]{+XPASS:+} libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -g (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O3 -g execution test > XFAIL: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -Os (test for excess errors) > PASS: libgomp.oacc-fortran/deviceptr-1.f90 -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -Os execution test > >> I've applied this patch to og7 [...]. It was tempting to remove the >> XFAIL from deviceptr-1.f90, but the test case still fails on at least >> one legacy driver. > > That's surprising. These XFAILs were because "OpenACC kernels construct > will be executed sequentially", so shouldn't have any relationship to > Nvidia driver versions. If you identified such a problem (which versions > and hardware exactly?), that's a separate problam and needs to be filed > as a new issue, and the reference in the test case file updated. So > please verify that, and/or alternatively remove the non-"-Os" XFAILs. You're correct. On further inspection, only -Os fails. The attached patch removes the xfails for -O2 and -O3. > Also please verify and resolve the following regression introduced by > your patch: > > PASS: c-c++-common/goacc/deviceptr-4.c (test for excess errors) > [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 > > [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++11 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 > PASS: c-c++-common/goacc/deviceptr-4.c -std=c++11 (test for excess errors) > [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++14 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 > PASS: c-c++-common/goacc/deviceptr-4.c -std=c++14 (test for excess errors) > [-PASS:-]{+FAIL:+} c-c++-common/goacc/deviceptr-4.c -std=c++98 scan-tree-dump-times gimple "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 > PASS: c-c++-common/goacc/deviceptr-4.c -std=c++98 (test for excess errors) I forgot to update the expected data mapping in devicetpr-4.c. Now, instead of implicitly adding a 'copy' clause for know deviceptr variables, the gimplifier will assign a force_deviceptr clause. I've applied the attached patch to og7 to fix both of the issues you've identified. Cesar 2018-05-09 Cesar Philippidis <cesar@codesourcery.com> gcc/testsuite/ * c-c++-common/goacc/deviceptr-4.c: Update expected data mapping. libgomp/ * libgomp.oacc-fortran/deviceptr-1.f90: Remove xfail for -O2 and -O3. diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c index db1b91633a6..79a51620db9 100644 --- a/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c +++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c @@ -8,4 +8,4 @@ subr (int *a) a[0] += 1.0; } -/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(force_deviceptr:a" 1 "gimple" } } */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 index 610d071393c..7c8b063b220 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 @@ -7,7 +7,7 @@ ! regressed with the "Partially enable GOMP_MAP_FIRSTPRIVATE_POINTER in ! gfortran" changes. ! warning: OpenACC kernels construct will be executed sequentially; will by default avoid offloading to prevent data copy penalty -! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" "-O2" "-O3" } { "" } } +! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" } { "" } } subroutine subr1 (a, b) implicit none
2018-05-07 Cesar Philippidis <cesar@codesourcery.com> gcc/fortran/ * trans-openmp.c (gfc_omp_finish_clause): Don't create pointer data mappings for deviceptr clauses. (gfc_trans_omp_clauses_1): Likewise. gcc/ * gimplify.c (enum gimplify_omp_var_data): Add GOVD_DEVICETPR. (omp_notice_variable): Add GOVD_DEVICEPTR attribute when appropriate. (gimplify_scan_omp_clauses): Likewise. (gimplify_adjust_omp_clauses_1): Set GOMP_MAP_FORCE_DEVICEPTR for implicit deviceptr mappings. diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index ef828e8ac06..1a8fb3461ef 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -1069,6 +1069,8 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) #endif tree c2 = NULL_TREE, c3 = NULL_TREE, c4 = NULL_TREE; + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR) + return; if (POINTER_TYPE_P (TREE_TYPE (decl))) { if (!gfc_omp_privatize_by_reference (decl) @@ -2159,6 +2161,12 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, || n->expr->ref->u.ar.type == AR_FULL)) { if (POINTER_TYPE_P (TREE_TYPE (decl)) + && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR) + { + OMP_CLAUSE_DECL (node) = decl; + goto finalize_map_clause; + } + else if (POINTER_TYPE_P (TREE_TYPE (decl)) && (gfc_omp_privatize_by_reference (decl) || GFC_DECL_GET_SCALAR_POINTER (decl) || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) @@ -2168,9 +2176,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, { tree orig_decl = decl; enum gomp_map_kind gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; - if (n->u.map_op == OMP_MAP_FORCE_DEVICEPTR) - gmk = GOMP_MAP_POINTER; - else if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) && (n->sym->attr.oacc_declare_create) && clauses->update_allocatable) gmk = GOMP_MAP_ALWAYS_POINTER; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 44c03ab8310..458e9ade797 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -105,6 +105,9 @@ enum gimplify_omp_var_data /* Flag for GOVD_MAP, copy to/from private storage inside offloaded region. */ GOVD_MAP_PRIVATE = 1048576, + /* Flag for OpenACC deviceptrs. */ + GOVD_DEVICEPTR = (1<<21), + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -7209,6 +7212,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) error ("variable %qE declared in enclosing " "%<host_data%> region", DECL_NAME (decl)); nflags |= GOVD_MAP; + nflags |= (n2->value & GOVD_DEVICEPTR); if (octx->region_type == ORT_ACC_DATA && (n2->value & GOVD_MAP_0LEN_ARRAY)) nflags |= GOVD_MAP_0LEN_ARRAY; @@ -8250,6 +8254,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TO || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TOFROM) flags |= GOVD_MAP_ALWAYS_TO; + else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR) + flags |= GOVD_DEVICEPTR; goto do_add; case OMP_CLAUSE_DEPEND: @@ -8927,7 +8933,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) /* Not all combinations of these GOVD_MAP flags are actually valid. */ switch (flags & (GOVD_MAP_TO_ONLY | GOVD_MAP_FORCE - | GOVD_MAP_FORCE_PRESENT)) + | GOVD_MAP_FORCE_PRESENT + | GOVD_DEVICEPTR)) { case 0: kind = GOMP_MAP_TOFROM; @@ -8944,6 +8951,9 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) case GOVD_MAP_FORCE_PRESENT: kind = GOMP_MAP_FORCE_PRESENT; break; + case GOVD_DEVICEPTR: + kind = GOMP_MAP_FORCE_DEVICEPTR; + break; default: gcc_unreachable (); }