From patchwork Fri Sep 20 21:17:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1165445 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-509380-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="YaAUR6D5"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 46Zmld3Q0vz9s7T for ; Sat, 21 Sep 2019 07:18:03 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version :content-transfer-encoding:content-type; q=dns; s=default; b=imS HEZN/Q/qMfPKsjmsMYgQ6kRkYwmjYzTPSMRt7XFMwjiJ/bDdLCqvKiT9H/KlALQO oGJag/KNEq72rsd2OcnZ8nIMMhy227F12JpuPtG5EOqjs7S4DmrXEmnzEaV5+RP7 hHIKJMONXuSQZNb9n0aplgZp/vNYNA5LZwS1iqxw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version :content-transfer-encoding:content-type; s=default; bh=qtbDXNDWi W9G93joiJmlVEA8gbs=; b=YaAUR6D5mhUWY2/BxmcW0kurxmnJud7AxxsOOWBqr Dw36t/cUiGfT+HxJK4OVtvgbUul6tgKHVf18WOaKHv8G8x8Q6pH9oInSA2/StKsH lTB9sXkw01gDvx9HQ6mmBa+0Dj7j5BP3GIl/PrkQDBSQ1I2S5fMnL5amjhlkeRP4 Ys= Received: (qmail 6115 invoked by alias); 20 Sep 2019 21:17:56 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 6104 invoked by uid 89); 20 Sep 2019 21:17:55 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-22.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.1 spammy=appearing, broadcast X-HELO: esa3.mentor.iphmx.com Received: from esa3.mentor.iphmx.com (HELO esa3.mentor.iphmx.com) (68.232.137.180) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 20 Sep 2019 21:17:53 +0000 IronPort-SDR: 2vKt2RPA1xFTJSX2d5lCis277tFJ65DcVLa1e9+bOWeb4EWDnIKVUA1yL+hWwQJ832dybIhGnn Zu/8I6dEaYPsR3NlcpbMtYl6pWgxD3/uaQWj8PCOAb4VYYIv2E0ENrVxbBr+szROwZ+BVwWsql wIuQNC6kw3xi1iZcfx7Y6VYiaM5Rzl/tZRrngYU8W4J8A2UAp0zp2Spm46uFsyTnTgSNgta0Nb XxDzJedwMlNQpm6XdkqJ3PGlldVEFaDxDCjVk+WS7pWwMKZyGqfHf14WjWNRkeNS+ercAl0HvZ pUw= Received: from orw-gwy-01-in.mentorg.com ([192.94.38.165]) by esa3.mentor.iphmx.com with ESMTP; 20 Sep 2019 13:17:46 -0800 IronPort-SDR: k5mKQbsTbOIopnOwReQKKblezj3Xb4/Dlvmkpm5YsXdz80OR8NlPltoCVgCf9P6neURQCUvv8U QvMiIcD9iGdIjOg+lszsP+hd1ZdXDtVLZCGc/ceKlEJVn8NUFQRaXUlFxVqqajTmoq/CdNzIBz YSbNBbjfk8/wiWg9gPEcXHQGqBQIPFeq1ET3Y/KKE9NmcCDmMxn0Y+oKJcDI1MZB0jEuaqte4Y AheNM8/EMdITkWeUABVhWmJDM1gK3/80igBiA7hYldfVjZGw3NEt3o8MSQFw91ReIzXqQl3ofd JZY= From: Julian Brown To: CC: Andrew Stubbs , Thomas Schwinge Subject: [PATCH] [og9] Handle references in OpenACC "private" clauses Date: Fri, 20 Sep 2019 14:17:33 -0700 Message-ID: <20190920211734.28104-1-julian@codesourcery.com> MIME-Version: 1.0 X-IsSubscribed: yes This patch rewrites reference-type variables appearing in OpenACC "private" clauses in a similar way to how such variables are handled in reduction clauses. Otherwise, the mechanism used to privatize reference variables is currently ill-suited to the worker-partitioning mechanism used for AMD GCN, and each worker ends up accessing worker 0's copy of those reference variables via broadcast pointers. Rewriting reference variables to non-reference-type scalars sidesteps that problem. This is intended as a somewhat temporary solution: it works for the newly-included tests, but is not very elegant. Tested with offloading to AMD GCN. I will apply to the openacc-gcc-9-branch shortly. Cheers, Julian ChangeLog gcc/ * gimplify.c (localize_reductions): Rewrite references for OMP_CLAUSE_PRIVATE also. libgomp/ * testsuite/libgomp.oacc-fortran/privatized-ref-1.f95: New test. * testsuite/libgomp.oacc-c++/privatized-ref-2.C: New test. * testsuite/libgomp.oacc-c++/privatized-ref-3.C: New test. --- gcc/ChangeLog.openacc | 5 ++ gcc/gimplify.c | 15 ++++ libgomp/ChangeLog.openacc | 6 ++ .../libgomp.oacc-c++/privatized-ref-2.C | 64 +++++++++++++++++ .../libgomp.oacc-c++/privatized-ref-3.C | 64 +++++++++++++++++ .../libgomp.oacc-fortran/privatized-ref-1.f95 | 71 +++++++++++++++++++ 6 files changed, 225 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c++/privatized-ref-2.C create mode 100644 libgomp/testsuite/libgomp.oacc-c++/privatized-ref-3.C create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-1.f95 diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc index fe584959153..523b6eb1d74 100644 --- a/gcc/ChangeLog.openacc +++ b/gcc/ChangeLog.openacc @@ -1,3 +1,8 @@ +2019-09-20 Julian Brown + + * gimplify.c (localize_reductions): Rewrite references for + OMP_CLAUSE_PRIVATE also. + 2019-09-17 Tobias Burnus * config/gcn/gcn.c (gcn_expand_scalar_to_vector_address, diff --git a/gcc/gimplify.c b/gcc/gimplify.c index d16611d3617..d95ad5d4baa 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -10879,6 +10879,21 @@ localize_reductions (tree clauses, tree body) OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var; } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE) + { + var = OMP_CLAUSE_DECL (c); + + if (!lang_hooks.decls.omp_privatize_by_reference (var)) + continue; + + type = TREE_TYPE (TREE_TYPE (var)); + new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var))); + + pr.ref_var = var; + pr.local_var = new_var; + + walk_tree (&body, localize_reductions_r, &pr, NULL); + } } diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc index 7813760e642..d9d1c353e31 100644 --- a/libgomp/ChangeLog.openacc +++ b/libgomp/ChangeLog.openacc @@ -1,3 +1,9 @@ +2019-09-20 Julian Brown + + * testsuite/libgomp.oacc-fortran/privatized-ref-1.f95: New test. + * testsuite/libgomp.oacc-c++/privatized-ref-2.C: New test. + * testsuite/libgomp.oacc-c++/privatized-ref-3.C: New test. + 2019-09-19 Julian Brown * plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_host2dev): diff --git a/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-2.C b/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-2.C new file mode 100644 index 00000000000..3884f163132 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-2.C @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +#include + +void workers (void) +{ + double res[65536]; + int i; + +#pragma acc parallel copyout(res) num_gangs(64) num_workers(64) + { + int i, j; +#pragma acc loop gang + for (i = 0; i < 256; i++) + { +#pragma acc loop worker + for (j = 0; j < 256; j++) + { + int tmpvar; + int &tmpref = tmpvar; + tmpref = (i * 256 + j) * 99; + res[i * 256 + j] = tmpref; + } + } + } + + for (i = 0; i < 65536; i++) + if (res[i] != i * 99) + abort (); +} + +void vectors (void) +{ + double res[65536]; + int i; + +#pragma acc parallel copyout(res) num_gangs(64) num_workers(64) + { + int i, j; +#pragma acc loop gang worker + for (i = 0; i < 256; i++) + { +#pragma acc loop vector + for (j = 0; j < 256; j++) + { + int tmpvar; + int &tmpref = tmpvar; + tmpref = (i * 256 + j) * 101; + res[i * 256 + j] = tmpref; + } + } + } + + for (i = 0; i < 65536; i++) + if (res[i] != i * 101) + abort (); +} + +int main (int argc, char *argv[]) +{ + workers (); + vectors (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-3.C b/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-3.C new file mode 100644 index 00000000000..c1a10cba31b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/privatized-ref-3.C @@ -0,0 +1,64 @@ +/* { dg-do run } */ + +#include + +void workers (void) +{ + double res[65536]; + int i; + +#pragma acc parallel copyout(res) num_gangs(64) num_workers(64) + { + int i, j; + int tmpvar; + int &tmpref = tmpvar; +#pragma acc loop gang + for (i = 0; i < 256; i++) + { +#pragma acc loop worker private(tmpref) + for (j = 0; j < 256; j++) + { + tmpref = (i * 256 + j) * 99; + res[i * 256 + j] = tmpref; + } + } + } + + for (i = 0; i < 65536; i++) + if (res[i] != i * 99) + abort (); +} + +void vectors (void) +{ + double res[65536]; + int i; + +#pragma acc parallel copyout(res) num_gangs(64) num_workers(64) + { + int i, j; + int tmpvar; + int &tmpref = tmpvar; +#pragma acc loop gang worker + for (i = 0; i < 256; i++) + { +#pragma acc loop vector private(tmpref) + for (j = 0; j < 256; j++) + { + tmpref = (i * 256 + j) * 101; + res[i * 256 + j] = tmpref; + } + } + } + + for (i = 0; i < 65536; i++) + if (res[i] != i * 101) + abort (); +} + +int main (int argc, char *argv[]) +{ + workers (); + vectors (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-1.f95 b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-1.f95 new file mode 100644 index 00000000000..f16f69c1d1b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-1.f95 @@ -0,0 +1,71 @@ +! { dg-do run } + +program main + implicit none + integer :: myint + integer :: i + real :: res(65536), tmp + + res(:) = 0.0 + + myint = 5 + call workers(myint, res) + + do i=1,65536 + tmp = i * 99 + if (res(i) .ne. tmp) stop 1 + end do + + res(:) = 0.0 + + myint = 7 + call vectors(myint, res) + + do i=1,65536 + tmp = i * 101 + if (res(i) .ne. tmp) stop 2 + end do + +contains + + subroutine workers(t1, res) + implicit none + integer :: t1 + integer :: i, j + real, intent(out) :: res(:) + + !$acc parallel copyout(res) num_gangs(64) num_workers(64) + + !$acc loop gang + do i=0,255 + !$acc loop worker private(t1) + do j=1,256 + t1 = (i * 256 + j) * 99 + res(i * 256 + j) = t1 + end do + end do + + !$acc end parallel + end subroutine workers + + subroutine vectors(t1, res) + implicit none + integer :: t1 + integer :: i, j + real, intent(out) :: res(:) + + !$acc parallel copyout(res) num_gangs(64) num_workers(64) + + !$acc loop gang worker + do i=0,255 + !$acc loop vector private(t1) + do j=1,256 + t1 = (i * 256 + j) * 101 + res(i * 256 + j) = t1 + end do + end do + + !$acc end parallel + end subroutine vectors + +end program main