From patchwork Sun Dec 6 20:10:27 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Norris X-Patchwork-Id: 553183 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 907BF140297 for ; Mon, 7 Dec 2015 07:10:47 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=kazGyhX2; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=QzEzDKDXVpSUCNS5VpuFtgMan2Lpl3R/UaHrjPEga48poA bUctsJRjBwA/IZRSatEZz3LQqH5bKAvKyUi4D6CTl0SK+nNMBZJlV+BLJALrJg32 uV1z1dux1wWpEY2Jw28zwwDX68rkaOpYvd55eq9dHjy2DFs9jclXWMGhX1obU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; s= default; bh=JHC8pduzYWQ/gttbIeinV0Cd+RY=; b=kazGyhX2v7gxIfvDejP+ 9xTyQWeR7bBO7sudUx9APNIYHJTbLBsgvq5PkwtzKXNguqOSss6om0hiZpVOUhjY 0IOXwXNTxe4Mxb6X7uBkqNaOsUCUCbzXzkoGMYjebff8vb0P7aaotajHmy6s1x7j 58qydX8RbdwrSBVr5P26TF0= Received: (qmail 51721 invoked by alias); 6 Dec 2015 20:10:40 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 51700 invoked by uid 89); 6 Dec 2015 20:10:36 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sun, 06 Dec 2015 20:10:35 +0000 Received: from svr-orw-fem-04.mgc.mentorg.com ([147.34.97.41]) by relay1.mentorg.com with esmtp id 1a5fdf-00055t-OR from James_Norris@mentor.com ; Sun, 06 Dec 2015 12:10:31 -0800 Received: from [172.30.80.171] (147.34.91.1) by svr-orw-fem-04.mgc.mentorg.com (147.34.97.41) with Microsoft SMTP Server id 14.3.224.2; Sun, 6 Dec 2015 12:10:28 -0800 Message-ID: <56649633.8080803@codesourcery.com> Date: Sun, 6 Dec 2015 14:10:27 -0600 From: James Norris User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.7.0 MIME-Version: 1.0 To: GCC Patches , Jakub Jelinek Subject: [PATCH] Fix Fortran deviceptr clause. Hi, Attached is a patch that fixes some runtime issues dealing with the deviceptr clause in Fortran. There were some corner cases that were not being dealt with correctly, e.g., specifying the deviceptr clause and not using the variable specified by the deviceptr clause within the associated region. Also a new set of test cases has been added. Regtested on x86_64-pc-linux-gnu Ok for trunk? Thanks! Jim diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index cde0b5c..df26e7c 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,10 @@ +2015-12-XX James Norris + + * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start): + Handle Fortran deviceptr clause combination. + * testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test. + * testsuite/libgomp.oacc-fortran/declare-1.f90: Remove erroneous test. + 2015-12-02 Thomas Schwinge * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: Restrict to diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index db7cab3..bf8cbfe 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -88,6 +88,42 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), thr = goacc_thread (); acc_dev = thr->dev; + for (i = 0; i < mapnum; i++) + { + unsigned short kind1 = kinds[i] & 0xff; + + /* Handle Fortran deviceptr clause. */ + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) + { + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + /* If the size is right, skip it. */ + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ + hostaddrs[i] = NULL; + } + } + /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (host_fallback) @@ -172,8 +208,13 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start - + tgt->list[i].key->tgt_offset); + { + if (tgt->list[i].key != NULL) + devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset); + else + devaddrs[i] = NULL; + } acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, async, dims, tgt); @@ -210,6 +251,7 @@ GOACC_data_start (int device, size_t mapnum, { bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; struct target_mem_desc *tgt; + int i; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -224,6 +266,42 @@ GOACC_data_start (int device, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + for (i = 0; i < mapnum; i++) + { + unsigned short kind1 = kinds[i] & 0xff; + + /* Handle Fortran deviceptr clause. */ + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) + { + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + /* If the size is right, skip it. */ + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ + hostaddrs[i] = NULL; + } + } + /* Host fallback or 'do nothing'. */ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || host_fallback) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 index f717d1b..151f409 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 @@ -6,24 +6,6 @@ module vars !$acc declare create (z) end module vars -subroutine subr6 (a, d) - implicit none - integer, parameter :: N = 8 - integer :: i - integer :: a(N) - !$acc declare deviceptr (a) - integer :: d(N) - - i = 0 - - !$acc parallel copy (d) - do i = 1, N - d(i) = a(i) + a(i) - end do - !$acc end parallel - -end subroutine - subroutine subr5 (a, b, c, d) implicit none integer, parameter :: N = 8 @@ -201,15 +183,6 @@ subroutine subr0 (a, b, c, d) if (d(i) .ne. 13) call abort end do - call subr6 (a, d) - - call test (a, .true.) - call test (d, .false.) - - do i = 1, N - if (d(i) .ne. 16) call abort - end do - end subroutine program main @@ -241,7 +214,7 @@ program main if (a(i) .ne. 8) call abort if (b(i) .ne. 8) call abort if (c(i) .ne. 8) call abort - if (d(i) .ne. 16) call abort + if (d(i) .ne. 13) call abort end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 new file mode 100644 index 0000000..879cbf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 @@ -0,0 +1,197 @@ +! { dg-do run } + +!! Test the deviceptr clause with various directives +!! and in combination with other directives where +!! the deviceptr variable is implied. + +subroutine subr1 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +subroutine subr2 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 4 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr3 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc kernels copy (b) + do i = 1, N + a(i) = i * 8 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr4 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 16 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr5 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc kernels deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 32 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr6 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + b(i) = i + end do + !$acc end parallel + +end subroutine + +subroutine subr7 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copy (b) + do i = 1, N + a(i) = b(i) * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +program main + use iso_c_binding, only: c_ptr, c_f_pointer + implicit none + type (c_ptr) :: cp + integer, parameter :: N = 8 + integer, pointer :: fp(:) + integer :: i = 0 + integer :: b(N) + + interface + function acc_malloc (s) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + integer (c_size_t), value :: s + type (c_ptr) :: acc_malloc + end function + end interface + + cp = acc_malloc (N * sizeof (fp(N))) + call c_f_pointer (cp, fp, [N]) + + call subr1 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 2) call abort + end do + + call subr2 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + + call subr3 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 8) call abort + end do + + call subr4 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 16) call abort + end do + + call subr5 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 32) call abort + end do + + call subr6 (fp, b) + + do i = 1, N + if (b(i) .ne. i) call abort + end do + + call subr7 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + +end program main