From patchwork Tue Aug 4 10:00:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tom de Vries X-Patchwork-Id: 1340765 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org (client-ip=8.43.85.97; helo=sourceware.org; envelope-from=gcc-patches-bounces@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=suse.de Received: from sourceware.org (server2.sourceware.org [8.43.85.97]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4BLVcV2zdZz9sSt for ; Tue, 4 Aug 2020 20:00:26 +1000 (AEST) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 648743857C5A; Tue, 4 Aug 2020 10:00:22 +0000 (GMT) X-Original-To: gcc-patches@gcc.gnu.org Delivered-To: gcc-patches@gcc.gnu.org Received: from mx2.suse.de (mx2.suse.de [195.135.220.15]) by sourceware.org (Postfix) with ESMTPS id 5E3D83857C57 for ; Tue, 4 Aug 2020 10:00:19 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 5E3D83857C57 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=suse.de Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=tdevries@suse.de X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.221.27]) by mx2.suse.de (Postfix) with ESMTP id 42020AB55 for ; Tue, 4 Aug 2020 10:00:34 +0000 (UTC) Date: Tue, 4 Aug 2020 12:00:16 +0200 From: Tom de Vries To: gcc-patches@gcc.gnu.org Subject: [committed][nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle Message-ID: <20200804100015.GA24756@delia> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) X-Spam-Status: No, score=-11.0 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_STATUS, RCVD_IN_MSPIKE_H3, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: gcc-patches-bounces@gcc.gnu.org Sender: "Gcc-patches" Hi, With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle. Fix this by adding handling of V2DI as well as V2SI mode in nvptx_gen_shuffle. Build and reg-tested on x86_64 with nvptx accelerator. Committed to trunk. Thanks, - Tom [nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle gcc/ChangeLog: PR target/96428 * config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI. libgomp/ChangeLog: PR target/96428 * testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test. * testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test. --- gcc/config/nvptx/nvptx.c | 38 ++++++++++++++++++++++ .../libgomp.oacc-fortran/pr96628-part1.f90 | 20 ++++++++++++ .../libgomp.oacc-fortran/pr96628-part2.f90 | 37 +++++++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index d8a8fb2d55b..cf53a921e5b 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind) end_sequence (); } break; + case E_V2SImode: + { + rtx src0 = gen_rtx_SUBREG (SImode, src, 0); + rtx src1 = gen_rtx_SUBREG (SImode, src, 4); + rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + start_sequence (); + emit_insn (gen_movsi (tmp0, src0)); + emit_insn (gen_movsi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movsi (dst0, tmp0)); + emit_insn (gen_movsi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; + case E_V2DImode: + { + rtx src0 = gen_rtx_SUBREG (DImode, src, 0); + rtx src1 = gen_rtx_SUBREG (DImode, src, 8); + rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8); + rtx tmp0 = gen_reg_rtx (DImode); + rtx tmp1 = gen_reg_rtx (DImode); + start_sequence (); + emit_insn (gen_movdi (tmp0, src0)); + emit_insn (gen_movdi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movdi (dst0, tmp0)); + emit_insn (gen_movdi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; case E_BImode: { rtx tmp = gen_reg_rtx (SImode); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 new file mode 100644 index 00000000000..71219f9c467 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 @@ -0,0 +1,20 @@ +! { dg-do run } +! { dg-additional-sources pr96628-part2.f90 } +! { dg-additional-options "-ftree-slp-vectorize" } +! +! This file is compiled first +module m2 + real*8 :: mysum + !$acc declare device_resident(mysum) +contains + SUBROUTINE one(t) + !$acc routine + REAL*8, INTENT(IN) :: t(:) + mysum = sum(t) + END SUBROUTINE one + SUBROUTINE two(t) + !$acc routine seq + REAL*8, INTENT(INOUT) :: t(:) + t = (100.0_8*t)/sum + END SUBROUTINE two +end module m2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 new file mode 100644 index 00000000000..784dc27e19e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 @@ -0,0 +1,37 @@ +! { dg-do compile { target skip-all-targets } } +! +! Main file is pr96628-part1.f90 + +MODULE m + IMPLICIT NONE + REAL*8, ALLOCATABLE :: t(:) +CONTAINS + SUBROUTINE run() + use m2 + IMPLICIT NONE + + INTEGER :: i,j ! loop indices + !$acc data present(t) + !$acc parallel + !$acc loop gang + DO j = 1,2 + !$acc loop vector + DO i = 1,2 + CALL one(t(:)) + CALL two(t(:)) + END DO + END DO + !$acc end parallel + !$acc end data + END SUBROUTINE run +END MODULE m + +use m +implicit none +integer :: i +t = [(3.0_8*i, i = 1, 100)] +!$acc data copy(t) +call run +!$acc end data +if (any (abs(t - [((300.0_8*i)/15150.0_8, i = 1, 100)]) < 10.0_8*epsilon(t))) stop 1 +end