From patchwork Mon Nov 2 19:23:19 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nathan Sidwell X-Patchwork-Id: 539084 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 1744C140D6A for ; Tue, 3 Nov 2015 06:23:32 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=rZW0LJws; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :subject:to:references:from:message-id:date:mime-version :in-reply-to:content-type; q=dns; s=default; b=Wvhc0knIXi1L3pYKl 8s744ouBz+CKwCKEl3d8JiXImqp9oJlR3dUiAcvBBnk//9sR6uG+OQ7lS/35tbtb 4QvYB92tC01fof9+p77xMfULa6B+Ud85yOcmr3Bw5rY1a8HBc+OouFO1NUZlEttp KKK+TgqQDB9qL+KMaBrdZtmQpw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :subject:to:references:from:message-id:date:mime-version :in-reply-to:content-type; s=default; bh=MrivxBHSxeuqv/ORlT+g0YB lhgo=; b=rZW0LJwscs4SbgWOre2fEWqKmFXRu4aIST6nJftGfFrP1TtGTwB9UJt +SOtX+gtj696z79WsxlXS7/IdQwIN2qlflUvtXjaTSR/DxsHcHTa1eFWuJDCqJx6 9GCR14kTWlCvEglf8pBjleLvc1UN+33BN35ofOcNuVfQMgAUxh30= Received: (qmail 121678 invoked by alias); 2 Nov 2015 19:23:25 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 121042 invoked by uid 89); 2 Nov 2015 19:23:25 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=BAYES_00, FREEMAIL_FROM, KAM_ASCII_DIVIDERS, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=no version=3.3.2 X-HELO: mail-qg0-f53.google.com Received: from mail-qg0-f53.google.com (HELO mail-qg0-f53.google.com) (209.85.192.53) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Mon, 02 Nov 2015 19:23:23 +0000 Received: by qgbb65 with SMTP id b65so124189082qgb.2 for ; Mon, 02 Nov 2015 11:23:20 -0800 (PST) X-Received: by 10.140.237.209 with SMTP id i200mr32766570qhc.92.1446492200693; Mon, 02 Nov 2015 11:23:20 -0800 (PST) Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48? ([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by smtp.googlemail.com with ESMTPSA id m23sm8434853qkh.46.2015.11.02.11.23.19 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 02 Nov 2015 11:23:20 -0800 (PST) Subject: Re: [2/2] OpenACC routine support To: Jakub Jelinek , GCC Patches References: <5637B1CF.5060408@acm.org> From: Nathan Sidwell Message-ID: <5637B827.1020909@acm.org> Date: Mon, 2 Nov 2015 14:23:19 -0500 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0 MIME-Version: 1.0 In-Reply-To: <5637B1CF.5060408@acm.org> Here are the tests for the routine support. The compiler tests check invalid combinations of gang, worker, vector & seq. The libgomp execution tests check the expected partioning occurs within loops. As with the reduction tests, these ones are taken from the execution model loop tests. ok? nathan 2015-11-02 Nathan Sidwell gcc/testsuite/ * c-c++-common/goacc/routine-1.c: New. * c-c++-common/goacc/routine-2.c: New. * c-c++-common/goacc/routine-3.c: New. * c-c++-common/goacc/routine-4.c: New. libgomp/ * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: New. Index: gcc/testsuite/c-c++-common/goacc/routine-1.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-1.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-1.c (working copy) @@ -0,0 +1,34 @@ + +#pragma acc routine gang +void gang (void) +{ +} + +#pragma acc routine worker +void worker (void) +{ +} + +#pragma acc routine vector +void vector (void) +{ +} + +#pragma acc routine seq +void seq (void) +{ +} + +int main () +{ + +#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) + { + gang (); + worker (); + vector (); + seq (); + } + + return 0; +} Index: gcc/testsuite/c-c++-common/goacc/routine-2.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-2.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-2.c (working copy) @@ -0,0 +1,21 @@ +#pragma acc routine gang worker /* { dg-error "multiple loop axes" } */ +void gang (void) +{ +} + +#pragma acc routine worker vector /* { dg-error "multiple loop axes" } */ +void worker (void) +{ +} + +#pragma acc routine vector seq /* { dg-error "multiple loop axes" } */ +void vector (void) +{ +} + +#pragma acc routine seq gang /* { dg-error "multiple loop axes" } */ +void seq (void) +{ +} + +#pragma acc routine (nothing) gang /* { dg-error "not been declared" } */ Index: gcc/testsuite/c-c++-common/goacc/routine-3.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-3.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-3.c (working copy) @@ -0,0 +1,53 @@ +#pragma acc routine gang +void gang (void) /* { dg-message "declared here" 3 } */ +{ +} + +#pragma acc routine worker +void worker (void) /* { dg-message "declared here" 2 } */ +{ +} + +#pragma acc routine vector +void vector (void) /* { dg-message "declared here" 1 } */ +{ +} + +#pragma acc routine seq +void seq (void) +{ +} + +int main () +{ + +#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) + { + #pragma acc loop gang /* { dg-message "loop here" 1 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); + vector (); + seq (); + } + #pragma acc loop worker /* { dg-message "loop here" 2 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); /* { dg-error "routine call uses same" } */ + vector (); + seq (); + } + #pragma acc loop vector /* { dg-message "loop here" 3 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); /* { dg-error "routine call uses same" } */ + vector (); /* { dg-error "routine call uses same" } */ + seq (); + } + } + + return 0; +} Index: gcc/testsuite/c-c++-common/goacc/routine-4.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-4.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-4.c (working copy) @@ -0,0 +1,41 @@ + +void gang (void); +void worker (void); +void vector (void); + +#pragma acc routine (gang) gang +#pragma acc routine (worker) worker +#pragma acc routine (vector) vector + +#pragma acc routine seq +void seq (void) +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); /* { dg-error "routine call uses" } */ + vector (); /* { dg-error "routine call uses" } */ + seq (); +} + +void vector (void) /* { dg-message "declared here" 1 } */ +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); /* { dg-error "routine call uses" } */ + vector (); + seq (); +} + +void worker (void) /* { dg-message "declared here" 2 } */ +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); + vector (); + seq (); +} + +void gang (void) /* { dg-message "declared here" 3 } */ +{ + gang (); + worker (); + vector (); + seq (); +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine gang +void __attribute__ ((noinline)) gang (int ary[N]) +{ +#pragma acc loop gang + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + gang (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = ix / ((N + 31) / 32); + int w = 0; + int v = 0; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (working copy) @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine gang +void __attribute__ ((noinline)) gang (int ary[N]) +{ +#pragma acc loop gang worker vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + gang (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + + int g = ix / (chunk_size * 32 * 32); + int w = ix / 32 % 32; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine vector +void __attribute__ ((noinline)) vector (int ary[N]) +{ +#pragma acc loop vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + vector (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = 0; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine worker +void __attribute__ ((noinline)) worker (int ary[N]) +{ +#pragma acc loop worker + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + worker (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = ix % 32; + int v = 0; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine worker +void __attribute__ ((noinline)) worker (int ary[N]) +{ +#pragma acc loop worker vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + worker (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = (ix / 32) % 32; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +}