From patchwork Mon Feb 15 18:44:10 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Monakov X-Patchwork-Id: 583063 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 7E28E1401CA for ; Tue, 16 Feb 2016 05:45:22 +1100 (AEDT) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=C822+b3/; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references; q=dns; s= default; b=mS6VabnM6ZBimB7dINUR8jG54fznTPP4827XL4wCZaVcLAmAjXWR5 o01jbROipzfye3ps6lSyIgDkEwEVubbJqJ86IRq9Ay+1UnTFiZrkU13b/T2AeakW uxSe/AhR5kQvUlPxJr9iXyxN25iHm10ZuOpqDB8VxjQeMxr4bquqaQ= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references; s= default; bh=yVjUc5lm65c4bJIgd4d+vDH7riw=; b=C822+b3/Y0nYE8OgIK/4 u1FIaWTUuuP63fs0T1GILOErOeOSGFzF0rB4YqY1NMBAgpWad0rETd8ccYvd7B7M IaSd1amt0E5Lw//jTMi5diYlR6E6XUVdIIcDhZNMiumh8L4kgYWhjZILV9ZG8ibD EL0h84WHcotCEN35dmWTxOc= Received: (qmail 30465 invoked by alias); 15 Feb 2016 18:44:18 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 30228 invoked by uid 89); 15 Feb 2016 18:44:18 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.5 required=5.0 tests=AWL, BAYES_00, KAM_LAZY_DOMAIN_SECURITY, RCVD_IN_DNSWL_NONE, RP_MATCHES_RCVD autolearn=no version=3.3.2 spammy=UD:u, ptx, H*Ad:D*acm.org, highly X-HELO: smtp.ispras.ru Received: from smtp.ispras.ru (HELO smtp.ispras.ru) (83.149.199.79) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 15 Feb 2016 18:44:15 +0000 Received: from condor.intra.ispras.ru (unknown [83.149.199.91]) by smtp.ispras.ru (Postfix) with ESMTP id 16B1B226B2; Mon, 15 Feb 2016 21:44:13 +0300 (MSK) Received: by condor.intra.ispras.ru (Postfix, from userid 23246) id F19AE1225E03; Mon, 15 Feb 2016 21:44:12 +0300 (MSK) From: Alexander Monakov To: gcc-patches@gcc.gnu.org Cc: Nathan Sidwell Subject: [gomp-nvptx 3/5] nvptx backend: set up stacks in entry code Date: Mon, 15 Feb 2016 21:44:10 +0300 Message-Id: <1455561852-9237-4-git-send-email-amonakov@ispras.ru> In-Reply-To: <1455561852-9237-1-git-send-email-amonakov@ispras.ru> References: <1455561852-9237-1-git-send-email-amonakov@ispras.ru> X-IsSubscribed: yes This patch implements the NVPTX backend part of the transition to host-allocated soft stacks. The compiler-emitted kernel entry code now accepts a pointer to stack storage and per-warp stack size, and initialized __nvptx_stacks based on that (as well as trivially initializing __nvptx_uni). The rewritten part of write_omp_entry now uses macro-expanded assembly snippets to avoid highly repetitive dynamic code accounting for 32/64-bit differences. * config/nvptx/nvptx.c (write_omp_entry): Expand entry code to initialize __nvptx_uni and __nvptx_stacks (based on pointer to storage allocated by the libgomp plugin). diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index efd0f8e..81dd9a2 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -979,8 +979,10 @@ nvptx_init_unisimt_predicate (FILE *file) /* Emit kernel NAME for function ORIG outlined for an OpenMP 'target' region: extern void gomp_nvptx_main (void (*fn)(void*), void *fnarg); - void __attribute__((kernel)) NAME(void *arg) + void __attribute__((kernel)) NAME (void *arg, char *stack, size_t stacksize) { + __nvptx_stacks[tid.y] = stack + stacksize * (ctaid.x * ntid.y + tid.y + 1); + __nvptx_uni[tid.y] = 0; gomp_nvptx_main (ORIG, arg); } ORIG itself should not be emitted as a PTX .entry function. */ @@ -1000,21 +1002,44 @@ write_omp_entry (std::stringstream &s, const char *name, const char *orig) s << ".extern .func gomp_nvptx_main"; s << "(.param" << sfx << " %in_ar1, .param" << sfx << " %in_ar2);\n"; } - s << ".visible .entry " << name << "(.param" << sfx << " %in_ar1)\n"; - s << "{\n"; - s << "\t.reg" << sfx << " %ar1;\n"; - s << "\t.reg" << sfx << " %r1;\n"; - s << "\tld.param" << sfx << " %ar1, [%in_ar1];\n"; - s << "\tmov" << sfx << " %r1, " << orig << ";\n"; - s << "\t{\n"; - s << "\t\t.param" << sfx << " %out_arg0;\n"; - s << "\t\t.param" << sfx << " %out_arg1;\n"; - s << "\t\tst.param" << sfx << " [%out_arg0], %r1;\n"; - s << "\t\tst.param" << sfx << " [%out_arg1], %ar1;\n"; - s << "\t\tcall.uni gomp_nvptx_main, (%out_arg0, %out_arg1);\n"; - s << "\t}\n"; - s << "\tret;\n"; - s << "}\n"; +#define ENTRY_TEMPLATE(PS, PS_BYTES, MAD_PS_32) "\ + (.param.u" PS " %arg, .param.u" PS " %stack, .param.u" PS " %sz)\n\ +{\n\ + .reg.u32 %r<3>;\n\ + .reg.u" PS " %R<4>;\n\ + mov.u32 %r0, %tid.y;\n\ + mov.u32 %r1, %ntid.y;\n\ + mov.u32 %r2, %ctaid.x;\n\ + cvt.u" PS ".u32 %R1, %r0;\n\ + " MAD_PS_32 " %R1, %r1, %r2, %R1;\n\ + mov.u" PS " %R0, __nvptx_stacks;\n\ + " MAD_PS_32 " %R0, %r0, " PS_BYTES ", %R0;\n\ + ld.param.u" PS " %R2, [%stack];\n\ + ld.param.u" PS " %R3, [%sz];\n\ + add.u" PS " %R2, %R2, %R3;\n\ + mad.lo.u" PS " %R2, %R1, %R3, %R2;\n\ + st.shared.u" PS " [%R0], %R2;\n\ + mov.u" PS " %R0, __nvptx_uni;\n\ + " MAD_PS_32 " %R0, %r0, 4, %R0;\n\ + mov.u32 %r0, 0;\n\ + st.shared.u32 [%R0], %r0;\n\ + mov.u" PS " %R0, \0;\n\ + ld.param.u" PS " %R1, [%arg];\n\ + {\n\ + .param.u" PS " %P<2>;\n\ + st.param.u" PS " [%P0], %R0;\n\ + st.param.u" PS " [%P1], %R1;\n\ + call.uni gomp_nvptx_main, (%P0, %P1);\n\ + }\n\ + ret.uni;\n\ +}\n" + static const char template64[] = ENTRY_TEMPLATE ("64", "8", "mad.wide.u32"); + static const char template32[] = ENTRY_TEMPLATE ("32", "4", "mad.lo.u32 "); +#undef ENTRY_TEMPLATE + const char *template_1 = TARGET_ABI64 ? template64 : template32; + const char *template_2 = template_1 + strlen (template64) + 1; + s << ".visible .entry " << name << template_1 << orig << template_2; + need_softstack_decl = need_unisimt_decl = true; } /* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx