From patchwork Wed Sep 5 11:48:49 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966326 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485162-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="xYtpi6yP"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42527V11jQz9s5c for ; Wed, 5 Sep 2018 21:49:57 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=bp8la0MU/9gGDvlvfaOLmyBEjXbwg qknJ10njAph4dRjvTVjlo3XRlt/NsyEYyiz6vRiFsfAk49uaFtGe6FCAs0pfKNYC 73kTVLKSYsfN7KpSpXzSYmSC6fwgo161UCIXV8Y4QvLTs/sXknIDdBTIeOFpt3pH mC5IRL+0UmPjU4= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=661HT4iTtuCyaUY+DjpXu8cSAhA=; b=xYt pi6yP8jfruKy7AlkMBc56NnLvYho7NdaMR97+hpZ7RzI1U7bZSrtDh/VRZFbJb/Z OKYcQSRzytzGU9VU4zdv8ANNHt9FJeGZyYnEtUyKgEvWWn5hUWgSAdCVzuwnrU3Y Hg24RB1cUNS/uO/faeaSHxxC19xdgtNiFAQawBaQ= Received: (qmail 80684 invoked by alias); 5 Sep 2018 11:49:38 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 80412 invoked by uid 89); 5 Sep 2018 11:49:26 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.4 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=UNITS_PER_WORD, units_per_word X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:49:23 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJ1-0005TL-ET from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:07 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:03 +0100 From: To: Subject: [PATCH 01/25] Handle vectors that don't fit in an integer. Date: Wed, 5 Sep 2018 12:48:49 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 GCN vector sizes range between 64 and 512 bytes, none of which have correspondingly sized integer modes. This breaks a number of assumptions throughout the compiler, but I don't really want to create modes just for this purpose. Instead, this patch fixes up the cases that I've found, so far, such that the compiler tries something else, or fails to optimize, rather than just ICE. 2018-09-05 Andrew Stubbs Kwok Cheung Yeung Jan Hubicka Martin Jambor gcc/ * combine.c (gen_lowpart_or_truncate): Return clobber if there is not a integer mode if the same size as x. (gen_lowpart_for_combine): Fail if there is no integer mode of the same size. * expr.c (expand_expr_real_1): Force first operand to be in memory if it is a vector register and the result is in BLKmode. * tree-vect-stmts.c (vectorizable_store): Don't ICE when int_mode_for_size fails. (vectorizable_load): Likewise. --- gcc/combine.c | 13 ++++++++++++- gcc/expr.c | 8 ++++++++ gcc/tree-vect-stmts.c | 8 ++++---- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/gcc/combine.c b/gcc/combine.c index a2649b6..cbf9dae 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x) { /* Bit-cast X into an integer mode. */ if (!SCALAR_INT_MODE_P (GET_MODE (x))) - x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x); + { + enum machine_mode imode = + int_mode_for_mode (GET_MODE (x)).require (); + if (imode == BLKmode) + return gen_rtx_CLOBBER (mode, const0_rtx); + x = gen_lowpart (imode, x); + } x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode).require (), x, GET_MODE (x)); } @@ -11698,6 +11704,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x) if (omode == imode) return x; + /* This can happen when there is no integer mode corresponding + to a size of vector mode. */ + if (omode == BLKmode) + goto fail; + /* We can only support MODE being wider than a word if X is a constant integer or has a mode the same size. */ if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD) diff --git a/gcc/expr.c b/gcc/expr.c index cd5cf12..776254a 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -10569,6 +10569,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode, || maybe_gt (bitpos + bitsize, GET_MODE_BITSIZE (mode2))); + /* If the result is in BLKmode and the underlying object is a + vector in a register, and the size of the vector is larger than + the largest integer mode, then we must force OP0 to be in memory + as this is assumed in later code. */ + if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode + && maybe_gt (bitsize, MAX_FIXED_MODE_SIZE)) + must_force_mem = 1; + /* Handle CONCAT first. */ if (GET_CODE (op0) == CONCAT && !must_force_mem) { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 8d94fca..607a2bd 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6702,12 +6702,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, supported. */ unsigned lsize = group_size * GET_MODE_BITSIZE (elmode); - elmode = int_mode_for_size (lsize, 0).require (); unsigned int lnunits = const_nunits / group_size; /* If we can't construct such a vector fall back to element extracts from the original vector type and element size stores. */ - if (mode_for_vector (elmode, lnunits).exists (&vmode) + if (int_mode_for_size (lsize, 0).exists (&elmode) + && mode_for_vector (elmode, lnunits).exists (&vmode) && VECTOR_MODE_P (vmode) && targetm.vector_mode_supported_p (vmode) && (convert_optab_handler (vec_extract_optab, @@ -7839,11 +7839,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, to a larger load. */ unsigned lsize = group_size * TYPE_PRECISION (TREE_TYPE (vectype)); - elmode = int_mode_for_size (lsize, 0).require (); unsigned int lnunits = const_nunits / group_size; /* If we can't construct such a vector fall back to element loads of the original vector type. */ - if (mode_for_vector (elmode, lnunits).exists (&vmode) + if (int_mode_for_size (lsize, 0).exists (&elmode) + && mode_for_vector (elmode, lnunits).exists (&vmode) && VECTOR_MODE_P (vmode) && targetm.vector_mode_supported_p (vmode) && (convert_optab_handler (vec_init_optab, vmode, elmode) From patchwork Wed Sep 5 11:48:50 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966327 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485163-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="xV3MptIk"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42527l0Mp5z9sCf for ; Wed, 5 Sep 2018 21:50:10 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=JALg2xyPSz8XNDFHg1DhtQ8Fr+1HT wEO9IgznBPxgW26R0Z/Y+RhO/19Oi8QTBcMtkQhhhVhocr+lfbWCYYwht+HNkxHX 3L6rBvCbbwlSOx1qcKt+2FupLEdpc57dCDwdOtXMmkqXlY4OAtMlrNUI6LVdbF12 HQDFUoLkY+C6ZU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=aMpj6heRVtaMGmydD9jeypkyHN4=; b=xV3 MptIklvKcxaBP0wU0zA9Pai0UgxdJupSuAT/cemSq6sP8px0FOOvO/OxjSK35xWs MEJcdDlLgsA80q3l8CE8Y6RQ2keed6E1oVH8b9DkUPMdxb4RcLfYbXzYgqr5KPWX qBLBc4weqjPGuie3x1tWyNw9F4CS0aJNG+bPLp24= Received: (qmail 80749 invoked by alias); 5 Sep 2018 11:49:38 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 80427 invoked by uid 89); 5 Sep 2018 11:49:28 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=Hx-languages-length:1763 X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:49:23 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJ4-0005TS-0K from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:10 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:05 +0100 From: To: Subject: [PATCH 02/25] Propagate address spaces to builtins. Date: Wed, 5 Sep 2018 12:48:50 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 At present, pointers passed to builtin functions, including atomic operators, are stripped of their address space properties. This doesn't seem to be deliberate, it just omits to copy them. Not only that, but it forces pointer sizes to Pmode, which isn't appropriate for all address spaces. This patch attempts to correct both issues. It works for GCN atomics and GCN OpenACC gang-private variables. 2018-09-05 Andrew Stubbs Julian Brown gcc/ * builtins.c (get_builtin_sync_mem): Handle address spaces. --- gcc/builtins.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/builtins.c b/gcc/builtins.c index 58ea747..361361c 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -5781,14 +5781,21 @@ static rtx get_builtin_sync_mem (tree loc, machine_mode mode) { rtx addr, mem; + int addr_space = TYPE_ADDR_SPACE (POINTER_TYPE_P (TREE_TYPE (loc)) + ? TREE_TYPE (TREE_TYPE (loc)) + : TREE_TYPE (loc)); + scalar_int_mode addr_mode = targetm.addr_space.address_mode (addr_space); - addr = expand_expr (loc, NULL_RTX, ptr_mode, EXPAND_SUM); - addr = convert_memory_address (Pmode, addr); + addr = expand_expr (loc, NULL_RTX, addr_mode, EXPAND_SUM); /* Note that we explicitly do not want any alias information for this memory, so that we kill all other live memories. Otherwise we don't satisfy the full barrier semantics of the intrinsic. */ - mem = validize_mem (gen_rtx_MEM (mode, addr)); + mem = gen_rtx_MEM (mode, addr); + + set_mem_addr_space (mem, addr_space); + + mem = validize_mem (mem); /* The alignment needs to be at least according to that of the mode. */ set_mem_align (mem, MAX (GET_MODE_ALIGNMENT (mode), From patchwork Wed Sep 5 11:48:51 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966329 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485165-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="hFDvJK4l"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42528D66bwz9sCf for ; Wed, 5 Sep 2018 21:50:36 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=yTJnyp2pSZDJh+oDIiPpZiEw2ZX88 yuV7cjG0W6G6W3Gc7jT/St6LSIBUfowyDOYeZUjW5p47rQdBeqaHrjbmufnjg/LS zM7wkf0Ga4aEnEMHWsHZMKDDpjp5QwPW8Xi/zlmLBOnBoMm2bEV1Dkj0CllJL0Rq rOHeSEOrEoHxiw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=0hwo2AORhuVg412b+WgLNJQ77kU=; b=hFD vJK4lY23E054dTj6PJNeBvrE+zkzmwv4DycCjoTk2gw0D5Ti0PZwIoSjrMSSVUQz KDVoWaOgMpb6QwFezyOfLgKfHlzTikvc1jfjGK+1q+CjyZZT3Cv2nGzt+soVk/aN COURaYvk3EBDi7uniEmJeckbK8g4O8gCBKZDizi8= Received: (qmail 80897 invoked by alias); 5 Sep 2018 11:49:39 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 80429 invoked by uid 89); 5 Sep 2018 11:49:28 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=sk:handle_, drivers X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:49:23 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJ6-0005Tk-Hv from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:12 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:08 +0100 From: To: Subject: [PATCH 03/25] Improve TARGET_MANGLE_DECL_ASSEMBLER_NAME. Date: Wed, 5 Sep 2018 12:48:51 +0100 Message-ID: <328b94cd7d15c4d2113a676a3a70ce640b37333a.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 The HSA GPU drivers can't cope with binaries that have the same symbol defined multiple times, even though the names are not exported. This happens whenever there are file-scope static variables with matching names. I believe it's also an issue with switch tables. This is a bug, but outside our control, so we must work around it when multiple translation units have the same symbol defined. Therefore, we've implemented name mangling via TARGET_MANGLE_DECL_ASSEMBLER_NAME, but found some places where the middle-end assumes that the decl name matches the name in the source. This patch fixes up those cases by falling back to comparing the unmangled name, when a lookup fails. 2018-09-05 Julian Brown gcc/ * cgraphunit.c (handle_alias_pairs): Scan for aliases by DECL_NAME if decl assembler name doesn't match. gcc/c-family/ * c-pragma.c (maye_apply_pending_pragma_weaks): Scan for aliases with DECL_NAME if decl assembler name doesn't match. --- gcc/c-family/c-pragma.c | 14 ++++++++++++++ gcc/cgraphunit.c | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 84e4341..1c0be0c 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -323,6 +323,20 @@ maybe_apply_pending_pragma_weaks (void) continue; target = symtab_node::get_for_asmname (id); + + /* Try again if ID didn't match an assembler name by looking through + decl names. */ + if (!target) + { + symtab_node *node; + FOR_EACH_SYMBOL (node) + if (strcmp (IDENTIFIER_POINTER (id), node->name ()) == 0) + { + target = node; + break; + } + } + decl = build_decl (UNKNOWN_LOCATION, target ? TREE_CODE (target->decl) : FUNCTION_DECL, alias_id, default_function_type); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index ec490d7..fc3f34e 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1393,6 +1393,21 @@ handle_alias_pairs (void) { symtab_node *target_node = symtab_node::get_for_asmname (p->target); + /* If the alias target didn't match a symbol's assembler name (e.g. + because it has been mangled by TARGET_MANGLE_DECL_ASSEMBLER_NAME), + try again with the unmangled decl name. */ + if (!target_node) + { + symtab_node *node; + FOR_EACH_SYMBOL (node) + if (strcmp (IDENTIFIER_POINTER (p->target), + node->name ()) == 0) + { + target_node = node; + break; + } + } + /* Weakrefs with target not defined in current unit are easy to handle: they behave just as external variables except we need to note the alias flag to later output the weakref pseudo op into asm file. */ From patchwork Wed Sep 5 11:48:52 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966328 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485164-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="FWsmQGJY"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252800bNLz9s5c for ; Wed, 5 Sep 2018 21:50:23 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=fNF3lOI+8KmuDcCcZSLfezOupkRT0 B6oQHCLQRRT0cCsYD0go/+0XBHU4jA4hmXO+zkdQx3fB27eVBGFDRDUxAnQGkkTm I7iep9opVsoCofXoENouHsO8BOIiH5CKD1lEcF1lPdhzPNM3nTWxwmhY0OWs+Bkr 9CORFvDEEVRmEg= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=axxu3If8kVyyJt5ZbkZSwstQCd4=; b=FWs mQGJYVCGifYgyo6jCx52unJmN/RZiFaeSBKszTxglJ/iheO/bQ1OwEXUdYOYQTnh jLVDGJRM8+MxSF1CC5uRP0L6m19Hdc0+epxeXGFbVaA+8gAfBn5L0Q3Ku3YWWlEi AKlkV5ubsGuZBPawZQWGXBUP4psdCivx7BGdaHrs= Received: (qmail 80859 invoked by alias); 5 Sep 2018 11:49:39 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 80458 invoked by uid 89); 5 Sep 2018 11:49:30 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-23.1 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, SUBJ_ALL_CAPS autolearn=ham version=3.3.2 spammy=respectively, HX-detected-operating-system:fuzzy, HX-detected-operating-system:kernel, HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:49:23 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJB-0001kf-FM for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:20 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45291) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJB-0001il-7R for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:17 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJ9-0005Tp-4i from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:15 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:10 +0100 From: To: Subject: [PATCH 04/25] SPECIAL_REGNO_P Date: Wed, 5 Sep 2018 12:48:52 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 GCN has some registers which are special purpose, but not "fixed" because we want the register allocator to track their usage and select alternatives that use different special registers (e.g. scalar cc vs. vector cc). Sometimes this leads the regrename pass to ICE. Quite how it gets confused is not well understood, but considering such registers for renaming is surely not useful. This patch creates a new macro SPECIAL_REGNO_P which disables regrename. In other words, the register is fixed once allocated. 2018-09-05 Kwok Cheung Yeung gcc/ * defaults.h (SPECIAL_REGNO_P): Define to false by default. * regrename.c (check_new_reg_p): Do not rename to a special register. (rename_chains): Do not rename special registers. --- gcc/defaults.h | 4 ++++ gcc/regrename.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/gcc/defaults.h b/gcc/defaults.h index 9035b33..40ecf61 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -1198,6 +1198,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define NO_FUNCTION_CSE false #endif +#ifndef SPECIAL_REGNO_P +#define SPECIAL_REGNO_P(REGNO) false +#endif + #ifndef HARD_REGNO_RENAME_OK #define HARD_REGNO_RENAME_OK(FROM, TO) true #endif diff --git a/gcc/regrename.c b/gcc/regrename.c index 8424093..92e403e 100644 --- a/gcc/regrename.c +++ b/gcc/regrename.c @@ -320,6 +320,7 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i) || fixed_regs[new_reg + i] || global_regs[new_reg + i] + || SPECIAL_REGNO_P (new_reg + i) /* Can't use regs which aren't saved by the prologue. */ || (! df_regs_ever_live_p (new_reg + i) && ! call_used_regs[new_reg + i]) @@ -480,6 +481,7 @@ rename_chains (void) continue; if (fixed_regs[reg] || global_regs[reg] + || SPECIAL_REGNO_P (reg) || (!HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM) || (HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed From patchwork Wed Sep 5 11:49:37 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966330 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485166-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="fXodIllK"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42528T3W1hz9s5c for ; Wed, 5 Sep 2018 21:50:49 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=B1obW+HTi59opkRq9IiqSPNOPvtN7 AtXVp945dKxI6gC1D0HYHapcIRapPefiXSG/qhCjNZyIeOwdI/iNPlhkdtE1OFRx k9Ues/xwtqxK+Td7S/rXn5QRbTEFFGH6oYyOOjp2v0dYERBfUvGWrPsxNLe7EWx3 2aK0V4owhwTRU4= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=nodIp/twAAwTzA9t0tQsLsmeGNw=; b=fXo dIllK7paaiWAglN7M0TkHavYGBFINTViDNIyjN0gCvoy01wfRuQsNFzeY/jwq1xT r/wXpRiGEV8M1VhRxKazK9IGkZZ1lZ6vO8Z7WBiwrJPFV8mm4N6sBM0qaiGFAADt G32jybC54q4YtTI9HttkTkrm4jJeDD7soTT3kTMw= Received: (qmail 83293 invoked by alias); 5 Sep 2018 11:49:59 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 83124 invoked by uid 89); 5 Sep 2018 11:49:58 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=967, HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:49:56 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJm-0002ac-E4 for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:55 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45352) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJm-0002Zy-6h for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:54 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJl-0005YG-Bu from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:53 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:49 +0100 From: To: Subject: [PATCH 05/25] Add sorry_at diagnostic function. Date: Wed, 5 Sep 2018 12:49:37 +0100 Message-ID: <0390b03b37fb75afba6a754964c2ae43f799727b.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The plain "sorry" diagnostic only gives the "current" location, which is typically the last line of the function or translation unit by time we get to the back end. GCN uses "sorry" to report unsupported language features, such as static constructors, so it's useful to have a "sorry_at" variant. This patch implements "sorry_at" according to the pattern of the other "at" variants. 2018-09-05 Andrew Stubbs gcc/ * diagnostic-core.h (sorry_at): New prototype. * diagnostic.c (sorry_at): New function. --- gcc/diagnostic-core.h | 1 + gcc/diagnostic.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/gcc/diagnostic-core.h b/gcc/diagnostic-core.h index e4ebe00..80ff395 100644 --- a/gcc/diagnostic-core.h +++ b/gcc/diagnostic-core.h @@ -96,6 +96,7 @@ extern bool permerror (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern bool permerror (rich_location *, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void sorry (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2); +extern void sorry_at (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void inform (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void inform (rich_location *, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3); extern void inform_n (location_t, unsigned HOST_WIDE_INT, const char *, diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c index aae0934..56a1140 100644 --- a/gcc/diagnostic.c +++ b/gcc/diagnostic.c @@ -1443,6 +1443,17 @@ sorry (const char *gmsgid, ...) va_end (ap); } +/* Same as above, but use location LOC instead of input_location. */ +void +sorry_at (location_t loc, const char *gmsgid, ...) +{ + va_list ap; + va_start (ap, gmsgid); + rich_location richloc (line_table, loc); + diagnostic_impl (&richloc, -1, gmsgid, &ap, DK_SORRY); + va_end (ap); +} + /* Return true if an error or a "sorry" has been seen. Various processing is disabled after errors. */ bool From patchwork Wed Sep 5 11:49:38 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966332 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485168-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="RGDYwy/s"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42528z0SQWz9s5c for ; Wed, 5 Sep 2018 21:51:14 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=MVp5Md+v9iXxNBzvvLz7C1aqtc0AT mQJyUesNHBkTVbdMa3AsswkaqTwnPuXcptJ4bD6o2LB0Jbng1S7d51cnW8SIqjOP xQtUmBH/lvRtcOmstVEGYubCdB0izJtTjTckWraNcOsEzTJJspuwuVrJVIE3/7Cl f/d8wxDgY8H9wY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=tf/470lf44t7XXA63vBeWzVy2dM=; b=RGD Ywy/seaosivX/SFegc1WOzWkQjaSm8RVBMJ/1X7scfhwOeDuYTaEhDyYLRCxmRcJ zgi3UPuI2KmoOwBwdBEh/bZsaP5RuA0KkeuqtjSL97EvONRA4+hx/GxVlGDP7jni A9r2Y/fZ87q47iTj+zCnjiypxssLMnVvZTnMCWGM= Received: (qmail 84059 invoked by alias); 5 Sep 2018 11:50:04 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 83953 invoked by uid 89); 5 Sep 2018 11:50:03 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:02 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJp-0002dG-Ej for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:00 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45360) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJp-0002cW-6Q for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:57 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJo-0005Yh-BA from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:56 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:52 +0100 From: To: Subject: [PATCH 06/25] Remove constant vec_select restriction. Date: Wed, 5 Sep 2018 12:49:38 +0100 Message-ID: <5f024fdd5ac905aa67cacdc932dcfb0b647675b6.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The vec_select operator is documented to require a const_int for the lane selector operand, but GCN has an instruction that can select the lane at runtime, so it seems reasonable to remove this restriction. This patch simply replaces assertions that the operand is constant with early exits from the optimizers. I think it's reasonable that vec_select with a non-constant operand cannot be optimized, yet. Also included is the necessary documentation tweak. 2018-09-05 Andrew Stubbs gcc/ * doc/rtl.texi: Adjust vec_select description. * simplify-rtx.c (simplify_binary_operation_1): Allow VEC_SELECT to use non-constant selectors. --- gcc/doc/rtl.texi | 11 ++++++----- gcc/simplify-rtx.c | 9 +++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index 5b1e695..0695ad2 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -2939,11 +2939,12 @@ a set bit indicates it is taken from @var{vec1}. @item (vec_select:@var{m} @var{vec1} @var{selection}) This describes an operation that selects parts of a vector. @var{vec1} is the source vector, and @var{selection} is a @code{parallel} that contains a -@code{const_int} for each of the subparts of the result vector, giving the -number of the source subpart that should be stored into it. -The result mode @var{m} is either the submode for a single element of -@var{vec1} (if only one subpart is selected), or another vector mode -with that element submode (if multiple subparts are selected). +@code{const_int} (or another expression, if the selection can be made at +runtime) for each of the subparts of the result vector, giving the number of +the source subpart that should be stored into it. The result mode @var{m} is +either the submode for a single element of @var{vec1} (if only one subpart is +selected), or another vector mode with that element submode (if multiple +subparts are selected). @findex vec_concat @item (vec_concat:@var{m} @var{x1} @var{x2}) diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index a9f2586..b4c6883 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -3604,7 +3604,10 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0))); gcc_assert (GET_CODE (trueop1) == PARALLEL); gcc_assert (XVECLEN (trueop1, 0) == 1); - gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0))); + + /* We can't reason about selections made at runtime. */ + if (!CONST_INT_P (XVECEXP (trueop1, 0, 0))) + return 0; if (vec_duplicate_p (trueop0, &elt0)) return elt0; @@ -3703,7 +3706,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, { rtx x = XVECEXP (trueop1, 0, i); - gcc_assert (CONST_INT_P (x)); + if (!CONST_INT_P (x)) + return 0; + RTVEC_ELT (v, i) = CONST_VECTOR_ELT (trueop0, INTVAL (x)); } From patchwork Wed Sep 5 11:49:39 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966331 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485167-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="vGDM8Ko/"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42528l0hmyz9s5c for ; Wed, 5 Sep 2018 21:51:02 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=nb4MsZWf903UiNgwgaK5sHnl8th+H mCrdA+IBjp2BQ/Diq2fKHDPDDewzHlAhf4efCWUNnMvYjkd+Bi92j4auiOk5wFDJ mw9uc8tLgmm0s6Jqxws1MaSDxQ7VtY55oSvQJleCGmXFMBB7Hmz3BhVbjAZRhGd8 m0C7162QXywH9o= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=6rYZtjwXJRIBhuGK4RVsBEUJHgw=; b=vGD M8Ko/E/Xny837EMwNV/JYAfOEGMgFP2URCIz4AlCYmUysREL1coyKQVXSt91mQk6 5Pb4zOFNk3U6Xpa9VEWO9ffWeO6qkbsXQaB6z2zkx4d1l4o7/fNeQdVbRu2+jGQV k3oynZpb4hIKRfCCR2BU+AjKGLV8J61M9S6a/I2M= Received: (qmail 84027 invoked by alias); 5 Sep 2018 11:50:04 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 83918 invoked by uid 89); 5 Sep 2018 11:50:03 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:02 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJr-0002hd-Uj for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:00 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45363) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJr-0002gW-ND for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:49:59 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJq-0005Yz-Sa from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:49:58 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:54 +0100 From: To: Subject: [PATCH 07/25] [pr82089] Don't sign-extend SFV 1 in BImode Date: Wed, 5 Sep 2018 12:49:39 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This is an update of the patch posted to PR82089 long ago. We ran into the same bug on GCN, so we need this fixed as part of this series. 2018-09-05 Andrew Stubbs Tom de Vries PR82089 gcc/ * expmed.c (emit_cstore): Fix handling of result_mode == BImode and STORE_FLAG_VALUE == 1. --- gcc/expmed.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/gcc/expmed.c b/gcc/expmed.c index 29ce10b..0b87fdc 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -5464,11 +5464,18 @@ emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, If STORE_FLAG_VALUE does not have the sign bit set when interpreted in MODE, we can do this conversion as unsigned, which is usually more efficient. */ - if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)) + if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode) + || (result_mode == BImode && int_target_mode != BImode)) { - convert_move (target, subtarget, - val_signbit_known_clear_p (result_mode, - STORE_FLAG_VALUE)); + gcc_assert (GET_MODE_SIZE (result_mode) != 1 + || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1); + bool unsignedp + = (GET_MODE_SIZE (result_mode) == 1 + ? STORE_FLAG_VALUE == 1 + : val_signbit_known_clear_p (result_mode, STORE_FLAG_VALUE)); + + convert_move (target, subtarget, unsignedp); + op0 = target; result_mode = int_target_mode; } From patchwork Wed Sep 5 11:49:40 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966333 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485169-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="yhbes/Z3"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42529G3v9xz9s5c for ; Wed, 5 Sep 2018 21:51:30 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=UwrN13qa9qQLB6g+2AG+4qGd6gtpI dYRoEz7q7hwuvQA7cvJ45lEGQ5TrC/4tI/PqFpCibNE6bcch+Md8vaOPBEX8FVhE tOBhx2BJKwTFXmTDUyA2pK5EN0OC8CVzIimzogeolIi1+A6FSr0syIy/Oz7I64kE hIZ19wjZnZmXlk= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=3UPwfcsvjjRuBAjjiK9owcrJhw0=; b=yhb es/Z3oJBsn4SnJq2voMSzakCTnAi6pZUEfdzf234db54Ar3WPq1iwiBwOgAMjcRG cbTuSNUS+5c4sYLKXyzntQhyGt7mSRIYje5l9ZqQHOcctanI3h6rn2utJyjVr7TW 75cBwsOTH/l6i1Rvx6EfzkyoRV37LzV3DL/ySSwI= Received: (qmail 84456 invoked by alias); 5 Sep 2018 11:50:07 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 84361 invoked by uid 89); 5 Sep 2018 11:50:06 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:05 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJu-0002oN-CO for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:03 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45367) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJu-0002nR-4r for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:02 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJt-0005ZF-9t from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:01 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:57 +0100 From: To: Subject: [PATCH 08/25] Fix co-array allocation Date: Wed, 5 Sep 2018 12:49:40 +0100 Message-ID: <024e798b9539b765a1259cfc9cb2f1dc480b24ca.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The Fortran front-end has a bug in which it uses "int" values for "size_t" parameters. I don't know why this isn't problem for all 64-bit architectures, but GCN ends up with the data in the wrong argument register and/or stack slot, and bad things happen. This patch corrects the issue by setting the correct type. 2018-09-05 Kwok Cheung Yeung gcc/fortran/ * trans-expr.c (gfc_trans_structure_assign): Ensure that integer_zero_node is of sizetype when used as the first argument of a call to _gfortran_caf_register. * trans-intrinsic.c (conv_intrinsic_event_query): Convert computed index to a size_t type. * trans-stmt.c (gfc_trans_event_post_wait): Likewise. --- gcc/fortran/trans-expr.c | 2 +- gcc/fortran/trans-intrinsic.c | 3 ++- gcc/fortran/trans-stmt.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 56ce98c..91be3fb 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -7729,7 +7729,7 @@ gfc_trans_structure_assign (tree dest, gfc_expr * expr, bool init, bool coarray) suffices to recognize the data as array. */ if (rank < 0) rank = 1; - size = integer_zero_node; + size = fold_convert (sizetype, integer_zero_node); desc = field; gfc_add_modify (&block, gfc_conv_descriptor_rank (desc), build_int_cst (signed_char_type_node, rank)); diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index b2cea93..23c13da 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -10732,7 +10732,8 @@ conv_intrinsic_event_query (gfc_code *code) tmp = fold_build2_loc (input_location, MULT_EXPR, integer_type_node, extent, tmp); index = fold_build2_loc (input_location, PLUS_EXPR, - integer_type_node, index, tmp); + size_type_node, index, + fold_convert (size_type_node, tmp)); if (i < ar->dimen - 1) { ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]); diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 795d3cc..2c59675 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -1096,7 +1096,8 @@ gfc_trans_event_post_wait (gfc_code *code, gfc_exec_op op) tmp = fold_build2_loc (input_location, MULT_EXPR, integer_type_node, extent, tmp); index = fold_build2_loc (input_location, PLUS_EXPR, - integer_type_node, index, tmp); + size_type_node, index, + fold_convert (size_type_node, tmp)); if (i < ar->dimen - 1) { ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]); From patchwork Wed Sep 5 11:49:41 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966334 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485170-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="Webqd6gC"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42529W4pYMz9s5c for ; Wed, 5 Sep 2018 21:51:43 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=WroPyn7T7ewCqbfTQYDWh5A6/BXdK C9ZpqUlAZJEPz6qLGvygSHjb5BF6VvCWB0+U3Aao2IJYKFCHAVzU8lGyh6G/2Yp/ /8NxL2zszGReZkWPg0RzeHp6X7i7JSSFyhVIkx4+jJzEFCFfwftsO76vCVQu7qnt 9YMRNy2Q/kTwCY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=745j7nPdKhUIZ/4rGHqy2qd2TMM=; b=Web qd6gC7WqVGXAQT3q6Z06xVtVcKHjRtYYO/hfaHcCU9Sm45y3WsiZKYI/rzbmSJ94 WLEtQTPzn7FUBkWmcW/yoNNzgy4DpfUgO4n5QUUskQ4gzEh6tm6iF3N+01UmkarO G4AdVTKbYb97sZQJjlunKg1EktAgFcGRUT5lNpFM= Received: (qmail 84694 invoked by alias); 5 Sep 2018 11:50:08 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 84588 invoked by uid 89); 5 Sep 2018 11:50:08 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:06 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWJw-0002s3-KO for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:05 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45369) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWJw-0002q9-AY for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:04 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWJv-0005Zx-G2 from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:03 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:49:59 +0100 From: To: Subject: [PATCH 09/25] Elide repeated RTL elements. Date: Wed, 5 Sep 2018 12:49:41 +0100 Message-ID: <626ff7cb294733fe61c5e7b05de436400bd9c262.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 GCN's 64-lane vectors tend to make RTL dumps very long. This patch makes them far more bearable by eliding long sequences of the same element into "repeated" messages. 2018-09-05 Andrew Stubbs Jan Hubicka Martin Jambor * print-rtl.c (print_rtx_operand_codes_E_and_V): Print how many times the same elements are repeated rather than printing all of them. --- gcc/print-rtl.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index 5dd2e31..8a04264 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -370,7 +370,20 @@ rtx_writer::print_rtx_operand_codes_E_and_V (const_rtx in_rtx, int idx) m_sawclose = 1; for (int j = 0; j < XVECLEN (in_rtx, idx); j++) - print_rtx (XVECEXP (in_rtx, idx, j)); + { + int j1; + + print_rtx (XVECEXP (in_rtx, idx, j)); + for (j1 = j + 1; j1 < XVECLEN (in_rtx, idx); j1++) + if (XVECEXP (in_rtx, idx, j) != XVECEXP (in_rtx, idx, j1)) + break; + + if (j1 != j + 1) + { + fprintf (m_outfile, " repeated %ix", j1 - j); + j = j1 - 1; + } + } m_indent -= 2; } From patchwork Wed Sep 5 11:50:25 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966335 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485171-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="htXmV78P"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42529m0XjPz9sCf for ; Wed, 5 Sep 2018 21:51:55 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=aGJYvy1rFiHf6aUUvL54NWdrqCqJK nB11Jh3REfGTNj+FuB1k3hbI+ALwMLOKgUkBEIhSV2SdaHJIFL6mi4h52lutWHbw ew/yGN2rYFTGMhrQ/h+L6abNVD54/c0Z1TeZg2KgaE7cBFtZItg4egvrJy1wGZhl jKsu/wdHklo6O8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=R69NxrkeJTUhhhzYjDKrGMTkUK4=; b=htX mV78PGrYPphUwI4gs1gAlH6SBDlntOa+OHqcA9PBcVp2ddWe+dM5Pr6DcEljjgLo nZ2Hnc4FdkOiMlJil1Fg2uvFCBS4gEwOirdQSLqPegTmbThoLBeC/3S4Wkn8oZud u+Kw2PiJHbe2ZkQYy7FLXlT58dwsWm+pKmXz/jpQ= Received: (qmail 89465 invoked by alias); 5 Sep 2018 11:50:51 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 89331 invoked by uid 89); 5 Sep 2018 11:50:51 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=reg_equiv, masks, REG_EQUIV X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:49 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWKb-0004Jx-Uk for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:47 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45430) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWKb-0004J7-Mk for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:45 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWKa-0005gl-RO from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:44 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:50:40 +0100 From: To: Subject: [PATCH 10/25] Convert BImode vectors. Date: Wed, 5 Sep 2018 12:50:25 +0100 Message-ID: <98eaa10bbdf6dd8d9142362184e23bc90c5d612f.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 GCN uses V64BImode to represent vector masks in the middle-end, and DImode bit-masks to represent them in the back-end. These must be converted at expand time and the most convenient way is to simply use a SUBREG. This works fine except that simplify_subreg needs to be able to convert immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know how to convert vectors to integers where there is more than one element per byte. This patch implements such conversions for the cases that we need. I don't know why this is not a problem for other targets that use BImode vectors, such as ARM SVE, so it's possible I missed some magic somewhere? 2018-09-05 Andrew Stubbs gcc/ * simplify-rtx.c (convert_packed_vector): New function. (simplify_immed_subreg): Recognised Boolean vectors and call convert_packed_vector. --- gcc/simplify-rtx.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index b4c6883..89487f2 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5976,6 +5976,73 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, return 0; } +/* Convert a CONST_INT to a CONST_VECTOR, or vice versa. + + This should only occur for VECTOR_BOOL_MODE types, so the semantics + specified by that are assumed. In particular, the lowest value is + in the first byte. */ + +static rtx +convert_packed_vector (fixed_size_mode to_mode, rtx op, + machine_mode from_mode, unsigned int byte, + unsigned int first_elem, unsigned int inner_bytes) +{ + /* Sizes greater than HOST_WIDE_INT would need a better implementation. */ + gcc_assert (GET_MODE_SIZE (to_mode) <= sizeof (HOST_WIDE_INT)); + + if (GET_CODE (op) == CONST_VECTOR) + { + gcc_assert (!VECTOR_MODE_P (to_mode)); + + int num_elem = GET_MODE_NUNITS (from_mode).to_constant(); + int elem_bitsize = (GET_MODE_SIZE (from_mode).to_constant() + * BITS_PER_UNIT) / num_elem; + int elem_mask = (1 << elem_bitsize) - 1; + HOST_WIDE_INT subreg_mask = + (sizeof (HOST_WIDE_INT) == GET_MODE_SIZE (to_mode) + ? -1 + : (((HOST_WIDE_INT)1 << (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT)) + - 1)); + + HOST_WIDE_INT val = 0; + for (int i = 0; i < num_elem; i++) + val |= ((INTVAL (CONST_VECTOR_ELT (op, i)) & elem_mask) + << (i * elem_bitsize)); + + val >>= byte * BITS_PER_UNIT; + val &= subreg_mask; + + return gen_rtx_CONST_INT (VOIDmode, val); + } + else if (GET_CODE (op) == CONST_INT) + { + /* Subregs of a vector not implemented yet. */ + gcc_assert (maybe_eq (GET_MODE_SIZE (to_mode), + GET_MODE_SIZE (from_mode))); + + gcc_assert (VECTOR_MODE_P (to_mode)); + + int num_elem = GET_MODE_NUNITS (to_mode); + int elem_bitsize = (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT) / num_elem; + int elem_mask = (1 << elem_bitsize) - 1; + + rtvec val = rtvec_alloc (num_elem); + rtx *elem = &RTVEC_ELT (val, 0); + + for (int i = 0; i < num_elem; i++) + elem[i] = gen_rtx_CONST_INT (VOIDmode, + (INTVAL (op) >> (i * elem_bitsize)) + & elem_mask); + + return gen_rtx_CONST_VECTOR (to_mode, val); + } + else + { + gcc_unreachable (); + return op; + } +} + /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. @@ -6017,6 +6084,15 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx op, if (COMPLEX_MODE_P (outermode)) return NULL_RTX; + /* Vectors with multiple elements per byte are a special case. */ + if ((VECTOR_MODE_P (innermode) + && ((GET_MODE_NUNITS (innermode).to_constant() + / GET_MODE_SIZE(innermode).to_constant()) > 1)) + || (VECTOR_MODE_P (outermode) + && (GET_MODE_NUNITS (outermode) / GET_MODE_SIZE(outermode) > 1))) + return convert_packed_vector (outermode, op, innermode, byte, first_elem, + inner_bytes); + /* We support any size mode. */ max_bitsize = MAX (GET_MODE_BITSIZE (outermode), inner_bytes * BITS_PER_UNIT); From patchwork Wed Sep 5 11:50:26 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966336 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485172-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="lDctZ0Y0"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252B13Gwxz9s5c for ; Wed, 5 Sep 2018 21:52:09 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=GpBRQ4ulOTedQ/k4vpRKTkiIt9Rw3 r1VJE4uBkiACLTVmrI/By3H6byh+XkZpjfBSDPRZYysItQQFCIVmkiT7hOqMVntk pCXRfY8j+Ky0H6H0Q1fsyMvL8ckHKhqpOUgxrkmT4S1PXsarLh5UjBOS1ZIb6zcv sg9RItLjEyjpLU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=/GOwOhDS714tKFQXwCeVCcMOIF8=; b=lDc tZ0Y0Tr0cE2e5OInTtmq9Ic3a/a6CogmN06XR7P0wjb3b0aefb1S834e8dg/Qe2n a01CpN9EbGze7xMfrgDPcABzLJ2TgZUHAzEsT0Ra45/sD2Z5NBD08/iYjFA1PoHl OgMg5o5xNlyDa8zZxBrI2OsmTtQ6ZJphp30fRPyc= Received: (qmail 89727 invoked by alias); 5 Sep 2018 11:50:53 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 89643 invoked by uid 89); 5 Sep 2018 11:50:53 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=became, inactive, HX-detected-operating-system:Windows X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:51 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWKe-0004Np-KY for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:49 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45434) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWKe-0004MD-Bs for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:48 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWKd-0005gz-Gu from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:47 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:50:43 +0100 From: To: Subject: [PATCH 11/25] Simplify vec_merge according to the mask. Date: Wed, 5 Sep 2018 12:50:26 +0100 Message-ID: <333c6a5ad2f1c56613c4734a830fc5d8214061d9.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This patch was part of the original patch we acquired from Honza and Martin. It simplifies vector elements that are inactive, according to the mask. 2018-09-05 Jan Hubicka Martin Jambor * simplify-rtx.c (simplify_merge_mask): New function. (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the same masks are used in op1 or op2. --- gcc/simplify-rtx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 89487f2..6f27bda 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5578,6 +5578,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* X is an operand number OP of VEC_MERGE operation with MASK. + Try to simplify using knowledge that values outside of MASK + will not be used. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) + { + if (!side_effects_p (XEXP (x, 1 - op))) + return XEXP (x, op); + } + if (side_effects_p (x)) + return NULL_RTX; + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); + } + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1)); + } + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) + { + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); + } + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + + if (GET_CODE (op0) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op0, 2)) + && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 0), op1, op2); + + if (GET_CODE (op1) == VEC_MERGE + && rtx_equal_p (op2, XEXP (op1, 2)) + && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2)) + return simplify_gen_ternary (code, mode, mode, + XEXP (op0, 1), op1, op2); + break; default: From patchwork Wed Sep 5 11:50:27 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966337 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485173-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="elEIEaVv"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252BF2wWBz9s5c for ; Wed, 5 Sep 2018 21:52:21 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=ZGx+MpUVCLASA7wk3HNsaef3DBANR RdYECUV2HYakCdat3qh9uKX9XoY8nC7kgvaiLVH0umOdZoWyKh1ErGHnN0BeYVtV zKRSDk3vp/KS+1c1fW5uXykX79/8n0+pCztcS7Rrqe/K6UB7MGC02fxxt7A7dKrY TB1BD+OaPGcgAw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=n+SdMQUYgsvWdEe4Ut7uesbFI0g=; b=elE IEaVvK30GvoUn/Sq8r+dls5GWEs7FqnZlgo1S8sk6m1+VfAmqiI7VZGj8FNw9CdC PPhoKLKJ+jtIbl1rYiYV6aM6BlBRcENlH3cP/UDrZdtys0V17nZDOTe1XQsuC/lu rXkj+hCPUXLXrZfA/+hBm6ACFGSeSv8M69kaUdJ8= Received: (qmail 90020 invoked by alias); 5 Sep 2018 11:50:56 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 89908 invoked by uid 89); 5 Sep 2018 11:50:55 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=easiest X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:53 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWKh-0004Uc-5j for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:52 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45438) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWKg-0004RJ-UY for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:51 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWKg-0005ip-0t from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:50 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:50:45 +0100 From: To: Subject: [PATCH 12/25] Make default_static_chain return NULL in non-static functions Date: Wed, 5 Sep 2018 12:50:27 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This patch allows default_static_chain to be called from the back-end without it knowing if the function is static or not. Or, to put it another way, without duplicating the check everywhere it's used. 2018-09-05 Tom de Vries gcc/ * targhooks.c (default_static_chain): Return NULL in non-static functions. --- gcc/targhooks.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/targhooks.c b/gcc/targhooks.c index afd56f3..742cfbf 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1021,8 +1021,14 @@ default_internal_arg_pointer (void) } rtx -default_static_chain (const_tree ARG_UNUSED (fndecl_or_type), bool incoming_p) +default_static_chain (const_tree fndecl_or_type, bool incoming_p) { + /* While this function won't be called by the middle-end when a static + chain isn't needed, it's also used throughout the backend so it's + easiest to keep this check centralized. */ + if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type)) + return NULL; + if (incoming_p) { #ifdef STATIC_CHAIN_INCOMING_REGNUM From patchwork Wed Sep 5 11:50:28 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966339 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485175-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="HweSyHpj"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252Bj5WYQz9s5c for ; Wed, 5 Sep 2018 21:52:45 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=f2jCStKvRB0O8wGJFCwzzxhUAa3O/ PcSC7tzJdPWPU/WjDyli6wGhUcnHxogMmD4Sl1yZ0Pz1Kz8DMKoMdlDtITJ5iE+y 7cYZbsUtgpsAJu6KYn5JBfWJaHvu+BT7WPBBedQqGNGsYz/aS+k+1FmvMeAuYC2K ggtZA3O//L8t40= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=NysX/dyGYW7oRAo2eU3HtTnoi+o=; b=Hwe SyHpj8Xa5kyrMiqKJiQ9HbBNMgNQCHBZrJGeKVOYqcn/VFsRubEytln8d6rSsLO1 SUjJ5x68O62pt+gYdaZUi/hRpBahYIUKYcoHR1nw2BT14y55O9ZOGbUUv4VDwqvr XSr2h5UvvM2HWBMRRr8wQJjr2Q6jgF9KBttTrtRE= Received: (qmail 90619 invoked by alias); 5 Sep 2018 11:51:00 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 90496 invoked by uid 89); 5 Sep 2018 11:50:59 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=stick, caches X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:58 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWKj-0004Yd-If for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:56 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45451) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWKj-0004Xp-9X for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:53 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWKi-0005kj-Eo from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:52 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:50:48 +0100 From: To: Subject: [PATCH 13/25] Create TARGET_DISABLE_CURRENT_VECTOR_SIZE Date: Wed, 5 Sep 2018 12:50:28 +0100 Message-ID: <85da45b3271492b67b7f2a6f9474ce7a153e200c.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This feature probably ought to be reworked as a proper target hook, but I would like to know if this is the correct solution to the problem first. The problem is that GCN vectors have a fixed number of elements (64) and the vector size varies with element size. E.g. V64QI is 64 bytes and V64SI is 256 bytes. This is a problem because GCC has an assumption that a) vector registers are fixed size, and b) if there are multiple vector sizes you want to pick one size and stick with it for the whole function. This is a problem in various places, but mostly it's not fatal. However, get_vectype_for_scalar_type caches the vector size for the first type it encounters and then tries to apply that to all subsequent vectors, which completely destroys vectorization. The caching feature appears to be an attempt to cope with AVX having a different vector size to other x86 vector options. This patch simply disables the cache so that it must ask the backend for the preferred mode for every type. 2018-09-05 Andrew Stubbs gcc/ * tree-vect-stmts.c (get_vectype_for_scalar_type): Implement TARGET_DISABLE_CURRENT_VECTOR_SIZE. --- gcc/tree-vect-stmts.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 607a2bd..8875201 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9945,9 +9945,12 @@ get_vectype_for_scalar_type (tree scalar_type) tree vectype; vectype = get_vectype_for_scalar_type_and_size (scalar_type, current_vector_size); +/* FIXME: use a proper target hook or macro. */ +#ifndef TARGET_DISABLE_CURRENT_VECTOR_SIZE if (vectype && known_eq (current_vector_size, 0U)) current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); +#endif return vectype; } From patchwork Wed Sep 5 11:50:29 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966338 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485174-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="yi0fI5Un"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252BT1FwTz9s5c for ; Wed, 5 Sep 2018 21:52:32 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=sKenAJJRccEWxM0h9vvqfVrSqNzlN f227BItH3dv/sn+J62ZSRM16U8saOg3qdDsgAxccMCadi0MzGU7T0KrRm3rQHIs+ Pqd0dp1QygYi3uXGf9dt8Rm4m0+Yr2DPySJdeJBdMskrinn55FN7t2K8U/2UXwo6 zegm+wozUZd298= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=4YGcTn58IauYaDvmogl75gQNQvc=; b=yi0 fI5Un+rq4eaHvYu/BI6Cemo0+0zQuYYXYMjvmboNFs8cDamP3uVVuJUmeLTdIztI DD1mlb0UhvsuKc1nXkYYBb/phzMIJrlU/oRl/w8p5CreGoXcwx7+oBzATvqzD8I1 7dLElIqvfsGZusRuEnMoCgh5r9FG5d1Apss+lR+Q= Received: (qmail 90575 invoked by alias); 5 Sep 2018 11:51:00 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 90488 invoked by uid 89); 5 Sep 2018 11:50:59 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy= X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:50:58 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWKm-0004eI-2S for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:56 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45455) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWKl-0004cp-Pp for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:50:55 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWKk-0005l9-Uc from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:50:54 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:50:50 +0100 From: To: Subject: [PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores. Date: Wed, 5 Sep 2018 12:50:29 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 If the autovectorizer tries to load a GCN 64-lane vector elementwise then it blows away the register file and produces horrible code. This patch simply disallows elementwise loads for such large vectors. Is there a better way to disable this in the middle-end? 2018-09-05 Julian Brown gcc/ * tree-vect-stmts.c (get_load_store_type): Don't use VMAT_ELEMENTWISE loads/stores with many-element (>=64) vectors. --- gcc/tree-vect-stmts.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 8875201..a333991 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2452,6 +2452,26 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, *memory_access_type = VMAT_CONTIGUOUS; } + /* FIXME: Element-wise accesses can be extremely expensive if we have a + large number of elements to deal with (e.g. 64 for AMD GCN) using the + current generic code expansion. Until an efficient code sequence is + supported for affected targets instead, don't attempt vectorization for + VMAT_ELEMENTWISE at all. */ + if (*memory_access_type == VMAT_ELEMENTWISE) + { + poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype); + + if (maybe_ge (nelements, 64)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "too many elements (%u) for elementwise accesses\n", + (unsigned) nelements.to_constant ()); + + return false; + } + } + if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !nunits.is_constant ()) From patchwork Wed Sep 5 11:51:16 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966340 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485176-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="LXvsLKZU"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252Bx31Z4z9s5c for ; Wed, 5 Sep 2018 21:52:57 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=degFTVbsKXJ0QUgthLMzUdwmcipV+ U9Tbek+ip/RlcW0z9i0i3XcJv7ytk8TW9XmjknssIkNJUPwcf6NqMElKaglS0WGA 4u1y4n12ZiM3tDB/XVcvr9LkcrLXuV2Z3NpPP0DEdgLsd5CGBQHb/g/3w7LdgqTF PW7T8whvm75aB0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=ubhGFTGjygepyCpzZM8o/9GMHJg=; b=LXv sLKZUZJB1vAEZ0RTQLY3XDnc2rgRsCjd1IshXzULLI1Zl9ZVMc+R8IbJsJyaTsIy CY8NQtu0VLuwsdT+bu/YW8eECY0AMg4KG4Y0YN1YgE0bHrKWC+hvIxMZcmmZJvlf z4P/ByBQQALXszZayhIt1aiYcSsLvHa5i5IlRGN8= Received: (qmail 95440 invoked by alias); 5 Sep 2018 11:51:40 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 95355 invoked by uid 89); 5 Sep 2018 11:51:39 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=alt, incidentally, freedom X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:51:37 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWLP-00064N-0l for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:36 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45544) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWLO-00062r-Nu for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:34 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWLN-0005ql-QS from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:51:33 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:51:29 +0100 From: To: Subject: [PATCH 15/25] Don't double-count early-clobber matches. Date: Wed, 5 Sep 2018 12:51:16 +0100 Message-ID: <99dbde9ad7afdc6199cfc2f024d8a039028fc208.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 Given a pattern with a number of operands: (match_operand 0 "" "=&v") (match_operand 1 "" " v0") (match_operand 2 "" " v0") (match_operand 3 "" " v0") GCC will currently increment "reject" once, for operand 0, and then decrement it once for each of the other operands, ending with reject == -2 and an assertion failure. If there's a conflict then it might try to decrement reject yet again. Incidentally, what these patterns are trying to achieve is an allocation in which operand 0 may match one of the other operands, but may not partially overlap any of them. Ideally there'd be a better way to do this. In any case, it will affect any pattern in which multiple operands may (or must) match an early-clobber operand. The patch only allows a reject-- when one has not already occurred, for that operand. 2018-09-05 Andrew Stubbs gcc/ * lra-constraints.c (process_alt_operands): Check matching_early_clobber before decrementing reject, and set matching_early_clobber after. * lra-int.h (struct lra_operand_data): Add matching_early_clobber. * lra.c (setup_operand_alternative): Initialize matching_early_clobber. --- gcc/lra-constraints.c | 22 ++++++++++++++-------- gcc/lra-int.h | 3 +++ gcc/lra.c | 1 + 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 8be4d46..55163f1 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -2202,7 +2202,13 @@ process_alt_operands (int only_alternative) " %d Matching earlyclobber alt:" " reject--\n", nop); - reject--; + if (!curr_static_id->operand[m] + .matching_early_clobber) + { + reject--; + curr_static_id->operand[m] + .matching_early_clobber = 1; + } } /* Otherwise we prefer no matching alternatives because it gives more freedom @@ -2948,15 +2954,11 @@ process_alt_operands (int only_alternative) curr_alt_dont_inherit_ops[curr_alt_dont_inherit_ops_num++] = last_conflict_j; losers++; - /* Early clobber was already reflected in REJECT. */ - lra_assert (reject > 0); if (lra_dump_file != NULL) fprintf (lra_dump_file, " %d Conflict early clobber reload: reject--\n", i); - reject--; - overall += LRA_LOSER_COST_FACTOR - 1; } else { @@ -2980,17 +2982,21 @@ process_alt_operands (int only_alternative) } curr_alt_win[i] = curr_alt_match_win[i] = false; losers++; - /* Early clobber was already reflected in REJECT. */ - lra_assert (reject > 0); if (lra_dump_file != NULL) fprintf (lra_dump_file, " %d Matched conflict early clobber reloads: " "reject--\n", i); + } + /* Early clobber was already reflected in REJECT. */ + if (!curr_static_id->operand[i].matching_early_clobber) + { + lra_assert (reject > 0); reject--; - overall += LRA_LOSER_COST_FACTOR - 1; + curr_static_id->operand[i].matching_early_clobber = 1; } + overall += LRA_LOSER_COST_FACTOR - 1; } if (lra_dump_file != NULL) fprintf (lra_dump_file, " alt=%d,overall=%d,losers=%d,rld_nregs=%d\n", diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 5267b53..f193e1f 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -147,6 +147,9 @@ struct lra_operand_data This field is set up every time when corresponding operand_alternative in lra_static_insn_data is set up. */ unsigned int early_clobber : 1; + /* True if there is an early clobber that has a matching alternative. + This field is used to prevent multiple matches being counted. */ + unsigned int matching_early_clobber : 1; /* True if the operand is an address. */ unsigned int is_address : 1; }; diff --git a/gcc/lra.c b/gcc/lra.c index aa768fb..01dd8b8 100644 --- a/gcc/lra.c +++ b/gcc/lra.c @@ -797,6 +797,7 @@ setup_operand_alternative (lra_insn_recog_data_t data, { static_data->operand[i].early_clobber_alts = 0; static_data->operand[i].early_clobber = false; + static_data->operand[i].matching_early_clobber = false; static_data->operand[i].is_address = false; if (static_data->operand[i].constraint[0] == '%') { From patchwork Wed Sep 5 11:51:17 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966341 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485177-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="O3fndHmH"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252C94TSsz9s5c for ; Wed, 5 Sep 2018 21:53:09 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=pKi+0/3K7OW0nyHlOYgCpKral9T+O sRUJDp+WHz+1F0mXorojWzpoT9vgHsG70baxpxnogNAJDZAWJKnyHV7BbbuwN0aF 20RgVng6XQltOQpIpkE+5MhgWWQlk+Rzuick2FBsy03SNgTqRgOl+p+l0pR6UAKX e0cOJQwXCeBH40= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=X11d5rQ5Qh79sQt58UPI719QDlE=; b=O3f ndHmHrZPaMvnjCq45vFZ3Khs6qI7lLTkUrIMmUeVOj5nMxDqnP3HCBSHbJiHG8Sn lAyoPUwF1MEKLnNfM7phkxEybNGKjzFaQKiRWPwDFdFnTiPB87iqLuhAOctmkHrU gm+w4KWt7gOKgcL9nnzD/IWhZR3hWu9Uk++BGys4= Received: (qmail 95701 invoked by alias); 5 Sep 2018 11:51:42 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 95616 invoked by uid 89); 5 Sep 2018 11:51:41 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=begins, ICEs, ira, ices X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:51:39 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWLR-00067y-Hg for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:38 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45550) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWLR-00066w-9U for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:37 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWLQ-0005r1-DJ from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:51:36 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:51:32 +0100 From: To: Subject: [PATCH 16/25] Fix IRA ICE. Date: Wed, 5 Sep 2018 12:51:17 +0100 Message-ID: <54304bb66c95238afe5a603eff894caf56ac19ca.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The IRA pass makes an assumption that any pseudos created after the pass begins were created explicitly by the pass itself and therefore will have corresponding entries in its other tables. The GCN back-end, however, often creates additional pseudos, in expand patterns, to represent the necessary EXEC value, and these break IRA's assumption and cause ICEs. This patch simply has IRA skip unknown pseudos, and the problem goes away. Presumably, it's not ideal that these registers have not been processed by IRA, but it does not appear to do any real harm. 2018-09-05 Andrew Stubbs gcc/ * ira.c (setup_preferred_alternate_classes_for_new_pseudos): Skip pseudos not created by this pass. (move_unallocated_pseudos): Likewise. --- gcc/ira.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gcc/ira.c b/gcc/ira.c index def194a..e0c293c 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -2769,7 +2769,12 @@ setup_preferred_alternate_classes_for_new_pseudos (int start) for (i = start; i < max_regno; i++) { old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]); - ira_assert (i != old_regno); + + /* Skip any new pseudos not created directly by this pass. + gen_move_insn can do this on AMD GCN, for example. */ + if (i == old_regno) + continue; + setup_reg_classes (i, reg_preferred_class (old_regno), reg_alternate_class (old_regno), reg_allocno_class (old_regno)); @@ -5054,6 +5059,12 @@ move_unallocated_pseudos (void) { int idx = i - first_moveable_pseudo; rtx other_reg = pseudo_replaced_reg[idx]; + + /* Skip any new pseudos not created directly by find_moveable_pseudos. + gen_move_insn can do this on AMD GCN, for example. */ + if (!other_reg) + continue; + rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (i)); /* The use must follow all definitions of OTHER_REG, so we can insert the new definition immediately after any of them. */ From patchwork Wed Sep 5 11:51:18 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966342 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485178-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="lGXJm7Sn"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252CQ34bMz9s5c for ; Wed, 5 Sep 2018 21:53:22 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=h7bSsbFqKWfH2qa7f43vsdSlU0JX0 wDdzwHgQR1GDMffWgjs4ihvYPA4iaeoeJ9hq4mM0NU5glhVFBH671lECVjyDhauE o7DjnRA+iMtARd5zoRI6FO9JrljRrY/xPiu4zoqDK2KC8RuIM9fEf/kV76Hl+5aT WlN03L2BE6PiPw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=tR4l8G9XuyipwoszMJNiHkxWoS8=; b=lGX Jm7SnWcwHMGeBxq6kasdYTavk/lEvvvipV7o3iZ5fC9/etlQksPd3ZFRqRMtRUUJ sOINXL6/ChPmn6nn92C+9MuldlMuNfA7EV1b0nOt6ANwa8sN7VyPfHkHB5hPr/Sh zkbusbdr0zKF2zGzJP+7l/QQ/x6nxRo68lzClG48= Received: (qmail 96200 invoked by alias); 5 Sep 2018 11:51:45 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 96113 invoked by uid 89); 5 Sep 2018 11:51:45 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=STOP, Hx-languages-length:1003 X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:51:43 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWLU-0006AU-A8 for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:42 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45561) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWLU-00069g-0i for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:40 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWLS-0005rZ-UJ from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:51:38 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:51:34 +0100 From: To: Subject: [PATCH 17/25] Fix Fortran STOP. Date: Wed, 5 Sep 2018 12:51:18 +0100 Message-ID: <3b1ee6252e6bc42be1886f45fd4512efda27bcbd.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The minimal libgfortran setup was created for NVPTX, but will also be used by AMD GCN. This patch simply removes an assumption that NVPTX is the only user. Specifically, NVPTX exit is broken, but AMD GCN exit works just fine. 2018-09-05 Andrew Stubbs libgfortran/ * runtime/minimal.c (exit): Only work around nvptx bugs on nvptx. --- libgfortran/runtime/minimal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c index 0b1efeb..8940f97 100644 --- a/libgfortran/runtime/minimal.c +++ b/libgfortran/runtime/minimal.c @@ -197,10 +197,12 @@ sys_abort (void) #define st_printf printf #undef estr_write #define estr_write printf +#if __nvptx__ /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region doesn't terminate process'. */ #undef exit #define exit(...) do { abort (); } while (0) +#endif #undef exit_error #define exit_error(...) do { abort (); } while (0) From patchwork Wed Sep 5 11:51:19 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966343 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485179-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="tPZkzKnU"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252Cg260sz9s5c for ; Wed, 5 Sep 2018 21:53:35 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=MA62VSLd5SWXv5HhWdy3swEN1D+3q 021JMqvFVBmj/EM4FnEuobNoE8RkFoD3SsblwkkF+ZrUy0Q1T1WbgWykHiDOinb0 6zi0lrsoghNoJdVpHBC20uJuFWTj1vep1PuonrOyYproshuPYGyvzUkq3nFTTkQL Due8MPodk+vcOM= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=EElunJUDcnFV0XFxVUqUCFDq3lo=; b=tPZ kzKnUozzboJDOVNtElHBXwFIFm1dYHaLEppBLGUACCLYDm2TUHm+Fd3nOZrS4Tq6 bkg+8fDwpUuCmujs+zcyJWu79XmVRJ77WJcoYAiCM84EMQMSYgjaIJZuBFamURNp 2KJWIhLLLhsiYHsiePIZZ95rxVgKTLxaAM7cmMmU= Received: (qmail 97316 invoked by alias); 5 Sep 2018 11:51:53 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 97190 invoked by uid 89); 5 Sep 2018 11:51:52 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy= X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:51:51 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWLZ-0006Gm-UV for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:49 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45568) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWLW-0006Bo-Ef for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:44 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWLV-0005sM-Hz from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:51:41 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:51:37 +0100 From: To: Subject: [PATCH 18/25] Fix interleaving of Fortran stop messages Date: Wed, 5 Sep 2018 12:51:19 +0100 Message-ID: <2ba94a85fe9c01c57e474a4cc6b7171cc0adb351.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 Fortran STOP and ERROR STOP use a different function to print the "STOP" string and the message string. On GCN this results in out-of-order output, such as "ERROR STOP ". This patch fixes the problem by making estr_write use the proper Fortran write, not C printf, so both parts are now output the same way. This also ensures that both parts are output to STDERR (not that that means anything on GCN). 2018-09-05 Kwok Cheung Yeung libgfortran/ * runtime/minimal.c (estr_write): Define in terms of write. --- libgfortran/runtime/minimal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c index 8940f97..b6d26fd 100644 --- a/libgfortran/runtime/minimal.c +++ b/libgfortran/runtime/minimal.c @@ -196,7 +196,7 @@ sys_abort (void) #undef st_printf #define st_printf printf #undef estr_write -#define estr_write printf +#define estr_write(X) write(STDERR_FILENO, (X), strlen (X)) #if __nvptx__ /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region doesn't terminate process'. */ From patchwork Wed Sep 5 11:51:20 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966344 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485180-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="AXdIkCAp"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252Cv2yM6z9s5c for ; Wed, 5 Sep 2018 21:53:47 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=x2EzaZVOprDYSwSdkkoaNJTwlPMV8 iAz27+FVFM607ujUQ5nLt5yCnGyA5x1Svcl+j3WFZy+AkE0legfrG6VhhPTJXnN2 lntcwber68hbh+kCIji8R1o7lR+kiUQ6m3Dvg+q5DeC+Hh+QPc9kbE3A3liURmb8 GVG8+uttGJ1Wz0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=JBz9LOh2BZjT7csMF+pIuAvV2YQ=; b=AXd IkCApbiyerJX/eHH3wUaJsfc1yOKyErKPgc4Cfcvgv76vNczmBRNt6rPGGgR75aB HZVsyljN7XurOZDUyEHBPc08TlL9zsTB/Rml7WeWiEhBqQlXY8bzrcWZRaqdsUkL hTE16J03yCWA4M2hkiQZ2vDwUFUBrWYvuPxgvcxI= Received: (qmail 99212 invoked by alias); 5 Sep 2018 11:52:08 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 99100 invoked by uid 89); 5 Sep 2018 11:52:07 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy= X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:52:00 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWLk-0006mh-8T for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:59 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45578) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWLk-0006F0-0A for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:51:56 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWLY-0005se-1a from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:51:44 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:51:40 +0100 From: To: Subject: [PATCH 19/25] GCN libgfortran. Date: Wed, 5 Sep 2018 12:51:20 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This patch contains the GCN port of libgfortran. We use the minimal configuration created for NVPTX. That's all that's required, besides the target-independent bug fixes posted already. 2018-09-05 Andrew Stubbs Kwok Cheung Yeung Julian Brown Tom de Vries libgfortran/ * configure.ac: Use minimal mode for amdgcn. * configure: Regenerate. --- libgfortran/configure | 7 ++++--- libgfortran/configure.ac | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libgfortran/configure b/libgfortran/configure index a583b67..fd8b697 100755 --- a/libgfortran/configure +++ b/libgfortran/configure @@ -5994,7 +5994,8 @@ fi # * C library support for other features such as signal, environment # variables, time functions - if test "x${target_cpu}" = xnvptx; then + if test "x${target_cpu}" = xnvptx \ + || test "x${target_cpu}" = xamdgcn; then LIBGFOR_MINIMAL_TRUE= LIBGFOR_MINIMAL_FALSE='#' else @@ -12514,7 +12515,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12517 "configure" +#line 12518 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12620,7 +12621,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12623 "configure" +#line 12624 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac index 05952aa..11b629d 100644 --- a/libgfortran/configure.ac +++ b/libgfortran/configure.ac @@ -206,7 +206,8 @@ AM_CONDITIONAL(LIBGFOR_USE_SYMVER_SUN, [test "x$gfortran_use_symver" = xsun]) # * C library support for other features such as signal, environment # variables, time functions -AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx]) +AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx \ + || test "x${target_cpu}" = xamdgcn]) # Figure out whether the compiler supports "-ffunction-sections -fdata-sections", # similarly to how libstdc++ does it From patchwork Wed Sep 5 11:52:08 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966345 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485181-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="o7sInGT4"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252D82zGKz9s5c for ; Wed, 5 Sep 2018 21:54:00 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=nRN8VbfXoSh1kg0JQ8Y5GGJuSlfR9 l3IB2CqabLjDORorOr/3OODy4hNI1LG0FQ7fO4wpcl1hdOXm0H8QoWCxsMrHe9VM JFUSmC7O9h4tcFrVWA5Jz9Ups/QMofmyQ+s5JVr8MFA6mBf7sBbfmbQ24e0B+ONJ bLPGp0nKOANf5k= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=adGpTSuH4h+ldR9d4nru1+OfgLY=; b=o7s InGT4EX6AWVlxuE41wWAcLYs7HVADBV0ukz2Wjb1+/ZtdUEKf/vDYvNxfm8zRDQW ufurW4WkhnirHlp4PijefKBX4TPCr8SW+B0oE69KLvVInIBRkbA9t1tOjKbgxxP9 OTBzRLiIRAxvHJ8KgnrMLweIChsohpqkZGn3Ard4= Received: (qmail 103041 invoked by alias); 5 Sep 2018 11:52:37 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 102885 invoked by uid 89); 5 Sep 2018 11:52:36 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT autolearn=ham version=3.3.2 spammy=suffice X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:52:31 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWMF-0007sL-HM for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:29 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45729) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWMF-0007px-4z for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:27 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWMD-00063K-Sr from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:52:26 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:52:21 +0100 From: To: Subject: [PATCH 20/25] GCN libgcc. Date: Wed, 5 Sep 2018 12:52:08 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This patch contains the GCN port of libgcc. I've broken it out just to keep both parts more manageable. We have the usual stuff, plus a "gomp_print" implementation intended to provide a means to output text to console without using the full printf. Originally this was because we did not have a working Newlib port, but now it provides the underlying mechanism for printf. It's also much lighter than printf, and therefore more suitable for debugging offload kernels (for which there is no debugger, yet). In order to work in offload kernels the same function must be present in both host and GCN toolchains. Therefore it needs to live in libgomp (hence the name). However, having found it also useful in stand alone testing I have moved the GCN implementation to libgcc. It was also necessary to provide a means to disable EMUTLS. 2018-09-05 Andrew Stubbs Kwok Cheung Yeung Julian Brown Tom de Vries libgcc/ * Makefile.in: Don't add emutls.c when --enable-emutls is "no". * config.host: Recognize amdgcn*-*-amdhsa. * config/gcn/crt0.c: New file. * config/gcn/gomp_print.c: New file. * config/gcn/lib2-divmod-hi.c: New file. * config/gcn/lib2-divmod.c: New file. * config/gcn/lib2-gcn.h: New file. * config/gcn/reduction.c: New file. * config/gcn/sfp-machine.h: New file. * config/gcn/t-amdgcn: New file. --- libgcc/Makefile.in | 2 + libgcc/config.host | 8 +++ libgcc/config/gcn/crt0.c | 23 ++++++++ libgcc/config/gcn/gomp_print.c | 99 +++++++++++++++++++++++++++++++ libgcc/config/gcn/lib2-divmod-hi.c | 117 +++++++++++++++++++++++++++++++++++++ libgcc/config/gcn/lib2-divmod.c | 117 +++++++++++++++++++++++++++++++++++++ libgcc/config/gcn/lib2-gcn.h | 49 ++++++++++++++++ libgcc/config/gcn/reduction.c | 30 ++++++++++ libgcc/config/gcn/sfp-machine.h | 51 ++++++++++++++++ libgcc/config/gcn/t-amdgcn | 25 ++++++++ 10 files changed, 521 insertions(+) create mode 100644 libgcc/config/gcn/crt0.c create mode 100644 libgcc/config/gcn/gomp_print.c create mode 100644 libgcc/config/gcn/lib2-divmod-hi.c create mode 100644 libgcc/config/gcn/lib2-divmod.c create mode 100644 libgcc/config/gcn/lib2-gcn.h create mode 100644 libgcc/config/gcn/reduction.c create mode 100644 libgcc/config/gcn/sfp-machine.h create mode 100644 libgcc/config/gcn/t-amdgcn diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index 0c5b264..6f68257 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -429,9 +429,11 @@ LIB2ADD += enable-execute-stack.c # While emutls.c has nothing to do with EH, it is in LIB2ADDEH* # instead of LIB2ADD because that's the way to be sure on some targets # (e.g. *-*-darwin*) only one copy of it is linked. +ifneq ($(enable_emutls),no) LIB2ADDEH += $(srcdir)/emutls.c LIB2ADDEHSTATIC += $(srcdir)/emutls.c LIB2ADDEHSHARED += $(srcdir)/emutls.c +endif # Library members defined in libgcc2.c. lib2funcs = _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _cmpdi2 _ucmpdi2 \ diff --git a/libgcc/config.host b/libgcc/config.host index 029f656..29178da 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -91,6 +91,10 @@ alpha*-*-*) am33_2.0-*-linux*) cpu_type=mn10300 ;; +amdgcn*-*-*) + cpu_type=gcn + tmake_file="${tmake_file} t-softfp-sfdf t-softfp" + ;; arc*-*-*) cpu_type=arc ;; @@ -384,6 +388,10 @@ alpha*-dec-*vms*) extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o" md_unwind_header=alpha/vms-unwind.h ;; +amdgcn*-*-amdhsa) + tmake_file="$tmake_file gcn/t-amdgcn" + extra_parts="crt0.o" + ;; arc*-*-elf*) tmake_file="arc/t-arc" extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o" diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c new file mode 100644 index 0000000..f4f367b --- /dev/null +++ b/libgcc/config/gcn/crt0.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Provide an entry point symbol to silence a linker warning. */ +void _start() {} diff --git a/libgcc/config/gcn/gomp_print.c b/libgcc/config/gcn/gomp_print.c new file mode 100644 index 0000000..41f50c3 --- /dev/null +++ b/libgcc/config/gcn/gomp_print.c @@ -0,0 +1,99 @@ +/* Newlib may not have been built yet. */ +typedef long int64_t; +typedef long size_t; +extern char *strncpy (char *dst, const char *src, size_t length); +extern void exit(int); + +void gomp_print_string (const char *msg, const char *value); +void gomp_print_integer (const char *msg, int64_t value); +void gomp_print_double (const char *msg, double value); + +/* This struct must match the one used by gcn-run and libgomp. + It holds all the data output from a kernel (besides mapping data). + + The base address pointer can be found at kernargs+16. + + The next_output counter must be atomically incremented for each + print output. Only when the print data is fully written can the + "written" flag be set. */ +struct output { + int return_value; + int next_output; + struct printf_data { + int written; + char msg[128]; + int type; + union { + int64_t ivalue; + double dvalue; + char text[128]; + }; + } queue[1000]; +}; + +static struct printf_data * +reserve_print_slot (void) { + /* The kernargs pointer is in s[8:9]. + This will break if the enable_sgpr_* flags are ever changed. */ + char *kernargs; + asm ("s_mov_b64 %0, s[8:9]" : "=Sg"(kernargs)); + + /* The output data is at kernargs[2]. */ + struct output *data = *(struct output **)(kernargs + 16); + + /* We don't have atomic operators in C yet. + "glc" means return original value. */ + int index = 0; + asm ("flat_atomic_add %0, %1, %2 glc\n\t" + "s_waitcnt 0" + : "=v"(index) + : "v"(&data->next_output), "v"(1), "e"(1l)); + + if (index >= 1000) + exit(1); + + return &(data->queue[index]); +} + +void +gomp_print_string (const char *msg, const char *value) +{ + struct printf_data *output = reserve_print_slot (); + output->type = 2; /* String. */ + + strncpy (output->msg, msg, 127); + output->msg[127] = '\0'; + strncpy (output->text, value, 127); + output->text[127] = '\0'; + + asm ("" ::: "memory"); + output->written = 1; +} + +void +gomp_print_integer (const char *msg, int64_t value) +{ + struct printf_data *output = reserve_print_slot (); + output->type = 0; /* Integer. */ + + strncpy (output->msg, msg, 127); + output->msg[127] = '\0'; + output->ivalue = value; + + asm ("" ::: "memory"); + output->written = 1; +} + +void +gomp_print_double (const char *msg, double value) +{ + struct printf_data *output = reserve_print_slot (); + output->type = 1; /* Double. */ + + strncpy (output->msg, msg, 127); + output->msg[127] = '\0'; + output->dvalue = value; + + asm ("" ::: "memory"); + output->written = 1; +} diff --git a/libgcc/config/gcn/lib2-divmod-hi.c b/libgcc/config/gcn/lib2-divmod-hi.c new file mode 100644 index 0000000..d57e145 --- /dev/null +++ b/libgcc/config/gcn/lib2-divmod-hi.c @@ -0,0 +1,117 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 16-bit HI divide and modulo as used in gcn. */ + +static UHItype +udivmodhi4 (UHItype num, UHItype den, word_type modwanted) +{ + UHItype bit = 1; + UHItype res = 0; + + while (den < num && bit && !(den & (1L<<15))) + { + den <<=1; + bit <<=1; + } + while (bit) + { + if (num >= den) + { + num -= den; + res |= bit; + } + bit >>=1; + den >>=1; + } + if (modwanted) + return num; + return res; +} + + +HItype +__divhi3 (HItype a, HItype b) +{ + word_type neg = 0; + HItype res; + + if (a < 0) + { + a = -a; + neg = !neg; + } + + if (b < 0) + { + b = -b; + neg = !neg; + } + + res = udivmodhi4 (a, b, 0); + + if (neg) + res = -res; + + return res; +} + + +HItype +__modhi3 (HItype a, HItype b) +{ + word_type neg = 0; + HItype res; + + if (a < 0) + { + a = -a; + neg = 1; + } + + if (b < 0) + b = -b; + + res = udivmodhi4 (a, b, 1); + + if (neg) + res = -res; + + return res; +} + + +UHItype +__udivhi3 (UHItype a, UHItype b) +{ + return udivmodhi4 (a, b, 0); +} + + +UHItype +__umodhi3 (UHItype a, UHItype b) +{ + return udivmodhi4 (a, b, 1); +} + diff --git a/libgcc/config/gcn/lib2-divmod.c b/libgcc/config/gcn/lib2-divmod.c new file mode 100644 index 0000000..08e7103 --- /dev/null +++ b/libgcc/config/gcn/lib2-divmod.c @@ -0,0 +1,117 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 32-bit SI divide and modulo as used in gcn. */ + +static USItype +udivmodsi4 (USItype num, USItype den, word_type modwanted) +{ + USItype bit = 1; + USItype res = 0; + + while (den < num && bit && !(den & (1L<<31))) + { + den <<=1; + bit <<=1; + } + while (bit) + { + if (num >= den) + { + num -= den; + res |= bit; + } + bit >>=1; + den >>=1; + } + if (modwanted) + return num; + return res; +} + + +SItype +__divsi3 (SItype a, SItype b) +{ + word_type neg = 0; + SItype res; + + if (a < 0) + { + a = -a; + neg = !neg; + } + + if (b < 0) + { + b = -b; + neg = !neg; + } + + res = udivmodsi4 (a, b, 0); + + if (neg) + res = -res; + + return res; +} + + +SItype +__modsi3 (SItype a, SItype b) +{ + word_type neg = 0; + SItype res; + + if (a < 0) + { + a = -a; + neg = 1; + } + + if (b < 0) + b = -b; + + res = udivmodsi4 (a, b, 1); + + if (neg) + res = -res; + + return res; +} + + +SItype +__udivsi3 (SItype a, SItype b) +{ + return udivmodsi4 (a, b, 0); +} + + +SItype +__umodsi3 (SItype a, SItype b) +{ + return udivmodsi4 (a, b, 1); +} + diff --git a/libgcc/config/gcn/lib2-gcn.h b/libgcc/config/gcn/lib2-gcn.h new file mode 100644 index 0000000..aff0bd2 --- /dev/null +++ b/libgcc/config/gcn/lib2-gcn.h @@ -0,0 +1,49 @@ +/* Integer arithmetic support for gcn. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef LIB2_GCN_H +#define LIB2_GCN_H + +/* Types. */ + +typedef char QItype __attribute__ ((mode (QI))); +typedef unsigned char UQItype __attribute__ ((mode (QI))); +typedef short HItype __attribute__ ((mode (HI))); +typedef unsigned short UHItype __attribute__ ((mode (HI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int word_type __attribute__ ((mode (__word__))); + +/* Exported functions. */ +extern SItype __divsi3 (SItype, SItype); +extern SItype __modsi3 (SItype, SItype); +extern SItype __udivsi3 (SItype, SItype); +extern SItype __umodsi3 (SItype, SItype); +extern HItype __divhi3 (HItype, HItype); +extern HItype __modhi3 (HItype, HItype); +extern UHItype __udivhi3 (UHItype, UHItype); +extern UHItype __umodhi3 (UHItype, UHItype); +extern SItype __mulsi3 (SItype, SItype); + +#endif /* LIB2_GCN_H */ diff --git a/libgcc/config/gcn/reduction.c b/libgcc/config/gcn/reduction.c new file mode 100644 index 0000000..fbe9aaa --- /dev/null +++ b/libgcc/config/gcn/reduction.c @@ -0,0 +1,30 @@ +/* Oversized reductions lock variable + Copyright (C) 2017 Free Software Foundation, Inc. + Contributed by Mentor Graphics. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* We use a global lock variable for reductions on objects larger than + 64 bits. Until and unless proven that lock contention for + different reductions is a problem, a single lock will suffice. */ + +unsigned volatile __reduction_lock = 0; diff --git a/libgcc/config/gcn/sfp-machine.h b/libgcc/config/gcn/sfp-machine.h new file mode 100644 index 0000000..7874081 --- /dev/null +++ b/libgcc/config/gcn/sfp-machine.h @@ -0,0 +1,51 @@ +/* Use 32-bit types here to prevent longlong.h trying to use TImode. + Once TImode works we might be better to use 64-bit here. */ + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned int +#define _FP_WS_TYPE signed int +#define _FP_I_TYPE int + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* Someone please check this. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define _FP_TININESS_AFTER_ROUNDING 0 + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); diff --git a/libgcc/config/gcn/t-amdgcn b/libgcc/config/gcn/t-amdgcn new file mode 100644 index 0000000..d0c423d --- /dev/null +++ b/libgcc/config/gcn/t-amdgcn @@ -0,0 +1,25 @@ +LIB2ADD += $(srcdir)/config/gcn/gomp_print.c + +LIB2ADD += $(srcdir)/config/gcn/lib2-divmod.c \ + $(srcdir)/config/gcn/lib2-divmod-hi.c + +LIB2ADD += $(srcdir)/config/gcn/reduction.c + +LIB2ADDEH= +LIB2FUNCS_EXCLUDE=__main + +override LIB2FUNCS_ST := $(filter-out __gcc_bcmp,$(LIB2FUNCS_ST)) + +# Debug information is not useful, and probably uses broken relocations +LIBGCC2_DEBUG_CFLAGS = -g0 + +crt0.o: $(srcdir)/config/gcn/crt0.c + $(crt_compile) -c $< + +# Prevent building "advanced" stuff (for example, gcov support). We don't +# support it, and it may cause the build to fail, because of alloca usage, for +# example. +INHIBIT_LIBC_CFLAGS = -Dinhibit_libc + +# Disable emutls.c (temporarily?) +enable_emutls = no From patchwork Wed Sep 5 13:40:15 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966397 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485202-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="PFj60t7l"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4254bd4GJVz9sCn for ; Wed, 5 Sep 2018 23:41:01 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :subject:references:from:to:message-id:date:mime-version :in-reply-to:content-type; q=dns; s=default; b=VtAbKH0bphz86WoNb uiPKRaMtO9LoQdYFfkMrwIn9req20IrG4bcMPGbWA++wungcAlt8asZ3XkETbRGQ 1ISCs0Kjof2ps7keT9GiJhYRs1K0G3bIH/mCyCj85AYauHBL7cbHvKDvLUauVq5o BS3mApa1oR5Qp7EMIkwEyl87ns= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :subject:references:from:to:message-id:date:mime-version :in-reply-to:content-type; s=default; bh=qHdhYi6qL+u3l+ATULu2zYt 7xKw=; b=PFj60t7lDR39CBcdIg0RLUKdFLp/UX65n72CYiOq2SIjjP+NAt1FRzy fmk9fPI8rRnP4OkGGS58Tlgdn06JSC+ey08+lttd/b0xcYSOndSijgGmttxVToRB VOYMMoHNKjlwQvJMViw4vdU0VRaIQEFlXjyRPId/EUYyyxqnkiXo= Received: (qmail 32463 invoked by alias); 5 Sep 2018 13:40:43 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 32434 invoked by uid 89); 5 Sep 2018 13:40:42 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-23.0 required=5.0 tests=GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS, TIME_LIMIT_EXCEEDED, UNSUBSCRIBE_BODY autolearn=unavailable version=3.3.2 spammy=2017-2018, 20172018 X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 13:40:30 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxY2m-0001hK-10 from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 06:40:29 -0700 Received: from [172.30.89.133] (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 14:40:22 +0100 Subject: [PATCH 21/25] GCN Back-end (part 1/2). References: From: Andrew Stubbs To: "gcc-patches@gcc.gnu.org" Message-ID: Date: Wed, 5 Sep 2018 14:40:15 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1 MIME-Version: 1.0 In-Reply-To: This part initially failed to send due to size. This is the main portion of the GCN back-end, plus the configuration adjustments needed to build it. The config.sub patch is here so people can try it, but I'm aware that needs to be committed elsewhere first. The back-end contains various bits that support OpenACC and OpenMP, but the middle-end and libgomp patches are missing. I included them here because they're harmless and carving up the files seems like unnecessary effort. The remaining offload support will be posted at a later date. The gcn-run.c is a separate tool that can run a GCN program on a GPU using the ROCm drivers and HSA runtime libraries. 2018-09-05 Andrew Stubbs >....... Kwok Cheung Yeung >....... Julian Brown >....... Tom de Vries >....... Jan Hubicka >....... Martin Jambor >.......* config.sub: Recognize amdgcn*-*-amdhsa. >.......* configure.ac: Likewise. >.......* configure: Regenerate. >.......gcc/ >.......* common/config/gcn/gcn-common.c: New file. >.......* config.gcc: Add amdgcn*-*-amdhsa configuration. >.......* config/gcn/constraints.md: New file. >.......* config/gcn/driver-gcn.c: New file. >.......* config/gcn/gcn-builtins.def: New file. >.......* config/gcn/gcn-hsa.h: New file. >.......* config/gcn/gcn-modes.def: New file. >.......* config/gcn/gcn-opts.h: New file. >.......* config/gcn/gcn-passes.def: New file. >.......* config/gcn/gcn-protos.h: New file. >.......* config/gcn/gcn-run.c: New file. >.......* config/gcn/gcn-tree.c: New file. >.......* config/gcn/gcn-valu.md: New file. >.......* config/gcn/gcn.c: New file. >.......* config/gcn/gcn.h: New file. >.......* config/gcn/gcn.md: New file. >.......* config/gcn/gcn.opt: New file. >.......* config/gcn/mkoffload.c: New file. >.......* config/gcn/offload.h: New file. >.......* config/gcn/predicates.md: New file. >.......* config/gcn/t-gcn-hsa: New file. diff --git a/config.sub b/config.sub index c95acc6..33115a5 100755 --- a/config.sub +++ b/config.sub @@ -572,6 +572,7 @@ case $basic_machine in | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ + | amdgcn \ | arc | arceb \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv6m | armv[78][arm] \ | avr | avr32 \ @@ -909,6 +910,9 @@ case $basic_machine in fx2800) basic_machine=i860-alliant ;; + amdgcn) + basic_machine=amdgcn-unknown + ;; genix) basic_machine=ns32k-ns ;; @@ -1524,6 +1528,8 @@ case $os in ;; *-eabi) ;; + amdhsa) + ;; *) echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 exit 1 @@ -1548,6 +1554,9 @@ case $basic_machine in spu-*) os=elf ;; + amdgcn-*) + os=-amdhsa + ;; *-acorn) os=riscix1.2 ;; diff --git a/configure b/configure index dd9fbe4..fb311ce 100755 --- a/configure +++ b/configure @@ -3569,6 +3569,8 @@ case "${target}" in noconfigdirs="$noconfigdirs ld gas gdb gprof" noconfigdirs="$noconfigdirs sim target-rda" ;; + amdgcn*-*-*) + ;; arm-*-darwin*) noconfigdirs="$noconfigdirs ld gas gdb gprof" noconfigdirs="$noconfigdirs sim target-rda" diff --git a/configure.ac b/configure.ac index a0b0917..35acf25 100644 --- a/configure.ac +++ b/configure.ac @@ -903,6 +903,8 @@ case "${target}" in noconfigdirs="$noconfigdirs ld gas gdb gprof" noconfigdirs="$noconfigdirs sim target-rda" ;; + amdgcn*-*-*) + ;; arm-*-darwin*) noconfigdirs="$noconfigdirs ld gas gdb gprof" noconfigdirs="$noconfigdirs sim target-rda" diff --git a/gcc/common/config/gcn/gcn-common.c b/gcc/common/config/gcn/gcn-common.c new file mode 100644 index 0000000..275bfd5 --- /dev/null +++ b/gcc/common/config/gcn/gcn-common.c @@ -0,0 +1,38 @@ +/* Common hooks for GCN + Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" +#include "params.h" + +/* Set default optimization options. */ +static const struct default_options gcn_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE gcn_option_optimization_table + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index f81cf76..d28bee5 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -312,6 +312,10 @@ alpha*-*-*) cpu_type=alpha extra_options="${extra_options} g.opt" ;; +amdgcn*) + cpu_type=gcn + use_gcc_stdint=wrap + ;; am33_2.0-*-linux*) cpu_type=mn10300 ;; @@ -1376,6 +1380,19 @@ ft32-*-elf) tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}" tmake_file="${tmake_file} ft32/t-ft32" ;; +amdgcn-*-amdhsa) + tm_file="dbxelf.h elfos.h gcn/gcn-hsa.h gcn/gcn.h newlib-stdint.h" + tmake_file="gcn/t-gcn-hsa" + native_system_header_dir=/include + extra_modes=gcn/gcn-modes.def + extra_objs="${extra_objs} gcn-tree.o" + extra_gcc_objs="driver-gcn.o" + extra_programs="${extra_programs} gcn-run\$(exeext)" + if test x$enable_as_accelerator = xyes; then + extra_programs="${extra_programs} mkoffload\$(exeext)" + tm_file="${tm_file} gcn/offload.h" + fi + ;; moxie-*-elf) gas=yes gnu_ld=yes @@ -4042,6 +4059,24 @@ case "${target}" in esac ;; + amdgcn-*-*) + supported_defaults="arch tune" + + for which in arch tune; do + eval "val=\$with_$which" + case ${val} in + "" | carrizo | fiji | gfx900 ) + # OK + ;; + *) + echo "Unknown cpu used in --with-$which=$val." 1>&2 + exit 1 + ;; + esac + done + [ "x$with_arch" = x ] && with_arch=fiji + ;; + hppa*-*-*) supported_defaults="arch schedule" diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md new file mode 100644 index 0000000..9ebeb97 --- /dev/null +++ b/gcc/config/gcn/constraints.md @@ -0,0 +1,139 @@ +;; Constraint definitions for GCN. +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_constraint "I" + "Inline integer constant" + (and (match_code "const_int") + (match_test "ival >= -16 && ival <= 64"))) + +(define_constraint "J" + "Signed integer 16-bit inline constant" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) ival + 0x8000) < 0x10000"))) + +(define_constraint "Kf" + "Immeditate constant -1" + (and (match_code "const_int") + (match_test "ival == -1"))) + +(define_constraint "L" + "Unsigned integer 15-bit constant" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) ival) < 0x8000"))) + +(define_constraint "A" + "Inline immediate parameter" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_inline_constant_p (op)"))) + +(define_constraint "B" + "Immediate 32-bit parameter" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_constant_p (op)"))) + +(define_constraint "C" + "Immediate 32-bit parameter zero-extended to 64-bits" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_constant64_p (op)"))) + +(define_constraint "DA" + "Splittable inline immediate 64-bit parameter" + (and (match_code "const_int,const_double,const_vector") + (match_test "gcn_inline_constant64_p (op)"))) + +(define_constraint "DB" + "Splittable immediate 64-bit parameter" + (match_code "const_int,const_double,const_vector")) + +(define_constraint "U" + "unspecified value" + (match_code "unspec")) + +(define_constraint "Y" + "Symbol or label for relative calls" + (match_code "symbol_ref,label_ref")) + +(define_register_constraint "v" "VGPR_REGS" + "VGPR registers") + +(define_register_constraint "Sg" "SGPR_REGS" + "SGPR registers") + +(define_register_constraint "SD" "SGPR_DST_REGS" + "registers useable as a destination of scalar operation") + +(define_register_constraint "SS" "SGPR_SRC_REGS" + "registers useable as a source of scalar operation") + +(define_register_constraint "Sm" "SGPR_MEM_SRC_REGS" + "registers useable as a source of scalar memory operation") + +(define_register_constraint "Sv" "SGPR_VOP3A_SRC_REGS" + "registers useable as a source of VOP3A instruction") + +(define_register_constraint "ca" "ALL_CONDITIONAL_REGS" + "SCC VCCZ or EXECZ") + +(define_register_constraint "cs" "SCC_CONDITIONAL_REG" + "SCC") + +(define_register_constraint "cV" "VCC_CONDITIONAL_REG" + "VCC") + +(define_register_constraint "e" "EXEC_MASK_REG" + "EXEC") + +(define_special_memory_constraint "RB" + "Buffer memory address to scratch memory." + (and (match_code "mem") + (match_test "AS_SCRATCH_P (MEM_ADDR_SPACE (op))"))) + +(define_special_memory_constraint "RF" + "Buffer memory address to flat memory." + (and (match_code "mem") + (match_test "AS_FLAT_P (MEM_ADDR_SPACE (op)) + && gcn_flat_address_p (XEXP (op, 0), mode)"))) + +(define_special_memory_constraint "RS" + "Buffer memory address to scalar flat memory." + (and (match_code "mem") + (match_test "AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op)) + && gcn_scalar_flat_mem_p (op)"))) + +(define_special_memory_constraint "RL" + "Buffer memory address to LDS memory." + (and (match_code "mem") + (match_test "AS_LDS_P (MEM_ADDR_SPACE (op))"))) + +(define_special_memory_constraint "RG" + "Buffer memory address to GDS memory." + (and (match_code "mem") + (match_test "AS_GDS_P (MEM_ADDR_SPACE (op))"))) + +(define_special_memory_constraint "RD" + "Buffer memory address to GDS or LDS memory." + (and (match_code "mem") + (ior (match_test "AS_GDS_P (MEM_ADDR_SPACE (op))") + (match_test "AS_LDS_P (MEM_ADDR_SPACE (op))")))) + +(define_special_memory_constraint "RM" + "Memory address to global (main) memory." + (and (match_code "mem") + (match_test "AS_GLOBAL_P (MEM_ADDR_SPACE (op)) + && gcn_global_address_p (XEXP (op, 0))"))) diff --git a/gcc/config/gcn/driver-gcn.c b/gcc/config/gcn/driver-gcn.c new file mode 100644 index 0000000..21e8c69 --- /dev/null +++ b/gcc/config/gcn/driver-gcn.c @@ -0,0 +1,32 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +const char * +last_arg_spec_function (int argc, const char **argv) +{ + if (argc == 0) + return NULL; + + return argv[argc-1]; +} diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def new file mode 100644 index 0000000..1cf66d2 --- /dev/null +++ b/gcc/config/gcn/gcn-builtins.def @@ -0,0 +1,116 @@ +/* Copyright (C) 2016-2018 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* The first argument to these macros is the return type of the builtin, + the rest are arguments of the builtin. */ +#define _A1(a) {a, GCN_BTI_END_OF_PARAMS} +#define _A2(a,b) {a, b, GCN_BTI_END_OF_PARAMS} +#define _A3(a,b,c) {a, b, c, GCN_BTI_END_OF_PARAMS} +#define _A4(a,b,c,d) {a, b, c, d, GCN_BTI_END_OF_PARAMS} +#define _A5(a,b,c,d,e) {a, b, c, d, e, GCN_BTI_END_OF_PARAMS} + +DEF_BUILTIN (FLAT_LOAD_INT32, 1 /*CODE_FOR_flat_load_v64si*/, + "flat_load_int32", B_INSN, + _A3 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_LOAD_PTR_INT32, 2 /*CODE_FOR_flat_load_ptr_v64si */, + "flat_load_ptr_int32", B_INSN, + _A4 (GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_STORE_PTR_INT32, 3 /*CODE_FOR_flat_store_ptr_v64si */, + "flat_store_ptr_int32", B_INSN, + _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SIPTR, GCN_BTI_V64SI, + GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_LOAD_PTR_FLOAT, 2 /*CODE_FOR_flat_load_ptr_v64sf */, + "flat_load_ptr_float", B_INSN, + _A4 (GCN_BTI_V64SF, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FLAT_STORE_PTR_FLOAT, 3 /*CODE_FOR_flat_store_ptr_v64sf */, + "flat_store_ptr_float", B_INSN, + _A5 (GCN_BTI_VOID, GCN_BTI_EXEC, GCN_BTI_SFPTR, GCN_BTI_V64SI, + GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (SQRTVF, 3 /*CODE_FOR_sqrtvf */, + "sqrtvf", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */, + "sqrtf", B_INSN, + _A2 (GCN_BTI_SF, GCN_BTI_SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (CMP_SWAP, -1, + "cmp_swap", B_INSN, + _A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT), + gcn_expand_builtin_1) + +DEF_BUILTIN (CMP_SWAPLL, -1, + "cmp_swapll", B_INSN, + _A4 (GCN_BTI_LLUINT, + GCN_BTI_VOIDPTR, GCN_BTI_LLUINT, GCN_BTI_LLUINT), + gcn_expand_builtin_1) + +/* DEF_BUILTIN_BINOP_INT_FP creates many variants of a builtin function for a + given operation. The first argument will give base to the identifier of a + particular builtin, the second will be used to form the name of the patter + used to expand it to and the third will be used to create the user-visible + builtin identifier. */ + +DEF_BUILTIN_BINOP_INT_FP (ADD, add, "add") +DEF_BUILTIN_BINOP_INT_FP (SUB, sub, "sub") + +DEF_BUILTIN_BINOP_INT_FP (AND, and, "and") +DEF_BUILTIN_BINOP_INT_FP (IOR, ior, "or") +DEF_BUILTIN_BINOP_INT_FP (XOR, xor, "xor") + +/* OpenMP. */ + +DEF_BUILTIN (OMP_DIM_SIZE, CODE_FOR_oacc_dim_size, + "dim_size", B_INSN, + _A2 (GCN_BTI_INT, GCN_BTI_INT), + gcn_expand_builtin_1) +DEF_BUILTIN (OMP_DIM_POS, CODE_FOR_oacc_dim_pos, + "dim_pos", B_INSN, + _A2 (GCN_BTI_INT, GCN_BTI_INT), + gcn_expand_builtin_1) + +/* OpenACC. */ + +DEF_BUILTIN (ACC_SINGLE_START, -1, "single_start", B_INSN, _A1 (GCN_BTI_BOOL), + gcn_expand_builtin_1) + +DEF_BUILTIN (ACC_SINGLE_COPY_START, -1, "single_copy_start", B_INSN, + _A1 (GCN_BTI_LDS_VOIDPTR), gcn_expand_builtin_1) + +DEF_BUILTIN (ACC_SINGLE_COPY_END, -1, "single_copy_end", B_INSN, + _A2 (GCN_BTI_VOID, GCN_BTI_LDS_VOIDPTR), gcn_expand_builtin_1) + +DEF_BUILTIN (ACC_BARRIER, -1, "acc_barrier", B_INSN, _A1 (GCN_BTI_VOID), + gcn_expand_builtin_1) + + +#undef _A1 +#undef _A2 +#undef _A3 +#undef _A4 +#undef _A5 diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h new file mode 100644 index 0000000..182062d --- /dev/null +++ b/gcc/config/gcn/gcn-hsa.h @@ -0,0 +1,129 @@ +/* Copyright (C) 2016-2018 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef OBJECT_FORMAT_ELF + #error elf.h included before elfos.h +#endif + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#define GLOBAL_ASM_OP "\t.globl\t" +#define DATA_SECTION_ASM_OP "\t.data\t" +#define SET_ASM_OP "\t.set\t" +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" +#define ASM_COMMENT_START ";" +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + gcn_hsa_declare_function_name ((FILE), (NAME), (DECL)) + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGNMENT) \ + (fprintf ((FILE), "%s", COMMON_ASM_OP), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", \ + (SIZE) > 0 ? (SIZE) : 1, (ALIGNMENT) / BITS_PER_UNIT)) + +#define ASM_OUTPUT_LABEL(FILE,NAME) \ + do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) + +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME)) + +extern unsigned int gcn_local_sym_hash (const char *name); + +/* The HSA runtime puts all global and local symbols into a single per-kernel + variable map. In cases where we have two local static symbols with the same + name in different compilation units, this causes multiple definition errors. + To avoid this, we add a decoration to local symbol names based on a hash of + a "module ID" passed to the compiler via the -mlocal-symbol-id option. This + is far from perfect, but we expect static local variables to be rare in + offload code. */ + +#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER) \ + do { \ + (OUTVAR) = (char *) alloca (strlen (NAME) + 30); \ + if (local_symbol_id && *local_symbol_id) \ + sprintf ((OUTVAR), "%s.%u.%.8x", (NAME), (NUMBER), \ + gcn_local_sym_hash (local_symbol_id)); \ + else \ + sprintf ((OUTVAR), "%s.%u", (NAME), (NUMBER)); \ + } while (0) + +#define ASM_OUTPUT_SYMBOL_REF(FILE, X) gcn_asm_output_symbol_ref (FILE, X) + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + fprintf (FILE, "\t.word .L%d\n", VALUE) + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", 1<<(LOG)); } while (0) +#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG) \ + do { \ + if (LOG!=0) \ + fprintf (FILE, "\t.p2alignl\t%d, 0xBF800000" \ + " ; Fill value is 's_nop 0'\n", (LOG)); \ + } while (0) + +#define ASM_APP_ON "" +#define ASM_APP_OFF "" + +/* Avoid the default in ../../gcc.c, which adds "-pthread", which is not + supported for gcn. */ +#define GOMP_SELF_SPECS "" + +/* Use LLVM assembler and linker options. */ +#define ASM_SPEC "-triple=amdgcn--amdhsa " \ + "%:last_arg(%{march=*:-mcpu=%*}) " \ + "-filetype=obj" +/* Add -mlocal-symbol-id= unless the user (or mkoffload) + passes the option explicitly on the command line. The option also causes + several dump-matching tests to fail in the testsuite, so the option is not + added when or tree dump/compare-debug options used in the testsuite are + present. + This has the potential for surprise, but a user can still use an explicit + -mlocal-symbol-id= option manually together with -fdump-tree or + -fcompare-debug options. */ +#define CC1_SPEC "%{!mlocal-symbol-id=*:%{!fdump-tree-*:" \ + "%{!fdump-ipa-*:%{!fcompare-debug*:-mlocal-symbol-id=%b}}}}" +#define LINK_SPEC "--pie" +#define LIB_SPEC "-lc" + +/* Provides a _start symbol to keep the linker happy. */ +#define STARTFILE_SPEC "crt0.o%s" +#define ENDFILE_SPEC "" +#define STANDARD_STARTFILE_PREFIX_2 "" + +/* The LLVM assembler rejects multiple -mcpu options, so we must drop + all but the last. */ +extern const char *last_arg_spec_function (int argc, const char **argv); +#define EXTRA_SPEC_FUNCTIONS \ + { "last_arg", last_arg_spec_function }, + +#undef LOCAL_INCLUDE_DIR + +/* FIXME: review debug info settings */ +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#define DWARF2_DEBUGGING_INFO 1 +#define DWARF2_ASM_LINE_DEBUG_INFO 1 +#define EH_FRAME_THROUGH_COLLECT2 1 diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def new file mode 100644 index 0000000..6f273b0 --- /dev/null +++ b/gcc/config/gcn/gcn-modes.def @@ -0,0 +1,45 @@ +/* Copyright (C) 2016-2018 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +/* FIXME: No idea what format it is. */ +ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); + +/* Mask mode. Used for the autovectorizer only, and converted to DImode + during the expand pass. */ +VECTOR_BOOL_MODE (V64BI, 64, 8); /* V64BI */ + +/* Native vector modes. */ +VECTOR_MODE (INT, QI, 64); /* V64QI */ +VECTOR_MODE (INT, HI, 64); /* V64HI */ +VECTOR_MODE (INT, SI, 64); /* V64SI */ +VECTOR_MODE (INT, DI, 64); /* V64DI */ +VECTOR_MODE (INT, TI, 64); /* V64TI */ +VECTOR_MODE (FLOAT, HF, 64); /* V64HF */ +VECTOR_MODE (FLOAT, SF, 64); /* V64SF */ +VECTOR_MODE (FLOAT, DF, 64); /* V64DF */ + +/* Vector units handle reads independently and thus no large alignment + needed. */ +ADJUST_ALIGNMENT (V64QI, 1); +ADJUST_ALIGNMENT (V64HI, 2); +ADJUST_ALIGNMENT (V64SI, 4); +ADJUST_ALIGNMENT (V64DI, 8); +ADJUST_ALIGNMENT (V64TI, 16); +ADJUST_ALIGNMENT (V64HF, 2); +ADJUST_ALIGNMENT (V64SF, 4); +ADJUST_ALIGNMENT (V64DF, 8); diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h new file mode 100644 index 0000000..368e0b5 --- /dev/null +++ b/gcc/config/gcn/gcn-opts.h @@ -0,0 +1,36 @@ +/* Copyright (C) 2016-2018 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCN_OPTS_H +#define GCN_OPTS_H + +/* Which processor to generate code or schedule for. */ +enum processor_type +{ + PROCESSOR_CARRIZO, + PROCESSOR_FIJI, + PROCESSOR_VEGA +}; + +/* Set in gcn_option_override. */ +extern int gcn_isa; + +#define TARGET_GCN3 (gcn_isa == 3) +#define TARGET_GCN3_PLUS (gcn_isa >= 3) +#define TARGET_GCN5 (gcn_isa == 5) +#define TARGET_GCN5_PLUS (gcn_isa >= 5) + +#endif diff --git a/gcc/config/gcn/gcn-passes.def b/gcc/config/gcn/gcn-passes.def new file mode 100644 index 0000000..a1e1d73 --- /dev/null +++ b/gcc/config/gcn/gcn-passes.def @@ -0,0 +1,19 @@ +/* Copyright (C) 2017-2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +INSERT_PASS_AFTER (pass_omp_target_link, 1, pass_omp_gcn); diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h new file mode 100644 index 0000000..16ec3ed --- /dev/null +++ b/gcc/config/gcn/gcn-protos.h @@ -0,0 +1,144 @@ +/* Copyright (C) 2016-2018 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef _GCN_PROTOS_ +#define _GCN_PROTOS_ + +extern void gcn_asm_output_symbol_ref (FILE *file, rtx x); +extern tree gcn_builtin_decl (unsigned code, bool initialize_p); +extern bool gcn_can_split_p (machine_mode, rtx); +extern bool gcn_constant64_p (rtx); +extern bool gcn_constant_p (rtx); +extern rtx gcn_convert_mask_mode (rtx reg); +extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int); +extern void gcn_expand_epilogue (); +extern void gcn_expand_prologue (); +extern rtx gcn_expand_reduc_scalar (machine_mode, rtx, int); +extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx); +extern void gcn_expand_vector_init (rtx, rtx); +extern bool gcn_flat_address_p (rtx, machine_mode); +extern bool gcn_fp_constant_p (rtx, bool); +extern rtx gcn_full_exec (); +extern rtx gcn_full_exec_reg (); +extern rtx gcn_gen_undef (machine_mode); +extern bool gcn_global_address_p (rtx); +extern tree gcn_goacc_adjust_propagation_record (tree record_type, bool sender, + const char *name); +extern void gcn_goacc_adjust_gangprivate_decl (tree var); +extern void gcn_goacc_reduction (gcall *call); +extern bool gcn_hard_regno_rename_ok (unsigned int from_reg, + unsigned int to_reg); +extern machine_mode gcn_hard_regno_caller_save_mode (unsigned int regno, + unsigned int nregs, + machine_mode regmode); +extern bool gcn_hard_regno_mode_ok (int regno, machine_mode mode); +extern int gcn_hard_regno_nregs (int regno, machine_mode mode); +extern void gcn_hsa_declare_function_name (FILE *file, const char *name, + tree decl); +extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int); +extern bool gcn_inline_constant64_p (rtx); +extern bool gcn_inline_constant_p (rtx); +extern int gcn_inline_fp_constant_p (rtx, bool); +extern reg_class gcn_mode_code_base_reg_class (machine_mode, addr_space_t, + int, int); +extern rtx gcn_oacc_dim_pos (int dim); +extern rtx gcn_oacc_dim_size (int dim); +extern rtx gcn_operand_doublepart (machine_mode, rtx, int); +extern rtx gcn_operand_part (machine_mode, rtx, int); +extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode, + addr_space_t, int, int); +extern reg_class gcn_regno_reg_class (int regno); +extern rtx gcn_scalar_exec (); +extern rtx gcn_scalar_exec_reg (); +extern bool gcn_scalar_flat_address_p (rtx); +extern bool gcn_scalar_flat_mem_p (rtx); +extern bool gcn_sgpr_move_p (rtx, rtx); +extern bool gcn_valid_move_p (machine_mode, rtx, rtx); +extern rtx gcn_vec_constant (machine_mode, int); +extern rtx gcn_vec_constant (machine_mode, rtx); +extern bool gcn_vgpr_move_p (rtx, rtx); +extern void print_operand_address (FILE *file, register rtx addr); +extern void print_operand (FILE *file, rtx x, int code); +extern bool regno_ok_for_index_p (int); + +enum gcn_cvt_t +{ + fix_trunc_cvt, + fixuns_trunc_cvt, + float_cvt, + floatuns_cvt, + extend_cvt, + trunc_cvt +}; + +extern bool gcn_valid_cvt_p (machine_mode from, machine_mode to, + enum gcn_cvt_t op); + +#ifdef TREE_CODE +extern void gcn_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, + int); +class gimple_opt_pass; +extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt); +#endif + +/* Return true if MODE is valid for 1 VGPR register. */ + +inline bool +vgpr_1reg_mode_p (machine_mode mode) +{ + return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode + || mode == V64QImode || mode == V64HImode || mode == V64SImode + || mode == V64HFmode || mode == V64SFmode || mode == BImode); +} + +/* Return true if MODE is valid for 1 SGPR register. */ + +inline bool +sgpr_1reg_mode_p (machine_mode mode) +{ + return (mode == SImode || mode == SFmode || mode == HImode + || mode == QImode || mode == BImode); +} + +/* Return true if MODE is valid for pair of VGPR registers. */ + +inline bool +vgpr_2reg_mode_p (machine_mode mode) +{ + return (mode == DImode || mode == DFmode + || mode == V64DImode || mode == V64DFmode); +} + +/* Return true if MODE can be handled directly by VGPR operations. */ + +inline bool +vgpr_vector_mode_p (machine_mode mode) +{ + return (mode == V64QImode || mode == V64HImode + || mode == V64SImode || mode == V64DImode + || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode); +} + + +/* Return true if MODE is valid for pair of SGPR registers. */ + +inline bool +sgpr_2reg_mode_p (machine_mode mode) +{ + return mode == DImode || mode == DFmode || mode == V64BImode; +} + +#endif diff --git a/gcc/config/gcn/gcn-run.c b/gcc/config/gcn/gcn-run.c new file mode 100644 index 0000000..3dea343 --- /dev/null +++ b/gcc/config/gcn/gcn-run.c @@ -0,0 +1,854 @@ +/* Run a stand-alone AMD GCN kernel. + + Copyright 2017 Mentor Graphics Corporation + Copyright 2018 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* This program will run a compiled stand-alone GCN kernel on a GPU. + + The kernel entry point's signature must use a standard main signature: + + int main(int argc, char **argv) +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* These probably won't be in elf.h for a while. */ +#ifndef R_AMDGPU_NONE +#define R_AMDGPU_NONE 0 +#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ +#define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */ +#define R_AMDGPU_ABS64 3 /* S + A */ +#define R_AMDGPU_REL32 4 /* S + A - P */ +#define R_AMDGPU_REL64 5 /* S + A - P */ +#define R_AMDGPU_ABS32 6 /* S + A */ +#define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */ +#define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */ +#define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */ +#define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */ +#define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */ +#define reserved 12 +#define R_AMDGPU_RELATIVE64 13 /* B + A */ +#endif + +#include "hsa.h" + +#ifndef HSA_RUNTIME_LIB +#define HSA_RUNTIME_LIB "libhsa-runtime64.so" +#endif + +#ifndef VERSION_STRING +#define VERSION_STRING "(version unknown)" +#endif + +bool debug = false; + +hsa_agent_t device = { 0 }; +hsa_queue_t *queue = NULL; +uint64_t kernel = 0; +hsa_executable_t executable = { 0 }; + +hsa_region_t kernargs_region = { 0 }; +uint32_t kernarg_segment_size = 0; +uint32_t group_segment_size = 0; +uint32_t private_segment_size = 0; + +static void +usage (const char *progname) +{ + printf ("Usage: %s [options] kernel [kernel-args]\n\n" + "Options:\n" + " --help\n" + " --version\n" + " --debug\n", progname); +} + +static void +version (const char *progname) +{ + printf ("%s " VERSION_STRING "\n", progname); +} + +/* As an HSA runtime is dlopened, following structure defines the necessary + function pointers. + Code adapted from libgomp. */ + +struct hsa_runtime_fn_info +{ + /* HSA runtime. */ + hsa_status_t (*hsa_status_string_fn) (hsa_status_t status, + const char **status_string); + hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent, + hsa_agent_info_t attribute, + void *value); + hsa_status_t (*hsa_init_fn) (void); + hsa_status_t (*hsa_iterate_agents_fn) + (hsa_status_t (*callback) (hsa_agent_t agent, void *data), void *data); + hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region, + hsa_region_info_t attribute, + void *value); + hsa_status_t (*hsa_queue_create_fn) + (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, + void (*callback) (hsa_status_t status, hsa_queue_t *source, void *data), + void *data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t **queue); + hsa_status_t (*hsa_agent_iterate_regions_fn) + (hsa_agent_t agent, + hsa_status_t (*callback) (hsa_region_t region, void *data), void *data); + hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable); + hsa_status_t (*hsa_executable_create_fn) + (hsa_profile_t profile, hsa_executable_state_t executable_state, + const char *options, hsa_executable_t *executable); + hsa_status_t (*hsa_executable_global_variable_define_fn) + (hsa_executable_t executable, const char *variable_name, void *address); + hsa_status_t (*hsa_executable_load_code_object_fn) + (hsa_executable_t executable, hsa_agent_t agent, + hsa_code_object_t code_object, const char *options); + hsa_status_t (*hsa_executable_freeze_fn) (hsa_executable_t executable, + const char *options); + hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal); + hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, + void **ptr); + hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src, + size_t size); + hsa_status_t (*hsa_memory_free_fn) (void *ptr); + hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal); + hsa_status_t (*hsa_executable_get_symbol_fn) + (hsa_executable_t executable, const char *module_name, + const char *symbol_name, hsa_agent_t agent, int32_t call_convention, + hsa_executable_symbol_t *symbol); + hsa_status_t (*hsa_executable_symbol_get_info_fn) + (hsa_executable_symbol_t executable_symbol, + hsa_executable_symbol_info_t attribute, void *value); + void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, + hsa_signal_value_t value); + hsa_signal_value_t (*hsa_signal_wait_acquire_fn) + (hsa_signal_t signal, hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + hsa_signal_value_t (*hsa_signal_wait_relaxed_fn) + (hsa_signal_t signal, hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue); + hsa_status_t (*hsa_code_object_deserialize_fn) + (void *serialized_code_object, size_t serialized_code_object_size, + const char *options, hsa_code_object_t *code_object); + uint64_t (*hsa_queue_load_write_index_relaxed_fn) + (const hsa_queue_t *queue); + void (*hsa_queue_store_write_index_relaxed_fn) + (const hsa_queue_t *queue, uint64_t value); + hsa_status_t (*hsa_shut_down_fn) (); +}; + +/* HSA runtime functions that are initialized in init_hsa_context. + Code adapted from libgomp. */ + +static struct hsa_runtime_fn_info hsa_fns; + +#define DLSYM_FN(function) \ + hsa_fns.function##_fn = dlsym (handle, #function); \ + if (hsa_fns.function##_fn == NULL) \ + goto fail; + +static void +init_hsa_runtime_functions (void) +{ + void *handle = dlopen (HSA_RUNTIME_LIB, RTLD_LAZY); + if (handle == NULL) + { + fprintf (stderr, + "The HSA runtime is required to run GCN kernels on hardware.\n" + "%s: File not found or could not be opened\n", + HSA_RUNTIME_LIB); + exit (1); + } + + DLSYM_FN (hsa_status_string) + DLSYM_FN (hsa_agent_get_info) + DLSYM_FN (hsa_init) + DLSYM_FN (hsa_iterate_agents) + DLSYM_FN (hsa_region_get_info) + DLSYM_FN (hsa_queue_create) + DLSYM_FN (hsa_agent_iterate_regions) + DLSYM_FN (hsa_executable_destroy) + DLSYM_FN (hsa_executable_create) + DLSYM_FN (hsa_executable_global_variable_define) + DLSYM_FN (hsa_executable_load_code_object) + DLSYM_FN (hsa_executable_freeze) + DLSYM_FN (hsa_signal_create) + DLSYM_FN (hsa_memory_allocate) + DLSYM_FN (hsa_memory_copy) + DLSYM_FN (hsa_memory_free) + DLSYM_FN (hsa_signal_destroy) + DLSYM_FN (hsa_executable_get_symbol) + DLSYM_FN (hsa_executable_symbol_get_info) + DLSYM_FN (hsa_signal_wait_acquire) + DLSYM_FN (hsa_signal_wait_relaxed) + DLSYM_FN (hsa_signal_store_relaxed) + DLSYM_FN (hsa_queue_destroy) + DLSYM_FN (hsa_code_object_deserialize) + DLSYM_FN (hsa_queue_load_write_index_relaxed) + DLSYM_FN (hsa_queue_store_write_index_relaxed) + DLSYM_FN (hsa_shut_down) + + return; + +fail: + fprintf (stderr, "Failed to find HSA functions in " HSA_RUNTIME_LIB "\n"); + exit (1); +} + +#undef DLSYM_FN + +/* Report a fatal error STR together with the HSA error corresponding to + STATUS and terminate execution of the current process. */ + +static void +hsa_fatal (const char *str, hsa_status_t status) +{ + const char *hsa_error_msg; + hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); + fprintf (stderr, "%s: FAILED\nHSA Runtime message: %s\n", str, + hsa_error_msg); + exit (1); +} + +/* Helper macros to ensure we check the return values from the HSA Runtime. + These just keep the rest of the code a bit cleaner. */ + +#define XHSA_CMP(FN, CMP, MSG) \ + do { \ + hsa_status_t status = (FN); \ + if (!(CMP)) \ + hsa_fatal ((MSG), status); \ + else if (debug) \ + fprintf (stderr, "%s: OK\n", (MSG)); \ + } while (0) +#define XHSA(FN, MSG) XHSA_CMP(FN, status == HSA_STATUS_SUCCESS, MSG) + +/* Callback of hsa_iterate_agents. + Called once for each available device, and returns "break" when a + suitable one has been found. */ + +static hsa_status_t +get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused))) +{ + hsa_device_type_t device_type; + XHSA (hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE, + &device_type), + "Get agent type"); + + /* Select only GPU devices. */ + /* TODO: support selecting from multiple GPUs. */ + if (HSA_DEVICE_TYPE_GPU == device_type) + { + device = agent; + return HSA_STATUS_INFO_BREAK; + } + + /* The device was not suitable. */ + return HSA_STATUS_SUCCESS; +} + +/* Callback of hsa_iterate_regions. + Called once for each available memory region, and returns "break" when a + suitable one has been found. */ + +static hsa_status_t +get_kernarg_region (hsa_region_t region, void *data __attribute__ ((unused))) +{ + /* Reject non-global regions. */ + hsa_region_segment_t segment; + hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT, &segment); + if (HSA_REGION_SEGMENT_GLOBAL != segment) + return HSA_STATUS_SUCCESS; + + /* Find a region with the KERNARG flag set. */ + hsa_region_global_flag_t flags; + hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, + &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) + { + kernargs_region = region; + return HSA_STATUS_INFO_BREAK; + } + + /* The region was not suitable. */ + return HSA_STATUS_SUCCESS; +} + +/* Initialize the HSA Runtime library and GPU device. */ + +static void +init_device () +{ + /* Load the shared library and find the API functions. */ + init_hsa_runtime_functions (); + + /* Initialize the HSA Runtime. */ + XHSA (hsa_fns.hsa_init_fn (), + "Initialize run-time"); + + /* Select a suitable device. + The call-back function, get_gpu_agent, does the selection. */ + XHSA_CMP (hsa_fns.hsa_iterate_agents_fn (get_gpu_agent, NULL), + status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK, + "Find a device"); + + /* Initialize the queue used for launching kernels. */ + uint32_t queue_size = 0; + XHSA (hsa_fns.hsa_agent_get_info_fn (device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &queue_size), + "Find max queue size"); + XHSA (hsa_fns.hsa_queue_create_fn (device, queue_size, + HSA_QUEUE_TYPE_SINGLE, NULL, + NULL, UINT32_MAX, UINT32_MAX, &queue), + "Set up a device queue"); + + /* Select a memory region for the kernel arguments. + The call-back function, get_kernarg_region, does the selection. */ + XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_kernarg_region, + NULL), + status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK, + "Locate kernargs memory"); +} + + +/* Read a whole input file. + Code copied from mkoffload. */ + +static char * +read_file (const char *filename, size_t *plen) +{ + size_t alloc = 16384; + size_t base = 0; + char *buffer; + + FILE *stream = fopen (filename, "rb"); + if (!stream) + { + perror (filename); + exit (1); + } + + if (!fseek (stream, 0, SEEK_END)) + { + /* Get the file size. */ + long s = ftell (stream); + if (s >= 0) + alloc = s + 100; + fseek (stream, 0, SEEK_SET); + } + buffer = malloc (alloc); + + for (;;) + { + size_t n = fread (buffer + base, 1, alloc - base - 1, stream); + + if (!n) + break; + base += n; + if (base + 1 == alloc) + { + alloc *= 2; + buffer = realloc (buffer, alloc); + } + } + buffer[base] = 0; + *plen = base; + + fclose (stream); + + return buffer; +} + +/* Read a HSA Code Object (HSACO) from file, and load it into the device. */ + +static void +load_image (const char *filename) +{ + size_t image_size; + Elf64_Ehdr *image = (void *) read_file (filename, &image_size); + + /* An "executable" consists of one or more code objects. */ + XHSA (hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, "", + &executable), + "Initialize GCN executable"); + + /* Hide relocations from the HSA runtime loader. + Keep a copy of the unmodified section headers to use later. */ + Elf64_Shdr *image_sections = + (Elf64_Shdr *) ((char *) image + image->e_shoff); + Elf64_Shdr *sections = malloc (sizeof (Elf64_Shdr) * image->e_shnum); + memcpy (sections, image_sections, sizeof (Elf64_Shdr) * image->e_shnum); + for (int i = image->e_shnum - 1; i >= 0; i--) + { + if (image_sections[i].sh_type == SHT_RELA + || image_sections[i].sh_type == SHT_REL) + /* Change section type to something harmless. */ + image_sections[i].sh_type = SHT_NOTE; + } + + /* Add the HSACO to the executable. */ + hsa_code_object_t co = { 0 }; + XHSA (hsa_fns.hsa_code_object_deserialize_fn (image, image_size, NULL, &co), + "Deserialize GCN code object"); + XHSA (hsa_fns.hsa_executable_load_code_object_fn (executable, device, co, + ""), + "Load GCN code object"); + + /* We're done modifying he executable. */ + XHSA (hsa_fns.hsa_executable_freeze_fn (executable, ""), + "Freeze GCN executable"); + + /* Locate the "main" function, and read the kernel's properties. */ + hsa_executable_symbol_t symbol; + XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL, "main", + device, 0, &symbol), + "Find 'main' function"); + XHSA (hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel), + "Extract kernel object"); + XHSA (hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, + &kernarg_segment_size), + "Extract kernarg segment size"); + XHSA (hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &group_segment_size), + "Extract group segment size"); + XHSA (hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &private_segment_size), + "Extract private segment size"); + + /* Find main function in ELF, and calculate actual load offset. */ + Elf64_Addr load_offset; + XHSA (hsa_fns.hsa_executable_symbol_get_info_fn + (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &load_offset), + "Extract 'main' symbol address"); + for (int i = 0; i < image->e_shnum; i++) + if (sections[i].sh_type == SHT_SYMTAB) + { + Elf64_Shdr *strtab = §ions[sections[i].sh_link]; + char *strings = (char *) image + strtab->sh_offset; + + for (size_t offset = 0; + offset < sections[i].sh_size; + offset += sections[i].sh_entsize) + { + Elf64_Sym *sym = (Elf64_Sym *) ((char *) image + + sections[i].sh_offset + offset); + if (strcmp ("main", strings + sym->st_name) == 0) + { + load_offset -= sym->st_value; + goto found_main; + } + } + } + /* We only get here when main was not found. + This should never happen. */ + fprintf (stderr, "Error: main function not found.\n"); + abort (); +found_main:; + + /* Find dynamic symbol table. */ + Elf64_Shdr *dynsym = NULL; + for (int i = 0; i < image->e_shnum; i++) + if (sections[i].sh_type == SHT_DYNSYM) + { + dynsym = §ions[i]; + break; + } + + /* Fix up relocations. */ + for (int i = 0; i < image->e_shnum; i++) + { + if (sections[i].sh_type == SHT_RELA) + for (size_t offset = 0; + offset < sections[i].sh_size; + offset += sections[i].sh_entsize) + { + Elf64_Rela *reloc = (Elf64_Rela *) ((char *) image + + sections[i].sh_offset + + offset); + Elf64_Sym *sym = + (dynsym + ? (Elf64_Sym *) ((char *) image + + dynsym->sh_offset + + (dynsym->sh_entsize + * ELF64_R_SYM (reloc->r_info))) : NULL); + + int64_t S = (sym ? sym->st_value : 0); + int64_t P = reloc->r_offset + load_offset; + int64_t A = reloc->r_addend; + int64_t B = load_offset; + int64_t V, size; + switch (ELF64_R_TYPE (reloc->r_info)) + { + case R_AMDGPU_ABS32_LO: + V = (S + A) & 0xFFFFFFFF; + size = 4; + break; + case R_AMDGPU_ABS32_HI: + V = (S + A) >> 32; + size = 4; + break; + case R_AMDGPU_ABS64: + V = S + A; + size = 8; + break; + case R_AMDGPU_REL32: + V = S + A - P; + size = 4; + break; + case R_AMDGPU_REL64: + /* FIXME + LLD seems to emit REL64 where the the assembler has ABS64. + This is clearly wrong because it's not what the compiler + is expecting. Let's assume, for now, that it's a bug. + In any case, GCN kernels are always self contained and + therefore relative relocations will have been resolved + already, so this should be a safe workaround. */ + V = S + A /* - P */ ; + size = 8; + break; + case R_AMDGPU_ABS32: + V = S + A; + size = 4; + break; + /* TODO R_AMDGPU_GOTPCREL */ + /* TODO R_AMDGPU_GOTPCREL32_LO */ + /* TODO R_AMDGPU_GOTPCREL32_HI */ + case R_AMDGPU_REL32_LO: + V = (S + A - P) & 0xFFFFFFFF; + size = 4; + break; + case R_AMDGPU_REL32_HI: + V = (S + A - P) >> 32; + size = 4; + break; + case R_AMDGPU_RELATIVE64: + V = B + A; + size = 8; + break; + default: + fprintf (stderr, "Error: unsupported relocation type.\n"); + exit (1); + } + XHSA (hsa_fns.hsa_memory_copy_fn ((void *) P, &V, size), + "Fix up relocation"); + } + } +} + +/* Allocate some device memory from the kernargs region. + The returned address will be 32-bit (with excess zeroed on 64-bit host), + and accessible via the same address on both host and target (via + __flat_scalar GCN address space). */ + +static void * +device_malloc (size_t size) +{ + void *result; + XHSA (hsa_fns.hsa_memory_allocate_fn (kernargs_region, size, &result), + "Allocate device memory"); + return result; +} + +/* These are the device pointers that will be transferred to the target. + The HSA Runtime points the kernargs register here. + They correspond to function signature: + int main (int argc, char *argv[], int *return_value) + The compiler expects this, for kernel functions, and will + automatically assign the exit value to *return_value. */ +struct kernargs +{ + /* Kernargs. */ + int32_t argc; + int64_t argv; + int64_t out_ptr; + int64_t heap_ptr; + + /* Output data. */ + struct output + { + int return_value; + int next_output; + struct printf_data + { + int written; + char msg[128]; + int type; + union + { + int64_t ivalue; + double dvalue; + char text[128]; + }; + } queue[1000]; + } output_data; + + struct heap + { + int64_t size; + char data[0]; + } heap; +}; + +/* Print any console output from the kernel. + We print all entries from print_index to the next entry without a "written" + flag. Subsequent calls should use the returned print_index value to resume + from the same point. */ +void +gomp_print_output (struct kernargs *kernargs, int *print_index) +{ + static bool warned_p = false; + + int limit = (sizeof (kernargs->output_data.queue) + / sizeof (kernargs->output_data.queue[0])); + + int i; + for (i = *print_index; i < limit; i++) + { + struct printf_data *data = &kernargs->output_data.queue[i]; + + if (!data->written) + break; + + switch (data->type) + { + case 0: + printf ("%.128s%ld\n", data->msg, data->ivalue); + break; + case 1: + printf ("%.128s%f\n", data->msg, data->dvalue); + break; + case 2: + printf ("%.128s%.128s\n", data->msg, data->text); + break; + case 3: + printf ("%.128s%.128s", data->msg, data->text); + break; + } + + data->written = 0; + } + + if (kernargs->output_data.next_output > limit && !warned_p) + { + printf ("WARNING: GCN print buffer exhausted.\n"); + warned_p = true; + } + + *print_index = i; +} + +/* Execute an already-loaded kernel on the device. */ + +static void +run (void *kernargs) +{ + /* A "signal" is used to launch and monitor the kernel. */ + hsa_signal_t signal; + XHSA (hsa_fns.hsa_signal_create_fn (1, 0, NULL, &signal), + "Create signal"); + + /* Configure for a single-worker kernel. */ + uint64_t index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue); + const uint32_t queueMask = queue->size - 1; + hsa_kernel_dispatch_packet_t *dispatch_packet = + &(((hsa_kernel_dispatch_packet_t *) (queue->base_address))[index & + queueMask]); + dispatch_packet->setup |= 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + dispatch_packet->workgroup_size_x = (uint16_t) 1; + dispatch_packet->workgroup_size_y = (uint16_t) 64; + dispatch_packet->workgroup_size_z = (uint16_t) 1; + dispatch_packet->grid_size_x = 1; + dispatch_packet->grid_size_y = 64; + dispatch_packet->grid_size_z = 1; + dispatch_packet->completion_signal = signal; + dispatch_packet->kernel_object = kernel; + dispatch_packet->kernarg_address = (void *) kernargs; + dispatch_packet->private_segment_size = private_segment_size; + dispatch_packet->group_segment_size = group_segment_size; + + uint16_t header = 0; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + + __atomic_store_n ((uint32_t *) dispatch_packet, + header | (dispatch_packet->setup << 16), + __ATOMIC_RELEASE); + + if (debug) + fprintf (stderr, "Launch kernel\n"); + + hsa_fns.hsa_queue_store_write_index_relaxed_fn (queue, index + 1); + hsa_fns.hsa_signal_store_relaxed_fn (queue->doorbell_signal, index); + /* Kernel running ...... */ + int print_index = 0; + while (hsa_fns.hsa_signal_wait_relaxed_fn (signal, HSA_SIGNAL_CONDITION_LT, + 1, 1000000, + HSA_WAIT_STATE_ACTIVE) != 0) + { + usleep (10000); + gomp_print_output (kernargs, &print_index); + } + + gomp_print_output (kernargs, &print_index); + + if (debug) + fprintf (stderr, "Kernel exited\n"); + + XHSA (hsa_fns.hsa_signal_destroy_fn (signal), + "Clean up signal"); +} + +int +main (int argc, char *argv[]) +{ + int kernel_arg = 0; + for (int i = 1; i < argc; i++) + { + if (!strcmp (argv[i], "--help")) + { + usage (argv[0]); + return 0; + } + else if (!strcmp (argv[i], "--version")) + { + version (argv[0]); + return 0; + } + else if (!strcmp (argv[i], "--debug")) + debug = true; + else if (argv[i][0] == '-') + { + usage (argv[0]); + return 1; + } + else + { + kernel_arg = i; + break; + } + } + + if (!kernel_arg) + { + /* No kernel arguments were found. */ + usage (argv[0]); + return 1; + } + + /* The remaining arguments are for the GCN kernel. */ + int kernel_argc = argc - kernel_arg; + char **kernel_argv = &argv[kernel_arg]; + + init_device (); + load_image (kernel_argv[0]); + + /* Calculate size of function parameters + argv data. */ + size_t args_size = 0; + for (int i = 0; i < kernel_argc; i++) + args_size += strlen (kernel_argv[i]) + 1; + + /* Allocate device memory for both function parameters and the argv + data. */ + size_t heap_size = 10 * 1024 * 1024; /* 10MB. */ + struct kernargs *kernargs = device_malloc (sizeof (*kernargs) + heap_size); + struct argdata + { + int64_t argv_data[kernel_argc]; + char strings[args_size]; + } *args = device_malloc (sizeof (struct argdata)); + + /* Write the data to the target. */ + kernargs->argc = kernel_argc; + kernargs->argv = (int64_t) args->argv_data; + kernargs->out_ptr = (int64_t) &kernargs->output_data; + kernargs->output_data.return_value = 0xcafe0000; /* Default return value. */ + kernargs->output_data.next_output = 0; + for (unsigned i = 0; i < (sizeof (kernargs->output_data.queue) + / sizeof (kernargs->output_data.queue[0])); i++) + kernargs->output_data.queue[i].written = 0; + int offset = 0; + for (int i = 0; i < kernel_argc; i++) + { + size_t arg_len = strlen (kernel_argv[i]) + 1; + args->argv_data[i] = (int64_t) &args->strings[offset]; + memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1); + offset += arg_len; + } + kernargs->heap_ptr = (int64_t) &kernargs->heap; + kernargs->heap.size = heap_size; + + /* Run the kernel on the GPU. */ + run (kernargs); + unsigned int return_value = + (unsigned int) kernargs->output_data.return_value; + + unsigned int upper = (return_value & ~0xffff) >> 16; + if (upper == 0xcafe) + printf ("Kernel exit value was never set\n"); + else if (upper == 0xffff) + ; /* Set by exit. */ + else if (upper == 0) + ; /* Set by return from main. */ + else + printf ("Possible kernel exit value corruption, 2 most significant bytes " + "aren't 0xffff, 0xcafe, or 0: 0x%x\n", return_value); + + if (upper == 0xffff) + { + unsigned int signal = (return_value >> 8) & 0xff; + if (signal == SIGABRT) + printf ("Kernel aborted\n"); + else if (signal != 0) + printf ("Kernel received unkown signal\n"); + } + + if (debug) + printf ("Kernel exit value: %d\n", return_value & 0xff); + + /* Clean shut down. */ + XHSA (hsa_fns.hsa_memory_free_fn (kernargs), + "Clean up device memory"); + XHSA (hsa_fns.hsa_executable_destroy_fn (executable), + "Clean up GCN executable"); + XHSA (hsa_fns.hsa_queue_destroy_fn (queue), + "Clean up device queue"); + XHSA (hsa_fns.hsa_shut_down_fn (), + "Shut down run-time"); + + return return_value & 0xff; +} diff --git a/gcc/config/gcn/gcn-tree.c b/gcc/config/gcn/gcn-tree.c new file mode 100644 index 0000000..0365baf --- /dev/null +++ b/gcc/config/gcn/gcn-tree.c @@ -0,0 +1,715 @@ +/* Copyright (C) 2017-2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* {{{ Includes. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "gimple-iterator.h" +#include "cfghooks.h" +#include "cfgloop.h" +#include "tm_p.h" +#include "stringpool.h" +#include "fold-const.h" +#include "varasm.h" +#include "omp-low.h" +#include "omp-general.h" +#include "internal-fn.h" +#include "tree-vrp.h" +#include "tree-ssanames.h" +#include "tree-ssa-operands.h" +#include "gimplify.h" +#include "tree-phinodes.h" +#include "cgraph.h" +#include "targhooks.h" +#include "langhooks-def.h" + +/* }}} */ +/* {{{ OMP GCN pass. */ + +unsigned int +execute_omp_gcn (void) +{ + tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + tree thr_num_id = DECL_NAME (thr_num_tree); + tree team_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + tree team_num_id = DECL_NAME (team_num_tree); + basic_block bb; + gimple_stmt_iterator gsi; + unsigned int todo = 0; + + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *call = gsi_stmt (gsi); + tree decl; + + if (is_gimple_call (call) && (decl = gimple_call_fndecl (call))) + { + tree decl_id = DECL_NAME (decl); + tree lhs = gimple_get_lhs (call); + + if (decl_id == thr_num_id) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Replace '%s' with __builtin_gcn_dim_pos.\n", + IDENTIFIER_POINTER (decl_id)); + + /* Transform this: + lhs = __builtin_omp_get_thread_num () + to this: + lhs = __builtin_gcn_dim_pos (1) */ + tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0); + tree fnarg = build_int_cst (unsigned_type_node, 1); + gimple *stmt = gimple_build_call (fn, 1, fnarg); + gimple_call_set_lhs (stmt, lhs); + gsi_replace (&gsi, stmt, true); + + todo |= TODO_update_ssa; + } + else if (decl_id == team_num_id) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Replace '%s' with __builtin_gcn_dim_pos.\n", + IDENTIFIER_POINTER (decl_id)); + + /* Transform this: + lhs = __builtin_omp_get_team_num () + to this: + lhs = __builtin_gcn_dim_pos (0) */ + tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0); + tree fnarg = build_zero_cst (unsigned_type_node); + gimple *stmt = gimple_build_call (fn, 1, fnarg); + gimple_call_set_lhs (stmt, lhs); + gsi_replace (&gsi, stmt, true); + + todo |= TODO_update_ssa; + } + } + } + + return todo; +} + +namespace +{ + + const pass_data pass_data_omp_gcn = { + GIMPLE_PASS, + "omp_gcn", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ + }; + + class pass_omp_gcn : public gimple_opt_pass + { + public: + pass_omp_gcn (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_gcn, ctxt) + { + } + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_openmp; + } + + virtual unsigned int execute (function *) + { + return execute_omp_gcn (); + } + + }; /* class pass_omp_gcn. */ + +} /* anon namespace. */ + +gimple_opt_pass * +make_pass_omp_gcn (gcc::context *ctxt) +{ + return new pass_omp_gcn (ctxt); +} + +/* }}} */ +/* {{{ OpenACC reductions. */ + +/* Global lock variable, needed for 128bit worker & gang reductions. */ + +static GTY(()) tree global_lock_var; + +/* Lazily generate the global_lock_var decl and return its address. */ + +static tree +gcn_global_lock_addr () +{ + tree v = global_lock_var; + + if (!v) + { + tree name = get_identifier ("__reduction_lock"); + tree type = build_qualified_type (unsigned_type_node, + TYPE_QUAL_VOLATILE); + v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type); + global_lock_var = v; + DECL_ARTIFICIAL (v) = 1; + DECL_EXTERNAL (v) = 1; + TREE_STATIC (v) = 1; + TREE_PUBLIC (v) = 1; + TREE_USED (v) = 1; + mark_addressable (v); + mark_decl_referenced (v); + } + + return build_fold_addr_expr (v); +} + +/* Helper function for gcn_reduction_update. + + Insert code to locklessly update *PTR with *PTR OP VAR just before + GSI. We use a lockless scheme for nearly all case, which looks + like: + actual = initval (OP); + do { + guess = actual; + write = guess OP myval; + actual = cmp&swap (ptr, guess, write) + } while (actual bit-different-to guess); + return write; + + This relies on a cmp&swap instruction, which is available for 32- and + 64-bit types. Larger types must use a locking scheme. */ + +static tree +gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi, + tree ptr, tree var, tree_code op) +{ + unsigned fn = GCN_BUILTIN_CMP_SWAP; + tree_code code = NOP_EXPR; + tree arg_type = unsigned_type_node; + tree var_type = TREE_TYPE (var); + + if (TREE_CODE (var_type) == COMPLEX_TYPE + || TREE_CODE (var_type) == REAL_TYPE) + code = VIEW_CONVERT_EXPR; + + if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node)) + { + arg_type = long_long_unsigned_type_node; + fn = GCN_BUILTIN_CMP_SWAPLL; + } + + tree swap_fn = gcn_builtin_decl (fn, true); + + gimple_seq init_seq = NULL; + tree init_var = make_ssa_name (arg_type); + tree init_expr = omp_reduction_init_op (loc, op, var_type); + init_expr = fold_build1 (code, arg_type, init_expr); + gimplify_assign (init_var, init_expr, &init_seq); + gimple *init_end = gimple_seq_last (init_seq); + + gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT); + + /* Split the block just after the init stmts. */ + basic_block pre_bb = gsi_bb (*gsi); + edge pre_edge = split_block (pre_bb, init_end); + basic_block loop_bb = pre_edge->dest; + pre_bb = pre_edge->src; + /* Reset the iterator. */ + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + tree expect_var = make_ssa_name (arg_type); + tree actual_var = make_ssa_name (arg_type); + tree write_var = make_ssa_name (arg_type); + + /* Build and insert the reduction calculation. */ + gimple_seq red_seq = NULL; + tree write_expr = fold_build1 (code, var_type, expect_var); + write_expr = fold_build2 (op, var_type, write_expr, var); + write_expr = fold_build1 (code, arg_type, write_expr); + gimplify_assign (write_var, write_expr, &red_seq); + + gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); + + /* Build & insert the cmp&swap sequence. */ + gimple_seq latch_seq = NULL; + tree swap_expr = build_call_expr_loc (loc, swap_fn, 3, + ptr, expect_var, write_var); + gimplify_assign (actual_var, swap_expr, &latch_seq); + + gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (&latch_seq, cond); + + gimple *latch_end = gimple_seq_last (latch_seq); + gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT); + + /* Split the block just after the latch stmts. */ + edge post_edge = split_block (loop_bb, latch_end); + basic_block post_bb = post_edge->dest; + loop_bb = post_edge->src; + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; + /* post_edge->probability = profile_probability::even (); */ + edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE); + /* loop_edge->probability = profile_probability::even (); */ + set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb); + set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); + + gphi *phi = create_phi_node (expect_var, loop_bb); + add_phi_arg (phi, init_var, pre_edge, loc); + add_phi_arg (phi, actual_var, loop_edge, loc); + + loop *loop = alloc_loop (); + loop->header = loop_bb; + loop->latch = loop_bb; + add_loop (loop, loop_bb->loop_father); + + return fold_build1 (code, var_type, write_var); +} + +/* Helper function for gcn_reduction_update. + + Insert code to lockfully update *PTR with *PTR OP VAR just before + GSI. This is necessary for types larger than 64 bits, where there + is no cmp&swap instruction to implement a lockless scheme. We use + a lock variable in global memory. + + while (cmp&swap (&lock_var, 0, 1)) + continue; + T accum = *ptr; + accum = accum OP var; + *ptr = accum; + cmp&swap (&lock_var, 1, 0); + return accum; + + A lock in global memory is necessary to force execution engine + descheduling and avoid resource starvation that can occur if the + lock is in shared memory. */ + +static tree +gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, + tree ptr, tree var, tree_code op) +{ + tree var_type = TREE_TYPE (var); + tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true); + tree uns_unlocked = build_int_cst (unsigned_type_node, 0); + tree uns_locked = build_int_cst (unsigned_type_node, 1); + + /* Split the block just before the gsi. Insert a gimple nop to make + this easier. */ + gimple *nop = gimple_build_nop (); + gsi_insert_before (gsi, nop, GSI_SAME_STMT); + basic_block entry_bb = gsi_bb (*gsi); + edge entry_edge = split_block (entry_bb, nop); + basic_block lock_bb = entry_edge->dest; + /* Reset the iterator. */ + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + /* Build and insert the locking sequence. */ + gimple_seq lock_seq = NULL; + tree lock_var = make_ssa_name (unsigned_type_node); + tree lock_expr = gcn_global_lock_addr (); + lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr, + uns_unlocked, uns_locked); + gimplify_assign (lock_var, lock_expr, &lock_seq); + gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (&lock_seq, cond); + gimple *lock_end = gimple_seq_last (lock_seq); + gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT); + + /* Split the block just after the lock sequence. */ + edge locked_edge = split_block (lock_bb, lock_end); + basic_block update_bb = locked_edge->dest; + lock_bb = locked_edge->src; + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + /* Create the lock loop. */ + locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; + locked_edge->probability = profile_probability::even (); + edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE); + loop_edge->probability = profile_probability::even (); + set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb); + + /* Create the loop structure. */ + loop *lock_loop = alloc_loop (); + lock_loop->header = lock_bb; + lock_loop->latch = lock_bb; + lock_loop->nb_iterations_estimate = 1; + lock_loop->any_estimate = true; + add_loop (lock_loop, entry_bb->loop_father); + + /* Build and insert the reduction calculation. */ + gimple_seq red_seq = NULL; + tree acc_in = make_ssa_name (var_type); + tree ref_in = build_simple_mem_ref (ptr); + TREE_THIS_VOLATILE (ref_in) = 1; + gimplify_assign (acc_in, ref_in, &red_seq); + + tree acc_out = make_ssa_name (var_type); + tree update_expr = fold_build2 (op, var_type, ref_in, var); + gimplify_assign (acc_out, update_expr, &red_seq); + + tree ref_out = build_simple_mem_ref (ptr); + TREE_THIS_VOLATILE (ref_out) = 1; + gimplify_assign (ref_out, acc_out, &red_seq); + + gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); + + /* Build & insert the unlock sequence. */ + gimple_seq unlock_seq = NULL; + tree unlock_expr = gcn_global_lock_addr (); + unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr, + uns_locked, uns_unlocked); + gimplify_and_add (unlock_expr, &unlock_seq); + gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT); + + return acc_out; +} + +/* Emit a sequence to update a reduction accumulator at *PTR with the + value held in VAR using operator OP. Return the updated value. + + TODO: optimize for atomic ops and independent complex ops. */ + +static tree +gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi, + tree ptr, tree var, tree_code op) +{ + tree type = TREE_TYPE (var); + tree size = TYPE_SIZE (type); + + if (size == TYPE_SIZE (unsigned_type_node) + || size == TYPE_SIZE (long_long_unsigned_type_node)) + return gcn_lockless_update (loc, gsi, ptr, var, op); + else + return gcn_lockfull_update (loc, gsi, ptr, var, op); +} + +/* Return a temporary variable decl to use for an OpenACC worker reduction. */ + +static tree +gcn_goacc_get_worker_red_decl (tree type, unsigned offset) +{ + machine_function *machfun = cfun->machine; + tree existing_decl; + + if (TREE_CODE (type) == REFERENCE_TYPE) + type = TREE_TYPE (type); + + tree var_type + = build_qualified_type (type, + (TYPE_QUALS (type) + | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS))); + + if (machfun->reduc_decls + && offset < machfun->reduc_decls->length () + && (existing_decl = (*machfun->reduc_decls)[offset])) + { + gcc_assert (TREE_TYPE (existing_decl) == var_type); + return existing_decl; + } + else + { + char name[50]; + sprintf (name, ".oacc_reduction_%u", offset); + tree decl = create_tmp_var_raw (var_type, name); + + DECL_CONTEXT (decl) = NULL_TREE; + TREE_STATIC (decl) = 1; + + varpool_node::finalize_decl (decl); + + vec_safe_grow_cleared (machfun->reduc_decls, offset + 1); + (*machfun->reduc_decls)[offset] = decl; + + return decl; + } + + return NULL_TREE; +} + +/* Expand IFN_GOACC_REDUCTION_SETUP. */ + +static void +gcn_goacc_reduction_setup (gcall *call) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (call); + tree lhs = gimple_call_lhs (call); + tree var = gimple_call_arg (call, 2); + int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); + gimple_seq seq = NULL; + + push_gimplify_context (true); + + if (level != GOMP_DIM_GANG) + { + /* Copy the receiver object. */ + tree ref_to_res = gimple_call_arg (call, 1); + + if (!integer_zerop (ref_to_res)) + var = build_simple_mem_ref (ref_to_res); + } + + if (level == GOMP_DIM_WORKER) + { + tree var_type = TREE_TYPE (var); + /* Store incoming value to worker reduction buffer. */ + tree offset = gimple_call_arg (call, 5); + tree decl + = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); + + gimplify_assign (decl, var, &seq); + } + + if (lhs) + gimplify_assign (lhs, var, &seq); + + pop_gimplify_context (NULL); + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Expand IFN_GOACC_REDUCTION_INIT. */ + +static void +gcn_goacc_reduction_init (gcall *call) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (call); + tree lhs = gimple_call_lhs (call); + tree var = gimple_call_arg (call, 2); + int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); + enum tree_code rcode + = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); + tree init = omp_reduction_init_op (gimple_location (call), rcode, + TREE_TYPE (var)); + gimple_seq seq = NULL; + + push_gimplify_context (true); + + if (level == GOMP_DIM_GANG) + { + /* If there's no receiver object, propagate the incoming VAR. */ + tree ref_to_res = gimple_call_arg (call, 1); + if (integer_zerop (ref_to_res)) + init = var; + } + + if (lhs) + gimplify_assign (lhs, init, &seq); + + pop_gimplify_context (NULL); + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Expand IFN_GOACC_REDUCTION_FINI. */ + +static void +gcn_goacc_reduction_fini (gcall *call) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (call); + tree lhs = gimple_call_lhs (call); + tree ref_to_res = gimple_call_arg (call, 1); + tree var = gimple_call_arg (call, 2); + int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); + enum tree_code op + = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); + gimple_seq seq = NULL; + tree r = NULL_TREE;; + + push_gimplify_context (true); + + tree accum = NULL_TREE; + + if (level == GOMP_DIM_WORKER) + { + tree var_type = TREE_TYPE (var); + tree offset = gimple_call_arg (call, 5); + tree decl + = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); + + accum = build_fold_addr_expr (decl); + } + else if (integer_zerop (ref_to_res)) + r = var; + else + accum = ref_to_res; + + if (accum) + { + /* UPDATE the accumulator. */ + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + seq = NULL; + r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op); + } + + if (lhs) + gimplify_assign (lhs, r, &seq); + pop_gimplify_context (NULL); + + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Expand IFN_GOACC_REDUCTION_TEARDOWN. */ + +static void +gcn_goacc_reduction_teardown (gcall *call) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (call); + tree lhs = gimple_call_lhs (call); + tree var = gimple_call_arg (call, 2); + int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); + gimple_seq seq = NULL; + + push_gimplify_context (true); + + if (level == GOMP_DIM_WORKER) + { + tree var_type = TREE_TYPE (var); + + /* Read the worker reduction buffer. */ + tree offset = gimple_call_arg (call, 5); + tree decl + = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); + var = decl; + } + + if (level != GOMP_DIM_GANG) + { + /* Write to the receiver object. */ + tree ref_to_res = gimple_call_arg (call, 1); + + if (!integer_zerop (ref_to_res)) + gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq); + } + + if (lhs) + gimplify_assign (lhs, var, &seq); + + pop_gimplify_context (NULL); + + gsi_replace_with_seq (&gsi, seq, true); +} + +/* Implement TARGET_GOACC_REDUCTION. + + Expand calls to the GOACC REDUCTION internal function, into a sequence of + gimple instructions. */ + +void +gcn_goacc_reduction (gcall *call) +{ + int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); + + if (level == GOMP_DIM_VECTOR) + { + default_goacc_reduction (call); + return; + } + + unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); + + switch (code) + { + case IFN_GOACC_REDUCTION_SETUP: + gcn_goacc_reduction_setup (call); + break; + + case IFN_GOACC_REDUCTION_INIT: + gcn_goacc_reduction_init (call); + break; + + case IFN_GOACC_REDUCTION_FINI: + gcn_goacc_reduction_fini (call); + break; + + case IFN_GOACC_REDUCTION_TEARDOWN: + gcn_goacc_reduction_teardown (call); + break; + + default: + gcc_unreachable (); + } +} + +/* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD. + + Tweak (worker) propagation record, e.g. to put it in shared memory. */ + +tree +gcn_goacc_adjust_propagation_record (tree record_type, bool sender, + const char *name) +{ + tree type = record_type; + + TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS; + + if (!sender) + type = build_pointer_type (type); + + tree decl = create_tmp_var_raw (type, name); + + if (sender) + { + DECL_CONTEXT (decl) = NULL_TREE; + TREE_STATIC (decl) = 1; + } + + if (sender) + varpool_node::finalize_decl (decl); + + return decl; +} + +void +gcn_goacc_adjust_gangprivate_decl (tree var) +{ + tree type = TREE_TYPE (var); + tree lds_type = build_qualified_type (type, + TYPE_QUALS_NO_ADDR_SPACE (type) + | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)); + machine_function *machfun = cfun->machine; + + TREE_TYPE (var) = lds_type; + TREE_STATIC (var) = 1; + + /* We're making VAR static. We have to mangle the name to avoid collisions + between different local variables that share the same names. */ + lhd_set_decl_assembler_name (var); + + varpool_node::finalize_decl (var); + + if (machfun) + machfun->use_flat_addressing = true; +} + +/* }}} */ diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md new file mode 100644 index 0000000..0531c4f --- /dev/null +++ b/gcc/config/gcn/gcn-valu.md @@ -0,0 +1,3509 @@ +;; Copyright (C) 2016-2018 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; {{{ Vector iterators + +; Vector modes for one vector register +(define_mode_iterator VEC_1REG_MODE + [V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator VEC_1REG_ALT + [V64QI V64HI V64SI V64HF V64SF]) + +(define_mode_iterator VEC_1REG_INT_MODE + [V64QI V64HI V64SI]) +(define_mode_iterator VEC_1REG_INT_ALT + [V64QI V64HI V64SI]) + +(define_mode_iterator SCALAR_1REG_INT_MODE + [QI HI SI]) + +; Vector modes for two vector registers +(define_mode_iterator VEC_2REG_MODE + [V64DI V64DF]) + +; All of above +(define_mode_iterator VEC_REG_MODE + [V64QI V64HI V64SI V64HF V64SF ; Single reg + V64DI V64DF]) ; Double reg + +(define_mode_attr scalar_mode + [(V64QI "qi") (V64HI "hi") (V64SI "si") + (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) + +(define_mode_attr SCALAR_MODE + [(V64QI "QI") (V64HI "HI") (V64SI "SI") + (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) + +;; }}} +;; {{{ Vector moves + +; This is the entry point for all vector register moves. Memory accesses can +; come this way also, but will more usually use the reload_in/out, +; gather/scatter, maskload/store, etc. + +(define_expand "mov" + [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") + (match_operand:VEC_REG_MODE 1 "general_operand"))] + "" + { + /* Do not attempt to move unspec vectors. */ + if (GET_CODE (operands[1]) == UNSPEC + && XINT (operands[1], 1) == UNSPEC_VECTOR) + FAIL; + + if (can_create_pseudo_p ()) + { + rtx exec = gcn_full_exec_reg (); + rtx undef = gcn_gen_undef (mode); + + if (MEM_P (operands[0])) + { + operands[1] = force_reg (mode, operands[1]); + rtx scratch = gen_rtx_SCRATCH (V64DImode); + rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); + rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); + rtx expr = gcn_expand_scalar_to_vector_address (mode, exec, + operands[0], + scratch); + emit_insn (gen_scatter_expr (expr, operands[1], a, v, exec)); + } + else if (MEM_P (operands[1])) + { + rtx scratch = gen_rtx_SCRATCH (V64DImode); + rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); + rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); + rtx expr = gcn_expand_scalar_to_vector_address (mode, exec, + operands[1], + scratch); + emit_insn (gen_gather_expr (operands[0], expr, a, v, undef, + exec)); + } + else + emit_insn (gen_mov_vector (operands[0], operands[1], exec, + undef)); + + DONE; + } + }) + +; A vector move that does not reference EXEC explicitly, and therefore is +; suitable for use during or after LRA. It uses the "exec" attribure instead. + +(define_insn "mov_full" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v") + (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))] + "lra_in_progress || reload_completed" + "v_mov_b32\t%0, %1" + [(set_attr "type" "vop1,vop1") + (set_attr "length" "4,8") + (set_attr "exec" "full")]) + +(define_insn "mov_full" + [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v") + (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))] + "lra_in_progress || reload_completed" + { + if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) + return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; + else + return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; + } + [(set_attr "type" "vmult") + (set_attr "length" "16") + (set_attr "exec" "full")]) + +; A SGPR-base load looks like: +; v, Sg +; +; There's no hardware instruction that corresponds to this, but vector base +; addresses are placed in an SGPR because it is easier to add to a vector. +; We also have a temporary vT, and the vector v1 holding numbered lanes. +; +; Rewrite as: +; vT = v1 << log2(element-size) +; vT += Sg +; flat_load v, vT + +(define_insn "mov_sgprbase" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m") + (unspec:VEC_1REG_MODE + [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")] + UNSPEC_SGPRBASE)) + (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))] + "lra_in_progress || reload_completed" + "@ + v_mov_b32\t%0, %1 + v_mov_b32\t%0, %1 + # + #" + [(set_attr "type" "vop1,vop1,*,*") + (set_attr "length" "4,8,12,12") + (set_attr "exec" "full")]) + +(define_insn "mov_sgprbase" + [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m") + (unspec:VEC_2REG_MODE + [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")] + UNSPEC_SGPRBASE)) + (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))] + "lra_in_progress || reload_completed" + "@ + * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ + return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ + else \ + return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; + # + #" + [(set_attr "type" "vmult,*,*") + (set_attr "length" "8,12,12") + (set_attr "exec" "full")]) + +; reload_in was once a standard name, but here it's only referenced by +; gcn_secondary_reload. It allows a reload with a scratch register. + +(define_expand "reload_in" + [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v") + (match_operand:VEC_REG_MODE 1 "memory_operand" " m")) + (clobber (match_operand:V64DI 2 "register_operand" "=&v"))] + "" + { + emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2])); + DONE; + }) + +; reload_out is similar to reload_in, above. + +(define_expand "reload_out" + [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m") + (match_operand:VEC_REG_MODE 1 "register_operand" " v")) + (clobber (match_operand:V64DI 2 "register_operand" "=&v"))] + "" + { + emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2])); + DONE; + }) + +; This is the 'normal' kind of vector move created before register allocation. + +(define_insn "mov_vector" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" + "=v, v, v, v, v, m") + (vec_merge:VEC_1REG_MODE + (match_operand:VEC_1REG_MODE 1 "general_operand" + "vA, B, v,vA, m, v") + (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand" + "U0,U0,vA,vA,U0,U0") + (match_operand:DI 2 "register_operand" " e, e,cV,Sg, e, e"))) + (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))] + "!MEM_P (operands[0]) || REG_P (operands[1])" + "@ + v_mov_b32\t%0, %1 + v_mov_b32\t%0, %1 + v_cndmask_b32\t%0, %3, %1, vcc + v_cndmask_b32\t%0, %3, %1, %2 + # + #" + [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*") + (set_attr "length" "4,8,4,8,16,16") + (set_attr "exec" "*,*,full,full,*,*")]) + +; This variant does not accept an unspec, but does permit MEM +; read/modify/write which is necessary for maskstore. + +(define_insn "*mov_vector_match" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m") + (vec_merge:VEC_1REG_MODE + (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v") + (match_dup 0) + (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) + (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))] + "!MEM_P (operands[0]) || REG_P (operands[1])" + "@ + v_mov_b32\t%0, %1 + v_mov_b32\t%0, %1 + # + #" + [(set_attr "type" "vop1,vop1,*,*") + (set_attr "length" "4,8,16,16")]) + +(define_insn "mov_vector" + [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" + "= v, v, v, v, m") + (vec_merge:VEC_2REG_MODE + (match_operand:VEC_2REG_MODE 1 "general_operand" + "vDB, v0, v0, m, v") + (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand" + " U0,vDA0,vDA0,U0,U0") + (match_operand:DI 2 "register_operand" " e, cV, Sg, e, e"))) + (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))] + "!MEM_P (operands[0]) || REG_P (operands[1])" + { + if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) + switch (which_alternative) + { + case 0: + return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; + case 1: + return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;" + "v_cndmask_b32\t%H0, %H3, %H1, vcc"; + case 2: + return "v_cndmask_b32\t%L0, %L3, %L1, %2\;" + "v_cndmask_b32\t%H0, %H3, %H1, %2"; + } + else + switch (which_alternative) + { + case 0: + return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; + case 1: + return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;" + "v_cndmask_b32\t%L0, %L3, %L1, vcc"; + case 2: + return "v_cndmask_b32\t%H0, %H3, %H1, %2\;" + "v_cndmask_b32\t%L0, %L3, %L1, %2"; + } + + return "#"; + } + [(set_attr "type" "vmult,vmult,vmult,*,*") + (set_attr "length" "16,16,16,16,16") + (set_attr "exec" "*,full,full,*,*")]) + +; This variant does not accept an unspec, but does permit MEM +; read/modify/write which is necessary for maskstore. + +(define_insn "*mov_vector_match" + [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m") + (vec_merge:VEC_2REG_MODE + (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v") + (match_dup 0) + (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) + (clobber (match_scratch:V64DI 3 "=X,&v,&v"))] + "!MEM_P (operands[0]) || REG_P (operands[1])" + "@ + * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ + return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ + else \ + return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; + # + #" + [(set_attr "type" "vmult,*,*") + (set_attr "length" "16,16,16")]) + +; Expand scalar addresses into gather/scatter patterns + +(define_split + [(set (match_operand:VEC_REG_MODE 0 "memory_operand") + (unspec:VEC_REG_MODE + [(match_operand:VEC_REG_MODE 1 "general_operand")] + UNSPEC_SGPRBASE)) + (clobber (match_scratch:V64DI 2))] + "" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_dup 5) (match_dup 1) + (match_dup 6) (match_dup 7) (match_dup 8)] + UNSPEC_SCATTER))] + { + operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL, + operands[0], + operands[2]); + operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); + operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); + operands[8] = gen_rtx_CONST_INT (VOIDmode, -1); + }) + +(define_split + [(set (match_operand:VEC_REG_MODE 0 "memory_operand") + (vec_merge:VEC_REG_MODE + (match_operand:VEC_REG_MODE 1 "general_operand") + (match_operand:VEC_REG_MODE 3 "") + (match_operand:DI 2 "gcn_exec_reg_operand"))) + (clobber (match_scratch:V64DI 4))] + "" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_dup 5) (match_dup 1) + (match_dup 6) (match_dup 7) (match_dup 2)] + UNSPEC_SCATTER))] + { + operands[5] = gcn_expand_scalar_to_vector_address (mode, + operands[2], + operands[0], + operands[4]); + operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); + operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); + }) + +(define_split + [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") + (unspec:VEC_REG_MODE + [(match_operand:VEC_REG_MODE 1 "memory_operand")] + UNSPEC_SGPRBASE)) + (clobber (match_scratch:V64DI 2))] + "" + [(set (match_dup 0) + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7) + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_dup 8) + (match_dup 9)))] + { + operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL, + operands[1], + operands[2]); + operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); + operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); + operands[8] = gcn_gen_undef (mode); + operands[9] = gen_rtx_CONST_INT (VOIDmode, -1); + }) + +(define_split + [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") + (vec_merge:VEC_REG_MODE + (match_operand:VEC_REG_MODE 1 "memory_operand") + (match_operand:VEC_REG_MODE 3 "") + (match_operand:DI 2 "gcn_exec_reg_operand"))) + (clobber (match_scratch:V64DI 4))] + "" + [(set (match_dup 0) + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7) + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_dup 3) + (match_dup 2)))] + { + operands[5] = gcn_expand_scalar_to_vector_address (mode, + operands[2], + operands[1], + operands[4]); + operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); + operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); + }) + +; TODO: Add zero/sign extending variants. + +;; }}} +;; {{{ Lane moves + +; v_writelane and v_readlane work regardless of exec flags. +; We allow source to be scratch. +; +; FIXME these should take A immediates + +(define_insn "*vec_set" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand: 1 "register_operand" " SS")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (ashift (const_int 1) + (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8") + (set_attr "laneselect" "yes")]) + +; FIXME: 64bit operations really should be splitters, but I am not sure how +; to represent vertical subregs. +(define_insn "*vec_set" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand: 1 "register_operand" " SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (ashift (const_int 1) + (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" + [(set_attr "type" "vmult") + (set_attr "length" "16") + (set_attr "laneselect" "yes")]) + +(define_expand "vec_set" + [(set (match_operand:VEC_REG_MODE 0 "register_operand") + (vec_merge:VEC_REG_MODE + (vec_duplicate:VEC_REG_MODE + (match_operand: 1 "register_operand")) + (match_dup 0) + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] + "") + +(define_insn "*vec_set_1" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand: 1 "register_operand" "SS")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" + "U0") + (match_operand:SI 2 "const_int_operand" " i")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" + { + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %0, %1, %2"; + } + [(set_attr "type" "vop3a") + (set_attr "length" "8") + (set_attr "laneselect" "yes")]) + +(define_insn "*vec_set_1" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand: 1 "register_operand" "SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" + "U0") + (match_operand:SI 2 "const_int_operand" " i")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" + { + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; + } + [(set_attr "type" "vmult") + (set_attr "length" "16") + (set_attr "laneselect" "yes")]) + +(define_insn "vec_duplicate" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_duplicate:VEC_1REG_MODE + (match_operand: 1 "gcn_alu_operand" "SgB")))] + "" + "v_mov_b32\t%0, %1" + [(set_attr "type" "vop3a") + (set_attr "exec" "full") + (set_attr "length" "8")]) + +(define_insn "vec_duplicate" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v") + (vec_duplicate:VEC_2REG_MODE + (match_operand: 1 "gcn_alu_operand" "SgDB")))] + "" + "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" + [(set_attr "type" "vop3a") + (set_attr "exec" "full") + (set_attr "length" "16")]) + +(define_insn "vec_duplicate_exec" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand: 1 "gcn_alu_operand" "SSB")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "" + "v_mov_b32\t%0, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "vec_duplicate_exec" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand: 1 "register_operand" "SgDB")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "" + "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" + [(set_attr "type" "vmult") + (set_attr "length" "16")]) + +(define_insn "vec_extract" + [(set (match_operand: 0 "register_operand" "=Sg") + (vec_select: + (match_operand:VEC_1REG_MODE 1 "register_operand" " v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8") + (set_attr "laneselect" "yes")]) + +(define_insn "vec_extract" + [(set (match_operand: 0 "register_operand" "=Sg") + (vec_select: + (match_operand:VEC_2REG_MODE 1 "register_operand" " v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" + [(set_attr "type" "vmult") + (set_attr "length" "16") + (set_attr "laneselect" "yes")]) + +(define_expand "vec_init" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand 1)] + "" + { + gcn_expand_vector_init (operands[0], operands[1]); + DONE; + }) + +;; }}} +;; {{{ Scatter / Gather + +;; GCN does not have an instruction for loading a vector from contiguous +;; memory so *all* loads and stores are eventually converted to scatter +;; or gather. +;; +;; GCC does not permit MEM to hold vectors of addresses, so we must use an +;; unspec. The unspec formats are as follows: +;; +;; (unspec:V64?? +;; [(
) +;; () +;; () +;; (mem:BLK (scratch))] +;; UNSPEC_GATHER) +;; +;; (unspec:BLK +;; [(
) +;; () +;; () +;; () +;; ()] +;; UNSPEC_SCATTER) +;; +;; - Loads are expected to be wrapped in a vec_merge, so do not need . +;; - The mem:BLK does not contain any real information, but indicates that an +;; unknown memory read is taking place. Stores are expected to use a similar +;; mem:BLK outside the unspec. +;; - The address space and glc (volatile) fields are there to replace the +;; fields normally found in a MEM. +;; - Multiple forms of address expression are supported, below. + +(define_expand "gather_load" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx exec = gcn_full_exec_reg (); + + /* TODO: more conversions will be needed when more types are vectorized. */ + if (GET_MODE (operands[2]) == V64DImode) + { + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_vec_truncatev64div64si (tmp, operands[2], + gcn_gen_undef (V64SImode), + exec)); + operands[2] = tmp; + } + + emit_insn (gen_gather_exec (operands[0], operands[1], operands[2], + operands[3], operands[4], exec)); + DONE; + }) + +(define_expand "gather_exec" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:V64SI 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "gcn_exec_reg_operand")] + "" + { + rtx dest = operands[0]; + rtx base = operands[1]; + rtx offsets = operands[2]; + int unsignedp = INTVAL (operands[3]); + rtx scale = operands[4]; + rtx exec = operands[5]; + + rtx tmpsi = gen_reg_rtx (V64SImode); + rtx tmpdi = gen_reg_rtx (V64DImode); + rtx undefsi = gcn_gen_undef (V64SImode); + rtx undefdi = gcn_gen_undef (V64DImode); + rtx undefmode = gcn_gen_undef (mode); + + if (CONST_INT_P (scale) + && INTVAL (scale) > 0 + && exact_log2 (INTVAL (scale)) >= 0) + emit_insn (gen_ashlv64si3 (tmpsi, offsets, + GEN_INT (exact_log2 (INTVAL (scale))))); + else + emit_insn (gen_mulv64si3_vector_dup (tmpsi, offsets, scale, exec, + undefsi)); + + if (DEFAULT_ADDR_SPACE == ADDR_SPACE_FLAT) + { + if (unsignedp) + emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base, exec, + undefdi)); + else + emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base, exec, + undefdi)); + emit_insn (gen_gather_insn_1offset (dest, tmpdi, const0_rtx, + const0_rtx, const0_rtx, + undefmode, exec)); + } + else if (DEFAULT_ADDR_SPACE == ADDR_SPACE_GLOBAL) + emit_insn (gen_gather_insn_2offsets (dest, base, tmpsi, const0_rtx, + const0_rtx, const0_rtx, + undefmode, exec)); + else + gcc_unreachable (); + DONE; + }) + +; Allow any address expression +(define_expand "gather_expr" + [(set (match_operand:VEC_REG_MODE 0 "register_operand") + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE + [(match_operand 1 "") + (match_operand 2 "immediate_operand") + (match_operand 3 "immediate_operand") + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_operand:VEC_REG_MODE 4 "gcn_register_or_unspec_operand") + (match_operand:DI 5 "gcn_exec_operand")))] + "" + {}) + +(define_insn "gather_insn_1offset" + [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v, v") + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE + [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v, v") + (vec_duplicate:V64DI + (match_operand 2 "immediate_operand" " n, n"))) + (match_operand 3 "immediate_operand" " n, n") + (match_operand 4 "immediate_operand" " n, n") + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_operand:VEC_REG_MODE 5 "gcn_register_or_unspec_operand" + "U0, U0") + (match_operand:DI 6 "gcn_exec_operand" " e,*Kf")))] + "(AS_FLAT_P (INTVAL (operands[3])) + && ((TARGET_GCN3 && INTVAL(operands[2]) == 0) + || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))) + || (AS_GLOBAL_P (INTVAL (operands[3])) + && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" + { + addr_space_t as = INTVAL (operands[3]); + const char *glc = INTVAL (operands[4]) ? " glc" : ""; + + static char buf[200]; + if (AS_FLAT_P (as)) + { + if (TARGET_GCN5_PLUS) + sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", + glc); + else + sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc); + } + else if (AS_GLOBAL_P (as)) + sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;" + "s_waitcnt\tvmcnt(0)", glc); + else + gcc_unreachable (); + + return buf; + } + [(set_attr "type" "flat") + (set_attr "length" "12") + (set_attr "exec" "*,full")]) + +(define_insn "gather_insn_1offset_ds" + [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v, v") + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE + [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v, v") + (vec_duplicate:V64SI + (match_operand 2 "immediate_operand" " n, n"))) + (match_operand 3 "immediate_operand" " n, n") + (match_operand 4 "immediate_operand" " n, n") + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_operand:VEC_REG_MODE 5 "gcn_register_or_unspec_operand" + "U0, U0") + (match_operand:DI 6 "gcn_exec_operand" " e,*Kf")))] + "(AS_ANY_DS_P (INTVAL (operands[3])) + && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" + { + addr_space_t as = INTVAL (operands[3]); + static char buf[200]; + sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", + (AS_GDS_P (as) ? " gds" : "")); + return buf; + } + [(set_attr "type" "ds") + (set_attr "length" "12") + (set_attr "exec" "*,full")]) + +(define_insn "gather_insn_2offsets" + [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_REG_MODE + (unspec:VEC_REG_MODE + [(plus:V64DI + (plus:V64DI + (vec_duplicate:V64DI + (match_operand:DI 1 "register_operand" "SS")) + (sign_extend:V64DI + (match_operand:V64SI 2 "register_operand" " v"))) + (vec_duplicate:V64DI (match_operand 3 "immediate_operand" + " n"))) + (match_operand 4 "immediate_operand" " n") + (match_operand 5 "immediate_operand" " n") + (mem:BLK (scratch))] + UNSPEC_GATHER) + (match_operand:VEC_REG_MODE 6 "gcn_register_or_unspec_operand" + "U0") + (match_operand:DI 7 "gcn_exec_operand" " e")))] + "(AS_GLOBAL_P (INTVAL (operands[4])) + && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" + { + addr_space_t as = INTVAL (operands[4]); + const char *glc = INTVAL (operands[5]) ? " glc" : ""; + + static char buf[200]; + if (AS_GLOBAL_P (as)) + { + /* Work around assembler bug in which a 64-bit register is expected, + but a 32-bit value would be correct. */ + int reg = REGNO (operands[2]) - FIRST_VGPR_REG; + sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;" + "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); + } + else + gcc_unreachable (); + + return buf; + } + [(set_attr "type" "flat") + (set_attr "length" "12")]) + +(define_expand "scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:VEC_REG_MODE 4 "register_operand")] + "" + { + rtx exec = gcn_full_exec_reg (); + + /* TODO: more conversions will be needed when more types are vectorized. */ + if (GET_MODE (operands[1]) == V64DImode) + { + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_vec_truncatev64div64si (tmp, operands[1], + gcn_gen_undef (V64SImode), + exec)); + operands[1] = tmp; + } + + emit_insn (gen_scatter_exec (operands[0], operands[1], operands[2], + operands[3], operands[4], exec)); + DONE; + }) + +(define_expand "scatter_exec" + [(match_operand:DI 0 "register_operand") + (match_operand 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:VEC_REG_MODE 4 "register_operand") + (match_operand:DI 5 "gcn_exec_reg_operand")] + "" + { + rtx base = operands[0]; + rtx offsets = operands[1]; + int unsignedp = INTVAL (operands[2]); + rtx scale = operands[3]; + rtx src = operands[4]; + rtx exec = operands[5]; + + rtx tmpsi = gen_reg_rtx (V64SImode); + rtx tmpdi = gen_reg_rtx (V64DImode); + rtx undefsi = gcn_gen_undef (V64SImode); + rtx undefdi = gcn_gen_undef (V64DImode); + + if (CONST_INT_P (scale) + && INTVAL (scale) > 0 + && exact_log2 (INTVAL (scale)) >= 0) + emit_insn (gen_ashlv64si3 (tmpsi, offsets, + GEN_INT (exact_log2 (INTVAL (scale))))); + else + emit_insn (gen_mulv64si3_vector_dup (tmpsi, offsets, scale, exec, + undefsi)); + + if (DEFAULT_ADDR_SPACE == ADDR_SPACE_FLAT) + { + if (unsignedp) + emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base, exec, + undefdi)); + else + emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base, exec, + undefdi)); + emit_insn (gen_scatter_insn_1offset (tmpdi, const0_rtx, src, + const0_rtx, const0_rtx, + exec)); + } + else if (DEFAULT_ADDR_SPACE == ADDR_SPACE_GLOBAL) + emit_insn (gen_scatter_insn_2offsets (base, tmpsi, const0_rtx, src, + const0_rtx, const0_rtx, + exec)); + else + gcc_unreachable (); + DONE; + }) + +; Allow any address expression +(define_expand "scatter_expr" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V64DI 0 "") + (match_operand:VEC_REG_MODE 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand 3 "immediate_operand") + (match_operand:DI 4 "gcn_exec_operand")] + UNSPEC_SCATTER))] + "" + {}) + +(define_insn "scatter_insn_1offset" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v, v") + (vec_duplicate:V64DI + (match_operand 1 "immediate_operand" "n, n"))) + (match_operand:VEC_REG_MODE 2 "register_operand" "v, v") + (match_operand 3 "immediate_operand" "n, n") + (match_operand 4 "immediate_operand" "n, n") + (match_operand:DI 5 "gcn_exec_operand" "e,*Kf")] + UNSPEC_SCATTER))] + "(AS_FLAT_P (INTVAL (operands[3])) + && (INTVAL(operands[1]) == 0 + || (TARGET_GCN5_PLUS + && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) + || (AS_GLOBAL_P (INTVAL (operands[3])) + && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" + { + addr_space_t as = INTVAL (operands[3]); + const char *glc = INTVAL (operands[4]) ? " glc" : ""; + + static char buf[200]; + if (AS_FLAT_P (as)) + { + if (TARGET_GCN5_PLUS) + sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;s_waitcnt\t0", + glc); + else + sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\t0", glc); + } + else if (AS_GLOBAL_P (as)) + sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;" + "s_waitcnt\tvmcnt(0)", glc); + else + gcc_unreachable (); + + return buf; + } + [(set_attr "type" "flat") + (set_attr "length" "12") + (set_attr "exec" "*,full")]) + +(define_insn "scatter_insn_1offset_ds" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v, v") + (vec_duplicate:V64SI + (match_operand 1 "immediate_operand" "n, n"))) + (match_operand:VEC_REG_MODE 2 "register_operand" "v, v") + (match_operand 3 "immediate_operand" "n, n") + (match_operand 4 "immediate_operand" "n, n") + (match_operand:DI 5 "gcn_exec_operand" "e,*Kf")] + UNSPEC_SCATTER))] + "(AS_ANY_DS_P (INTVAL (operands[3])) + && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" + { + addr_space_t as = INTVAL (operands[3]); + static char buf[200]; + sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", + (AS_GDS_P (as) ? " gds" : "")); + return buf; + } + [(set_attr "type" "ds") + (set_attr "length" "12") + (set_attr "exec" "*,full")]) + +(define_insn "scatter_insn_2offsets" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(plus:V64DI + (plus:V64DI + (vec_duplicate:V64DI + (match_operand:DI 0 "register_operand" "SS")) + (sign_extend:V64DI + (match_operand:V64SI 1 "register_operand" " v"))) + (vec_duplicate:V64DI (match_operand 2 "immediate_operand" " n"))) + (match_operand:VEC_REG_MODE 3 "register_operand" " v") + (match_operand 4 "immediate_operand" " n") + (match_operand 5 "immediate_operand" " n") + (match_operand:DI 6 "gcn_exec_operand" " e")] + UNSPEC_SCATTER))] + "(AS_GLOBAL_P (INTVAL (operands[4])) + && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" + { + addr_space_t as = INTVAL (operands[4]); + const char *glc = INTVAL (operands[5]) ? " glc" : ""; + + static char buf[200]; + if (AS_GLOBAL_P (as)) + { + /* Work around assembler bug in which a 64-bit register is expected, + but a 32-bit value would be correct. */ + int reg = REGNO (operands[1]) - FIRST_VGPR_REG; + sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;" + "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); + } + else + gcc_unreachable (); + + return buf; + } + [(set_attr "type" "flat") + (set_attr "length" "12")]) + +;; }}} +;; {{{ Permutations + +(define_insn "ds_bpermute" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (unspec:VEC_1REG_MODE + [(match_operand:VEC_1REG_MODE 2 "register_operand" " v") + (match_operand:V64SI 1 "register_operand" " v") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")] + UNSPEC_BPERMUTE))] + "" + "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" + [(set_attr "type" "vop2") + (set_attr "length" "12")]) + +(define_insn_and_split "ds_bpermute" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v") + (unspec:VEC_2REG_MODE + [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0") + (match_operand:V64SI 1 "register_operand" " v") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")] + UNSPEC_BPERMUTE))] + "" + "#" + "reload_completed" + [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)] + UNSPEC_BPERMUTE)) + (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)] + UNSPEC_BPERMUTE))] + { + operands[4] = gcn_operand_part (mode, operands[0], 0); + operands[5] = gcn_operand_part (mode, operands[0], 1); + operands[6] = gcn_operand_part (mode, operands[2], 0); + operands[7] = gcn_operand_part (mode, operands[2], 1); + } + [(set_attr "type" "vmult") + (set_attr "length" "24")]) + +;; }}} +;; {{{ ALU special case: add/sub + +(define_mode_iterator V64SIDI [V64SI V64DI]) + +(define_expand "3" + [(parallel [(set (match_operand:V64SIDI 0 "register_operand") + (vec_merge:V64SIDI + (plus_minus:V64SIDI + (match_operand:V64SIDI 1 "register_operand") + (match_operand:V64SIDI 2 "gcn_alu_operand")) + (match_dup 4) + (match_dup 3))) + (clobber (reg:DI VCC_REG))])] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (mode); + }) + +(define_insn "addv64si3_vector" + [(set (match_operand:V64SI 0 "register_operand" "= v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "% v") + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add%^_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "addsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "= v") + (plus:SI + (match_operand:SI 1 "register_operand" "% v") + (match_operand:SI 2 "gcn_alu_operand" "vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e")) + (clobber (reg:DI VCC_REG))] + "" + "v_add%^_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "addv64si3_vector_dup" + [(set (match_operand:V64SI 0 "register_operand" "= v, v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI + (match_operand:SI 2 "gcn_alu_operand" "SSB,SSB")) + (match_operand:V64SI 1 "register_operand" " v, v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e,*Kf"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add%^_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8") + (set_attr "exec" "*,full")]) + +(define_insn "addv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "= v, v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "% v, v") + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" + " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (set (match_operand:DI 5 "register_operand" "= cV, Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" + " U5, U5"))))] + "" + "v_add%^_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b") + (set_attr "length" "8")]) + +; This pattern only changes the VCC bits when the corresponding lane is +; enabled, so the set must be described as an ior. + +(define_insn "addv64si3_vector_vcc_dup" + [(set (match_operand:V64SI 0 "register_operand" "= v, v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI (match_operand:SI 2 "gcn_alu_operand" + "SSB,SSB")) + (match_operand:V64SI 1 "register_operand" " v, v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (set (match_operand:DI 5 "register_operand" "=cV, Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2)) + (match_dup 1)) + (vec_duplicate:V64SI (match_dup 2))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" + " 5U, 5U"))))] + "" + "v_add%^_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b") + (set_attr "length" "8,8")]) + +; This pattern does not accept SGPR because VCC read already counts as an +; SGPR use and number of SGPR operands is limited to 1. + +(define_insn "addcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (plus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" " A, A") + (match_operand:V64SI 8 "gcn_vec0_operand" " A, A") + (match_operand:DI 5 "register_operand" " cV,Sg")) + (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA")) + (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0,U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ior:DI (ltu:DI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (plus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" + " 6U,6U"))))] + "" + "v_addc%^_u32\t%0, %6, %1, %2, %5" + [(set_attr "type" "vop2,vop3b") + (set_attr "length" "4,8")]) + +(define_insn "addcv64si3_vec_dup" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (plus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" " A, A") + (match_operand:V64SI 8 "gcn_vec0_operand" " A, A") + (match_operand:DI 5 "register_operand" " cV, Sg")) + (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA")) + (vec_duplicate:V64SI + (match_operand:SI 2 "gcn_alu_operand" "SSB,SSB"))) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (set (match_operand:DI 6 "register_operand" "=cV, Sg") + (ior:DI (and:DI (ior:DI (ltu:DI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (vec_duplicate:V64SI + (match_dup 2))) + (vec_duplicate:V64SI + (match_dup 2))) + (ltu:DI (plus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" + " 6U,6U"))))] + "" + "v_addc%^_u32\t%0, %6, %1, %2, %5" + [(set_attr "type" "vop2,vop3b") + (set_attr "length" "4,8")]) + +(define_insn "subv64si3_vector" + [(set (match_operand:V64SI 0 "register_operand" "= v, v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB, v") + (match_operand:V64SI 2 "gcn_alu_operand" " v,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) + || register_operand (operands[2], VOIDmode)" + "@ + v_sub%^_u32\t%0, vcc, %1, %2 + v_subrev%^_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8,8")]) + +(define_insn "subsi3_scalar" + [(set (match_operand:SI 0 "register_operand" "= v, v") + (minus:SI + (match_operand:SI 1 "gcn_alu_operand" "vSSB, v") + (match_operand:SI 2 "gcn_alu_operand" " v,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e")) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) + || register_operand (operands[2], VOIDmode)" + "@ + v_sub%^_u32\t%0, vcc, %1, %2 + v_subrev%^_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8,8")]) + +(define_insn "subv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,vSSB, v, v") + (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" + " U0, U0, U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e, e, e"))) + (set (match_operand:DI 5 "register_operand" "= cV, Sg, cV, Sg") + (ior:DI (and:DI (gtu:DI (minus:V64SI (match_dup 1) + (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" + " 5U, 5U, 5U, 5U"))))] + "register_operand (operands[1], VOIDmode) + || register_operand (operands[2], VOIDmode)" + "@ + v_sub%^_u32\t%0, %5, %1, %2 + v_sub%^_u32\t%0, %5, %1, %2 + v_subrev%^_u32\t%0, %5, %2, %1 + v_subrev%^_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "length" "8")]) + +; This pattern does not accept SGPR because VCC read already counts +; as a SGPR use and number of SGPR operands is limited to 1. + +(define_insn "subcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v") + (vec_merge:V64SI + (minus:V64SI + (minus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" " A, A, A, A") + (match_operand:V64SI 8 "gcn_vec0_operand" " A, A, A, A") + (match_operand:DI 5 "gcn_alu_operand" " cV,Sg,cV,Sg")) + (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB")) + (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" + " U0,U0,U0,U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e, e, e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg,cV,Sg") + (ior:DI (and:DI (ior:DI (gtu:DI (minus:V64SI (minus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (minus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" + " 6U,6U,6U,6U"))))] + "register_operand (operands[1], VOIDmode) + || register_operand (operands[2], VOIDmode)" + "@ + v_subb%^_u32\t%0, %6, %1, %2, %5 + v_subb%^_u32\t%0, %6, %1, %2, %5 + v_subbrev%^_u32\t%0, %6, %2, %1, %5 + v_subbrev%^_u32\t%0, %6, %2, %1, %5" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "length" "8")]) + +(define_insn_and_split "addv64di3_vector" + [(set (match_operand:V64DI 0 "register_operand" "= &v") + (vec_merge:V64DI + (plus:V64DI + (match_operand:V64DI 1 "register_operand" "% v0") + (match_operand:V64DI 2 "gcn_alu_operand" "vSSB0")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc + (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec + (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + +(define_insn_and_split "subv64di3_vector" + [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") + (vec_merge:V64DI + (minus:V64DI + (match_operand:V64DI 1 "gcn_alu_operand" "vSSB0, v0") + (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSSB0")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" + " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) + || register_operand (operands[2], VOIDmode)" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_subv64si3_vector_vcc + (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + emit_insn (gen_subcv64si3_vec + (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8,8")]) + +(define_insn_and_split "addv64di3_vector_dup" + [(set (match_operand:V64DI 0 "register_operand" "= &v") + (vec_merge:V64DI + (plus:V64DI + (match_operand:V64DI 1 "register_operand" " v0") + (vec_duplicate:V64DI + (match_operand:DI 2 "gcn_alu_operand" "SSDB"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup + (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec_dup + (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + +(define_insn_and_split "addv64di3_zext" + [(set (match_operand:V64DI 0 "register_operand" "=&v,&v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB")) + (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc + (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec + (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8,8")]) + +(define_insn_and_split "addv64di3_zext_dup" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (vec_duplicate:V64SI + (match_operand:SI 1 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 2 "gcn_alu_operand" "vA0")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup + (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec + (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + +(define_insn_and_split "addv64di3_zext_dup2" + [(set (match_operand:V64DI 0 "register_operand" "= v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" + " vA")) + (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup + (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); + emit_insn (gen_vec_duplicatev64si_exec + (dsthi, gcn_operand_part (DImode, operands[2], 1), + operands[3], gcn_gen_undef (V64SImode))); + emit_insn (gen_addcv64si3_vec + (dsthi, dsthi, const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + +(define_insn_and_split "addv64di3_sext_dup2" + [(set (match_operand:V64DI 0 "register_operand" "= v") + (vec_merge:V64DI + (plus:V64DI + (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" + " vA")) + (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:V64SI 5 "=&v")) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_ashrv64si3_vector (operands[5], operands[1], GEN_INT (31), + operands[3], gcn_gen_undef (V64SImode))); + emit_insn (gen_addv64si3_vector_vcc_dup + (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, gcn_gen_undef (DImode))); + rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); + emit_insn (gen_vec_duplicatev64si_exec + (dsthi, gcn_operand_part (DImode, operands[2], 1), + operands[3], gcn_gen_undef (V64SImode))); + emit_insn (gen_addcv64si3_vec + (dsthi, dsthi, operands[5], operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + +(define_insn "addv64di3_scalarsi" + [(set (match_operand:V64DI 0 "register_operand" "=&v, v") + (plus:V64DI (vec_duplicate:V64DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " Sg,Sg"))) + (match_operand:V64DI 1 "register_operand" " v, 0")))] + "" + "v_add%^_u32\t%L0, vcc, %2, %L1\;v_addc%^_u32\t%H0, vcc, 0, %H1, vcc" + [(set_attr "type" "vmult") + (set_attr "length" "8") + (set_attr "exec" "full")]) + +;; }}} +;; {{{ DS memory ALU: add/sub + +(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) +(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) + +;; FIXME: the vector patterns probably need RD expanded to a vector of +;; addresses. For now, the only way a vector can get into LDS is +;; if the user puts it there manually. +;; +;; FIXME: the scalar patterns are probably fine in themselves, but need to be +;; checked to see if anything can ever use them. + +(define_insn "add3_ds_vector" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (plus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" " v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" + " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_add%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +(define_insn "add3_ds_scalar" + [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") + (plus:DS_ARITH_SCALAR_MODE + (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" + "%RD") + (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e"))] + "rtx_equal_p (operands[0], operands[1])" + "ds_add%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +(define_insn "sub3_ds_vector" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" " v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" + " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_sub%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +(define_insn "sub3_ds_scalar" + [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") + (minus:DS_ARITH_SCALAR_MODE + (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" + " RD") + (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e"))] + "rtx_equal_p (operands[0], operands[1])" + "ds_sub%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +(define_insn "subr3_ds_vector" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 2 "register_operand" " v") + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" + " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_rsub%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +(define_insn "subr3_ds_scalar" + [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") + (minus:DS_ARITH_SCALAR_MODE + (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") + (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" + " RD"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e"))] + "rtx_equal_p (operands[0], operands[1])" + "ds_rsub%u0\t%A0, %2%O0" + [(set_attr "type" "ds") + (set_attr "length" "8")]) + +;; }}} +;; {{{ ALU special case: mult + +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) +(define_code_attr u [(sign_extend "") (zero_extend "u")]) +(define_code_attr iu [(sign_extend "i") (zero_extend "u")]) +(define_code_attr e [(sign_extend "e") (zero_extend "")]) + +(define_expand "mulsi3_highpart" + [(parallel [(set (match_operand:SI 0 "register_operand") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "register_operand")) + (any_extend:DI + (match_operand:SI 2 "gcn_vop3_operand"))) + (const_int 32)))) + (use (match_dup 3))])] + "" + { + operands[3] = gcn_scalar_exec_reg (); + + if (CONST_INT_P (operands[2])) + { + emit_insn (gen_const_mulsi3_highpart_scalar (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } + }) + +(define_insn "mulv64si3_highpart_vector" + [(set (match_operand:V64SI 0 "register_operand" "= v") + (vec_merge:V64SI + (truncate:V64SI + (lshiftrt:V64DI + (mult:V64DI + (any_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" " %v")) + (any_extend:V64DI + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB"))) + (const_int 32))) + (match_operand:V64SI 4 "gcn_register_ds_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_mul_hi0\t%0, %2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "mulsi3_highpart_scalar" + [(set (match_operand:SI 0 "register_operand" "= v") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "register_operand" "% v")) + (any_extend:DI + (match_operand:SI 2 "register_operand" "vSS"))) + (const_int 32)))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" " e"))] + "" + "v_mul_hi0\t%0, %2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "const_mulsi3_highpart_scalar" + [(set (match_operand:SI 0 "register_operand" "=v") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "register_operand" "%v")) + (match_operand:SI 2 "gcn_vop3_operand" " A")) + (const_int 32)))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" " e"))] + "" + "v_mul_hi0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_expand "mulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand") + (mult:SI + (any_extend:SI (match_operand:HI 1 "register_operand")) + (any_extend:SI (match_operand:HI 2 "register_operand")))) + (use (match_dup 3))])] + "" + { + operands[3] = gcn_scalar_exec_reg (); + }) + +(define_insn "mulhisi3_scalar" + [(set (match_operand:SI 0 "register_operand" "=v") + (mult:SI + (any_extend:SI (match_operand:HI 1 "register_operand" "%v")) + (any_extend:SI (match_operand:HI 2 "register_operand" " v")))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" " e"))] + "" + "v_mul_32_24_sdwa\t%0, %1, %2 src0_sel:WORD_0 src1_sel:WORD_0" + [(set_attr "type" "vop_sdwa") + (set_attr "length" "8")]) + +(define_expand "mulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand") + (mult:HI + (any_extend:HI (match_operand:QI 1 "register_operand")) + (any_extend:HI (match_operand:QI 2 "register_operand")))) + (use (match_dup 3))])] + "" + { + operands[3] = gcn_scalar_exec_reg (); + }) + +(define_insn "mulqihi3_scalar" + [(set (match_operand:HI 0 "register_operand" "=v") + (mult:HI + (any_extend:HI (match_operand:QI 1 "register_operand" "%v")) + (any_extend:HI (match_operand:QI 2 "register_operand" " v")))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" " e"))] + "" + "v_mul_32_24_sdwa\t%0, %1, %2 src0_sel:BYTE_0 src1_sel:BYTE_0" + [(set_attr "type" "vop_sdwa") + (set_attr "length" "8")]) + +(define_expand "mulv64si3" + [(set (match_operand:V64SI 0 "register_operand") + (vec_merge:V64SI + (mult:V64SI + (match_operand:V64SI 1 "gcn_alu_operand") + (match_operand:V64SI 2 "gcn_alu_operand")) + (match_dup 4) + (match_dup 3)))] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (V64SImode); + }) + +(define_insn "mulv64si3_vector" + [(set (match_operand:V64SI 0 "register_operand" "= v") + (vec_merge:V64SI + (mult:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA") + (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_mul_lo_u32\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "mulv64si3_vector_dup" + [(set (match_operand:V64SI 0 "register_operand" "= v") + (vec_merge:V64SI + (mult:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA") + (vec_duplicate:V64SI + (match_operand:SI 2 "gcn_alu_operand" " SvA"))) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_mul_lo_u32\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_expand "mulv64di3" + [(match_operand:V64DI 0 "register_operand") + (match_operand:V64DI 1 "gcn_alu_operand") + (match_operand:V64DI 2 "gcn_alu_operand")] + "" + { + emit_insn (gen_mulv64di3_vector (operands[0], operands[1], operands[2], + gcn_full_exec_reg (), + gcn_gen_undef (V64DImode))); + DONE; + }) + +(define_insn_and_split "mulv64di3_vector" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (vec_merge:V64DI + (mult:V64DI + (match_operand:V64DI 1 "gcn_alu_operand" "% v") + (match_operand:V64DI 2 "gcn_alu_operand" "vDA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:V64SI 5 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); + rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); + rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); + rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); + rtx exec = operands[3]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[4]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (V64SImode); + } + else + { + old_lo = gcn_operand_part (V64DImode, operands[4], 0); + old_hi = gcn_operand_part (V64DImode, operands[4], 1); + } + + rtx undef = gcn_gen_undef (V64SImode); + + emit_insn (gen_mulv64si3_vector (out_lo, left_lo, right_lo, exec, old_lo)); + emit_insn (gen_umulv64si3_highpart_vector (out_hi, left_lo, right_lo, + exec, old_hi)); + emit_insn (gen_mulv64si3_vector (tmp, left_hi, right_lo, exec, undef)); + emit_insn (gen_addv64si3_vector (out_hi, out_hi, tmp, exec, out_hi)); + emit_insn (gen_mulv64si3_vector (tmp, left_lo, right_hi, exec, undef)); + emit_insn (gen_addv64si3_vector (out_hi, out_hi, tmp, exec, out_hi)); + emit_insn (gen_mulv64si3_vector (tmp, left_hi, right_hi, exec, undef)); + emit_insn (gen_addv64si3_vector (out_hi, out_hi, tmp, exec, out_hi)); + DONE; + }) + +(define_insn_and_split "mulv64di3_vector_zext" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (vec_merge:V64DI + (mult:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" " v")) + (match_operand:V64DI 2 "gcn_alu_operand" "vDA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:V64SI 5 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); + rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); + rtx left = operands[1]; + rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); + rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); + rtx exec = operands[3]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[4]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (V64SImode); + } + else + { + old_lo = gcn_operand_part (V64DImode, operands[4], 0); + old_hi = gcn_operand_part (V64DImode, operands[4], 1); + } + + rtx undef = gcn_gen_undef (V64SImode); + + emit_insn (gen_mulv64si3_vector (out_lo, left, right_lo, exec, old_lo)); + emit_insn (gen_umulv64si3_highpart_vector (out_hi, left, right_lo, + exec, old_hi)); + emit_insn (gen_mulv64si3_vector (tmp, left, right_hi, exec, undef)); + emit_insn (gen_addv64si3_vector (out_hi, out_hi, tmp, exec, out_hi)); + DONE; + }) + +(define_insn_and_split "mulv64di3_vector_zext_dup2" + [(set (match_operand:V64DI 0 "register_operand" "= &v") + (vec_merge:V64DI + (mult:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" " v")) + (vec_duplicate:V64DI + (match_operand:DI 2 "gcn_alu_operand" "SSDA"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:V64SI 5 "= &v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); + rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); + rtx left = operands[1]; + rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); + rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); + rtx exec = operands[3]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[4]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (V64SImode); + } + else + { + old_lo = gcn_operand_part (V64DImode, operands[4], 0); + old_hi = gcn_operand_part (V64DImode, operands[4], 1); + } + + rtx undef = gcn_gen_undef (V64SImode); + + emit_insn (gen_mulv64si3_vector (out_lo, left, right_lo, exec, old_lo)); + emit_insn (gen_umulv64si3_highpart_vector (out_hi, left, right_lo, + exec, old_hi)); + emit_insn (gen_mulv64si3_vector (tmp, left, right_hi, exec, undef)); + emit_insn (gen_addv64si3_vector (out_hi, out_hi, tmp, exec, out_hi)); + DONE; + }) + +;; }}} +;; {{{ ALU generic case + +(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI]) + +(define_code_iterator bitop [and ior xor]) +(define_code_iterator bitunop [not popcount]) +(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) +(define_code_iterator minmaxop [smin smax umin umax]) + +(define_expand "3" + [(set (match_operand:VEC_INT_MODE 0 "gcn_valu_dst_operand") + (vec_merge:VEC_INT_MODE + (bitop:VEC_INT_MODE + (match_operand:VEC_INT_MODE 1 "gcn_valu_src0_operand") + (match_operand:VEC_INT_MODE 2 "gcn_valu_src1com_operand")) + (match_dup 4) + (match_dup 3)))] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (mode); + }) + +(define_expand "v64si3" + [(set (match_operand:V64SI 0 "register_operand") + (vec_merge:V64SI + (shiftop:V64SI + (match_operand:V64SI 1 "register_operand") + (match_operand:SI 2 "gcn_alu_operand")) + (match_dup 4) + (match_dup 3)))] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (V64SImode); + }) + +(define_expand "vv64si3" + [(set (match_operand:V64SI 0 "register_operand") + (vec_merge:V64SI + (shiftop:V64SI + (match_operand:V64SI 1 "register_operand") + (match_operand:V64SI 2 "gcn_alu_operand")) + (match_dup 4) + (match_dup 3)))] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (V64SImode); + }) + +(define_expand "3" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand") + (vec_merge:VEC_1REG_INT_MODE + (minmaxop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1_operand")) + (match_dup 4) + (match_dup 3)))] + "mode != V64QImode" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (mode); + }) + +(define_insn "2_vector" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v") + (vec_merge:VEC_1REG_INT_MODE + (bitunop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" + "vSSB")) + (match_operand:VEC_1REG_INT_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "" + "v_0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "3_vector" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD") + (vec_merge:VEC_1REG_INT_MODE + (bitop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" + "% v, 0") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" + "vSSB, v")) + (match_operand:VEC_1REG_INT_MODE 4 + "gcn_register_ds_or_unspec_operand" " U0,U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) + && register_operand (operands[2], VOIDmode))" + "@ + v_0\t%0, %2, %1 + ds_0\t%A0, %2%O0" + [(set_attr "type" "vop2,ds") + (set_attr "length" "8,8")]) + +(define_insn "2_vscalar" + [(set (match_operand:SCALAR_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v") + (bitunop:SCALAR_1REG_INT_MODE + (match_operand:SCALAR_1REG_INT_MODE 1 "gcn_valu_src0_operand" + "vSSB"))) + (use (match_operand:DI 2 "gcn_exec_operand" " e"))] + "" + "v_0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "3_scalar" + [(set (match_operand:SCALAR_1REG_INT_MODE 0 "gcn_valu_dst_operand" + "= v,RD") + (vec_and_scalar_com:SCALAR_1REG_INT_MODE + (match_operand:SCALAR_1REG_INT_MODE 1 "gcn_valu_src0_operand" + "% v, 0") + (match_operand:SCALAR_1REG_INT_MODE 2 "gcn_valu_src1com_operand" + "vSSB, v"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e"))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) + && register_operand (operands[2], VOIDmode))" + "@ + v_0\t%0, %2, %1 + ds_0\t%A0, %2%O0" + [(set_attr "type" "vop2,ds") + (set_attr "length" "8,8")]) + +(define_insn_and_split "v64di3_vector" + [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") + (vec_merge:V64DI + (bitop:V64DI + (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") + (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSSB, v")) + (match_operand:V64DI 4 "gcn_register_ds_or_unspec_operand" + " U0,U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) + && register_operand (operands[2], VOIDmode))" + "@ + # + ds_0\t%A0, %2%O0" + "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))" + [(set (match_dup 5) + (vec_merge:V64SI + (bitop:V64SI (match_dup 7) (match_dup 9)) + (match_dup 11) + (match_dup 3))) + (set (match_dup 6) + (vec_merge:V64SI + (bitop:V64SI (match_dup 8) (match_dup 10)) + (match_dup 12) + (match_dup 3)))] + { + operands[5] = gcn_operand_part (V64DImode, operands[0], 0); + operands[6] = gcn_operand_part (V64DImode, operands[0], 1); + operands[7] = gcn_operand_part (V64DImode, operands[1], 0); + operands[8] = gcn_operand_part (V64DImode, operands[1], 1); + operands[9] = gcn_operand_part (V64DImode, operands[2], 0); + operands[10] = gcn_operand_part (V64DImode, operands[2], 1); + operands[11] = gcn_operand_part (V64DImode, operands[4], 0); + operands[12] = gcn_operand_part (V64DImode, operands[4], 1); + } + [(set_attr "type" "vmult,ds") + (set_attr "length" "16,8")]) + +(define_insn_and_split "di3_scalar" + [(set (match_operand:DI 0 "gcn_valu_dst_operand" "= &v,RD") + (bitop:DI + (match_operand:DI 1 "gcn_valu_src0_operand" "% v,RD") + (match_operand:DI 2 "gcn_valu_src1com_operand" "vSSB, v"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e"))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) + && register_operand (operands[2], VOIDmode))" + "@ + # + ds_0\t%A0, %2%O0" + "(reload_completed && !gcn_ds_memory_operand (operands[0], DImode))" + [(parallel [(set (match_dup 4) + (bitop:V64SI (match_dup 6) (match_dup 8))) + (use (match_dup 3))]) + (parallel [(set (match_dup 5) + (bitop:V64SI (match_dup 7) (match_dup 9))) + (use (match_dup 3))])] + { + operands[4] = gcn_operand_part (DImode, operands[0], 0); + operands[5] = gcn_operand_part (DImode, operands[0], 1); + operands[6] = gcn_operand_part (DImode, operands[1], 0); + operands[7] = gcn_operand_part (DImode, operands[1], 1); + operands[8] = gcn_operand_part (DImode, operands[2], 0); + operands[9] = gcn_operand_part (DImode, operands[2], 1); + } + [(set_attr "type" "vmult,ds") + (set_attr "length" "16,8")]) + +(define_insn "v64si3_vector" + [(set (match_operand:V64SI 0 "register_operand" "= v") + (vec_merge:V64SI + (shiftop:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" " v") + (match_operand:SI 2 "gcn_alu_operand" "SSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "vv64si3_vector" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (shiftop:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" " v") + (match_operand:V64SI 2 "gcn_alu_operand" "vB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "v64si3_full" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (shiftop:V64SI (match_operand:V64SI 1 "register_operand" " v,v") + (match_operand:SI 2 "nonmemory_operand" "Sg,I")))] + "" + "@ + v_0\t%0, %2, %1 + v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "4") + (set_attr "exec" "full")]) + +(define_insn "*si3_scalar" + [(set (match_operand:SI 0 "register_operand" "= v") + (shiftop:SI + (match_operand:SI 1 "gcn_alu_operand" " v") + (match_operand:SI 2 "gcn_alu_operand" "vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e"))] + "" + "v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "3_vector" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD") + (vec_merge:VEC_1REG_INT_MODE + (minmaxop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" + "% v, 0") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" + "vSSB, v")) + (match_operand:VEC_1REG_INT_MODE 4 + "gcn_register_ds_or_unspec_operand" " U0,U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e")))] + "mode != V64QImode + && (!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) + && register_operand (operands[2], VOIDmode)))" + "@ + v_0\t%0, %2, %1 + ds_0\t%A0, %2%O0" + [(set_attr "type" "vop2,ds") + (set_attr "length" "8,8")]) + +;; }}} +;; {{{ FP binops - special cases + +; GCN does not directly provide a DFmode subtract instruction, so we do it by +; adding the negated second operand to the first. + +(define_insn "subv64df3_vector" + [(set (match_operand:V64DF 0 "register_operand" "= v, v") + (vec_merge:V64DF + (minus:V64DF + (match_operand:V64DF 1 "gcn_alu_operand" "vSSB, v") + (match_operand:V64DF 2 "gcn_alu_operand" " v,vSSB")) + (match_operand:V64DF 4 "gcn_register_or_unspec_operand" + " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e")))] + "" + "@ + v_add_f64\t%0, %1, -%2 + v_add_f64\t%0, -%2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8,8")]) + +(define_insn "subdf_scalar" + [(set (match_operand:DF 0 "register_operand" "= v, v") + (minus:DF + (match_operand:DF 1 "gcn_alu_operand" "vSSB, v") + (match_operand:DF 2 "gcn_alu_operand" " v,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e"))] + "" + "@ + v_add_f64\t%0, %1, -%2 + v_add_f64\t%0, -%2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8,8")]) + +;; }}} +;; {{{ FP binops - generic + +(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF]) +(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF]) +(define_mode_iterator FP_MODE [HF SF DF]) +(define_mode_iterator FP_1REG_MODE [HF SF]) + +(define_code_iterator comm_fp [plus mult smin smax]) +(define_code_iterator nocomm_fp [minus]) +(define_code_iterator all_fp [plus mult minus smin smax]) + +(define_insn "3_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") + (vec_merge:VEC_FP_MODE + (comm_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8")]) + +(define_insn "3_scalar" + [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL") + (comm_fp:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0") + (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSSB,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e"))] + "" + "@ + v_0\t%0, %2, %1 + v_0\t%0, %1%O0" + [(set_attr "type" "vop2,ds") + (set_attr "length" "8")]) + +(define_insn "3_vector" + [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v") + (vec_merge:VEC_FP_1REG_MODE + (nocomm_fp:VEC_FP_1REG_MODE + (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSSB, v") + (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSSB")) + (match_operand:VEC_FP_1REG_MODE 4 "gcn_register_or_unspec_operand" + " U0, U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e, e")))] + "" + "@ + v_0\t%0, %1, %2 + v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8,8")]) + +(define_insn "3_scalar" + [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v") + (nocomm_fp:FP_1REG_MODE + (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSSB, v") + (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e, e"))] + "" + "@ + v_0\t%0, %1, %2 + v_0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "length" "8,8")]) + +(define_expand "3" + [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand") + (vec_merge:VEC_FP_MODE + (all_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand") + (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand")) + (match_dup 4) + (match_dup 3)))] + "" + { + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (mode); + }) + +(define_expand "3" + [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand") + (all_fp:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src0_operand") + (match_operand:FP_MODE 2 "gcn_valu_src1_operand"))) + (use (match_dup 3))])] + "" + { + operands[3] = gcn_scalar_exec (); + }) + +;; }}} +;; {{{ FP unops + +(define_insn "abs2" + [(set (match_operand:FP_MODE 0 "register_operand" "=v") + (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))] + "" + "v_add%i0\t%0, 0, |%1|" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_expand "abs2" + [(set (match_operand:VEC_FP_MODE 0 "register_operand") + (abs:VEC_FP_MODE (match_operand:VEC_FP_MODE 1 "register_operand")))] + "" + { + emit_insn (gen_abs2_vector (operands[0], operands[1], + gcn_full_exec_reg (), + gcn_gen_undef (mode))); + DONE; + }) + +(define_insn "abs2_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") + (vec_merge:VEC_FP_MODE + (abs:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" " v")) + (match_operand:VEC_FP_MODE 3 "gcn_register_or_unspec_operand" + "U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "" + "v_add%i0\t%0, 0, |%1|" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_expand "neg2" + [(set (match_operand:VEC_FP_MODE 0 "register_operand") + (neg:VEC_FP_MODE (match_operand:VEC_FP_MODE 1 "register_operand")))] + "" + { + emit_insn (gen_neg2_vector (operands[0], operands[1], + gcn_full_exec_reg (), + gcn_gen_undef (mode))); + DONE; + }) + +(define_insn "neg2_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") + (vec_merge:VEC_FP_MODE + (neg:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" " v")) + (match_operand:VEC_FP_MODE 3 "gcn_register_or_unspec_operand" + "U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "" + "v_add%i0\t%0, 0, -%1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "sqrt_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") + (vec_merge:VEC_FP_MODE + (sqrt:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSSB")) + (match_operand:VEC_FP_MODE 3 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 2 "gcn_exec_reg_operand" " e")))] + "flag_unsafe_math_optimizations" + "v_sqrt%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "sqrt_scalar" + [(set (match_operand:FP_MODE 0 "register_operand" "= v") + (sqrt:FP_MODE + (match_operand:FP_MODE 1 "gcn_alu_operand" "vSSB"))) + (use (match_operand:DI 2 "gcn_exec_operand" " e"))] + "flag_unsafe_math_optimizations" + "v_sqrt%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_expand "sqrt2" + [(set (match_operand:VEC_FP_MODE 0 "register_operand") + (vec_merge:VEC_FP_MODE + (sqrt:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_alu_operand")) + (match_dup 3) + (match_dup 2)))] + "flag_unsafe_math_optimizations" + { + operands[2] = gcn_full_exec_reg (); + operands[3] = gcn_gen_undef (mode); + }) + +(define_expand "sqrt2" + [(parallel [(set (match_operand:FP_MODE 0 "register_operand") + (sqrt:FP_MODE + (match_operand:FP_MODE 1 "gcn_alu_operand"))) + (use (match_dup 2))])] + "flag_unsafe_math_optimizations" + { + operands[2] = gcn_scalar_exec (); + }) + +;; }}} +;; {{{ FP fused multiply and add + +(define_insn "fma_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v") + (vec_merge:VEC_FP_MODE + (fma:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSSA") + (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSSA, vA")) + (match_operand:VEC_FP_MODE 5 "gcn_register_or_unspec_operand" + " U0, U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] + "" + "v_fma%i0\t%0, %1, %2, %3" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "fma_vector_negop2" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v") + (vec_merge:VEC_FP_MODE + (fma:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSSA") + (neg:VEC_FP_MODE + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" + " vA,vSSA, vA")) + (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSSA, vA, vA")) + (match_operand:VEC_FP_MODE 5 "gcn_register_or_unspec_operand" + " U0, U0, U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e")))] + "" + "v_fma%i0\t%0, %1, -%2, %3" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "fma_scalar" + [(set (match_operand:FP_MODE 0 "register_operand" "= v, v") + (fma:FP_MODE + (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA") + (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSSA") + (match_operand:FP_MODE 3 "gcn_alu_operand" "vSSA, vA"))) + (use (match_operand:DI 4 "gcn_exec_operand" " e, e"))] + "" + "v_fma%i0\t%0, %1, %2, %3" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "fma_scalar_negop2" + [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v") + (fma:FP_MODE + (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSSA") + (neg:FP_MODE + (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSSA, vA")) + (match_operand:FP_MODE 3 "gcn_alu_operand" "vSSA, vA, vA"))) + (use (match_operand:DI 4 "gcn_exec_operand" " e, e, e"))] + "" + "v_fma%i0\t%0, %1, -%2, %3" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_expand "fma4" + [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand") + (vec_merge:VEC_FP_MODE + (fma:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_valu_src1_operand") + (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand") + (match_operand:VEC_FP_MODE 3 "gcn_valu_src1_operand")) + (match_dup 5) + (match_dup 4)))] + "" + { + operands[4] = gcn_full_exec_reg (); + operands[5] = gcn_gen_undef (mode); + }) + +(define_expand "fma4_negop2" + [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand") + (vec_merge:VEC_FP_MODE + (fma:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_valu_src1_operand") + (neg:VEC_FP_MODE + (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand")) + (match_operand:VEC_FP_MODE 3 "gcn_valu_src1_operand")) + (match_dup 5) + (match_dup 4)))] + "" + { + operands[4] = gcn_full_exec_reg (); + operands[5] = gcn_gen_undef (mode); + }) + +(define_expand "fma4" + [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand") + (fma:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src1_operand") + (match_operand:FP_MODE 2 "gcn_valu_src1_operand") + (match_operand:FP_MODE 3 "gcn_valu_src1_operand"))) + (use (match_dup 4))])] + "" + { + operands[4] = gcn_scalar_exec (); + }) + +(define_expand "fma4_negop2" + [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand") + (fma:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src1_operand") + (neg:FP_MODE + (match_operand:FP_MODE 2 "gcn_valu_src1_operand")) + (match_operand:FP_MODE 3 "gcn_valu_src1_operand"))) + (use (match_dup 4))])] + "" + { + operands[4] = gcn_scalar_exec (); + }) + +;; }}} +;; {{{ FP division + +(define_insn "recip_vector" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") + (vec_merge:VEC_FP_MODE + (div:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_vec1d_operand" " A") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" + " U0") + (match_operand:DI 3 "gcn_exec_reg_operand" " e")))] + "" + "v_rcp%i0\t%0, %2" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "recip_scalar" + [(set (match_operand:FP_MODE 0 "register_operand" "= v") + (div:FP_MODE + (match_operand:FP_MODE 1 "gcn_const1d_operand" " A") + (match_operand:FP_MODE 2 "gcn_alu_operand" "vSSB"))) + (use (match_operand:DI 3 "gcn_exec_operand" " e"))] + "" + "v_rcp%i0\t%0, %2" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +;; Do division via a = b * 1/c +;; The v_rcp_* instructions are not sufficiently accurate on their own, +;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson +;; which the ISA manual says is enough to improve the reciprocal accuracy. +;; +;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. + +(define_expand "div3" + [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand") + (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand") + (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")] + "flag_reciprocal_math" + { + rtx one = gcn_vec_constant (mode, + const_double_from_real_value (dconst1, mode)); + rtx two = gcn_vec_constant (mode, + const_double_from_real_value (dconst2, mode)); + rtx initrcp = gen_reg_rtx (mode); + rtx fma = gen_reg_rtx (mode); + rtx rcp; + + bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR + && real_identical + (CONST_DOUBLE_REAL_VALUE + (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1)); + + if (is_rcp) + rcp = operands[0]; + else + rcp = gen_reg_rtx (mode); + + emit_insn (gen_recip_vector (initrcp, one, operands[2], + gcn_full_exec_reg (), + gcn_gen_undef (mode))); + emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two)); + emit_insn (gen_mul3 (rcp, initrcp, fma)); + + if (!is_rcp) + emit_insn (gen_mul3 (operands[0], operands[1], rcp)); + + DONE; + }) + +(define_expand "div3" + [(match_operand:FP_MODE 0 "gcn_valu_dst_operand") + (match_operand:FP_MODE 1 "gcn_valu_src0_operand") + (match_operand:FP_MODE 2 "gcn_valu_src0_operand")] + "flag_reciprocal_math" + { + rtx one = const_double_from_real_value (dconst1, mode); + rtx two = const_double_from_real_value (dconst2, mode); + rtx initrcp = gen_reg_rtx (mode); + rtx fma = gen_reg_rtx (mode); + rtx rcp; + + bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE + && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), + &dconstm1)); + + if (is_rcp) + rcp = operands[0]; + else + rcp = gen_reg_rtx (mode); + + emit_insn (gen_recip_scalar (initrcp, one, operands[2], + gcn_scalar_exec ())); + emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two)); + emit_insn (gen_mul3 (rcp, initrcp, fma)); + + if (!is_rcp) + emit_insn (gen_mul3 (operands[0], operands[1], rcp)); + + DONE; + }) + +;; }}} +;; {{{ Int/FP conversions + +(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) +(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) +(define_mode_iterator CVT_F_MODE [HF SF DF]) +(define_mode_iterator CVT_I_MODE [HI SI]) + +(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF]) +(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF]) +(define_mode_iterator VCVT_F_MODE [V64HF V64SF V64DF]) +(define_mode_iterator VCVT_I_MODE [V64HI V64SI]) + +(define_code_iterator cvt_op [fix unsigned_fix + float unsigned_float + float_extend float_truncate]) +(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") + (float "float") (unsigned_float "floatuns") + (float_extend "extend") (float_truncate "trunc")]) +(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") + (float "%i0%i1") (unsigned_float "%i0%u1") + (float_extend "%i0%i1") + (float_truncate "%i0%i1")]) + +(define_expand "2" + [(parallel [(set (match_operand:CVT_F_MODE 0 "register_operand") + (cvt_op:CVT_F_MODE + (match_operand:CVT_FROM_MODE 1 "gcn_valu_src0_operand"))) + (use (match_dup 2))])] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + { + operands[2] = gcn_scalar_exec (); + }) + +(define_expand "2" + [(set (match_operand:VCVT_F_MODE 0 "register_operand") + (vec_merge:VCVT_F_MODE + (cvt_op:VCVT_F_MODE + (match_operand:VCVT_FROM_MODE 1 "gcn_valu_src0_operand")) + (match_dup 3) + (match_dup 2)))] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + { + operands[2] = gcn_full_exec_reg (); + operands[3] = gcn_gen_undef (mode); + }) + +(define_expand "2" + [(parallel [(set (match_operand:CVT_I_MODE 0 "register_operand") + (cvt_op:CVT_I_MODE + (match_operand:CVT_F_MODE 1 "gcn_valu_src0_operand"))) + (use (match_dup 2))])] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + { + operands[2] = gcn_scalar_exec (); + }) + +(define_expand "2" + [(set (match_operand:VCVT_I_MODE 0 "register_operand") + (vec_merge:VCVT_I_MODE + (cvt_op:VCVT_I_MODE + (match_operand:VCVT_F_MODE 1 "gcn_valu_src0_operand")) + (match_dup 3) + (match_dup 2)))] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + { + operands[2] = gcn_full_exec_reg (); + operands[3] = gcn_gen_undef (mode); + }) + +(define_insn "2_insn" + [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") + (cvt_op:CVT_TO_MODE + (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSSB"))) + (use (match_operand:DI 2 "gcn_exec_operand" " e"))] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + "v_cvt\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2_insn" + [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v") + (vec_merge:VCVT_TO_MODE + (cvt_op:VCVT_TO_MODE + (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSSB")) + (match_operand:VCVT_TO_MODE 2 "gcn_alu_or_unspec_operand" " U0") + (match_operand:DI 3 "gcn_exec_operand" " e")))] + "gcn_valid_cvt_p (mode, mode, + _cvt)" + "v_cvt\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +;; }}} +;; {{{ Int/int conversions + +;; GCC can already do these for scalar types, but not for vector types. +;; Unfortunately you can't just do SUBREG on a vector to select the low part, +;; so there must be a few tricks here. + +(define_insn_and_split "vec_truncatev64div64si" + [(set (match_operand:V64SI 0 "register_operand" "=v,&v") + (vec_merge:V64SI + (truncate:V64SI + (match_operand:V64DI 1 "register_operand" " 0, v")) + (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0") + (match_operand:DI 3 "gcn_exec_operand" " e, e")))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 0) + (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3))) + (clobber (scratch:V64DI))])] + { + operands[1] = gcn_operand_part (V64SImode, operands[1], 0); + } + [(set_attr "type" "vop2") + (set_attr "length" "0,4")]) + +;; }}} +;; {{{ Vector comparison/merge + +(define_expand "vec_cmpdi" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (and:DI + (match_operator 1 "comparison_operator" + [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand") + (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand")]) + (match_dup 4))) + (clobber (match_scratch:DI 5))])] + "" + { + operands[4] = gcn_full_exec_reg (); + }) + +(define_expand "vec_cmpudi" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (and:DI + (match_operator 1 "comparison_operator" + [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") + (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")]) + (match_dup 4))) + (clobber (match_scratch:DI 5))])] + "" + { + operands[4] = gcn_full_exec_reg (); + }) + +(define_insn "vec_cmpdi_insn" + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") + (and:DI + (match_operator 1 "comparison_operator" + [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand" + "vSS, B,vSS, B, v,vA") + (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" + " v, v, v, v,vA, v")]) + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) + (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] + "" + "@ + v_cmp%E1\tvcc, %2, %3 + v_cmp%E1\tvcc, %2, %3 + v_cmpx%E1\tvcc, %2, %3 + v_cmpx%E1\tvcc, %2, %3 + v_cmp%E1\t%0, %2, %3 + v_cmp%E1\t%0, %2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") + (set_attr "length" "4,8,4,8,8,8")]) + +(define_insn "vec_cmpdi_dup" + [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") + (and:DI + (match_operator 1 "comparison_operator" + [(vec_duplicate:VEC_1REG_MODE + (match_operand: 2 "gcn_alu_operand" + " SS, B,SS,B, A")) + (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" + " v, v, v,v, v")]) + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) + (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] + "" + "@ + v_cmp%E1\tvcc, %2, %3 + v_cmp%E1\tvcc, %2, %3 + v_cmpx%E1\tvcc, %2, %3 + v_cmpx%E1\tvcc, %2, %3 + v_cmp%E1\t%0, %2, %3" + [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") + (set_attr "length" "4,8,4,8,8")]) + +(define_expand "vcond_mask_di" + [(parallel + [(set (match_operand:VEC_REG_MODE 0 "register_operand" "") + (vec_merge:VEC_REG_MODE + (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "") + (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "") + (match_operand:DI 3 "register_operand" ""))) + (clobber (scratch:V64DI))])] + "" + "") + +(define_expand "vcond" + [(match_operand:VEC_1REG_MODE 0 "register_operand") + (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand") + (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand") + (match_operator 3 "comparison_operator" + [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand") + (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])] + "" + { + rtx tmp = gen_reg_rtx (DImode); + rtx cmp_op = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode, operands[4], + operands[5]); + rtx set = gen_rtx_SET (tmp, gen_rtx_AND (DImode, cmp_op, + gcn_full_exec_reg ())); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DImode)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2], + tmp)); + DONE; + }) + + +(define_expand "vcondu" + [(match_operand:VEC_1REG_INT_MODE 0 "register_operand") + (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") + (match_operator 3 "comparison_operator" + [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand") + (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])] + "" + { + rtx tmp = gen_reg_rtx (DImode); + rtx cmp_op = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode, operands[4], + operands[5]); + rtx set = gen_rtx_SET (tmp, + gen_rtx_AND (DImode, cmp_op, gcn_full_exec_reg ())); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DImode)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2], + tmp)); + DONE; + }) + +;; }}} +;; {{{ Fully masked loop support +;; +;; The autovectorizer requires the mask is a vector value (we use V64BImode), +;; but the backend uses simple DImode for the same thing. +;; +;; There are two kinds of patterns here: +;; +;; 1) Expanders for masked vector operatoions (while_ult, maskload, etc.) +;; +;; 2) Expanders that convert general V64BImode operations to DImode +;; equivalents. +; +(define_expand "while_ultsiv64bi" + [(match_operand:V64BI 0 "register_operand") + (match_operand:SI 1 "") + (match_operand:SI 2 "")] + "" + { + operands[0] = gcn_convert_mask_mode (operands[0]); + + if (GET_CODE (operands[1]) != CONST_INT + || GET_CODE (operands[2]) != CONST_INT) + { + rtx exec = gcn_full_exec_reg (); + rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); + rtx tmp = _0_1_2_3; + if (GET_CODE (operands[1]) != CONST_INT + || INTVAL (operands[1]) != 0) + { + tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_addv64si3_vector_dup (tmp, _0_1_2_3, operands[1], + exec, tmp)); + } + emit_insn (gen_vec_cmpv64sidi_dup (operands[0], + gen_rtx_GT (VOIDmode, 0, 0), + operands[2], tmp, exec)); + } + else + { + HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); + HOST_WIDE_INT mask = (diff >= 64 ? -1 : ~((HOST_WIDE_INT)-1 << diff)); + emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); + } + DONE; + }) + +(define_expand "cstorev64bi4" + [(match_operand:BI 0 "gcn_conditional_register_operand") + (match_operator:BI 1 "gcn_compare_operator" + [(match_operand:V64BI 2 "gcn_alu_operand") + (match_operand:V64BI 3 "gcn_alu_operand")])] + "" + { + operands[2] = gcn_convert_mask_mode (operands[2]); + operands[3] = gcn_convert_mask_mode (operands[3]); + + emit_insn (gen_cstoredi4 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }) + +(define_expand "cbranchv64bi4" + [(match_operator 0 "gcn_compare_operator" + [(match_operand:SI 1 "") + (match_operand:SI 2 "")]) + (match_operand 3)] + "" + { + operands[1] = gcn_convert_mask_mode (operands[1]); + operands[2] = gcn_convert_mask_mode (operands[2]); + + emit_insn(gen_cbranchdi4 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }) + +(define_expand "movv64bi" + [(set (match_operand:V64BI 0 "nonimmediate_operand") + (match_operand:V64BI 1 "general_operand"))] + "" + { + operands[0] = gcn_convert_mask_mode (operands[0]); + operands[1] = gcn_convert_mask_mode (operands[1]); + }) + +(define_expand "vcond_mask_v64bi" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand:VEC_REG_MODE 1 "register_operand") + (match_operand:VEC_REG_MODE 2 "register_operand") + (match_operand:V64BI 3 "register_operand")] + "" + { + operands[3] = gcn_convert_mask_mode (operands[3]); + + emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }) + +(define_expand "maskloadv64bi" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand:VEC_REG_MODE 1 "memory_operand") + (match_operand 2 "")] + "" + { + rtx exec = force_reg (DImode, gcn_convert_mask_mode (operands[2])); + rtx addr = gcn_expand_scalar_to_vector_address + (mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode)); + rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); + rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); + rtx undef = gcn_gen_undef (mode); + emit_insn (gen_gather_expr (operands[0], addr, as, v, undef, exec)); + DONE; + }) + +(define_expand "maskstorev64bi" + [(match_operand:VEC_REG_MODE 0 "memory_operand") + (match_operand:VEC_REG_MODE 1 "register_operand") + (match_operand 2 "")] + "" + { + rtx exec = force_reg (DImode, gcn_convert_mask_mode (operands[2])); + rtx addr = gcn_expand_scalar_to_vector_address + (mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode)); + rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); + rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); + emit_insn (gen_scatter_expr (addr, operands[1], as, v, exec)); + DONE; + }) + +(define_expand "mask_gather_load" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:V64BI 5 "")] + "" + { + rtx exec = force_reg (DImode, gcn_convert_mask_mode (operands[5])); + + /* TODO: more conversions will be needed when more types are vectorized. */ + if (GET_MODE (operands[2]) == V64DImode) + { + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_vec_truncatev64div64si (tmp, operands[2], + gcn_gen_undef (V64SImode), + exec)); + operands[2] = tmp; + } + + emit_insn (gen_gather_exec (operands[0], operands[1], operands[2], + operands[3], operands[4], exec)); + DONE; + }) + +(define_expand "mask_scatter_store" + [(match_operand:DI 0 "register_operand") + (match_operand 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:VEC_REG_MODE 4 "register_operand") + (match_operand:V64BI 5 "")] + "" + { + rtx exec = force_reg (DImode, gcn_convert_mask_mode (operands[5])); + + /* TODO: more conversions will be needed when more types are vectorized. */ + if (GET_MODE (operands[1]) == V64DImode) + { + rtx tmp = gen_reg_rtx (V64SImode); + emit_insn (gen_vec_truncatev64div64si (tmp, operands[1], + gcn_gen_undef (V64SImode), + exec)); + operands[1] = tmp; + } + + emit_insn (gen_scatter_exec (operands[0], operands[1], operands[2], + operands[3], operands[4], exec)); + DONE; + }) + +; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented. +(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF]) +(define_mode_iterator COND_INT_MODE [V64SI V64DI]) + +(define_code_iterator cond_op [plus minus]) + +(define_expand "cond_" + [(match_operand:COND_MODE 0 "register_operand") + (match_operand:V64BI 1 "register_operand") + (cond_op:COND_MODE + (match_operand:COND_MODE 2 "gcn_alu_operand") + (match_operand:COND_MODE 3 "gcn_alu_operand")) + (match_operand:COND_MODE 4 "register_operand")] + "" + { + operands[1] = force_reg (DImode, gcn_convert_mask_mode (operands[1])); + operands[2] = force_reg (mode, operands[2]); + + emit_insn (gen_3_vector (operands[0], operands[2], + operands[3], operands[1], + operands[4])); + DONE; + }) + +(define_code_iterator cond_bitop [and ior xor]) + +(define_expand "cond_" + [(match_operand:COND_INT_MODE 0 "register_operand") + (match_operand:V64BI 1 "register_operand") + (cond_bitop:COND_INT_MODE + (match_operand:COND_INT_MODE 2 "gcn_alu_operand") + (match_operand:COND_INT_MODE 3 "gcn_alu_operand")) + (match_operand:COND_INT_MODE 4 "register_operand")] + "" + { + operands[1] = force_reg (DImode, gcn_convert_mask_mode (operands[1])); + operands[2] = force_reg (mode, operands[2]); + + emit_insn (gen_3_vector (operands[0], operands[2], + operands[3], operands[1], + operands[4])); + DONE; + }) + +(define_expand "vec_cmpv64bi" + [(match_operand:V64BI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand") + (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand")])] + "" + { + operands[0] = gcn_convert_mask_mode (operands[0]); + + emit_insn (gen_vec_cmpdi (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }) + +(define_expand "vec_cmpuv64bi" + [(match_operand:V64BI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") + (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])] + "" + { + operands[0] = gcn_convert_mask_mode (operands[0]); + + emit_insn (gen_vec_cmpudi (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }) + +;; }}} +;; {{{ Vector reductions + +(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR + UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR + UNSPEC_PLUS_DPP_SHR + UNSPEC_AND_DPP_SHR + UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) + +(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR + UNSPEC_AND_DPP_SHR + UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) + +; FIXME: Isn't there a better way of doing this? +(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") + (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") + (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") + (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") + (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") + (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") + (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") + (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) + +(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") + (UNSPEC_SMAX_DPP_SHR "smax") + (UNSPEC_UMIN_DPP_SHR "umin") + (UNSPEC_UMAX_DPP_SHR "umax") + (UNSPEC_PLUS_DPP_SHR "plus") + (UNSPEC_AND_DPP_SHR "and") + (UNSPEC_IOR_DPP_SHR "ior") + (UNSPEC_XOR_DPP_SHR "xor")]) + +(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") + (UNSPEC_SMAX_DPP_SHR "v_max%i0") + (UNSPEC_UMIN_DPP_SHR "v_min%u0") + (UNSPEC_UMAX_DPP_SHR "v_max%u0") + (UNSPEC_PLUS_DPP_SHR "v_add%u0") + (UNSPEC_AND_DPP_SHR "v_and%b0") + (UNSPEC_IOR_DPP_SHR "v_or%b0") + (UNSPEC_XOR_DPP_SHR "v_xor%b0")]) + +(define_expand "reduc__scal_" + [(set (match_operand: 0 "register_operand") + (unspec: + [(match_operand:VEC_1REG_MODE 1 "register_operand")] + REDUC_UNSPEC))] + "" + { + rtx tmp = gcn_expand_reduc_scalar (mode, operands[1], + ); + + /* The result of the reduction is in lane 63 of tmp. */ + emit_insn (gen_mov_from_lane63_ (operands[0], tmp)); + + DONE; + }) + +(define_expand "reduc__scal_v64di" + [(set (match_operand:DI 0 "register_operand") + (unspec:DI + [(match_operand:V64DI 1 "register_operand")] + REDUC_2REG_UNSPEC))] + "" + { + rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1], + ); + + /* The result of the reduction is in lane 63 of tmp. */ + emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp)); + + DONE; + }) + +(define_insn "*_dpp_shr_" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (unspec:VEC_1REG_MODE + [(match_operand:VEC_1REG_MODE 1 "register_operand" "v") + (match_operand:VEC_1REG_MODE 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] + REDUC_UNSPEC))] + "!(TARGET_GCN3 && SCALAR_INT_MODE_P (mode) + && == UNSPEC_PLUS_DPP_SHR)" + { + return gcn_expand_dpp_shr_insn (mode, "", + , INTVAL (operands[3])); + } + [(set_attr "type" "vop_dpp") + (set_attr "exec" "full") + (set_attr "length" "8")]) + +(define_insn_and_split "*_dpp_shr_v64di" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (unspec:V64DI + [(match_operand:V64DI 1 "register_operand" "v0") + (match_operand:V64DI 2 "register_operand" "v0") + (match_operand:SI 3 "const_int_operand" "n")] + REDUC_2REG_UNSPEC))] + "" + "#" + "reload_completed" + [(set (match_dup 4) + (unspec:V64SI + [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) + (set (match_dup 5) + (unspec:V64SI + [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] + { + operands[4] = gcn_operand_part (V64DImode, operands[0], 0); + operands[5] = gcn_operand_part (V64DImode, operands[0], 1); + operands[6] = gcn_operand_part (V64DImode, operands[1], 0); + operands[7] = gcn_operand_part (V64DImode, operands[1], 1); + operands[8] = gcn_operand_part (V64DImode, operands[2], 0); + operands[9] = gcn_operand_part (V64DImode, operands[2], 1); + } + [(set_attr "type" "vmult") + (set_attr "exec" "full") + (set_attr "length" "16")]) + +; Special cases for addition. + +(define_insn "*plus_carry_dpp_shr_" + [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v") + (unspec:VEC_1REG_INT_MODE + [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v") + (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_PLUS_CARRY_DPP_SHR)) + (clobber (reg:DI VCC_REG))] + "" + { + const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0"; + return gcn_expand_dpp_shr_insn (mode, insn, + UNSPEC_PLUS_CARRY_DPP_SHR, + INTVAL (operands[3])); + } + [(set_attr "type" "vop_dpp") + (set_attr "exec" "full") + (set_attr "length" "8")]) + +(define_insn "*plus_carry_in_dpp_shr_v64si" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (unspec:V64SI + [(match_operand:V64SI 1 "register_operand" "v") + (match_operand:V64SI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n") + (match_operand:DI 4 "register_operand" "cV")] + UNSPEC_PLUS_CARRY_IN_DPP_SHR)) + (clobber (reg:DI VCC_REG))] + "" + { + const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0"; + return gcn_expand_dpp_shr_insn (V64SImode, insn, + UNSPEC_PLUS_CARRY_IN_DPP_SHR, + INTVAL (operands[3])); + } + [(set_attr "type" "vop_dpp") + (set_attr "exec" "full") + (set_attr "length" "8")]) + +(define_insn_and_split "*plus_carry_dpp_shr_v64di" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (unspec:V64DI + [(match_operand:V64DI 1 "register_operand" "v0") + (match_operand:V64DI 2 "register_operand" "v0") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_PLUS_CARRY_DPP_SHR)) + (clobber (reg:DI VCC_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 4) + (unspec:V64SI + [(match_dup 6) (match_dup 8) (match_dup 3)] + UNSPEC_PLUS_CARRY_DPP_SHR)) + (clobber (reg:DI VCC_REG))]) + (parallel [(set (match_dup 5) + (unspec:V64SI + [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] + UNSPEC_PLUS_CARRY_IN_DPP_SHR)) + (clobber (reg:DI VCC_REG))])] + { + operands[4] = gcn_operand_part (V64DImode, operands[0], 0); + operands[5] = gcn_operand_part (V64DImode, operands[0], 1); + operands[6] = gcn_operand_part (V64DImode, operands[1], 0); + operands[7] = gcn_operand_part (V64DImode, operands[1], 1); + operands[8] = gcn_operand_part (V64DImode, operands[2], 0); + operands[9] = gcn_operand_part (V64DImode, operands[2], 1); + } + [(set_attr "type" "vmult") + (set_attr "exec" "full") + (set_attr "length" "16")]) + +; Instructions to move a scalar value from lane 63 of a vector register. +(define_insn "mov_from_lane63_" + [(set (match_operand: 0 "register_operand" "=Sg,v") + (unspec: + [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")] + UNSPEC_MOV_FROM_LANE63))] + "" + "@ + v_readlane_b32\t%0, %1, 63 + v_mov_b32\t%0, %1 wave_ror:1" + [(set_attr "type" "vop3a,vop_dpp") + (set_attr "exec" "*,full") + (set_attr "length" "8")]) + +(define_insn "mov_from_lane63_v64di" + [(set (match_operand:DI 0 "register_operand" "=Sg,v") + (unspec:DI + [(match_operand:V64DI 1 "register_operand" "v,v")] + UNSPEC_MOV_FROM_LANE63))] + "" + "@ + v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63 + * if (REGNO (operands[0]) <= REGNO (operands[1])) \ + return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \ + \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \ + else \ + return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \ + \"v_mov_b32\t%L0, %L1 wave_ror:1\";" + [(set_attr "type" "vop3a,vop_dpp") + (set_attr "exec" "*,full") + (set_attr "length" "8")]) + +;; }}} +;; {{{ Miscellaneous + +(define_expand "vec_seriesv64si" + [(match_operand:V64SI 0 "register_operand") + (match_operand:SI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand")] + "" + { + rtx tmp = gen_reg_rtx (V64SImode); + rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); + rtx undef = gcn_gen_undef (V64SImode); + rtx exec = gcn_full_exec_reg (); + + emit_insn (gen_mulv64si3_vector_dup (tmp, v1, operands[2], exec, undef)); + emit_insn (gen_addv64si3_vector_dup (operands[0], tmp, operands[1], exec, + undef)); + DONE; + }) + +(define_expand "vec_seriesv64di" + [(match_operand:V64DI 0 "register_operand") + (match_operand:DI 1 "gcn_alu_operand") + (match_operand:DI 2 "gcn_alu_operand")] + "" + { + rtx tmp = gen_reg_rtx (V64DImode); + rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); + rtx undef = gcn_gen_undef (V64DImode); + rtx exec = gcn_full_exec_reg (); + + emit_insn (gen_mulv64di3_vector_zext_dup2 (tmp, v1, operands[2], exec, + undef)); + emit_insn (gen_addv64di3_vector_dup (operands[0], tmp, operands[1], exec, + undef)); + DONE; + }) + +;; }}} From patchwork Wed Sep 5 11:52:10 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966346 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485182-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="XfxjcpmD"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252DQ1dn5z9s5c for ; Wed, 5 Sep 2018 21:54:14 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=bGFdTVcp4jyPgdW7WEoTpwQLxWJjY Ej+TgIsfsERVPeYakyrFpP6Z71wCV1KOegxrOFeYqvKNkCxvb6JalODFWf327Woj EvH780cQz0n7MN6AgjUzlXjLpeEbJglXc0WgY2HE7yAIG/IxwzrQabFyzuqR8IuI D2emVJZ0xgstMc= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=j9yYbwxdzWdPf2fElDbb1BK3S+E=; b=Xfx jcpmDqEX3wSDjV5umHb5Egynth/b4LDcmrohfRPJH/xnP+CRBEN8gg593UbhTLBm 3dohtI+LIT/gdUD6JIfNjsjRjeLRZaZSHyl/3JRjOTJltm6p6wOdmGTBXrT2+yPM ePKrF0epjgEnuaRtPw3IXfoTVaw8qFHD4O6L728Q= Received: (qmail 103938 invoked by alias); 5 Sep 2018 11:52:43 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 103832 invoked by uid 89); 5 Sep 2018 11:52:42 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=46, realtime, unwinder, Require X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:52:38 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWMM-00081X-3x for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:37 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45743) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWML-00080d-Nd for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:34 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWMK-00064O-T0 from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:52:32 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:52:28 +0100 From: To: Subject: [PATCH 22/25] Add dg-require-effective-target exceptions Date: Wed, 5 Sep 2018 12:52:10 +0100 Message-ID: <0321f1ffad282da61507a7d4df2b9f1e26bbea68.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 There are a number of tests that fail because they assume that exceptions are available, but GCN does not support them, yet. This patch adds "dg-require-effective-target exceptions" in all the affected tests. There's probably an automatic way to test for exceptions, but the current implementation simply says that AMD GCN does not support them. This should ensure that no other targets are affected by the change. 2018-09-05 Andrew Stubbs Kwok Cheung Yeung Julian Brown Tom de Vries gcc/testsuite/ * c-c++-common/ubsan/pr71512-1.c: Require exceptions. * c-c++-common/ubsan/pr71512-2.c: Require exceptions. * gcc.c-torture/compile/pr34648.c: Require exceptions. * gcc.c-torture/compile/pr41469.c: Require exceptions. * gcc.dg/20111216-1.c: Require exceptions. * gcc.dg/cleanup-10.c: Require exceptions. * gcc.dg/cleanup-11.c: Require exceptions. * gcc.dg/cleanup-12.c: Require exceptions. * gcc.dg/cleanup-13.c: Require exceptions. * gcc.dg/cleanup-5.c: Require exceptions. * gcc.dg/cleanup-8.c: Require exceptions. * gcc.dg/cleanup-9.c: Require exceptions. * gcc.dg/gomp/pr29955.c: Require exceptions. * gcc.dg/lto/pr52097_0.c: Require exceptions. * gcc.dg/nested-func-5.c: Require exceptions. * gcc.dg/pch/except-1.c: Require exceptions. * gcc.dg/pch/valid-2.c: Require exceptions. * gcc.dg/pr41470.c: Require exceptions. * gcc.dg/pr42427.c: Require exceptions. * gcc.dg/pr44545.c: Require exceptions. * gcc.dg/pr47086.c: Require exceptions. * gcc.dg/pr51481.c: Require exceptions. * gcc.dg/pr51644.c: Require exceptions. * gcc.dg/pr52046.c: Require exceptions. * gcc.dg/pr54669.c: Require exceptions. * gcc.dg/pr56424.c: Require exceptions. * gcc.dg/pr64465.c: Require exceptions. * gcc.dg/pr65802.c: Require exceptions. * gcc.dg/pr67563.c: Require exceptions. * gcc.dg/tree-ssa/pr41469-1.c: Require exceptions. * gcc.dg/tree-ssa/ssa-dse-28.c: Require exceptions. * gcc.dg/vect/pr46663.c: Require exceptions. * lib/target-supports.exp (check_effective_target_exceptions): New. --- gcc/testsuite/c-c++-common/ubsan/pr71512-1.c | 1 + gcc/testsuite/c-c++-common/ubsan/pr71512-2.c | 1 + gcc/testsuite/gcc.c-torture/compile/pr34648.c | 1 + gcc/testsuite/gcc.c-torture/compile/pr41469.c | 1 + gcc/testsuite/gcc.dg/20111216-1.c | 1 + gcc/testsuite/gcc.dg/cleanup-10.c | 1 + gcc/testsuite/gcc.dg/cleanup-11.c | 1 + gcc/testsuite/gcc.dg/cleanup-12.c | 1 + gcc/testsuite/gcc.dg/cleanup-13.c | 1 + gcc/testsuite/gcc.dg/cleanup-5.c | 1 + gcc/testsuite/gcc.dg/cleanup-8.c | 1 + gcc/testsuite/gcc.dg/cleanup-9.c | 1 + gcc/testsuite/gcc.dg/gomp/pr29955.c | 1 + gcc/testsuite/gcc.dg/lto/pr52097_0.c | 1 + gcc/testsuite/gcc.dg/nested-func-5.c | 1 + gcc/testsuite/gcc.dg/pch/except-1.c | 1 + gcc/testsuite/gcc.dg/pch/valid-2.c | 2 +- gcc/testsuite/gcc.dg/pr41470.c | 1 + gcc/testsuite/gcc.dg/pr42427.c | 1 + gcc/testsuite/gcc.dg/pr44545.c | 1 + gcc/testsuite/gcc.dg/pr47086.c | 1 + gcc/testsuite/gcc.dg/pr51481.c | 1 + gcc/testsuite/gcc.dg/pr51644.c | 1 + gcc/testsuite/gcc.dg/pr52046.c | 1 + gcc/testsuite/gcc.dg/pr54669.c | 1 + gcc/testsuite/gcc.dg/pr56424.c | 1 + gcc/testsuite/gcc.dg/pr64465.c | 1 + gcc/testsuite/gcc.dg/pr65802.c | 1 + gcc/testsuite/gcc.dg/pr67563.c | 1 + gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c | 1 + gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c | 1 + gcc/testsuite/gcc.dg/vect/pr46663.c | 1 + gcc/testsuite/lib/target-supports.exp | 10 ++++++++++ 33 files changed, 42 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c index 2a90ab1..8af9365 100644 --- a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c +++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c @@ -1,5 +1,6 @@ /* PR c/71512 */ /* { dg-do compile } */ /* { dg-options "-O2 -fnon-call-exceptions -ftrapv -fexceptions -fsanitize=undefined" } */ +/* { dg-require-effective-target exceptions } */ #include "../../gcc.dg/pr44545.c" diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c index 1c95593..0c16934 100644 --- a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c +++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c @@ -1,5 +1,6 @@ /* PR c/71512 */ /* { dg-do compile } */ /* { dg-options "-O -fexceptions -fnon-call-exceptions -ftrapv -fsanitize=undefined" } */ +/* { dg-require-effective-target exceptions } */ #include "../../gcc.dg/pr47086.c" diff --git a/gcc/testsuite/gcc.c-torture/compile/pr34648.c b/gcc/testsuite/gcc.c-torture/compile/pr34648.c index 8bcdae0..90a88b9 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr34648.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr34648.c @@ -1,6 +1,7 @@ /* PR tree-optimization/34648 */ /* { dg-options "-fexceptions" } */ +/* { dg-require-effective-target exceptions } */ extern const unsigned short int **bar (void) __attribute__ ((const)); const char *a; diff --git a/gcc/testsuite/gcc.c-torture/compile/pr41469.c b/gcc/testsuite/gcc.c-torture/compile/pr41469.c index 5917794..923bca2 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr41469.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr41469.c @@ -1,5 +1,6 @@ /* { dg-options "-fexceptions" } */ /* { dg-skip-if "requires alloca" { ! alloca } { "-O0" } { "" } } */ +/* { dg-require-effective-target exceptions } */ void af (void *a) diff --git a/gcc/testsuite/gcc.dg/20111216-1.c b/gcc/testsuite/gcc.dg/20111216-1.c index cd82cf9..7f9395e 100644 --- a/gcc/testsuite/gcc.dg/20111216-1.c +++ b/gcc/testsuite/gcc.dg/20111216-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fexceptions -fnon-call-exceptions" } */ +/* { dg-require-effective-target exceptions } */ extern void f2 () __attribute__ ((noreturn)); void diff --git a/gcc/testsuite/gcc.dg/cleanup-10.c b/gcc/testsuite/gcc.dg/cleanup-10.c index 16035b1..1af63ea 100644 --- a/gcc/testsuite/gcc.dg/cleanup-10.c +++ b/gcc/testsuite/gcc.dg/cleanup-10.c @@ -1,5 +1,6 @@ /* { dg-do run { target hppa*-*-hpux* *-*-linux* *-*-gnu* powerpc*-*-darwin* *-*-darwin[912]* } } */ /* { dg-options "-fexceptions -fnon-call-exceptions -O2" } */ +/* { dg-require-effective-target exceptions } */ /* Verify that cleanups work with exception handling through signal frames on alternate stack. */ diff --git a/gcc/testsuite/gcc.dg/cleanup-11.c b/gcc/testsuite/gcc.dg/cleanup-11.c index ccc61ed..c1f19fe 100644 --- a/gcc/testsuite/gcc.dg/cleanup-11.c +++ b/gcc/testsuite/gcc.dg/cleanup-11.c @@ -1,5 +1,6 @@ /* { dg-do run { target hppa*-*-hpux* *-*-linux* *-*-gnu* powerpc*-*-darwin* *-*-darwin[912]* } } */ /* { dg-options "-fexceptions -fnon-call-exceptions -O2" } */ +/* { dg-require-effective-target exceptions } */ /* Verify that cleanups work with exception handling through realtime signal frames on alternate stack. */ diff --git a/gcc/testsuite/gcc.dg/cleanup-12.c b/gcc/testsuite/gcc.dg/cleanup-12.c index efb9a58..2171e35 100644 --- a/gcc/testsuite/gcc.dg/cleanup-12.c +++ b/gcc/testsuite/gcc.dg/cleanup-12.c @@ -4,6 +4,7 @@ /* { dg-options "-O2 -fexceptions" } */ /* { dg-skip-if "" { "ia64-*-hpux11.*" } } */ /* { dg-skip-if "" { ! nonlocal_goto } } */ +/* { dg-require-effective-target exceptions } */ /* Verify unwind info in presence of alloca. */ #include diff --git a/gcc/testsuite/gcc.dg/cleanup-13.c b/gcc/testsuite/gcc.dg/cleanup-13.c index 8a8db27..1b7ea5c 100644 --- a/gcc/testsuite/gcc.dg/cleanup-13.c +++ b/gcc/testsuite/gcc.dg/cleanup-13.c @@ -3,6 +3,7 @@ /* { dg-options "-fexceptions" } */ /* { dg-skip-if "" { "ia64-*-hpux11.*" } } */ /* { dg-skip-if "" { ! nonlocal_goto } } */ +/* { dg-require-effective-target exceptions } */ /* Verify DW_OP_* handling in the unwinder. */ #include diff --git a/gcc/testsuite/gcc.dg/cleanup-5.c b/gcc/testsuite/gcc.dg/cleanup-5.c index 4257f9e..9ed2a7c 100644 --- a/gcc/testsuite/gcc.dg/cleanup-5.c +++ b/gcc/testsuite/gcc.dg/cleanup-5.c @@ -3,6 +3,7 @@ /* { dg-options "-fexceptions" } */ /* { dg-skip-if "" { "ia64-*-hpux11.*" } } */ /* { dg-skip-if "" { ! nonlocal_goto } } */ +/* { dg-require-effective-target exceptions } */ /* Verify that cleanups work with exception handling. */ #include diff --git a/gcc/testsuite/gcc.dg/cleanup-8.c b/gcc/testsuite/gcc.dg/cleanup-8.c index 553c038..45abdb2 100644 --- a/gcc/testsuite/gcc.dg/cleanup-8.c +++ b/gcc/testsuite/gcc.dg/cleanup-8.c @@ -1,5 +1,6 @@ /* { dg-do run { target hppa*-*-hpux* *-*-linux* *-*-gnu* powerpc*-*-darwin* *-*-darwin[912]* } } */ /* { dg-options "-fexceptions -fnon-call-exceptions -O2" } */ +/* { dg-require-effective-target exceptions } */ /* Verify that cleanups work with exception handling through signal frames. */ diff --git a/gcc/testsuite/gcc.dg/cleanup-9.c b/gcc/testsuite/gcc.dg/cleanup-9.c index fe28072..98dc268 100644 --- a/gcc/testsuite/gcc.dg/cleanup-9.c +++ b/gcc/testsuite/gcc.dg/cleanup-9.c @@ -1,5 +1,6 @@ /* { dg-do run { target hppa*-*-hpux* *-*-linux* *-*-gnu* powerpc*-*-darwin* *-*-darwin[912]* } } */ /* { dg-options "-fexceptions -fnon-call-exceptions -O2" } */ +/* { dg-require-effective-target exceptions } */ /* Verify that cleanups work with exception handling through realtime signal frames. */ diff --git a/gcc/testsuite/gcc.dg/gomp/pr29955.c b/gcc/testsuite/gcc.dg/gomp/pr29955.c index e49c11c..102898c 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr29955.c +++ b/gcc/testsuite/gcc.dg/gomp/pr29955.c @@ -1,6 +1,7 @@ /* PR c/29955 */ /* { dg-do compile } */ /* { dg-options "-O2 -fopenmp -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ extern void bar (int); diff --git a/gcc/testsuite/gcc.dg/lto/pr52097_0.c b/gcc/testsuite/gcc.dg/lto/pr52097_0.c index cd4af5d..1b3fda3 100644 --- a/gcc/testsuite/gcc.dg/lto/pr52097_0.c +++ b/gcc/testsuite/gcc.dg/lto/pr52097_0.c @@ -1,5 +1,6 @@ /* { dg-lto-do link } */ /* { dg-lto-options { { -O -flto -fexceptions -fnon-call-exceptions --param allow-store-data-races=0 } } } */ +/* { dg-require-effective-target exceptions } */ typedef struct { unsigned int e0 : 16; } s1; typedef struct { unsigned int e0 : 16; } s2; diff --git a/gcc/testsuite/gcc.dg/nested-func-5.c b/gcc/testsuite/gcc.dg/nested-func-5.c index 3545f37..591f8a2 100644 --- a/gcc/testsuite/gcc.dg/nested-func-5.c +++ b/gcc/testsuite/gcc.dg/nested-func-5.c @@ -2,6 +2,7 @@ /* { dg-options "-fexceptions" } */ /* PR28516: ICE generating ARM unwind directives for nested functions. */ /* { dg-require-effective-target trampolines } */ +/* { dg-require-effective-target exceptions } */ void ex(int (*)(void)); void foo(int i) diff --git a/gcc/testsuite/gcc.dg/pch/except-1.c b/gcc/testsuite/gcc.dg/pch/except-1.c index f81b098..30350ed 100644 --- a/gcc/testsuite/gcc.dg/pch/except-1.c +++ b/gcc/testsuite/gcc.dg/pch/except-1.c @@ -1,4 +1,5 @@ /* { dg-options "-fexceptions -I." } */ +/* { dg-require-effective-target exceptions } */ #include "except-1.h" int main(void) diff --git a/gcc/testsuite/gcc.dg/pch/valid-2.c b/gcc/testsuite/gcc.dg/pch/valid-2.c index 3d8cb14..15a57c9 100644 --- a/gcc/testsuite/gcc.dg/pch/valid-2.c +++ b/gcc/testsuite/gcc.dg/pch/valid-2.c @@ -1,5 +1,5 @@ /* { dg-options "-I. -Winvalid-pch -fexceptions" } */ - +/* { dg-require-effective-target exceptions } */ #include "valid-2.h" /* { dg-warning "settings for -fexceptions do not match" } */ /* { dg-error "No such file" "no such file" { target *-*-* } 0 } */ /* { dg-error "they were invalid" "invalid files" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.dg/pr41470.c b/gcc/testsuite/gcc.dg/pr41470.c index 7ef0086..7374fac 100644 --- a/gcc/testsuite/gcc.dg/pr41470.c +++ b/gcc/testsuite/gcc.dg/pr41470.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-fexceptions" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target exceptions } */ void cf (void *); diff --git a/gcc/testsuite/gcc.dg/pr42427.c b/gcc/testsuite/gcc.dg/pr42427.c index cb43dd2..cb290fe 100644 --- a/gcc/testsuite/gcc.dg/pr42427.c +++ b/gcc/testsuite/gcc.dg/pr42427.c @@ -2,6 +2,7 @@ /* { dg-options "-O2 -fexceptions -fnon-call-exceptions -fpeel-loops" } */ /* { dg-add-options c99_runtime } */ /* { dg-require-effective-target ilp32 } */ +/* { dg-require-effective-target exceptions } */ #include diff --git a/gcc/testsuite/gcc.dg/pr44545.c b/gcc/testsuite/gcc.dg/pr44545.c index 8058261..37f75f1 100644 --- a/gcc/testsuite/gcc.dg/pr44545.c +++ b/gcc/testsuite/gcc.dg/pr44545.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fnon-call-exceptions -ftrapv -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ void DrawChunk(int *tabSize, int x) { diff --git a/gcc/testsuite/gcc.dg/pr47086.c b/gcc/testsuite/gcc.dg/pr47086.c index 71743fe..473e802 100644 --- a/gcc/testsuite/gcc.dg/pr47086.c +++ b/gcc/testsuite/gcc.dg/pr47086.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fexceptions -fnon-call-exceptions -ftrapv" } */ +/* { dg-require-effective-target exceptions } */ void foo () diff --git a/gcc/testsuite/gcc.dg/pr51481.c b/gcc/testsuite/gcc.dg/pr51481.c index d883d47..a35f8f3 100644 --- a/gcc/testsuite/gcc.dg/pr51481.c +++ b/gcc/testsuite/gcc.dg/pr51481.c @@ -1,6 +1,7 @@ /* PR tree-optimization/51481 */ /* { dg-do compile } */ /* { dg-options "-O -fexceptions -fipa-cp -fipa-cp-clone" } */ +/* { dg-require-effective-target exceptions } */ extern const unsigned short int **foo (void) __attribute__ ((__nothrow__, __const__)); diff --git a/gcc/testsuite/gcc.dg/pr51644.c b/gcc/testsuite/gcc.dg/pr51644.c index 2038a0c..e23c02f 100644 --- a/gcc/testsuite/gcc.dg/pr51644.c +++ b/gcc/testsuite/gcc.dg/pr51644.c @@ -1,6 +1,7 @@ /* PR middle-end/51644 */ /* { dg-do compile } */ /* { dg-options "-Wall -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ #include diff --git a/gcc/testsuite/gcc.dg/pr52046.c b/gcc/testsuite/gcc.dg/pr52046.c index e72061f..f0873e2 100644 --- a/gcc/testsuite/gcc.dg/pr52046.c +++ b/gcc/testsuite/gcc.dg/pr52046.c @@ -1,6 +1,7 @@ /* PR tree-optimization/52046 */ /* { dg-do compile } */ /* { dg-options "-O3 -fexceptions -fnon-call-exceptions" } */ +/* { dg-require-effective-target exceptions } */ extern float a[], b[], c[], d[]; extern int k[]; diff --git a/gcc/testsuite/gcc.dg/pr54669.c b/gcc/testsuite/gcc.dg/pr54669.c index b68c047..48967ed 100644 --- a/gcc/testsuite/gcc.dg/pr54669.c +++ b/gcc/testsuite/gcc.dg/pr54669.c @@ -3,6 +3,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fexceptions -fnon-call-exceptions" } */ +/* { dg-require-effective-target exceptions } */ int a[10]; diff --git a/gcc/testsuite/gcc.dg/pr56424.c b/gcc/testsuite/gcc.dg/pr56424.c index a724c64..7f28f04 100644 --- a/gcc/testsuite/gcc.dg/pr56424.c +++ b/gcc/testsuite/gcc.dg/pr56424.c @@ -2,6 +2,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fexceptions -fnon-call-exceptions" } */ +/* { dg-require-effective-target exceptions } */ extern long double cosl (long double); extern long double sinl (long double); diff --git a/gcc/testsuite/gcc.dg/pr64465.c b/gcc/testsuite/gcc.dg/pr64465.c index acfa952..d1d1749 100644 --- a/gcc/testsuite/gcc.dg/pr64465.c +++ b/gcc/testsuite/gcc.dg/pr64465.c @@ -1,6 +1,7 @@ /* PR tree-optimization/64465 */ /* { dg-do compile } */ /* { dg-options "-O2 -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ extern int foo (int *); extern int bar (int, int); diff --git a/gcc/testsuite/gcc.dg/pr65802.c b/gcc/testsuite/gcc.dg/pr65802.c index fcec234..0721ca8 100644 --- a/gcc/testsuite/gcc.dg/pr65802.c +++ b/gcc/testsuite/gcc.dg/pr65802.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O0 -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ #include diff --git a/gcc/testsuite/gcc.dg/pr67563.c b/gcc/testsuite/gcc.dg/pr67563.c index 34a78a2..5a727b8 100644 --- a/gcc/testsuite/gcc.dg/pr67563.c +++ b/gcc/testsuite/gcc.dg/pr67563.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ static void emit_package (int p1) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c index 6be7cd9..eb8e1f2 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fexceptions -fdump-tree-optimized" } */ +/* { dg-require-effective-target exceptions } */ void af (void *a); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c index d35377b..d3a1bbc 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-dse-details -fexceptions -fnon-call-exceptions -fno-isolate-erroneous-paths-dereference" } */ +/* { dg-require-effective-target exceptions } */ int foo (int *p, int b) diff --git a/gcc/testsuite/gcc.dg/vect/pr46663.c b/gcc/testsuite/gcc.dg/vect/pr46663.c index 457ceae..c2e56bb 100644 --- a/gcc/testsuite/gcc.dg/vect/pr46663.c +++ b/gcc/testsuite/gcc.dg/vect/pr46663.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-additional-options "-O -fexceptions" } */ +/* { dg-require-effective-target exceptions } */ typedef __attribute__ ((const)) int (*bart) (void); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b51e8f0..e27bed0 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8826,6 +8826,16 @@ proc check_effective_target_fenv_exceptions {} { } [add_options_for_ieee "-std=gnu99"]] } +# Return 1 if -fexceptions is supported. + +proc check_effective_target_exceptions {} { + if { [istarget amdgcn*-*-*] } { + return 0 + } + return 1 +} + + proc check_effective_target_tiny {} { global et_target_tiny_saved From patchwork Wed Sep 5 11:52:11 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966348 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485184-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="lOwJPRRr"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252F36Sggz9s5c for ; Wed, 5 Sep 2018 21:54:46 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=ffzheKuvm4FhY0ZBAuxrPOnudujYt Y/A0TP4l49p3jAPfpD1bUp5u7OuGiSzwYYSlCt2Lv1JUa8vqIf2xmmAOGy/atYIt L/mwVd4vfwUyXjepyGWglceSYCRhhp2uoIQxclqRmkcBAXJIEFEpxWIikO47ixR+ nttXVBrsW+KYEU= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=Yn8zTOD8aZLjLtil2yFn4+Aq5nU=; b=lOw JPRRrjy55U9K+1uKOo8akFPsmyCVifZB3oYBazHze5qQrihN702OXQSWchQyEq/1 y63UiJeAsIgXx0ivgDwcAidWSwQUKlusJ7qTfQ8IMdEDsxNJff5hyh0TuneuGO92 CsZU+nIfCYjPnCrcPrukiRDqFARZ5AuSR8u8wXqM= Received: (qmail 105331 invoked by alias); 5 Sep 2018 11:52:54 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 105252 invoked by uid 89); 5 Sep 2018 11:52:53 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=p10 X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:52:51 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWMY-0000Au-Hu for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:49 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45748) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWMY-00082n-9g for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:46 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWMN-000659-8X from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:52:35 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:52:31 +0100 From: To: Subject: [PATCH 23/25] Testsuite: GCN is always PIE. Date: Wed, 5 Sep 2018 12:52:11 +0100 Message-ID: <5074d3478af3cbf567b9475f280b8f8571dcf095.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The GCN/HSA loader ignores the load address and uses a random location, so we build all GCN binaries as PIE, by default. This patch makes the necessary testsuite adjustments to make this work correctly. 2018-09-05 Andrew Stubbs gcc/testsuite/ * gcc.dg/graphite/scop-19.c: Check pie_enabled. * gcc.dg/pic-1.c: Disable on amdgcn. * gcc.dg/pic-2.c: Disable on amdgcn. * gcc.dg/pic-3.c: Disable on amdgcn. * gcc.dg/pic-4.c: Disable on amdgcn. * gcc.dg/pie-3.c: Disable on amdgcn. * gcc.dg/pie-4.c: Disable on amdgcn. * gcc.dg/uninit-19.c: Check pie_enabled. * lib/target-supports.exp (check_effective_target_pie): Add amdgcn. --- gcc/testsuite/gcc.dg/graphite/scop-19.c | 4 ++-- gcc/testsuite/gcc.dg/pic-1.c | 2 +- gcc/testsuite/gcc.dg/pic-2.c | 1 + gcc/testsuite/gcc.dg/pic-3.c | 2 +- gcc/testsuite/gcc.dg/pic-4.c | 2 +- gcc/testsuite/gcc.dg/pie-3.c | 2 +- gcc/testsuite/gcc.dg/pie-4.c | 2 +- gcc/testsuite/gcc.dg/uninit-19.c | 4 ++-- gcc/testsuite/lib/target-supports.exp | 3 ++- 9 files changed, 12 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.dg/graphite/scop-19.c b/gcc/testsuite/gcc.dg/graphite/scop-19.c index c89717b..6028132 100644 --- a/gcc/testsuite/gcc.dg/graphite/scop-19.c +++ b/gcc/testsuite/gcc.dg/graphite/scop-19.c @@ -31,6 +31,6 @@ d_growable_string_append_buffer (struct d_growable_string *dgs, if (need > dgs->alc) d_growable_string_resize (dgs, need); } -/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target nonpic } } } */ -/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! nonpic } } } } */ +/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target { nonpic || pie_enabled } } } } */ +/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! { nonpic || pie_enabled } } } } } */ diff --git a/gcc/testsuite/gcc.dg/pic-1.c b/gcc/testsuite/gcc.dg/pic-1.c index 82ba43d..4bb332e 100644 --- a/gcc/testsuite/gcc.dg/pic-1.c +++ b/gcc/testsuite/gcc.dg/pic-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* } } } } */ +/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* amdgcn*-*-* } } } } */ /* { dg-require-effective-target fpic } */ /* { dg-options "-fpic" } */ diff --git a/gcc/testsuite/gcc.dg/pic-2.c b/gcc/testsuite/gcc.dg/pic-2.c index bccec13..3846ec4 100644 --- a/gcc/testsuite/gcc.dg/pic-2.c +++ b/gcc/testsuite/gcc.dg/pic-2.c @@ -2,6 +2,7 @@ /* { dg-require-effective-target fpic } */ /* { dg-options "-fPIC" } */ /* { dg-skip-if "__PIC__ is always 1 for MIPS" { mips*-*-* } } */ +/* { dg-skip-if "__PIE__ is always defined for GCN" { amdgcn*-*-* } } */ #if __PIC__ != 2 # error __PIC__ is not 2! diff --git a/gcc/testsuite/gcc.dg/pic-3.c b/gcc/testsuite/gcc.dg/pic-3.c index c56f06f..1397977 100644 --- a/gcc/testsuite/gcc.dg/pic-3.c +++ b/gcc/testsuite/gcc.dg/pic-3.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */ +/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */ /* { dg-options "-fno-pic" } */ #ifdef __PIC__ diff --git a/gcc/testsuite/gcc.dg/pic-4.c b/gcc/testsuite/gcc.dg/pic-4.c index 2afdd99..d6d9dc9 100644 --- a/gcc/testsuite/gcc.dg/pic-4.c +++ b/gcc/testsuite/gcc.dg/pic-4.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */ +/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */ /* { dg-options "-fno-PIC" } */ #ifdef __PIC__ diff --git a/gcc/testsuite/gcc.dg/pie-3.c b/gcc/testsuite/gcc.dg/pie-3.c index 5577437..fd4a48d 100644 --- a/gcc/testsuite/gcc.dg/pie-3.c +++ b/gcc/testsuite/gcc.dg/pie-3.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */ +/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */ /* { dg-options "-fno-pie" } */ #ifdef __PIC__ diff --git a/gcc/testsuite/gcc.dg/pie-4.c b/gcc/testsuite/gcc.dg/pie-4.c index 4134676..5523602 100644 --- a/gcc/testsuite/gcc.dg/pie-4.c +++ b/gcc/testsuite/gcc.dg/pie-4.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */ +/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */ /* { dg-options "-fno-PIE" } */ #ifdef __PIC__ diff --git a/gcc/testsuite/gcc.dg/uninit-19.c b/gcc/testsuite/gcc.dg/uninit-19.c index 094dc0e..3f5f06a 100644 --- a/gcc/testsuite/gcc.dg/uninit-19.c +++ b/gcc/testsuite/gcc.dg/uninit-19.c @@ -12,7 +12,7 @@ fn1 (int p1, float *f1, float *f2, float *f3, unsigned char *c1, float *f4, { if (p1 & 8) b[3] = p10[a]; - /* { dg-warning "may be used uninitialized" "" { target { { nonpic } || { hppa*64*-*-* } } } .-1 } */ + /* { dg-warning "may be used uninitialized" "" { target { { nonpic || pie_enabled } || { hppa*64*-*-* } } } .-1 } */ } void @@ -22,5 +22,5 @@ fn2 () if (l & 6) n = &c + m; fn1 (l, &d, &e, &g, &i, &h, &k, n); - /* { dg-warning "may be used uninitialized" "" { target { ! { { nonpic } || { hppa*64*-*-* } } } } .-1 } */ + /* { dg-warning "may be used uninitialized" "" { target { ! { { nonpic || pie_enabled } || { hppa*64*-*-* } } } } .-1 } */ } diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index e27bed0..61442bd 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1185,7 +1185,8 @@ proc check_effective_target_pie { } { || [istarget *-*-dragonfly*] || [istarget *-*-freebsd*] || [istarget *-*-linux*] - || [istarget *-*-gnu*] } { + || [istarget *-*-gnu*] + || [istarget *-*-amdhsa]} { return 1; } if { [istarget *-*-solaris2.1\[1-9\]*] } { From patchwork Wed Sep 5 11:52:12 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966347 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485183-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="NQLyTwr+"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252Dl69mmz9sCn for ; Wed, 5 Sep 2018 21:54:31 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=FtIFAgczRLN8VrQUf27PXv8QaX9gx B+ka3h4wtny9anhA7hIM41SIIbW2MXPuUYXAnLiuphx1m34yBAyL8L2HELEEQ/Pz rNz2Y8iD/cMuNLDvxI6D74jZNZBughXIkR+4XKWEKO3JiH1M+pB2F3PeGzdox8rY 3MiyQoDIkFFNn0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=sZm0hCQQxyX/B8fye4fpHpeOh0Y=; b=NQL yTwr+aKTH+0gsl3teU5+aYuGm2MI1zovQ+wywHcmE0jm3n1vzfiwHYwLDhxMfn/O +5UMkZx5CmkNHQ+DjlszjZU73MosQNre00kWE5YDOuhCurHcZxX0ba7eUgsde5tQ o0a5GyqC5BvV3GErnbdUXLLED14jp1ABNv/FOqzo= Received: (qmail 105298 invoked by alias); 5 Sep 2018 11:52:53 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 105198 invoked by uid 89); 5 Sep 2018 11:52:53 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy= X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:52:51 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWMa-0000Dn-RN for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:49 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45753) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWMa-00085f-JR for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:52:48 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWMP-00065N-NS from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:52:37 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:52:33 +0100 From: To: Subject: [PATCH 24/25] Ignore LLVM's blank lines. Date: Wed, 5 Sep 2018 12:52:12 +0100 Message-ID: <58748b9db7927f39a5740f42e7c30386d61a4080.1536144068.git.ams@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 The GCN toolchain must use the LLVM assembler and linker because there's no binutils port. The LLVM tools do not have the same diagnostic style as binutils, so the "blank line(s) in output" tests are inappropriate (and very noisy). The LLVM tools also have different command line options, so it's not possible to autodetect object formats in the same way. This patch addresses both issues. 2018-09-05 Andrew Stubbs gcc/testsuite/ * lib/file-format.exp (gcc_target_object_format): Handle AMD GCN. * lib/gcc-dg.exp (gcc-dg-prune): Ignore blank lines from the LLVM linker. * lib/target-supports.exp (check_effective_target_llvm_binutils): New. --- gcc/testsuite/lib/file-format.exp | 3 +++ gcc/testsuite/lib/gcc-dg.exp | 2 +- gcc/testsuite/lib/target-supports.exp | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/lib/file-format.exp b/gcc/testsuite/lib/file-format.exp index 5c47246..c595fe2 100644 --- a/gcc/testsuite/lib/file-format.exp +++ b/gcc/testsuite/lib/file-format.exp @@ -41,6 +41,9 @@ proc gcc_target_object_format { } { } elseif { [istarget *-*-aix*] } { # AIX doesn't necessarily have objdump, so hand-code it. set gcc_target_object_format_saved coff + } elseif { [istarget *-*-amdhsa*] } { + # AMD GCN uses LLVM objdump which is not CLI-compatible + set gcc_target_object_format_saved elf } else { set objdump_name [find_binutils_prog objdump] set open_file [open objfmtst.c w] diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp index f5e6bef..7df348e 100644 --- a/gcc/testsuite/lib/gcc-dg.exp +++ b/gcc/testsuite/lib/gcc-dg.exp @@ -361,7 +361,7 @@ proc gcc-dg-prune { system text } { # Complain about blank lines in the output (PR other/69006) global allow_blank_lines - if { !$allow_blank_lines } { + if { !$allow_blank_lines && ![check_effective_target_llvm_binutils]} { set num_blank_lines [llength [regexp -all -inline "\n\n" $text]] if { $num_blank_lines } { global testname_with_flags diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 61442bd..1e627fa 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9129,6 +9129,14 @@ proc check_effective_target_offload_hsa { } { } "-foffload=hsa" ] } +# Return 1 if the compiler has been configured with hsa offloading. + +proc check_effective_target_offload_gcn { } { + return [check_no_compiler_messages offload_gcn assembly { + int main () {return 0;} + } "-foffload=amdgcn-unknown-amdhsa" ] +} + # Return 1 if the target support -fprofile-update=atomic proc check_effective_target_profile_update_atomic {} { return [check_no_compiler_messages profile_update_atomic assembly { @@ -9427,3 +9435,9 @@ proc check_effective_target_cet { } { } } "-O2" ] } + +# Return 1 if this target uses an LLVM assembler and/or linker +proc check_effective_target_llvm_binutils { } { + return [expr { [istarget amdgcn*-*-*] + || [check_effective_target_offload_gcn] } ] +} From patchwork Wed Sep 5 11:53:03 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 966349 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-485185-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="nQ4hclI0"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4252FK3Z9qz9s5c for ; Wed, 5 Sep 2018 21:55:01 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; q=dns; s=default; b=ldRe10xyFH2e2WWe36V7Muljb8tGz hx0x9prqhab7+A53sOYGElRyGOWH5eA8o3IjjSrBJd43ny1x8hggY0i8Qp8budZM rB2JA9rel6xb5RQ1dP+rBzJaq0j94Lq4dAtx+yrk41sz0pxg3nWxpz5VF0Bi5wtc 64I6QIZkd8FDI8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references:mime-version :content-type; s=default; bh=uX0tZB4oh1NRr4pFuHpJxRBgp/E=; b=nQ4 hclI0YAJi6LTfPTe2LYhgj7zX/Aq7NIMQKYtLUDXIzwdsnY3Y8FhayVtJB1uwsmz evi5Q2FwR0LM1bZvILdxUfeAq3g6alJemaW91mOktFubFfkaf0WtbI2eU5/HgxXB h3OkNXuX4ZK04YKFFAzlXKiiRgXcQ6tefhnoWP44= Received: (qmail 109988 invoked by alias); 5 Sep 2018 11:53:34 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 109834 invoked by uid 89); 5 Sep 2018 11:53:32 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-24.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3 autolearn=ham version=3.3.2 spammy=Listed X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (208.118.235.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 05 Sep 2018 11:53:27 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fxWN6-0001vq-S0 for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:53:24 -0400 Received: from relay1.mentorg.com ([192.94.38.131]:45827) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fxWN5-0001je-0q for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 07:53:19 -0400 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=svr-ies-mbx-01.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1fxWN3-00069x-G4 from Andrew_Stubbs@mentor.com for gcc-patches@gcc.gnu.org; Wed, 05 Sep 2018 04:53:17 -0700 Received: from build6-trusty-cs.sje.mentorg.com (137.202.0.90) by svr-ies-mbx-01.mgc.mentorg.com (139.181.222.1) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Wed, 5 Sep 2018 12:53:12 +0100 From: To: Subject: [PATCH 25/25] Port testsuite to GCN Date: Wed, 5 Sep 2018 12:53:03 +0100 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: Windows NT kernel [generic] [fuzzy] X-Received-From: 192.94.38.131 This collection of miscellaneous patches configures the testsuite to run on AMD GCN in a standalone (i.e. not offloading) configuration. It assumes you have your Dejagnu set up to run binaries via the gcn-run tool. 2018-09-05 Andrew Stubbs Kwok Cheung Yeung Julian Brown Tom de Vries gcc/testsuite/ * gcc.dg/20020312-2.c: Add amdgcn support. * gcc.dg/Wno-frame-address.c: Disable on amdgcn. * gcc.dg/builtin-apply2.c: Likewise. * gcc.dg/torture/stackalign/builtin-apply-2.c: Likewise. * gcc.dg/gimplefe-28.c: Force -ffast-math. * gcc.dg/intermod-1.c: Add -mlocal-symbol-id on amdgcn. * gcc.dg/memcmp-1.c: Increase timeout factor. * gcc.dg/pr59605-2.c: Addd -DMAX_COPY=1025 on amdgcn. * gcc.dg/sibcall-10.c: xfail on amdgcn. * gcc.dg/sibcall-9.c: Likewise. * gcc.dg/tree-ssa/gen-vect-11c.c: Likewise. * gcc.dg/tree-ssa/pr84512.c: Likewise. * gcc.dg/tree-ssa/loop-1.c: Adjust expectations for amdgcn. * gfortran.dg/bind_c_array_params_2.f90: Likewise. * gcc.dg/vect/tree-vect.h: Avoid signal on amdgcn. * lib/target-supports.exp (check_effective_target_trampolines): Configure amdgcn. (check_profiling_available): Likewise. (check_effective_target_global_constructor): Likewise. (check_effective_target_return_address): Likewise. (check_effective_target_fopenacc): Likewise. (check_effective_target_fopenmp): Likewise. (check_effective_target_vect_int): Likewise. (check_effective_target_vect_intfloat_cvt): Likewise. (check_effective_target_vect_uintfloat_cvt): Likewise. (check_effective_target_vect_floatint_cvt): Likewise. (check_effective_target_vect_floatuint_cvt): Likewise. (check_effective_target_vect_simd_clones): Likewise. (check_effective_target_vect_shift): Likewise. (check_effective_target_whole_vector_shift): Likewise. (check_effective_target_vect_bswap): Likewise. (check_effective_target_vect_shift_char): Likewise. (check_effective_target_vect_long): Likewise. (check_effective_target_vect_float): Likewise. (check_effective_target_vect_double): Likewise. (check_effective_target_vect_perm): Likewise. (check_effective_target_vect_perm_byte): Likewise. (check_effective_target_vect_perm_short): Likewise. (check_effective_target_vect_widen_mult_qi_to_hi): Likewise. (check_effective_target_vect_widen_mult_hi_to_si): Likewise. (check_effective_target_vect_widen_mult_qi_to_hi_pattern): Likewise. (check_effective_target_vect_widen_mult_hi_to_si_pattern): Likewise. (check_effective_target_vect_natural_alignment): Likewise. (check_effective_target_vect_fully_masked): Likewise. (check_effective_target_vect_element_align): Likewise. (check_effective_target_vect_masked_store): Likewise. (check_effective_target_vect_scatter_store): Likewise. (check_effective_target_vect_condition): Likewise. (check_effective_target_vect_cond_mixed): Likewise. (check_effective_target_vect_char_mult): Likewise. (check_effective_target_vect_short_mult): Likewise. (check_effective_target_vect_int_mult): Likewise. (check_effective_target_sqrt_insn): Likewise. (check_effective_target_vect_call_sqrtf): Likewise. (check_effective_target_vect_call_btrunc): Likewise. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_lto): Likewise. (check_vect_support_and_set_flags): Likewise. (check_effective_target_vect_stridedN): Enable when fully masked is available. --- gcc/testsuite/gcc.dg/20020312-2.c | 2 + gcc/testsuite/gcc.dg/Wno-frame-address.c | 2 +- gcc/testsuite/gcc.dg/builtin-apply2.c | 2 +- gcc/testsuite/gcc.dg/gimplefe-28.c | 2 +- gcc/testsuite/gcc.dg/intermod-1.c | 1 + gcc/testsuite/gcc.dg/memcmp-1.c | 1 + gcc/testsuite/gcc.dg/pr59605-2.c | 2 +- gcc/testsuite/gcc.dg/sibcall-10.c | 2 +- gcc/testsuite/gcc.dg/sibcall-9.c | 2 +- .../gcc.dg/torture/stackalign/builtin-apply-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 6 +- gcc/testsuite/gcc.dg/tree-ssa/pr84512.c | 2 +- gcc/testsuite/gcc.dg/vect/tree-vect.h | 4 + .../gfortran.dg/bind_c_array_params_2.f90 | 3 +- gcc/testsuite/lib/target-supports.exp | 126 +++++++++++++++------ 16 files changed, 113 insertions(+), 48 deletions(-) diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c index f8be3ce..c88fdf3 100644 --- a/gcc/testsuite/gcc.dg/20020312-2.c +++ b/gcc/testsuite/gcc.dg/20020312-2.c @@ -116,6 +116,8 @@ extern void abort (void); # if defined (__CK807__) || defined (__CK810__) # define PIC_REG "r28" # endif +#elif defined (__AMDGCN__) +/* No pic register. */ #else # error "Modify the test for your target." #endif diff --git a/gcc/testsuite/gcc.dg/Wno-frame-address.c b/gcc/testsuite/gcc.dg/Wno-frame-address.c index 9fe4d07..5e3ef7a 100644 --- a/gcc/testsuite/gcc.dg/Wno-frame-address.c +++ b/gcc/testsuite/gcc.dg/Wno-frame-address.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-skip-if "Cannot access arbitrary stack frames" { arm*-*-* avr-*-* hppa*-*-* ia64-*-* visium-*-* csky-*-* } } */ +/* { dg-skip-if "Cannot access arbitrary stack frames" { arm*-*-* amdgpu-*-* avr-*-* hppa*-*-* ia64-*-* visium-*-* csky-*-* } } */ /* { dg-options "-Werror" } */ /* { dg-additional-options "-mbackchain" { target { s390*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/builtin-apply2.c b/gcc/testsuite/gcc.dg/builtin-apply2.c index 3768caa..aca3b1f 100644 --- a/gcc/testsuite/gcc.dg/builtin-apply2.c +++ b/gcc/testsuite/gcc.dg/builtin-apply2.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target untyped_assembly } */ -/* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { "avr-*-* nds32*-*-*" } } */ +/* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { "avr-*-* nds32*-*-* amdgcn-*-*" } } */ /* { dg-skip-if "Variadic funcs use different argument passing from normal funcs." { "riscv*-*-*" } } */ /* { dg-skip-if "Variadic funcs use Base AAPCS. Normal funcs use VFP variant." { arm*-*-* && arm_hf_eabi } } */ diff --git a/gcc/testsuite/gcc.dg/gimplefe-28.c b/gcc/testsuite/gcc.dg/gimplefe-28.c index 467172d..57b6e1f 100644 --- a/gcc/testsuite/gcc.dg/gimplefe-28.c +++ b/gcc/testsuite/gcc.dg/gimplefe-28.c @@ -1,5 +1,5 @@ /* { dg-do compile { target sqrt_insn } } */ -/* { dg-options "-fgimple -O2" } */ +/* { dg-options "-fgimple -O2 -ffast-math" } */ double __GIMPLE f1 (double x) diff --git a/gcc/testsuite/gcc.dg/intermod-1.c b/gcc/testsuite/gcc.dg/intermod-1.c index 9f8d19d..44a8ce0 100644 --- a/gcc/testsuite/gcc.dg/intermod-1.c +++ b/gcc/testsuite/gcc.dg/intermod-1.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-additional-options "-mlocal-symbol-id=" { target amdgcn-*-* } } */ /* { dg-final { scan-assembler-not {foo[1-9]\.[0-9]} } } */ /* Check that we don't get .0 suffixes on static variables when not using diff --git a/gcc/testsuite/gcc.dg/memcmp-1.c b/gcc/testsuite/gcc.dg/memcmp-1.c index 619cf9b..ea837ca 100644 --- a/gcc/testsuite/gcc.dg/memcmp-1.c +++ b/gcc/testsuite/gcc.dg/memcmp-1.c @@ -2,6 +2,7 @@ /* { dg-do run } */ /* { dg-options "-O2" } */ /* { dg-require-effective-target ptr32plus } */ +/* { dg-timeout-factor 2 } */ #include #include diff --git a/gcc/testsuite/gcc.dg/pr59605-2.c b/gcc/testsuite/gcc.dg/pr59605-2.c index 6d6ff23..9575481 100644 --- a/gcc/testsuite/gcc.dg/pr59605-2.c +++ b/gcc/testsuite/gcc.dg/pr59605-2.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-options "-O2" } */ -/* { dg-additional-options "-DMAX_COPY=1025" { target { { simulator } || { nvptx-*-* } } } } */ +/* { dg-additional-options "-DMAX_COPY=1025" { target { { simulator } || { nvptx-*-* amdgcn*-*-* } } } } */ /* { dg-additional-options "-minline-stringops-dynamically" { target { i?86-*-* x86_64-*-* } } } */ #include "pr59605.c" diff --git a/gcc/testsuite/gcc.dg/sibcall-10.c b/gcc/testsuite/gcc.dg/sibcall-10.c index 54cc604..f3e0a9b 100644 --- a/gcc/testsuite/gcc.dg/sibcall-10.c +++ b/gcc/testsuite/gcc.dg/sibcall-10.c @@ -5,7 +5,7 @@ Copyright (C) 2002 Free Software Foundation Inc. Contributed by Hans-Peter Nilsson */ -/* { dg-do run { xfail { { cris-*-* crisv32-*-* csky-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ +/* { dg-do run { xfail { { amdgcn*-*-* cris-*-* crisv32-*-* csky-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ /* -mlongcall disables sibcall patterns. */ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ /* -msave-restore disables sibcall patterns. */ diff --git a/gcc/testsuite/gcc.dg/sibcall-9.c b/gcc/testsuite/gcc.dg/sibcall-9.c index fc3bd9d..adb2ca3 100644 --- a/gcc/testsuite/gcc.dg/sibcall-9.c +++ b/gcc/testsuite/gcc.dg/sibcall-9.c @@ -5,7 +5,7 @@ Copyright (C) 2002 Free Software Foundation Inc. Contributed by Hans-Peter Nilsson */ -/* { dg-do run { xfail { { cris-*-* crisv32-*-* csky-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ +/* { dg-do run { xfail { { amdgcn*-*-* cris-*-* crisv32-*-* csky-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* nvptx-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */ /* -mlongcall disables sibcall patterns. */ /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */ /* -msave-restore disables sibcall patterns. */ diff --git a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c index d033010..669ab9a 100644 --- a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c +++ b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c @@ -9,7 +9,7 @@ /* arm_hf_eabi: Variadic funcs use Base AAPCS. Normal funcs use VFP variant. avr: Variadic funcs don't pass arguments in registers, while normal funcs do. */ -/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* } } } */ +/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* amdgcn-*-* } } } */ /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { nds32*-*-* } } */ /* { dg-require-effective-target untyped_assembly } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c index 236d3a5..22ff44c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c @@ -39,4 +39,4 @@ int main () } -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail amdgcn*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index 1862750..f422f39 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -45,8 +45,10 @@ int xxx(void) relaxation. */ /* CRIS keeps the address in a register. */ /* m68k sometimes puts the address in a register, depending on CPU and PIC. */ +/* AMD GCN loads symbol addresses as hi/lo pairs, and then reuses that for + each jump. */ -/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* } } } */ +/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* amdgcn*-*-* } } } */ /* { dg-final { scan-assembler-times "foo,%r" 5 { target hppa*-*-* } } } */ /* { dg-final { scan-assembler-times "= foo" 5 { target ia64*-*-* } } } */ /* { dg-final { scan-assembler-times "call\[ \t\]*_foo" 5 { target i?86-*-mingw* i?86-*-cygwin* } } } */ @@ -56,3 +58,5 @@ int xxx(void) /* { dg-final { scan-assembler-times "\[jb\]sr" 5 { target fido-*-* m68k-*-* } } } */ /* { dg-final { scan-assembler-times "bra *tr,r\[1-9\]*,r21" 5 { target visium-*-* } } } */ /* { dg-final { scan-assembler-times "(?n)\[ \t\]call\[ \t\].*\[ \t\]foo," 5 { target nvptx*-*-* } } } */ +/* { dg-final { scan-assembler-times "add_u32\t\[sv\]\[0-9\]*, \[sv\]\[0-9\]*, foo@rel32@lo" 1 { target { amdgcn*-*-* } } } } */ +/* { dg-final { scan-assembler-times "s_swappc_b64" 5 { target { amdgcn*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c index 056d1c4..3975757 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c @@ -13,4 +13,4 @@ int foo() } /* Listed targets xfailed due to PR84958. */ -/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { alpha*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } } } } */ +/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { alpha*-*-* amdgcn*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/tree-vect.h b/gcc/testsuite/gcc.dg/vect/tree-vect.h index 69c93ac..2ddfa5e 100644 --- a/gcc/testsuite/gcc.dg/vect/tree-vect.h +++ b/gcc/testsuite/gcc.dg/vect/tree-vect.h @@ -1,5 +1,9 @@ /* Check if system supports SIMD */ +#ifdef __AMDGCN__ +#define signal(A,B) +#else #include +#endif #if defined(__i386__) || defined(__x86_64__) # include "cpuid.h" diff --git a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 index 25f5dda..34ed055 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 @@ -16,8 +16,9 @@ integer :: aa(4,4) call test(aa) end -! { dg-final { scan-assembler-times "\[ \t\]\[$,_0-9\]*myBindC" 1 { target { ! { hppa*-*-* s390*-*-* *-*-cygwin* } } } } } +! { dg-final { scan-assembler-times "\[ \t\]\[$,_0-9\]*myBindC" 1 { target { ! { hppa*-*-* s390*-*-* *-*-cygwin* amdgcn*-*-* } } } } } ! { dg-final { scan-assembler-times "myBindC,%r2" 1 { target { hppa*-*-* } } } } ! { dg-final { scan-assembler-times "call\tmyBindC" 1 { target { *-*-cygwin* } } } } ! { dg-final { scan-assembler-times "brasl\t%r\[0-9\]*,myBindC" 1 { target { s390*-*-* } } } } +! { dg-final { scan-assembler-times "add_u32\t\[sv\]\[0-9\]*, \[sv\]\[0-9\]*, myBindC@rel32@lo" 1 { target { amdgcn*-*-* } } } } ! { dg-final { scan-tree-dump-times "test \\\(&parm\\." 1 "original" } } diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1e627fa..bbb2e1f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -662,6 +662,7 @@ proc check_profiling_available { test_what } { # missing other needed machinery. if {[istarget aarch64*-*-elf] || [istarget am3*-*-linux*] + || [istarget amdgcn-*-*] || [istarget arm*-*-eabi*] || [istarget arm*-*-elf] || [istarget arm*-*-symbianelf*] @@ -788,6 +789,9 @@ proc check_effective_target_global_constructor {} { if { [istarget nvptx-*-*] } { return 0 } + if { [istarget amdgcn-*-*] } { + return 0 + } return 1 } @@ -808,6 +812,10 @@ proc check_effective_target_return_address {} { if { [istarget nvptx-*-*] } { return 0 } + # It could be supported on amdgcn, but isn't yet. + if { [istarget amdgcn*-*-*] } { + return 0 + } return 1 } @@ -954,9 +962,10 @@ proc check_effective_target_fgraphite {} { # code, 0 otherwise. proc check_effective_target_fopenacc {} { - # nvptx can be built with the device-side bits of openacc, but it + # nvptx/amdgcn can be built with the device-side bits of openacc, but it # does not make sense to test it as an openacc host. if [istarget nvptx-*-*] { return 0 } + if [istarget amdgcn-*-*] { return 0 } return [check_no_compiler_messages fopenacc object { void foo (void) { } @@ -967,9 +976,10 @@ proc check_effective_target_fopenacc {} { # code, 0 otherwise. proc check_effective_target_fopenmp {} { - # nvptx can be built with the device-side bits of libgomp, but it + # nvptx/amdgcn can be built with the device-side bits of libgomp, but it # does not make sense to test it as an openmp host. if [istarget nvptx-*-*] { return 0 } + if [istarget amdgcn-*-*] { return 0 } return [check_no_compiler_messages fopenmp object { void foo (void) { } @@ -3107,6 +3117,7 @@ proc check_effective_target_vect_int { } { if { [istarget i?86-*-*] || [istarget x86_64-*-*] || ([istarget powerpc*-*-*] && ![istarget powerpc-*-linux*paired*]) + || [istarget amdgcn-*-*] || [istarget spu-*-*] || [istarget sparc*-*-*] || [istarget alpha*-*-*] @@ -3144,7 +3155,8 @@ proc check_effective_target_vect_intfloat_cvt { } { && ![istarget powerpc-*-linux*paired*]) || [is-effective-target arm_neon] || ([istarget mips*-*-*] - && [et-is-effective-target mips_msa]) } { + && [et-is-effective-target mips_msa]) + || [istarget amdgcn-*-*] } { set et_vect_intfloat_cvt_saved($et_index) 1 } } @@ -3248,7 +3260,8 @@ proc check_effective_target_vect_uintfloat_cvt { } { || [istarget aarch64*-*-*] || [is-effective-target arm_neon] || ([istarget mips*-*-*] - && [et-is-effective-target mips_msa]) } { + && [et-is-effective-target mips_msa]) + || [istarget amdgcn-*-*] } { set et_vect_uintfloat_cvt_saved($et_index) 1 } } @@ -3276,7 +3289,8 @@ proc check_effective_target_vect_floatint_cvt { } { && ![istarget powerpc-*-linux*paired*]) || [is-effective-target arm_neon] || ([istarget mips*-*-*] - && [et-is-effective-target mips_msa]) } { + && [et-is-effective-target mips_msa]) + || [istarget amdgcn-*-*] } { set et_vect_floatint_cvt_saved($et_index) 1 } } @@ -3302,7 +3316,8 @@ proc check_effective_target_vect_floatuint_cvt { } { && ![istarget powerpc-*-linux*paired*]) || [is-effective-target arm_neon] || ([istarget mips*-*-*] - && [et-is-effective-target mips_msa]) } { + && [et-is-effective-target mips_msa]) + || [istarget amdgcn-*-*] } { set et_vect_floatuint_cvt_saved($et_index) 1 } } @@ -3352,7 +3367,8 @@ proc check_effective_target_vect_simd_clones { } { # specified arch will be chosen, but still we need to at least # be able to assemble avx512f. if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && [check_effective_target_avx512f]) } { + && [check_effective_target_avx512f]) + || [istarget amdgcn-*-*] } { set et_vect_simd_clones_saved($et_index) 1 } } @@ -5462,7 +5478,8 @@ proc check_effective_target_vect_shift { } { && ([et-is-effective-target mips_msa] || [et-is-effective-target mips_loongson])) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_shift_saved($et_index) 1 } } @@ -5482,7 +5499,8 @@ proc check_effective_target_whole_vector_shift { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_loongson]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set answer 1 } else { set answer 0 @@ -5504,6 +5522,7 @@ proc check_effective_target_vect_bswap { } { set et_vect_bswap_saved($et_index) 0 if { [istarget aarch64*-*-*] || [is-effective-target arm_neon] + || [istarget amdgcn-*-*] } { set et_vect_bswap_saved($et_index) 1 } @@ -5530,7 +5549,8 @@ proc check_effective_target_vect_shift_char { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_shift_char_saved($et_index) 1 } } @@ -5555,7 +5575,8 @@ proc check_effective_target_vect_long { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set answer 1 } else { set answer 0 @@ -5589,7 +5610,8 @@ proc check_effective_target_vect_float { } { && [et-is-effective-target mips_msa]) || [is-effective-target arm_neon] || ([istarget s390*-*-*] - && [check_effective_target_s390_vxe]) } { + && [check_effective_target_s390_vxe]) + || [istarget amdgcn-*-*] } { set et_vect_float_saved($et_index) 1 } } @@ -5631,7 +5653,8 @@ proc check_effective_target_vect_double { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_double_saved($et_index) 1 } } @@ -5767,7 +5790,8 @@ proc check_effective_target_vect_perm { } { && ([et-is-effective-target mpaired_single] || [et-is-effective-target mips_msa])) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_perm_saved($et_index) 1 } } @@ -5872,7 +5896,8 @@ proc check_effective_target_vect_perm_byte { } { || ([istarget mips-*.*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_perm_byte_saved($et_index) 1 } } @@ -5913,7 +5938,8 @@ proc check_effective_target_vect_perm_short { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_perm_short_saved($et_index) 1 } } @@ -6084,7 +6110,8 @@ proc check_effective_target_vect_widen_mult_qi_to_hi { } { && ![check_effective_target_aarch64_sve]) || [is-effective-target arm_neon] || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_widen_mult_qi_to_hi_saved($et_index) 1 } } @@ -6124,7 +6151,8 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } { || [istarget i?86-*-*] || [istarget x86_64-*-*] || [is-effective-target arm_neon] || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_widen_mult_hi_to_si_saved($et_index) 1 } } @@ -6151,7 +6179,8 @@ proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { || ([is-effective-target arm_neon] && [check_effective_target_arm_little_endian]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_widen_mult_qi_to_hi_pattern_saved($et_index) 1 } } @@ -6181,7 +6210,8 @@ proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { || ([is-effective-target arm_neon] && [check_effective_target_arm_little_endian]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_widen_mult_hi_to_si_pattern_saved($et_index) 1 } } @@ -6578,7 +6608,8 @@ proc check_effective_target_vect_natural_alignment { } { set et_vect_natural_alignment 1 if { [check_effective_target_arm_eabi] || [istarget nvptx-*-*] - || [istarget s390*-*-*] } { + || [istarget s390*-*-*] + || [istarget amdgcn-*-*] } { set et_vect_natural_alignment 0 } verbose "check_effective_target_vect_natural_alignment:\ @@ -6589,7 +6620,8 @@ proc check_effective_target_vect_natural_alignment { } { # Return true if fully-masked loops are supported. proc check_effective_target_vect_fully_masked { } { - return [check_effective_target_aarch64_sve] + return [expr { [check_effective_target_aarch64_sve] + || [istarget amdgcn*-*-*] }] } # Return 1 if the target doesn't prefer any alignment beyond element @@ -6648,7 +6680,8 @@ proc check_effective_target_vect_element_align { } { set et_vect_element_align($et_index) 0 if { ([istarget arm*-*-*] && ![check_effective_target_arm_vect_no_misalign]) - || [check_effective_target_vect_hw_misalign] } { + || [check_effective_target_vect_hw_misalign] + || [istarget amdgcn-*-*] } { set et_vect_element_align($et_index) 1 } } @@ -6690,13 +6723,15 @@ proc check_effective_target_vect_load_lanes { } { # Return 1 if the target supports vector masked stores. proc check_effective_target_vect_masked_store { } { - return [check_effective_target_aarch64_sve] + return [expr { [check_effective_target_aarch64_sve] + || [istarget amdgcn*-*-*] }] } # Return 1 if the target supports vector scatter stores. proc check_effective_target_vect_scatter_store { } { - return [check_effective_target_aarch64_sve] + return [expr { [check_effective_target_aarch64_sve] + || [istarget amdgcn*-*-*] }] } # Return 1 if the target supports vector conditional operations, 0 otherwise. @@ -6719,7 +6754,8 @@ proc check_effective_target_vect_condition { } { || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_cond_saved($et_index) 1 } } @@ -6746,7 +6782,8 @@ proc check_effective_target_vect_cond_mixed { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_cond_mixed_saved($et_index) 1 } } @@ -6774,7 +6811,8 @@ proc check_effective_target_vect_char_mult { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_char_mult_saved($et_index) 1 } } @@ -6804,7 +6842,8 @@ proc check_effective_target_vect_short_mult { } { && ([et-is-effective-target mips_msa] || [et-is-effective-target mips_loongson])) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_short_mult_saved($et_index) 1 } } @@ -6833,7 +6872,8 @@ proc check_effective_target_vect_int_mult { } { && [et-is-effective-target mips_msa]) || [check_effective_target_arm32] || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_vect_int_mult_saved($et_index) 1 } } @@ -6949,6 +6989,9 @@ foreach N {2 3 4 8} { || [istarget aarch64*-*-*]) && N >= 2 && N <= 4 } { set et_vect_stridedN_saved($et_index) 1 } + if [check_effective_target_vect_fully_masked] { + set et_vect_stridedN_saved($et_index) 1 + } } verbose "check_effective_target_vect_stridedN:\ @@ -7038,7 +7081,8 @@ proc check_effective_target_sqrt_insn { } { || [istarget aarch64*-*-*] || ([istarget arm*-*-*] && [check_effective_target_arm_vfp_ok]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] } { set et_sqrt_insn_saved 1 } } @@ -7076,7 +7120,8 @@ proc check_effective_target_vect_call_sqrtf { } { proc check_effective_target_vect_call_lrint { } { set et_vect_call_lrint 0 if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && [check_effective_target_ilp32]) } { + && [check_effective_target_ilp32]) + || [istarget amdgcn-*-*] } { set et_vect_call_lrint 1 } @@ -7095,7 +7140,8 @@ proc check_effective_target_vect_call_btrunc { } { using cached result" 2 } else { set et_vect_call_btrunc_saved($et_index) 0 - if { [istarget aarch64*-*-*] } { + if { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] } { set et_vect_call_btrunc_saved($et_index) 1 } } @@ -7116,7 +7162,8 @@ proc check_effective_target_vect_call_btruncf { } { using cached result" 2 } else { set et_vect_call_btruncf_saved($et_index) 0 - if { [istarget aarch64*-*-*] } { + if { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] } { set et_vect_call_btruncf_saved($et_index) 1 } } @@ -7136,7 +7183,8 @@ proc check_effective_target_vect_call_ceil { } { verbose "check_effective_target_vect_call_ceil: using cached result" 2 } else { set et_vect_call_ceil_saved($et_index) 0 - if { [istarget aarch64*-*-*] } { + if { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] } { set et_vect_call_ceil_saved($et_index) 1 } } @@ -7196,7 +7244,8 @@ proc check_effective_target_vect_call_floorf { } { verbose "check_effective_target_vect_call_floorf: using cached result" 2 } else { set et_vect_call_floorf_saved($et_index) 0 - if { [istarget aarch64*-*-*] } { + if { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] } { set et_vect_call_floorf_saved($et_index) 1 } } @@ -8360,7 +8409,8 @@ proc check_effective_target_gld { } { # (LTO) support. proc check_effective_target_lto { } { - if { [istarget nvptx-*-*] } { + if { [istarget nvptx-*-*] + || [istarget amdgcn-*-*] } { return 0; } return [check_no_compiler_messages lto object { @@ -8678,6 +8728,8 @@ proc check_vect_support_and_set_flags { } { lappend DEFAULT_VECTCFLAGS "-march=z14" "-mzarch" set dg-do-what-default compile } + } elseif [istarget amdgcn-*-*] { + set dg-do-what-default run } else { return 0 }