From patchwork Wed Sep 24 15:06:04 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Greenhalgh X-Patchwork-Id: 392985 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 91FC914009C for ; Thu, 25 Sep 2014 01:06:26 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; q=dns; s=default; b=ym6aDnjSyxgbNuiF5BshGJKRksZMY+ve7qZVzRqPC7a51H3s87 IAxzNMl2lBkL5j2v/dgN9mdwm/aaRccedXPp8plBObM9t+oGJGB3vMO2JY8FeQDP yXTaUfuJZselW/OyrodfvJBjtlrYmAS6uu1Y8WGJJuBT5SXiUTx/1jCy8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; s= default; bh=GGX0msCSQYQJzfMYh+40urNAHUM=; b=aM6W7TjSdcjZGz2oGw0b Aqzq3u2kzVCS/nmc5WE5SJyIfX1KAyw1Z9w3bBlCNF7X2tKQuA703UBYL1JfoaaD Lda2wrwtVxQoM4FYwdRAVQXX8KTnsDWuJQ1aoVF84VGd/t7Bpktf6bnAWSTcoUD0 +QyLoqO+O1Rdt8BicwTnZJ0= Received: (qmail 2761 invoked by alias); 24 Sep 2014 15:06:19 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 2750 invoked by uid 89); 24 Sep 2014 15:06:17 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 24 Sep 2014 15:06:16 +0000 Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Wed, 24 Sep 2014 16:06:13 +0100 Received: from e106375-lin.cambridge.arm.com ([10.1.255.212]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Wed, 24 Sep 2014 16:06:11 +0100 From: James Greenhalgh To: gcc-patches@gcc.gnu.org Cc: marcus.shawcroft@arm.com Subject: [AArch64] Wire up vqdmullh_laneq_s16 and vqdmullh_laneq_s32 Date: Wed, 24 Sep 2014 16:06:04 +0100 Message-Id: <1411571164-10540-1-git-send-email-james.greenhalgh@arm.com> MIME-Version: 1.0 X-MC-Unique: 114092416061304001 X-IsSubscribed: yes Hi, As per the subject line this patch adds support for two arm_neon.h intrinsics that we had missed. We also need to fix the signature of vqdmulls_lane_s32, which is an obvious extension to this patch while we are in the area. Tested for simd.exp and aarch64.exp with no issues. OK? Thanks, James --- gcc/ 2014-09-24 James Greenhalgh * config/aarch64/aarch64-simd-builtins.def (sqdmull_laneq): Expand iterator. * config/aarch64/aarch64-simd.md (aarch64_sqdmull_laneq): Expand iterator. * config/aarch64/arm_neon.h (vqdmullh_laneq_s16): New. (vqdmulls_lane_s32): Fix return type. (vqdmulls_laneq_s32): New. gcc/testsuite/ 2014-09-24 James Greenhalgh * gcc.target/aarch64/simd/vqdmullh_laneq_s16.c: New. * gcc.target/aarch64/simd/vqdmulls_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Fix return type. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmulls_s32): Fix return type. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index de264c4..2367436 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -155,7 +155,7 @@ BUILTIN_VSD_HSI (BINOP, sqdmull, 0) BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0) - BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmull_laneq, 0) BUILTIN_VD_HSI (BINOP, sqdmull_n, 0) BUILTIN_VQ_HSI (BINOP, sqdmull2, 0) BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 493e88628c2a7ef2c4f87031d86d1a5edcbca06b..45ea9d7895e93d4c4b137de1c01f6a1e93942d11 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3398,7 +3398,7 @@ (define_expand "aarch64_sqdmull_lane" [(match_operand: 0 "register_operand" "=w") - (match_operand:VD_HSI 1 "register_operand" "w") + (match_operand:VSD_HSI 1 "register_operand" "w") (match_operand: 2 "register_operand" "") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index feca00e..9b1873f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -19420,16 +19420,28 @@ vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c); +} + __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vqdmulls_s32 (int32_t __a, int32_t __b) { return __builtin_aarch64_sqdmullsi (__a, __b); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { - return (int64x1_t) {__builtin_aarch64_sqdmull_lanesi (__a, __b, __c)}; + return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c); } /* vqmovn */ diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index c07c94c..ea29066 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -501,7 +501,7 @@ test_vqdmulls_s32 (int32_t a, int32_t b) /* { dg-final { scan-assembler-times "\\tsqdmull\\td\[0-9\]+, s\[0-9\]+, v" 1 } } */ -int64x1_t +int64_t test_vqdmulls_lane_s32 (int32_t a, int32x2_t b) { return vqdmulls_lane_s32 (a, b, 1); diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c new file mode 100644 index 0000000..947ebf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c @@ -0,0 +1,15 @@ +/* Test the vqdmullh_laneq_s16 AArch64 SIMD intrinsic. */ + +/* { dg-do compile } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +int32_t +t_vqdmullh_laneq_s16 (int16_t a, int16x8_t b) +{ + return vqdmullh_laneq_s16 (a, b, 0); +} + +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c index 6ed8e3a..24daaab 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c @@ -5,7 +5,7 @@ #include "arm_neon.h" -int64x1_t +int64_t t_vqdmulls_lane_s32 (int32_t a, int32x2_t b) { return vqdmulls_lane_s32 (a, b, 0); diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c new file mode 100644 index 0000000..503f81e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c @@ -0,0 +1,15 @@ +/* Test the vqdmulls_laneq_s32 AArch64 SIMD intrinsic. */ + +/* { dg-do compile } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +int64_t +t_vqdmulls_laneq_s32 (int32_t a, int32x4_t b) +{ + return vqdmulls_laneq_s32 (a, b, 0); +} + +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */