From patchwork Fri Jul 19 19:09:05 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Greenhalgh X-Patchwork-Id: 260335 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "localhost", Issuer "www.qmailtoaster.com" (not verified)) by ozlabs.org (Postfix) with ESMTPS id 723E62C0092 for ; Sat, 20 Jul 2013 05:09:29 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; q=dns; s=default; b=T4OKxyN2oU6qfTGJvzAYvgbWGKIzJn832hh2tt/SQvpR9126tC hq8lYupvswNNTHmBbgf6c796X77fcLLHS81WlTrufSG2fMpNIBLS3lpqBh7I03Ga 6SoEmuG+FMaIKwn19debkYtO9RWjb4Bz8pymlWNe7bSVTjR//k7TAmreg= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; s= default; bh=DtYcmcj9L0lUP2eWcNU48nfXpZE=; b=QC5F5AqYo/iiqCrJHPbw Y5jXT0Bwf6VaFGII9rEIsnhkpJeMAs2/cp4DIgQcjlz2c26yglomc6LepUIfyyf+ 6DWcSMnrz47t7KbFmnvl3ltFUV8QZNMnNnrGLJG+wKqTH3yx7XEwGdqs27oU3jxI rxY3PCTPo2+pf5UtDc82K0c= Received: (qmail 22075 invoked by alias); 19 Jul 2013 19:09:23 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 22055 invoked by uid 89); 19 Jul 2013 19:09:22 -0000 X-Spam-SWARE-Status: No, score=-0.9 required=5.0 tests=AWL, BAYES_50, RCVD_IN_DNSWL_LOW, RDNS_NONE, SPF_PASS autolearn=no version=3.3.1 Received: from Unknown (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.84/v0.84-167-ge50287c) with ESMTP; Fri, 19 Jul 2013 19:09:21 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Fri, 19 Jul 2013 20:09:12 +0100 Received: from e106375-lin.cambridge.arm.com ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.0); Fri, 19 Jul 2013 20:09:10 +0100 From: James Greenhalgh To: gcc-patches@gcc.gnu.org Cc: marcus.shawcroft@arm.com Subject: [AArch64] Rewrite vabs_s<8, 16, 32, 64> AdvSIMD intrinsics to fold to tree. Date: Fri, 19 Jul 2013 20:09:05 +0100 Message-Id: <1374260945-19614-1-git-send-email-james.greenhalgh@arm.com> MIME-Version: 1.0 X-MC-Unique: 113071920091200201 X-Virus-Found: No Hi, This patch uses aarch64_fold_builtin to fold all remaining variants of the vabs intrinsics to tree. Testcase added, full testsuite run for aarch64-none-elf with no issues. OK? Thanks, James --- gcc/ 2013-07-19 James Greenhalgh * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Fold abs in all modes. * config/aarch64/aarch64-simd-builtins.def (abs): Enable for all modes. * config/aarch64/arm_neon.h (vabs_s<8,16,32,64): Rewrite using builtins. (vabs_f64): Add missing intrinsic. gcc/testsuite/ 2013-07-19 James Greenhalgh * gcc.target/aarch64/vabs_intrinsic_1.c: New file. diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index f49f06b..6816b9c 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, switch (fcode) { - BUILTIN_VDQF (UNOP, abs, 2) + BUILTIN_VALLDI (UNOP, abs, 2) return fold_build1 (ABS_EXPR, type, args[0]); break; BUILTIN_VALLDI (BINOP, cmge, 0) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index af2dd6e..55dead6 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -347,7 +347,7 @@ BUILTIN_VDQF (UNOP, frecpe, 0) BUILTIN_VDQF (BINOP, frecps, 0) - BUILTIN_VDQF (UNOP, abs, 2) + BUILTIN_VALLDI (UNOP, abs, 2) VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) VAR1 (BINOP, float_truncate_hi_, 0, v4sf) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 122fd7d..99cf123 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vabs_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("abs %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vabs_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("abs %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vabs_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("abs %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vabsq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("abs %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vabsq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("abs %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vabsq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("abs %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vabsq_s64 (int64x2_t a) -{ - int64x2_t result; - __asm__ ("abs %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vaddlv_s8 (int8x8_t a) { @@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a) return __builtin_aarch64_absv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vabs_f64 (float64x1_t __a) +{ + return __builtin_fabs (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return __builtin_aarch64_absv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return __builtin_aarch64_absv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return __builtin_aarch64_absv2si (__a); +} + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vabs_s64 (int64x1_t __a) { @@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a) return __builtin_aarch64_absv2df (__a); } +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_absv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_absv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_absv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabsq_s64 (int64x2_t __a) +{ + return __builtin_aarch64_absv2di (__a); +} + /* vadd */ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c new file mode 100644 index 0000000..c9897e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); + +#define ETYPE(size) int##size##_t +#define VTYPE(size, lanes) int##size##x##lanes##_t + +#define TEST_VABS(q, size, lanes) \ +static void \ +test_vabs##q##_##size (ETYPE (size) * res, \ + const ETYPE (size) *in1) \ +{ \ + VTYPE (size, lanes) a = vld1##q##_s##size (res); \ + VTYPE (size, lanes) b = vld1##q##_s##size (in1); \ + a = vabs##q##_s##size (b); \ + vst1##q##_s##size (res, a); \ +} + +#define BUILD_VARS(width, n_lanes, n_half_lanes) \ +TEST_VABS (, width, n_half_lanes) \ +TEST_VABS (q, width, n_lanes) \ + +BUILD_VARS (64, 2, 1) +BUILD_VARS (32, 4, 2) +BUILD_VARS (16, 8, 4) +BUILD_VARS (8, 16, 8) + +#define POOL1 {-10} +#define POOL2 {2, -10} +#define POOL4 {0, -10, 2, -3} +#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70} +#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \ + -5, 10, -2, 3, -4, 50, -6, 70} + +#define EXPECTED1 {10} +#define EXPECTED2 {2, 10} +#define EXPECTED4 {0, 10, 2, 3} +#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70} +#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \ + 5, 10, 2, 3, 4, 50, 6, 70} + +#define BUILD_TEST(size, lanes_64, lanes_128) \ +static void \ +test_##size (void) \ +{ \ + int i; \ + ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \ + ETYPE (size) res1[lanes_64] = {0}; \ + ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \ + ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \ + ETYPE (size) res2[lanes_128] = {0}; \ + ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \ + \ + /* Forecfully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabs_##size (res1, pool1); \ + for (i = 0; i < lanes_64; i++) \ + if (res1[i] != expected1[i]) \ + abort (); \ + \ + /* Forecfully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabsq_##size (res2, pool2); \ + for (i = 0; i < lanes_128; i++) \ + if (res2[i] != expected2[i]) \ + abort (); \ +} + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +BUILD_TEST (8 , 8, 16) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */ +BUILD_TEST (16, 4, 8) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */ +BUILD_TEST (32, 2, 4) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ +BUILD_TEST (64, 1, 2) + +#undef BUILD_TEST + +#define BUILD_TEST(size) test_##size () + +int +main (int argc, char **argv) +{ + BUILD_TEST (8); + BUILD_TEST (16); + BUILD_TEST (32); + BUILD_TEST (64); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */