From patchwork Fri Apr 26 12:45:40 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Greenhalgh X-Patchwork-Id: 239857 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "localhost", Issuer "www.qmailtoaster.com" (not verified)) by ozlabs.org (Postfix) with ESMTPS id 0A5272C010B for ; Fri, 26 Apr 2013 22:46:00 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; q=dns; s=default; b=BR9jjVeElYrGTlvhm0ld9nCHJsJYJPlK7KTPOSCBQLmuOrMroJ e8avMWqpYUJx9c7agADzPTYa2iEM/jHRoLqJD4QPWeeZp4amxBHSFHIXzOtybauT Rug1KkgAm+Ednvcb8M6XzA/WyB1vn9tGvbW7YbppoolVyA7mmEsl9CsCA= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; s= default; bh=6ZGw5LE5OD0Yw8vrjvgFFqKsp5U=; b=i/5yIucCIa7iBMzTMMfw 0I+Y4t3EgqMk7ZAnBxCriM89f+Cu+owGm7U3l1sxC+fnTkL07WH/ubL9+7yHAxaf rWcpw9aFcdLJmkjKoV6ILoqq6ad2WfxHNZkFL+T1P7GGid8m0m2VljRiAI3nuRkb 77tq6Ih+XqWxXhbi+smUu0Q= Received: (qmail 27324 invoked by alias); 26 Apr 2013 12:45:54 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 27308 invoked by uid 89); 26 Apr 2013 12:45:54 -0000 X-Spam-SWARE-Status: No, score=-2.5 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS, TW_CP, TW_DQ, TW_PX, TW_VR autolearn=ham version=3.3.1 Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.84/v0.84-167-ge50287c) with ESMTP; Fri, 26 Apr 2013 12:45:53 +0000 Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Fri, 26 Apr 2013 13:45:49 +0100 Received: from e106375-lin.cambridge.arm.com ([10.1.255.212]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Fri, 26 Apr 2013 13:45:47 +0100 From: James Greenhalgh To: gcc-patches@gcc.gnu.org Cc: marcus.shawcroft@arm.com Subject: [AArch64] Convert NEON frint implementations to use builtins. Date: Fri, 26 Apr 2013 13:45:40 +0100 Message-Id: <1366980340-2426-1-git-send-email-james.greenhalgh@arm.com> MIME-Version: 1.0 X-MC-Unique: 113042613454908801 X-Virus-Found: No Hi, This patch renames the vrnd intrinsics, which previously were vrnd At the same time, we move these intrinsics to an RTL-based intrinsic. Regression tested on aarch64-none-elf with no issues. Thanks, James --- gcc/ 2013-04-26 James Greenhalgh * config/aarch64/arm_neon.h (vrndq_f<32, 64>): Rename to... (vrndq_f<32, 64>): ...This, implement using builtin. (vrnd_f32): Implement using builtins. (vrnd_f<32, 64>): New. gcc/testsuite/ 2013-04-26 James Greenhalgh * gcc.target/aarch64/vect-vrnd.c: New. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6f5ca8e..c868a46 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14941,171 +14941,6 @@ vrev64q_u32 (uint32x4_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnd_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnda_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frinta %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndm_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintm %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndn_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintn %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndp_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintp %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintz %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqa_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frinta %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqa_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frinta %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqm_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintm %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqm_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintm %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqn_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintn %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqn_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintn %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqp_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintp %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqp_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintp %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ @@ -23069,6 +22904,145 @@ vrecpxd_f64 (float64_t __a) return __builtin_aarch64_frecpxdf (__a); } +/* vrnd */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnd_f32 (float32x2_t __a) +{ + return __builtin_aarch64_btruncv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_btruncv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_btruncv2df (__a); +} + +/* vrnda */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnda_f32 (float32x2_t __a) +{ + return __builtin_aarch64_roundv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndaq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_roundv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndaq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_roundv2df (__a); +} + +/* vrndi */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndi_f32 (float32x2_t __a) +{ + return __builtin_aarch64_nearbyintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndiq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_nearbyintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndiq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_nearbyintv2df (__a); +} + +/* vrndm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndm_f32 (float32x2_t __a) +{ + return __builtin_aarch64_floorv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndmq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_floorv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndmq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_floorv2df (__a); +} + +/* vrndn */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndn_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frintnv2sf (__a); +} +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndnq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frintnv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndnq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frintnv2df (__a); +} + +/* vrndp */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndp_f32 (float32x2_t __a) +{ + return __builtin_aarch64_ceilv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndpq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_ceilv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndpq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_ceilv2df (__a); +} + +/* vrndx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndx_f32 (float32x2_t __a) +{ + return __builtin_aarch64_rintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndxq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_rintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndxq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_rintv2df (__a); +} + /* vrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c b/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c new file mode 100644 index 0000000..aa3fd9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +extern double trunc (double); +extern double round (double); +extern double nearbyint (double); +extern double floor (double); +extern double ceil (double); +extern double rint (double); + +extern float truncf (float); +extern float roundf (float); +extern float nearbyintf (float); +extern float floorf (float); +extern float ceilf (float); +extern float rintf (float); + +#define NUM_TESTS 8 +#define DELTA 0.000001 + +float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f}; +double input_f64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5}; + +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +int \ +test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \ +{ \ + int ret = 1; \ + int i = 0; \ + int nlanes = LANES; \ + float##WIDTH##_t expected_out[NUM_TESTS]; \ + float##WIDTH##_t actual_out[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + expected_out[i] = C_FN##F (input_f##WIDTH[i]); \ + /* Don't vectorize this. */ \ + asm volatile ("" : : : "memory"); \ + } \ + \ + /* Prevent the compiler from noticing these two loops do the same \ + thing and optimizing away the comparison. */ \ + asm volatile ("" : : : "memory"); \ + \ + for (i = 0; i < NUM_TESTS; i+=nlanes) \ + { \ + float##WIDTH##x##LANES##_t out = \ + vrnd##SUFFIX##Q##_f##WIDTH \ + (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \ + vst1##Q##_f##WIDTH (actual_out + i, out); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \ + \ + return ret; \ +} \ + + +#define BUILD_VARIANTS(SUFFIX, C_FN) \ +TEST (SUFFIX, , 32, 2, C_FN, f) \ +TEST (SUFFIX, q, 32, 4, C_FN, f) \ +TEST (SUFFIX, q, 64, 2, C_FN, ) \ + +BUILD_VARIANTS ( , trunc) +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (a, round) +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (i, nearbyint) +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (m, floor) +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (p, ceil) +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (x, rint) +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +{ \ + if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS ( , trunc) + BUILD_VARIANTS (a, round) + BUILD_VARIANTS (i, nearbyint) + BUILD_VARIANTS (m, floor) + BUILD_VARIANTS (p, ceil) + BUILD_VARIANTS (x, rint) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */