From patchwork Mon Oct 31 23:21:55 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Quentin Neill X-Patchwork-Id: 123000 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id EF19BB6F93 for ; Tue, 1 Nov 2011 10:22:16 +1100 (EST) Received: (qmail 9339 invoked by alias); 31 Oct 2011 23:22:13 -0000 Received: (qmail 9327 invoked by uid 22791); 31 Oct 2011 23:22:11 -0000 X-SWARE-Spam-Status: No, hits=-2.1 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW X-Spam-Check-By: sourceware.org Received: from mail-gy0-f175.google.com (HELO mail-gy0-f175.google.com) (209.85.160.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 31 Oct 2011 23:21:55 +0000 Received: by gyd8 with SMTP id 8so2519077gyd.20 for ; Mon, 31 Oct 2011 16:21:55 -0700 (PDT) MIME-Version: 1.0 Received: by 10.150.100.18 with SMTP id x18mr12624347ybb.75.1320103315078; Mon, 31 Oct 2011 16:21:55 -0700 (PDT) Received: by 10.150.137.5 with HTTP; Mon, 31 Oct 2011 16:21:55 -0700 (PDT) In-Reply-To: <20111031223108.GI1052@tyan-ft48-01.lab.bos.redhat.com> References: <20111031223108.GI1052@tyan-ft48-01.lab.bos.redhat.com> Date: Mon, 31 Oct 2011 18:21:55 -0500 Message-ID: Subject: Re: PATCH: Move f16c intrinsics into f16cintrin.h From: Quentin Neill To: Jakub Jelinek Cc: gcc-patches@gcc.gnu.org X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Mon, Oct 31, 2011 at 5:31 PM, Jakub Jelinek wrote: > On Mon, Oct 31, 2011 at 05:23:58PM -0500, Quentin Neill wrote: >> Interested parties should view these threads from three years ago: >> http://gcc.gnu.org/ml/gcc-patches/2008-11/threads.html#00145 >> http://gcc.gnu.org/ml/gcc-patches/2008-12/threads.html#00174 >> >> Testing on x86_64, okay to commit if no regressions? > > You aren't installing the header, so it will cause regressions. > config.gcc needs to be adjusted for it. Arggh.  Thanks, my tests found that too. Reposting, okay to commit after testing on x86_64 if no regressions? --- Quentin Neill From c0379bf7dacbe457813893cdaf381ae7206566c7 Mon Sep 17 00:00:00 2001 From: Quentin Neill Date: Mon, 31 Oct 2011 16:54:18 -0500 Subject: [PATCH] 2011-10-31 Quentin Neill Piledriver f16cintrin.h fix. * config/i386/f16cintrin.h: Contents moved from immintrin.h. * config/config.gcc: Add f16cintrin.h. --- gcc/ChangeLog | 6 +++ gcc/config.gcc | 4 +- gcc/config/i386/f16cintrin.h | 94 ++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/immintrin.h | 63 ++-------------------------- 4 files changed, 106 insertions(+), 61 deletions(-) create mode 100644 gcc/config/i386/f16cintrin.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index caed12e..14a4392 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2011-10-31 Quentin Neill + + Piledriver f16cintrin.h fix. + * config/i386/f16cintrin.h: Contents moved from immintrin.h. + * config/config.gcc: Add f16cintrin.h. + 2011-10-31 Richard Henderson * config/i386/sse.md (floatv8siv8sf2): Rename from avx_cvtdq2ps256. diff --git a/gcc/config.gcc b/gcc/config.gcc index 2c18655..2b60e77 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -361,7 +361,7 @@ i[34567]86-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h - avx2intrin.h fmaintrin.h" + avx2intrin.h fmaintrin.h f16cintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -374,7 +374,7 @@ x86_64-*-*) immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h - avx2intrin.h fmaintrin.h" + avx2intrin.h fmaintrin.h f16cintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/f16cintrin.h b/gcc/config/i386/f16cintrin.h new file mode 100644 index 0000000..5ff836b --- /dev/null +++ b/gcc/config/i386/f16cintrin.h @@ -0,0 +1,94 @@ +/* Copyright (C) 2011 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _X86INTRIN_H_INCLUDED +#if (!defined(_X86INTRIN_H_INCLUDED) && !defined(_IMMINTRIN_H_INCLUDED)) +# error "Never use directly; include or instead." +#endif + +#ifndef __F16C__ +# error "F16C instruction set not enabled" +#else + +#ifndef _F16CINTRIN_H_INCLUDED +#define _F16CINTRIN_H_INCLUDED + +extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_cvtsh_ss (unsigned short __S) +{ + __v8hi __H = __extension__ (__v8hi){ __S, 0, 0, 0, 0, 0, 0, 0 }; + __v4sf __A = __builtin_ia32_vcvtph2ps (__H); + return __builtin_ia32_vec_ext_v4sf (__A, 0); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_ps (__m128i __A) +{ + return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A); +} + +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_ps (__m128i __A) +{ + return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A); +} + +#ifdef __OPTIMIZE__ +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_cvtss_sh (float __F, const int __I) +{ + __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; + __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); + return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_ph (__m128 __A, const int __I) +{ + return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtps_ph (__m256 __A, const int __I) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I); +} +#else +#define _cvtss_sh(__F, __I) \ + (__extension__ \ + ({ \ + __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \ + __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \ + (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \ + })) + +#define _mm_cvtps_ph(A, I) \ + ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I))) + +#define _mm256_cvtps_ph(A, I) \ + ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I))) +#endif + +#endif /* __F16C__ */ +#endif diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 102814e..986a573 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -76,6 +76,10 @@ #include #endif +#ifdef __F16C__ +#include +#endif + #ifdef __RDRND__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -161,63 +165,4 @@ _rdrand64_step (unsigned long long *__P) #endif /* __RDRND__ */ #endif /* __x86_64__ */ -#ifdef __F16C__ -extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_cvtsh_ss (unsigned short __S) -{ - __v8hi __H = __extension__ (__v8hi){ __S, 0, 0, 0, 0, 0, 0, 0 }; - __v4sf __A = __builtin_ia32_vcvtph2ps (__H); - return __builtin_ia32_vec_ext_v4sf (__A, 0); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtph_ps (__m128i __A) -{ - return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A); -} - -extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtph_ps (__m128i __A) -{ - return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A); -} - -#ifdef __OPTIMIZE__ -extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_cvtss_sh (float __F, const int __I) -{ - __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; - __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); - return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtps_ph (__m128 __A, const int __I) -{ - return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I); -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtps_ph (__m256 __A, const int __I) -{ - return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I); -} -#else -#define _cvtss_sh(__F, __I) \ - (__extension__ \ - ({ \ - __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \ - __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \ - (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \ - })) - -#define _mm_cvtps_ph(A, I) \ - ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I))) - -#define _mm256_cvtps_ph(A, I) \ - ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I))) -#endif - -#endif /* __F16C__ */ - #endif /* _IMMINTRIN_H_INCLUDED */