From patchwork Thu Oct 28 18:37:31 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Quentin Neill X-Patchwork-Id: 69486 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 56D09B70CB for ; Fri, 29 Oct 2010 05:37:50 +1100 (EST) Received: (qmail 30243 invoked by alias); 28 Oct 2010 18:37:47 -0000 Received: (qmail 29668 invoked by uid 22791); 28 Oct 2010 18:37:43 -0000 X-SWARE-Spam-Status: No, hits=-1.3 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_AV, TW_BD, TW_CL, TW_EB, TW_FS, TW_MX, TW_OV, TW_PX, TW_ZC, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-ww0-f51.google.com (HELO mail-ww0-f51.google.com) (74.125.82.51) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 28 Oct 2010 18:37:34 +0000 Received: by wwi17 with SMTP id 17so199992wwi.8 for ; Thu, 28 Oct 2010 11:37:31 -0700 (PDT) MIME-Version: 1.0 Received: by 10.216.19.134 with SMTP id n6mr5492830wen.9.1288291051691; Thu, 28 Oct 2010 11:37:31 -0700 (PDT) Received: by 10.216.15.200 with HTTP; Thu, 28 Oct 2010 11:37:31 -0700 (PDT) In-Reply-To: References: Date: Thu, 28 Oct 2010 13:37:31 -0500 Message-ID: Subject: Re: [patch 1/2] AMD bdver2 processors - BMI From: Quentin Neill To: gcc-patches@gcc.gnu.org Cc: Richard Henderson X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Wed, Oct 27, 2010 at 11:18 PM, Quentin Neill wrote: > Followup to >     http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01356.html > in response to Richard Henderson's feedback >     http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01555.html > > Bootstrapped and tested with "make -k check" on x86-64. > > Okay to commit? > -- > Quentin Reposting BMI patch as text (why did gmail insist on a binary attachment) and copying Richard. diff --git a/gcc/config.gcc b/gcc/config.gcc index b353fa0..4034241 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -318,7 +318,7 @@ i[34567]86-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - abmintrin.h" + abmintrin.h bmiintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -329,7 +329,7 @@ x86_64-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - abmintrin.h" + abmintrin.h bmiintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h new file mode 100644 index 0000000..3814227 --- /dev/null +++ b/gcc/config/i386/bmiintrin.h @@ -0,0 +1,145 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use directly; include instead." +#endif + +#ifndef __BMI__ +# error "BMI instruction set not enabled" +#endif /* __BMI__ */ + +#ifndef _BMIINTRIN_H_INCLUDED +#define _BMIINTRIN_H_INCLUDED + +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lzcnt_u16 (unsigned short __X) +{ + return __builtin_ia32_lzcnt_u16 (__X); +} + +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u16 (unsigned short __X) +{ + return __builtin_ia32_tzcnt_u16 (__X); +} + + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u32 (unsigned int __X, unsigned int __Y) +{ + unsigned int tmp = ~(__X) & (__Y); + return tmp; +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u32 (unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bextr_u32 (__X, __Y); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u32 (unsigned int __X) +{ + unsigned int tmp = (__X) & (__X * -1); + return tmp; +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u32 (unsigned int __X) +{ + unsigned int tmp = (__X) ^ (__X - 1); + return tmp; +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u32 (unsigned int __X) +{ + unsigned int tmp = (__X) & (__X - 1); + return tmp; +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lzcnt_u32 (unsigned int __X) +{ + return __builtin_ia32_lzcnt_u32 (__X); +} + + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u32 (unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32 (__X); +} + + +#ifdef __x86_64__ +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + unsigned long long tmp = ~(__X) & (__Y); + return tmp; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u64 (unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bextr_u64 (__X, __Y); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u64 (unsigned long long __X) +{ + unsigned long long tmp = (__X) & (__X * -1); + return tmp; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u64 (unsigned long long __X) +{ + unsigned long long tmp = (__X) ^ (__X - 1); + return tmp; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u64 (unsigned long long __X) +{ + unsigned long long tmp = (__X) & (__X - 1); + return tmp; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lzcnt_u64 (unsigned long long __X) +{ + return __builtin_ia32_lzcnt_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64 (__X); +} + +#endif /* __x86_64__ */ + +#endif /* _BMIINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 11c2f1e..0f1af7f 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -62,6 +62,7 @@ /* Extended Features (%eax == 7) */ #define bit_FSGSBASE (1 << 0) +#define bit_BMI (1 << 3) #if defined(__i386__) && defined(__PIC__) /* %ebx may be the PIC register. */ diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 8a76857..bcc18b7 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -397,6 +397,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; unsigned int has_fma4 = 0, has_xop = 0; + unsigned int has_bmi = 0; bool arch; @@ -467,6 +468,10 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_longmode = edx & bit_LM; has_3dnowp = edx & bit_3DNOWP; has_3dnow = edx & bit_3DNOW; + + __cpuid (0x7, eax, ebx, ecx, edx); + + has_bmi = ebx & bit_BMI; } if (!arch) @@ -686,6 +691,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) options = concat (options, " -mfma4", NULL); if (has_xop) options = concat (options, " -mxop", NULL); + if (has_bmi) + options = concat (options, " -mbmi", NULL); if (has_avx) options = concat (options, " -mavx", NULL); diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 09dd9eb..110b81d 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -145,7 +145,9 @@ DEF_FUNCTION_TYPE (INT64, INT64) DEF_FUNCTION_TYPE (INT64, V2DF) DEF_FUNCTION_TYPE (INT64, V4SF) DEF_FUNCTION_TYPE (UINT64, INT) +DEF_FUNCTION_TYPE (UINT, UINT) DEF_FUNCTION_TYPE (UINT16, UINT16) +DEF_FUNCTION_TYPE (UINT64, UINT64) DEF_FUNCTION_TYPE (UINT64, PUNSIGNED) DEF_FUNCTION_TYPE (V16QI, PCCHAR) DEF_FUNCTION_TYPE (V16QI, V16QI) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 1846efb..e84347c 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -244,6 +244,8 @@ ix86_target_macros_internal (int isa_flag, def_or_undef (parse_in, "__LWP__"); if (isa_flag & OPTION_MASK_ISA_ABM) def_or_undef (parse_in, "__ABM__"); + if (isa_flag & OPTION_MASK_ISA_BMI) + def_or_undef (parse_in, "__BMI__"); if (isa_flag & OPTION_MASK_ISA_POPCNT) def_or_undef (parse_in, "__POPCNT__"); if (isa_flag & OPTION_MASK_ISA_FSGSBASE) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6668a62..0a13d1c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2079,6 +2079,8 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_ABM_SET \ (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) +#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI + #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF @@ -2133,6 +2135,7 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM +#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF @@ -2430,6 +2433,19 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mbmi: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET; + } + return true; + case OPT_mpopcnt: if (value) { @@ -2598,6 +2614,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, { "-mmmx", OPTION_MASK_ISA_MMX }, { "-mabm", OPTION_MASK_ISA_ABM }, + { "-mbmi", OPTION_MASK_ISA_BMI }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mcrc32", OPTION_MASK_ISA_CRC32 }, @@ -2852,7 +2869,9 @@ ix86_option_override_internal (bool main_args_p) PTA_LWP = 1 << 23, PTA_FSGSBASE = 1 << 24, PTA_RDRND = 1 << 25, - PTA_F16C = 1 << 26 + PTA_F16C = 1 << 26, + PTA_BMI = 1 << 27 + /* if this reaches 32, need to widen struct pta flags below */ }; static struct pta @@ -3184,6 +3203,9 @@ ix86_option_override_internal (bool main_args_p) if (processor_alias_table[i].flags & PTA_ABM && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) ix86_isa_flags |= OPTION_MASK_ISA_ABM; + if (processor_alias_table[i].flags & PTA_BMI + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) + ix86_isa_flags |= OPTION_MASK_ISA_BMI; if (processor_alias_table[i].flags & PTA_CX16 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) ix86_isa_flags |= OPTION_MASK_ISA_CX16; @@ -3930,6 +3952,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) /* isa options */ IX86_ATTR_ISA ("3dnow", OPT_m3dnow), IX86_ATTR_ISA ("abm", OPT_mabm), + IX86_ATTR_ISA ("bmi", OPT_mbmi), IX86_ATTR_ISA ("aes", OPT_maes), IX86_ATTR_ISA ("avx", OPT_mavx), IX86_ATTR_ISA ("mmx", OPT_mmmx), @@ -22929,6 +22952,13 @@ enum ix86_builtins IX86_BUILTIN_CLZS, + /* BMI instructions. */ + IX86_BUILTIN_BEXTR32, + IX86_BUILTIN_BEXTR64, + IX86_BUILTIN_TZCNT16, + IX86_BUILTIN_TZCNT32, + IX86_BUILTIN_TZCNT64, + /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, IX86_BUILTIN_RDFSBASE64, @@ -23868,6 +23898,13 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + /* BMI */ + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_di, "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + /* F16C */ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI }, { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI }, @@ -25088,9 +25125,11 @@ ix86_expand_args_builtin (const struct builtin_description *d, case FLOAT128_FTYPE_FLOAT128: case FLOAT_FTYPE_FLOAT: case INT_FTYPE_INT: + case UINT_FTYPE_UINT: case UINT64_FTYPE_INT: case UINT16_FTYPE_UINT16: case INT64_FTYPE_INT64: + case UINT64_FTYPE_UINT64: case INT64_FTYPE_V4SF: case INT64_FTYPE_V2DF: case INT_FTYPE_V16QI: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 25463a5..ca1415c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -59,6 +59,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_LWP OPTION_ISA_LWP #define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ABM OPTION_ISA_ABM +#define TARGET_BMI OPTION_ISA_BMI #define TARGET_POPCNT OPTION_ISA_POPCNT #define TARGET_SAHF OPTION_ISA_SAHF #define TARGET_MOVBE OPTION_ISA_MOVBE diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ae52746..100a5af 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -227,6 +227,10 @@ UNSPEC_VTESTP UNSPEC_VCVTPH2PS UNSPEC_VCVTPS2PH + + ;; For BMI support + UNSPEC_BEXTR + UNSPEC_TZCNT ]) (define_c_enum "unspecv" [ @@ -11849,6 +11853,78 @@ (set_attr "type" "bitmanip") (set_attr "mode" "")]) +;; BMI instructions. +(define_insn "*bmi_andn_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (not:SWI48 + (match_operand:SWI48 1 "register_operand" "r")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "andn\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "bmi_bextr_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")] + UNSPEC_BEXTR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "bextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsi_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (neg:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsi\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsmsk_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*bmi_blsr_" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "bmi_tzcnt_" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec:SWI248 [(match_operand:SWI248 1 "nonimmediate_operand" "rm")] + UNSPEC_TZCNT)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "tzcnt{}\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "")]) + (define_insn "bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9c1fe1f..d808804 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -353,6 +353,10 @@ mpopcnt Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save Support code generation of popcnt instruction. +mbmi +Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save +Support BMI built-in functions and code generation + mcx16 Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save Support code generation of cmpxchg16b instruction. diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 29d44dc..9a7366b 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -81,6 +81,10 @@ #include #endif +#ifdef __BMI__ +#include +#endif + #ifdef __POPCNT__ #include #endif diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index d39ab48..0aa1839 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9349,6 +9349,25 @@ unsigned char __builtin_ia32_lwpins32 (unsigned int, unsigned int, unsigned int) unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned int, unsigned int) @end smallexample +The following built-in functions are available when @option{-mbmi} is used. +All of them generate the machine instruction that is part of the name. +@smallexample +unsigned int __builtin_ia32_andn_u32(unsigned int, unsigned int); +unsigned long long __builtin_ia32_andn_u64 (unsigned long long, unsigned long long); +unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int); +unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long); +unsigned int __builtin_ia32_blsi_u32(unsigned int); +unsigned long long __builtin_ia32_blsi_u64 (unsigned long long); +unsigned int __builtin_ia32_blsmsk_u32(unsigned int); +unsigned long long __builtin_ia32_blsmsk_u64 (unsigned long long); +unsigned int __builtin_ia32_blsr_u32(unsigned int); +unsigned long long __builtin_ia32_blsr_u64 (unsigned long long); +unsigned int __builtin_ia32_lzcnt_u32(unsigned int); +unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long); +unsigned int __builtin_ia32_tzcnt_u32(unsigned int); +unsigned long long __builtin_ia32_tzcnt_u64 (unsigned long long); +@end smallexample + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ee68454..d47c397 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}. -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol --msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop -mlwp @gol +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol @@ -12418,6 +12418,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-popcnt @itemx -mabm @itemx -mno-abm +@itemx -mbmi +@itemx -mno-bmi @opindex mmmx @opindex mno-mmx @opindex msse @@ -12426,7 +12428,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, FSGSBASE, RDRND, -F16C, SSE4A, FMA4, XOP, LWP, ABM or 3DNow!@: extended instruction sets. +F16C, SSE4A, FMA4, XOP, LWP, ABM, BMI, or 3DNow!@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and disabled by these switches. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 7297068..f0a382a 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,8 +1,8 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, - lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with + bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with -O -pedantic-errors. */ #include diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 75515ef..4b27372 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,8 +1,8 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, - lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with + bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with -O -fkeep-inline-functions. */ #include diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c b/gcc/testsuite/gcc.target/i386/bmi-1.c new file mode 100644 index 0000000..dc964ba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi-1.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi " } */ +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "tzcntl\[^\\n]*(%|)eax" } } */ + +#include + +unsigned int +func_andn32 (unsigned int X, unsigned int Y) +{ + return __andn_u32(X, Y); +} + +unsigned int +func_bextr32 (unsigned int X, unsigned int Y) +{ + return __bextr_u32(X, Y); +} + +unsigned int +func_blsi32 (unsigned int X) +{ + return __blsi_u32(X); +} + +unsigned int +func_blsmsk32 (unsigned int X) +{ + return __blsmsk_u32(X); +} + +unsigned int +func_blsr32 (unsigned int X) +{ + return __blsr_u32(X); +} + +unsigned int +func_tzcnt32 (unsigned int X) +{ + return __tzcnt_u32(X); +} diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c b/gcc/testsuite/gcc.target/i386/bmi-2.c new file mode 100644 index 0000000..4f8c14f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mbmi " } */ +/* { dg-final { scan-assembler "andn\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "tzcntq\[^\\n]*(%|)rax" } } */ + +#include + +unsigned long long +func_andn64 (unsigned long long X, unsigned long long Y) +{ + return __andn_u64 (X, Y); +} + +unsigned long long +func_bextr64 (unsigned long long X, unsigned long long Y) +{ + return __bextr_u64 (X, Y); +} + +unsigned long long +func_blsi64 (unsigned long long X) +{ + return __blsi_u64 (X); +} + +unsigned long long +func_blsmsk64 (unsigned long long X) +{ + return __blsmsk_u64 (X); +} + +unsigned long long +func_blsr64 (unsigned long long X) +{ + return __blsr_u64 (X); +} + +unsigned long long +func_tzcnt64 (unsigned long long X) +{ + return __tzcnt_u64 (X); +} diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c b/gcc/testsuite/gcc.target/i386/bmi-3.c new file mode 100644 index 0000000..ddc5e0f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi-3.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi " } */ +/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */ + +#include + +unsigned short +func_tzcnt16 (unsigned short X) +{ + return __tzcnt_u16(X); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c index 34da51c..5e07d85 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c @@ -5,6 +5,7 @@ extern void test_abm (void) __attribute__((__target__("abm"))); extern void test_aes (void) __attribute__((__target__("aes"))); +extern void test_bmi (void) __attribute__((__target__("bmi"))); extern void test_mmx (void) __attribute__((__target__("mmx"))); extern void test_pclmul (void) __attribute__((__target__("pclmul"))); extern void test_popcnt (void) __attribute__((__target__("popcnt"))); @@ -21,6 +22,7 @@ extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); +extern void test_no_bmi (void) __attribute__((__target__("no-bmi"))); extern void test_no_mmx (void) __attribute__((__target__("no-mmx"))); extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul"))); extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c b/gcc/testsuite/gcc.target/i386/funcspec-6.c index 575be9b..81c831c 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c @@ -5,6 +5,7 @@ extern void test_abm (void) __attribute__((__target__("abm"))); extern void test_aes (void) __attribute__((__target__("aes"))); +extern void test_bmi (void) __attribute__((__target__("bmi"))); extern void test_mmx (void) __attribute__((__target__("mmx"))); extern void test_pclmul (void) __attribute__((__target__("pclmul"))); extern void test_popcnt (void) __attribute__((__target__("popcnt"))); @@ -21,6 +22,7 @@ extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); +extern void test_no_bmi (void) __attribute__((__target__("no-bmi"))); extern void test_no_mmx (void) __attribute__((__target__("no-mmx"))); extern void test_no_pclmul (void) __attribute__((__target__("no-pclmul"))); extern void test_no_popcnt (void) __attribute__((__target__("no-popcnt"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 2d50f41..d59777b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -1,8 +1,9 @@ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h, - abmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h are usable + fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h, + popcntintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include