Patchwork [1/2] AMD bdver2 processors - BMI

login
register
mail settings
Submitter Quentin Neill
Date Nov. 8, 2010, 4:13 p.m.
Message ID <AANLkTim9Z6ksdsBAAHwnC+6UOzyTR+jZm07dH3Rh8rxL@mail.gmail.com>
Download mbox | patch
Permalink /patch/70429/
State New
Headers show

Comments

Quentin Neill - Nov. 8, 2010, 4:13 p.m.
On Thu, Nov 4, 2010 at 6:31 PM, Richard Henderson <rth@redhat.com> wrote:
> On 11/04/2010 03:50 PM, Quentin Neill wrote:
>> And I had to add "-m32" to bmi-1.c to get tzcntl working.
>
> That should not be true.  You'll have to debug that too.
>
>> If so, I have three new test failures to debug.  Any good -fdump flags
>> to do that?
>
> Hmm.  Without looking I'd have expected this to be done
> somewhere in the first handful of gimple optimizations.
> An immediate 0 means we should pretty much fold it right away.
>
>
> r~

With fixes suggested by Richard: whitespace fixes, proper folding
tests, proper flags for the tests, and fix to intrinsics to call the
right builtin for the arguments.

Passes bootstrap and make -k check on x86-64.

Okay to commit?
Richard Henderson - Nov. 8, 2010, 4:31 p.m.
On 11/08/2010 08:13 AM, Quentin Neill wrote:
> +__tzcnt_u32 (unsigned int __X)
> +{
> +  return __builtin_ctzl(__X);

ctz, no l.

> +  UNSPEC_TZCNT

Leftover.

Otherwise ok.


r~
Quentin Neill - Nov. 9, 2010, 4:16 a.m.
On Mon, Nov 8, 2010 at 10:31 AM, Richard Henderson <rth@redhat.com> wrote:
> On 11/08/2010 08:13 AM, Quentin Neill wrote:
>> +__tzcnt_u32 (unsigned int __X)
>> +{
>> +  return __builtin_ctzl(__X);
>
> ctz, no l.
>
>> +  UNSPEC_TZCNT
>
> Leftover.
>
> Otherwise ok.
>
>
> r~
>

Attached with those two changes, passes bootstrap and make -k check.

Okay to commit?  (Or does the "Otherwise ok" count)
H.J. Lu - Nov. 11, 2010, 2:06 a.m.
On Mon, Nov 8, 2010 at 8:16 PM, Quentin Neill
<quentin.neill.gnu@gmail.com> wrote:
> On Mon, Nov 8, 2010 at 10:31 AM, Richard Henderson <rth@redhat.com> wrote:
>> On 11/08/2010 08:13 AM, Quentin Neill wrote:
>>> +__tzcnt_u32 (unsigned int __X)
>>> +{
>>> +  return __builtin_ctzl(__X);
>>
>> ctz, no l.
>>
>>> +  UNSPEC_TZCNT
>>
>> Leftover.
>>
>> Otherwise ok.
>>
>>
>> r~
>>
>
> Attached with those two changes, passes bootstrap and make -k check.
>
> Okay to commit?  (Or does the "Otherwise ok" count)

Your checkin caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46422

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 64eaaef..f31711d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -318,7 +318,7 @@  i[34567]86-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       abmintrin.h"
+		       abmintrin.h bmiintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -329,7 +329,7 @@  x86_64-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       abmintrin.h"
+		       abmintrin.h bmiintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
new file mode 100644
index 0000000..d3aa9fc
--- /dev/null
+++ b/gcc/config/i386/bmiintrin.h
@@ -0,0 +1,145 @@ 
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __BMI__
+# error "BMI instruction set not enabled"
+#endif /* __BMI__ */
+
+#ifndef _BMIINTRIN_H_INCLUDED
+#define _BMIINTRIN_H_INCLUDED
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ia32_lzcnt_u16 (__X);
+}
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ctzs(__X);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  unsigned int tmp = ~(__X) & (__Y);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bextr_u32 (__X, __Y);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) & (-(__X));
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) ^ (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) & (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u32 (unsigned int __X)
+{
+  return __builtin_ia32_lzcnt_u32 (__X);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u32 (unsigned int __X)
+{
+  return __builtin_ctzl(__X);
+}
+
+
+#ifdef  __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  unsigned long long tmp = ~(__X) & (__Y);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bextr_u64 (__X, __Y);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) & (-(__X));
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) ^ (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) & (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_ia32_lzcnt_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_ctzll(__X);
+}
+
+#endif /* __x86_64__  */
+
+#endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 11c2f1e..0f1af7f 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -62,6 +62,7 @@ 
 
 /* Extended Features (%eax == 7) */
 #define bit_FSGSBASE	(1 << 0)
+#define bit_BMI		(1 << 3)
 
 #if defined(__i386__) && defined(__PIC__)
 /* %ebx may be the PIC register.  */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 8a76857..bcc18b7 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -397,6 +397,7 @@  const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
   unsigned int has_fma4 = 0, has_xop = 0;
+  unsigned int has_bmi = 0;
 
   bool arch;
 
@@ -467,6 +468,10 @@  const char *host_detect_local_cpu (int argc, const char **argv)
       has_longmode = edx & bit_LM;
       has_3dnowp = edx & bit_3DNOWP;
       has_3dnow = edx & bit_3DNOW;
+
+      __cpuid (0x7, eax, ebx, ecx, edx);
+
+      has_bmi = ebx & bit_BMI;
     }
 
   if (!arch)
@@ -686,6 +691,8 @@  const char *host_detect_local_cpu (int argc, const char **argv)
 	options = concat (options, " -mfma4", NULL);
       if (has_xop)
 	options = concat (options, " -mxop", NULL);
+      if (has_bmi)
+	options = concat (options, " -mbmi", NULL);
 
       if (has_avx)
 	options = concat (options, " -mavx", NULL);
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 09dd9eb..110b81d 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -145,7 +145,9 @@  DEF_FUNCTION_TYPE (INT64, INT64)
 DEF_FUNCTION_TYPE (INT64, V2DF)
 DEF_FUNCTION_TYPE (INT64, V4SF)
 DEF_FUNCTION_TYPE (UINT64, INT)
+DEF_FUNCTION_TYPE (UINT, UINT)
 DEF_FUNCTION_TYPE (UINT16, UINT16)
+DEF_FUNCTION_TYPE (UINT64, UINT64)
 DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
 DEF_FUNCTION_TYPE (V16QI, PCCHAR)
 DEF_FUNCTION_TYPE (V16QI, V16QI)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 1846efb..e84347c 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -244,6 +244,8 @@  ix86_target_macros_internal (int isa_flag,
     def_or_undef (parse_in, "__LWP__");
   if (isa_flag & OPTION_MASK_ISA_ABM)
     def_or_undef (parse_in, "__ABM__");
+  if (isa_flag & OPTION_MASK_ISA_BMI)
+    def_or_undef (parse_in, "__BMI__");
   if (isa_flag & OPTION_MASK_ISA_POPCNT)
     def_or_undef (parse_in, "__POPCNT__");
   if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 32d6371..0887984 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2367,6 +2367,8 @@  static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_ABM_SET \
   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
 
+#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
@@ -2421,6 +2423,7 @@  static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
+#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
@@ -2718,6 +2721,19 @@  ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
 	}
       return true;
 
+    case OPT_mbmi:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
+	}
+      return true;
+
     case OPT_mpopcnt:
       if (value)
 	{
@@ -2886,6 +2902,7 @@  ix86_target_string (int isa, int flags, const char *arch, const char *tune,
     { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
     { "-mmmx",		OPTION_MASK_ISA_MMX },
     { "-mabm",		OPTION_MASK_ISA_ABM },
+    { "-mbmi",		OPTION_MASK_ISA_BMI },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
     { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
@@ -3141,7 +3158,9 @@  ix86_option_override_internal (bool main_args_p)
       PTA_LWP = 1 << 23,
       PTA_FSGSBASE = 1 << 24,
       PTA_RDRND = 1 << 25,
-      PTA_F16C = 1 << 26
+      PTA_F16C = 1 << 26,
+      PTA_BMI = 1 << 27
+      /* if this reaches 32, need to widen struct pta flags below */
     };
 
   static struct pta
@@ -3473,6 +3492,9 @@  ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_ABM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_ABM;
+	if (processor_alias_table[i].flags & PTA_BMI
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI;
 	if (processor_alias_table[i].flags & PTA_CX16
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
 	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
@@ -4288,6 +4310,7 @@  ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
     /* isa options */
     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
     IX86_ATTR_ISA ("abm",	OPT_mabm),
+    IX86_ATTR_ISA ("bmi",	OPT_mbmi),
     IX86_ATTR_ISA ("aes",	OPT_maes),
     IX86_ATTR_ISA ("avx",	OPT_mavx),
     IX86_ATTR_ISA ("mmx",	OPT_mmmx),
@@ -23632,6 +23655,11 @@  enum ix86_builtins
 
   IX86_BUILTIN_CLZS,
 
+  /* BMI instructions.  */
+  IX86_BUILTIN_BEXTR32,
+  IX86_BUILTIN_BEXTR64,
+  IX86_BUILTIN_CTZS,
+
   /* FSGSBASE instructions.  */
   IX86_BUILTIN_RDFSBASE32,
   IX86_BUILTIN_RDFSBASE64,
@@ -24571,6 +24599,11 @@  static const struct builtin_description bdesc_args[] =
 
   { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
 
+  /* BMI */
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2,       "__builtin_ctzs",           IX86_BUILTIN_CTZS,    UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+
   /* F16C */
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
@@ -25795,9 +25828,11 @@  ix86_expand_args_builtin (const struct builtin_description *d,
     case FLOAT128_FTYPE_FLOAT128:
     case FLOAT_FTYPE_FLOAT:
     case INT_FTYPE_INT:
+    case UINT_FTYPE_UINT:
     case UINT64_FTYPE_INT:
     case UINT16_FTYPE_UINT16:
     case INT64_FTYPE_INT64:
+    case UINT64_FTYPE_UINT64:
     case INT64_FTYPE_V4SF:
     case INT64_FTYPE_V2DF:
     case INT_FTYPE_V16QI:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3c7f9f0..7e71e3b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -59,6 +59,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_LWP	OPTION_ISA_LWP
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
+#define TARGET_BMI	OPTION_ISA_BMI
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
 #define TARGET_SAHF	OPTION_ISA_SAHF
 #define TARGET_MOVBE	OPTION_ISA_MOVBE
@@ -2362,6 +2363,14 @@  struct GTY(()) machine_function {
 extern void debug_ready_dispatch (void);
 extern void debug_dispatch_window (int);
 
+/* The value at zero is only defined for the BMI instructions
+   LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI)
+
+
 /*
 Local variables:
 version-control: t
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index feaf781..829333b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -227,6 +227,10 @@ 
   UNSPEC_VTESTP
   UNSPEC_VCVTPH2PS
   UNSPEC_VCVTPS2PH
+
+  ;; For BMI support
+  UNSPEC_BEXTR
+  UNSPEC_TZCNT
 ])
 
 (define_c_enum "unspecv" [
@@ -11839,13 +11843,19 @@ 
    (set_attr "mode" "<MODE>")])
 
 (define_insn "ctz<mode>2"
-  [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
+{
+  if (TARGET_BMI)
+    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else
+    return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
+   (set (attr "prefix_rep") (symbol_ref "TARGET_BMI"))
    (set_attr "mode" "<MODE>")])
 
 (define_expand "clz<mode>2"
@@ -11872,12 +11882,74 @@ 
   [(set (match_operand:SWI248 0 "register_operand" "=r")
 	(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ABM"
+  "TARGET_ABM || TARGET_BMI"
   "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+;; BMI instructions.
+(define_insn "*bmi_andn_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (not:SWI48
+            (match_operand:SWI48 1 "register_operand" "r"))
+            (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "andn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi_bextr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_BEXTR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "bextr\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsi_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (neg:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "bsr_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI (const_int 63)
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 28a921f..f4c3c58 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -358,6 +358,10 @@  mpopcnt
 Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save
 Support code generation of popcnt instruction.
 
+mbmi
+Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
+Support BMI built-in functions and code generation
+
 mcx16
 Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
 Support code generation of cmpxchg16b instruction.
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 29d44dc..9a7366b 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,6 +81,10 @@ 
 #include <abmintrin.h>
 #endif
 
+#ifdef __BMI__
+#include <bmiintrin.h>
+#endif
+
 #ifdef __POPCNT__
 #include <popcntintrin.h>
 #endif
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index de2be88..a7bff14 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9378,6 +9378,16 @@  unsigned char __builtin_ia32_lwpins32 (unsigned int, unsigned int, unsigned int)
 unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned int, unsigned int)
 @end smallexample
 
+The following built-in functions are available when @option{-mbmi} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
+unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
+unsigned short __builtin_ia32_lzcnt_16(unsigned short);
+unsigned int __builtin_ia32_lzcnt_u32(unsigned int);
+unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long);
+@end smallexample
+
 The following built-in functions are available when @option{-m3dnow} is used.
 All of them generate the machine instruction that is part of the name.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4a605e3..6cde607 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -597,7 +597,7 @@  Objective-C and Objective-C++ Dialects}.
 -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip -mvzeroupper @gol
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol
--msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop -mlwp @gol
+-msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol
 -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
@@ -12418,6 +12418,8 @@  preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mno-popcnt
 @itemx -mabm
 @itemx -mno-abm
+@itemx -mbmi
+@itemx -mno-bmi
 @opindex mmmx
 @opindex mno-mmx
 @opindex msse
@@ -12426,7 +12428,7 @@  preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex mno-3dnow
 These switches enable or disable the use of instructions in the MMX,
 SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, FSGSBASE, RDRND,
-F16C, SSE4A, FMA4, XOP, LWP, ABM or 3DNow!@: extended instruction sets.
+F16C, SSE4A, FMA4, XOP, LWP, ABM, BMI, or 3DNow!@: extended instruction sets.
 These extensions are also available as built-in functions: see
 @ref{X86 Built-in Functions}, for details of the functions enabled and
 disabled by these switches.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 7297068..f0a382a 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,8 +1,8 @@ 
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
-   lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
+   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
    -O -pedantic-errors.  */
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 75515ef..4b27372 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,8 +1,8 @@ 
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h,
-   lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
+   bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with
    -O -fkeep-inline-functions.  */
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c b/gcc/testsuite/gcc.target/i386/bmi-1.c
new file mode 100644
index 0000000..dc964ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
@@ -0,0 +1,46 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi " } */
+/* { dg-final { scan-assembler "andn\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "tzcntl\[^\\n]*(%|)eax" } } */
+
+#include <x86intrin.h>
+
+unsigned int
+func_andn32 (unsigned int X, unsigned int Y)
+{
+  return __andn_u32(X, Y);
+}
+
+unsigned int
+func_bextr32 (unsigned int X, unsigned int Y)
+{
+  return __bextr_u32(X, Y);
+}
+
+unsigned int
+func_blsi32 (unsigned int X)
+{
+  return __blsi_u32(X);
+}
+
+unsigned int
+func_blsmsk32 (unsigned int X)
+{
+  return __blsmsk_u32(X);
+}
+
+unsigned int
+func_blsr32 (unsigned int X)
+{
+  return __blsr_u32(X);
+}
+
+unsigned int
+func_tzcnt32 (unsigned int X)
+{
+  return __tzcnt_u32(X);
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c b/gcc/testsuite/gcc.target/i386/bmi-2.c
new file mode 100644
index 0000000..4f8c14f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mbmi " } */
+/* { dg-final { scan-assembler "andn\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "tzcntq\[^\\n]*(%|)rax" } } */
+
+#include <x86intrin.h>
+
+unsigned long long
+func_andn64 (unsigned long long X, unsigned long long Y)
+{
+  return __andn_u64 (X, Y);
+}
+
+unsigned long long
+func_bextr64 (unsigned long long X, unsigned long long Y)
+{
+  return __bextr_u64 (X, Y);
+}
+
+unsigned long long
+func_blsi64 (unsigned long long X)
+{
+  return __blsi_u64 (X);
+}
+
+unsigned long long
+func_blsmsk64 (unsigned long long X)
+{
+  return __blsmsk_u64 (X);
+}
+
+unsigned long long
+func_blsr64 (unsigned long long X)
+{
+  return __blsr_u64 (X);
+}
+
+unsigned long long
+func_tzcnt64 (unsigned long long X)
+{
+  return __tzcnt_u64 (X);
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c b/gcc/testsuite/gcc.target/i386/bmi-3.c
new file mode 100644
index 0000000..ddc5e0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi " } */
+/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
+
+#include <x86intrin.h>
+
+unsigned short
+func_tzcnt16 (unsigned short X)
+{
+  return __tzcnt_u16(X);
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-4.c b/gcc/testsuite/gcc.target/i386/bmi-4.c
new file mode 100644
index 0000000..e0a1161
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-4.c
@@ -0,0 +1,13 @@ 
+/* { dg-do link } */
+/* { dg-options "-O2 -mbmi" } */
+
+#include <x86intrin.h>
+
+/* Test that a constant operand 0 to tzcnt gets folded.  */
+extern void link_error(void);
+int main()
+{
+  if (__tzcnt_u32(0) != 32)
+    link_error();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-5.c b/gcc/testsuite/gcc.target/i386/bmi-5.c
new file mode 100644
index 0000000..b29c453
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-5.c
@@ -0,0 +1,14 @@ 
+/* { dg-do link } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mbmi" } */
+
+#include <x86intrin.h>
+
+/* Test that a constant operand 0 to tzcnt gets folded.  */
+extern void link_error(void);
+int main()
+{
+  if (__tzcnt_u64(0) != 64)
+    link_error();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-6.c b/gcc/testsuite/gcc.target/i386/bmi-6.c
new file mode 100644
index 0000000..5d4bcf4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-6.c
@@ -0,0 +1,13 @@ 
+/* { dg-do link } */
+/* { dg-options "-O0 -mbmi" } */
+
+#include <x86intrin.h>
+
+/* Test that a constant operand 0 to tzcnt gets folded.  */
+extern void link_error(void);
+int main()
+{
+  if (__tzcnt_u16(0) != 16)
+    link_error();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c
index 34da51c..5e07d85 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
+++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
@@ -5,6 +5,7 @@ 
 
 extern void test_abm (void)			__attribute__((__target__("abm")));
 extern void test_aes (void)			__attribute__((__target__("aes")));
+extern void test_bmi (void)			__attribute__((__target__("bmi")));
 extern void test_mmx (void)			__attribute__((__target__("mmx")));
 extern void test_pclmul (void)			__attribute__((__target__("pclmul")));
 extern void test_popcnt (void)			__attribute__((__target__("popcnt")));
@@ -21,6 +22,7 @@  extern void test_ssse3 (void)			__attribute__((__target__("ssse3")));
 
 extern void test_no_abm (void)			__attribute__((__target__("no-abm")));
 extern void test_no_aes (void)			__attribute__((__target__("no-aes")));
+extern void test_no_bmi (void)			__attribute__((__target__("no-bmi")));
 extern void test_no_mmx (void)			__attribute__((__target__("no-mmx")));
 extern void test_no_pclmul (void)		__attribute__((__target__("no-pclmul")));
 extern void test_no_popcnt (void)		__attribute__((__target__("no-popcnt")));
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c b/gcc/testsuite/gcc.target/i386/funcspec-6.c
index 575be9b..81c831c 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-6.c
+++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c
@@ -5,6 +5,7 @@ 
 
 extern void test_abm (void)			__attribute__((__target__("abm")));
 extern void test_aes (void)			__attribute__((__target__("aes")));
+extern void test_bmi (void)			__attribute__((__target__("bmi")));
 extern void test_mmx (void)			__attribute__((__target__("mmx")));
 extern void test_pclmul (void)			__attribute__((__target__("pclmul")));
 extern void test_popcnt (void)			__attribute__((__target__("popcnt")));
@@ -21,6 +22,7 @@  extern void test_ssse3 (void)			__attribute__((__target__("ssse3")));
 
 extern void test_no_abm (void)			__attribute__((__target__("no-abm")));
 extern void test_no_aes (void)			__attribute__((__target__("no-aes")));
+extern void test_no_bmi (void)			__attribute__((__target__("no-bmi")));
 extern void test_no_mmx (void)			__attribute__((__target__("no-mmx")));
 extern void test_no_pclmul (void)		__attribute__((__target__("no-pclmul")));
 extern void test_no_popcnt (void)		__attribute__((__target__("no-popcnt")));
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 2d50f41..d59777b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -1,8 +1,9 @@ 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
-   abmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h are usable
+   fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h,
+   popcntintrin.h and mm_malloc.h are usable
    with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 #include <x86intrin.h>