Patchwork GCC does not support *mmintrin.h with function specific opts

login
register
mail settings
Submitter Sriraman Tallam
Date May 16, 2013, 9:47 p.m.
Message ID <CAAs8HmzWkgmzyh0neX1UH8kydUejpTqUnLBjv25=A56jk3-DWw@mail.gmail.com>
Download mbox | patch
Permalink /patch/244428/
State New
Headers show

Comments

Sriraman Tallam - May 16, 2013, 9:47 p.m.
Hi Jakub,

   I have taken your proposed changes and made patch for this.  Please
let me know what you think. I have changed only the headers mmintrin.h
and x86intrin.h as that includes all the other headers.  The builtins
get enabled automatically when the pragma target is specified so need
to do any thing to def_builtin.  I have included 4 test case, where
intrinsics_4.c uses your example with __mm256_and_ps. I had to fix a
bug with lzcnt builtins in i386-common.c as that was not handled
there.

Thanks
Sri

On Wed, May 15, 2013 at 7:25 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> On Tue, May 14, 2013 at 3:04 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Tue, May 14, 2013 at 10:39:13AM +0200, Jakub Jelinek wrote:
>>> When trying with -O2 -mno-avx:
>>> #ifndef __AVX__
>>> #pragma GCC push_options
>>> #pragma GCC target("avx")
>>> #define __DISABLE_AVX__
>>> #endif
>>> typedef float __v8sf __attribute__ ((__vector_size__ (32)));
>>> typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
>>> extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>>> _mm256_and_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B); }
>>> #ifdef __DISABLE_AVX__
>>> #pragma GCC pop_options
>>> #undef __DISABLE_AVX__
>>> #endif
>>> __m256 a, b, c;
>>> void __attribute__((target ("avx")))
>>> foo (void)
>>> {
>>>   a = _mm256_and_ps (b, c);
>>> }
>>> we get bogus errors and ICE:
>>> tty2.c: In function '_mm256_and_ps':
>>> tty2.c:9:1: note: The ABI for passing parameters with 32-byte alignment has changed in GCC 4.6
>>> tty2.c: In function 'foo':
>>> tty2.c:9:82: error: '__builtin_ia32_andps256' needs isa option -m32
>>> tty2.c:9:82: internal compiler error: in emit_move_insn, at expr.c:3486
>>> 0x77a3d2 emit_move_insn(rtx_def*, rtx_def*)
>>>       ../../gcc/expr.c:3485
>>> (I have added "1 ||" instead of your generate_builtins into i386.c
>>> (def_builtin)), that just shows that target attribute/pragma support still
>>> has very severe issues that need to be fixed, instead of papered around.
>>>
>>> Note, we ICE on:
>>> #pragma GCC target ("mavx")
>>> That should be fixed too.
>>
>> Ok, I had a brief look at the above two issues.
>>
>> The first testcase has the problem that the ix86_previous_fndecl cache
>> gets out of date.  When set_cfun is called on _mm256_and_ps (with the
>> implicit avx attribute), then ix86_previous_fndecl is set to _mm256_and_ps,
>> TARGET_AVX is set to true, target reinited.  Then set_cfun is called
>> with NULL, we don't do anything.  Later on #pragma GCC pop_options appears,
>> sets !TARGET_AVX (as that is the new target_option_current_node).
>> Next foo is being parsed, avx attribute is noticed, the same target node
>> is used for it, but when set_cfun is called for foo, ix86_previous_fndecl's
>> target node is the same as foo's and so we don't do cl_target_restore_option
>> at all, so !TARGET_AVX remains, while it should be set.  That is the reason
>> for the bogus inform etc.  Fixed by resetting the ix86_previous_fndecl cache
>> on any #pragma GCC target below.  The #pragma GCC target ("mavx") is also
>> fixed below.  The patch also includes the "1 ||" to enable building all
>> builtins.  We still ICE with:
>> #0  fancy_abort (file=0x11d8fad "../../gcc/expr.c", line=316, function=0x11dada3 "convert_move") at ../../gcc/diagnostic.c:1180
>> #1  0x0000000000771c39 in convert_move (to=0x7ffff1b2df00, from=0x7ffff1b314e0, unsignedp=0) at ../../gcc/expr.c:316
>> #2  0x000000000078009f in store_expr (exp=0x7ffff19ab390, target=0x7ffff1b2df00, call_param_p=0, nontemporal=false) at ../../gcc/expr.c:5300
>> #3  0x000000000077eba1 in expand_assignment (to=0x7ffff1b35090, from=0x7ffff19ab390, nontemporal=false) at ../../gcc/expr.c:5025
>> on the first testcase.  We don't ICE say on:
>> #ifndef __AVX__
>> #pragma GCC push_options
>> #pragma GCC target("avx")
>> #define __DISABLE_AVX__
>> #endif
>> typedef float __v8sf __attribute__ ((__vector_size__ (32)));
>> typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
>> extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> _mm256_and_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B); }
>> #ifdef __DISABLE_AVX__
>> #pragma GCC pop_options
>> #undef __DISABLE_AVX__
>> #endif
>> __m256 a[10], b[10], c[10];
>> void __attribute__((target ("avx")))
>> foo (void)
>> {
>>   a[0] = _mm256_and_ps (b[0], c[0]);
>> }
>> The problem is that in the first testcase, the VAR_DECL c (guess also b and
>> a) have TYPE_MODE (TREE_TYPE (c)) == V8SFmode (this is dynamic, for vector
>> types TYPE_MODE is a function call), but DECL_MODE (c) is BLKmode
>> (it has been laid out while -mno-avx has been the current) and also
>> DECL_RTL which is a mem:BLK.  Guess expr.c would need to special case
>> TREE_STATIC or DECL_EXTERNAL VAR_DECLs with vector type, if they have
>> DECL_MODE BLKmode, but TYPE_MODE some vector type, just adjust the MEM
>> to the desired mode?
>>
>> --- gcc/config/i386/i386-c.c.jj 2013-01-15 17:20:37.000000000 +0100
>> +++ gcc/config/i386/i386-c.c    2013-05-14 11:46:50.773806894 +0200
>> @@ -369,20 +369,23 @@ ix86_pragma_target_parse (tree args, tre
>>
>>    if (! args)
>>      {
>> -      cur_tree = ((pop_target)
>> -                 ? pop_target
>> -                 : target_option_default_node);
>> +      cur_tree = (pop_target ? pop_target : target_option_default_node);
>>        cl_target_option_restore (&global_options,
>>                                 TREE_TARGET_OPTION (cur_tree));
>>      }
>>    else
>>      {
>>        cur_tree = ix86_valid_target_attribute_tree (args);
>> -      if (!cur_tree)
>> -       return false;
>> +      if (!cur_tree || cur_tree == error_mark_node)
>> +       {
>> +         cl_target_option_restore (&global_options,
>> +                                   TREE_TARGET_OPTION (prev_tree));
>> +         return false;
>> +       }
>>      }
>>
>>    target_option_current_node = cur_tree;
>> +  ix86_reset_previous_fndecl ();
>>
>>    /* Figure out the previous/current isa, arch, tune and the differences.  */
>>    prev_opt  = TREE_TARGET_OPTION (prev_tree);
>> --- gcc/config/i386/i386-protos.h.jj    2013-04-03 08:28:50.000000000 +0200
>> +++ gcc/config/i386/i386-protos.h       2013-05-14 11:41:35.389638299 +0200
>> @@ -40,6 +40,8 @@ extern void ix86_output_addr_diff_elt (F
>>  extern enum calling_abi ix86_cfun_abi (void);
>>  extern enum calling_abi ix86_function_type_abi (const_tree);
>>
>> +extern void ix86_reset_previous_fndecl (void);
>> +
>>  #ifdef RTX_CODE
>>  extern int standard_80387_constant_p (rtx);
>>  extern const char *standard_80387_constant_opcode (rtx);
>> --- gcc/config/i386/i386.c.jj   2013-05-14 08:23:31.000000000 +0200
>> +++ gcc/config/i386/i386.c      2013-05-14 11:40:54.996867411 +0200
>> @@ -4559,6 +4559,13 @@ ix86_can_inline_p (tree caller, tree cal
>>  /* Remember the last target of ix86_set_current_function.  */
>>  static GTY(()) tree ix86_previous_fndecl;
>>
>> +/* Invalidate ix86_previous_fndecl cache.  */
>> +void
>> +ix86_reset_previous_fndecl (void)
>> +{
>> +  ix86_previous_fndecl = NULL_TREE;
>> +}
>> +
>>  /* Establish appropriate back-end context for processing the function
>>     FNDECL.  The argument might be NULL to indicate processing at top
>>     level, outside of any function scope.  */
>> @@ -26829,7 +26836,7 @@ def_builtin (HOST_WIDE_INT mask, const c
>>        ix86_builtins_isa[(int) code].isa = mask;
>>
>>        mask &= ~OPTION_MASK_ISA_64BIT;
>> -      if (mask == 0
>> +      if (/* HACK */ 1 || mask == 0
>>           || (mask & ix86_isa_flags) != 0
>>           || (lang_hooks.builtin_function
>>               == lang_hooks.builtin_function_ext_scope))
>
> This does not seem necessary now once the #pragma GCC target is used
> in the header. The target-specific builtins seem to be generated when
> the target is specified using pragmas.
>
> I will consolidate your patches and send one shortly.
>
> Thanks for clearing this,
> Sri
>
>
>>
>>
>>         Jakub
This patch allows *intrin.h headers to be used with function target attributes.
To do that, either include x86intrin.h or immintrin.h as they have guards to
enable other headers when the target options are not passed via the command
line.  

	* testsuite/gcc.target/i386/intrinsics_4.c: New test.
	* testsuite/gcc.target/i386/intrinsics_1.c: New test.
	* testsuite/gcc.target/i386/intrinsics_2.c: Ditto.
	* testsuite/gcc.target/i386/intrinsics_3.c: Ditto.
	* config/i386/x86intrin.h: Guard every header inclusion by pushing the right
	target attribute when the target is not defined.
	* config/i386/immintrin.h: Guard every header inclusion by pushing the right
	target attribute when the target is not defined.  Include
	popcntintrin.h also here.
	* config/i386/i386.c (ix86_pragma_target_parse):  Restore target
	when current target options does not apply.
	* config/i386/i386-protos.h (ix86_reset_previous_fndecl): New function.
	* config/i386/i386.c (ix86_reset_previous_fndecl): Ditto.
	* common/config/i386/i386-common.c: Handle LZCNT.
Marc Glisse - May 16, 2013, 10:55 p.m.
On Thu, 16 May 2013, Sriraman Tallam wrote:

> Hi Jakub,
>
>   I have taken your proposed changes and made patch for this.  Please
> let me know what you think. I have changed only the headers mmintrin.h
> and x86intrin.h as that includes all the other headers.

I don't really understand why you made the change to x86intrin.h instead 
of making it inside each *mmintrin.h header. The code would be the same 
size, it would let us include smmintrin.h directly if we wanted to, and 
x86intrin.h would also automatically work.
Sriraman Tallam - May 16, 2013, 11 p.m.
On Thu, May 16, 2013 at 3:55 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Thu, 16 May 2013, Sriraman Tallam wrote:
>
>> Hi Jakub,
>>
>>   I have taken your proposed changes and made patch for this.  Please
>> let me know what you think. I have changed only the headers mmintrin.h
>> and x86intrin.h as that includes all the other headers.
>
>
> I don't really understand why you made the change to x86intrin.h instead of
> making it inside each *mmintrin.h header. The code would be the same size,
> it would let us include smmintrin.h directly if we wanted to, and
> x86intrin.h would also automatically work.

Right, I should have done that instead!

Sri

>
> --
> Marc Glisse
Jakub Jelinek - May 17, 2013, 5:49 a.m.
On Thu, May 16, 2013 at 04:00:53PM -0700, Sriraman Tallam wrote:
> On Thu, May 16, 2013 at 3:55 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> > I don't really understand why you made the change to x86intrin.h instead of
> > making it inside each *mmintrin.h header. The code would be the same size,
> > it would let us include smmintrin.h directly if we wanted to, and
> > x86intrin.h would also automatically work.
> 
> Right, I should have done that instead!

Yeah, definitely.  For the standalone headers, which have currently
__<FEATURE>__ guards inside of it, please replace it by the larger snippets
involving #pragma, and in the x86intrin.h/immintrin.h headers include those
unconditionally, instead of just if __<FEATURE>__ is defined.
For the non-standalone headers (newer ones like avxintrin.h), replace
the #ifdef __<FEATURE>__ in immintrin.h/x86intrin.h with larger snippets.

	Jakub

Patch

Index: testsuite/gcc.target/i386/intrinsics_4.c
===================================================================
--- testsuite/gcc.target/i386/intrinsics_4.c	(revision 0)
+++ testsuite/gcc.target/i386/intrinsics_4.c	(revision 0)
@@ -0,0 +1,14 @@ 
+/* Test case to check if intrinsics and function specific target
+   optimizations work together.  Check by including immintrin.h  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-avx" } */
+
+#include <immintrin.h>
+
+__m256 a[10], b[10], c[10];
+void __attribute__((target ("avx")))
+foo (void)
+{
+  a[0] = _mm256_and_ps (b[0], c[0]);
+}
Index: testsuite/gcc.target/i386/intrinsics_1.c
===================================================================
--- testsuite/gcc.target/i386/intrinsics_1.c	(revision 0)
+++ testsuite/gcc.target/i386/intrinsics_1.c	(revision 0)
@@ -0,0 +1,13 @@ 
+/* Test case to check if intrinsics and function specific target
+   optimizations work together.  Check by including x86intrin.h  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4.1 -mno-sse4.2" } */
+
+#include <x86intrin.h>
+
+__attribute__((target("sse4.2")))
+__m128i foo(__m128i *V)
+{
+    return _mm_stream_load_si128(V);
+}
Index: testsuite/gcc.target/i386/intrinsics_2.c
===================================================================
--- testsuite/gcc.target/i386/intrinsics_2.c	(revision 0)
+++ testsuite/gcc.target/i386/intrinsics_2.c	(revision 0)
@@ -0,0 +1,13 @@ 
+/* Test case to check if intrinsics and function specific target
+   optimizations work together.  Check by including immintrin.h  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4.1" } */
+
+#include <immintrin.h>
+
+__attribute__((target("sse4.2")))
+__m128i foo(__m128i *V)
+{
+    return _mm_stream_load_si128(V);
+}
Index: testsuite/gcc.target/i386/intrinsics_3.c
===================================================================
--- testsuite/gcc.target/i386/intrinsics_3.c	(revision 0)
+++ testsuite/gcc.target/i386/intrinsics_3.c	(revision 0)
@@ -0,0 +1,14 @@ 
+/* Test case to check if intrinsics and function specific target
+   optimizations work together.  Check if the POPCNT specific intrinsics
+   in included with popcntintrin.h get enabled by including immintrin.h  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4.1 -mno-sse4.2 -mno-popcnt" } */
+
+#include <immintrin.h>
+
+__attribute__((target("popcnt")))
+long long foo(unsigned long long X)
+{
+    return _mm_popcnt_u64 (X);
+}
Index: config/i386/x86intrin.h
===================================================================
--- config/i386/x86intrin.h	(revision 198950)
+++ config/i386/x86intrin.h	(working copy)
@@ -26,95 +26,341 @@ 
 
 #include <ia32intrin.h>
 
-#ifdef __MMX__
+#ifndef __MMX__
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __MMX__
+#define __DISABLE_MMX__
+#endif
+
 #include <mmintrin.h>
+
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#undef __MMX__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE__
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __SSE__
+#define __DISABLE_SSE__
+#endif
+
 #include <xmmintrin.h>
+
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#undef __SSE__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __SSE2__
+#define __DISABLE_SSE2__
+#endif
+
 #include <emmintrin.h>
+
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#undef __SSE2__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE3__
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __SSE3__
+#define __DISABLE_SSE3__
+#endif
+
 #include <pmmintrin.h>
+
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#undef __SSE3__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSSE3__
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __SSSE3__
+#define __DISABLE_SSSE3__
+#endif
+
 #include <tmmintrin.h>
+
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#undef __SSSE3__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE4A__
+#ifndef __SSE4A__
+#pragma GCC push_options
+#pragma GCC target("sse4a")
+#define __SSE4A__
+#define __DISABLE_SSE4A__
+#endif
+
 #include <ammintrin.h>
+
+#ifdef __DISABLE_SSE4A__
+#undef __DISABLE_SSE4A__
+#undef __SSE4A__
+#pragma GCC pop_options
 #endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#if !defined (__SSE4_2__) && !defined (__SSE4_1__)
+#pragma GCC push_options
+#pragma GCC target("sse4.2,sse4.1")
+#define __SSE4_2__
+#define __SSE4_1__
+#define __DISABLE_SSE4_2__
+#endif
+
 #include <smmintrin.h>
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#undef __SSE4_2__
+#undef __SSE4_1__
+#pragma GCC pop_options
 #endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
+#if !defined (__AES__) && !defined (__PCLMUL__)
+#pragma GCC push_options
+#pragma GCC target("aes,pclmul")
+#define __AES__
+#define __PCLMUL__
+#define __DISABLE_AES_PCLMUL__
+#endif
+
 #include <wmmintrin.h>
+
+#ifdef __DISABLE_AES_PCLMUL__
+#undef __DISABLE_AES_PCLMUL__
+#undef __AES__
+#undef __PCLMUL__
+#pragma GCC pop_options
 #endif
 
 /* For including AVX instructions */
 #include <immintrin.h>
 
-#ifdef __3dNOW__
+#ifndef __3dNOW__
+#pragma GCC push_options
+#pragma GCC target("3dnow")
+#define __3dNOW__
+#define __DISABLE_3dNOW__
+#endif
+
 #include <mm3dnow.h>
+
+#ifdef __DISABLE_3dNOW__
+#undef __DISABLE_3dNOW__
+#undef __3dNOW__
+#pragma GCC pop_options
 #endif
 
-#ifdef __FMA4__
+#ifndef __FMA4__
+#pragma GCC push_options
+#pragma GCC target("fma4")
+#define __FMA4__
+#define __DISABLE_FMA4__
+#endif
+
 #include <fma4intrin.h>
+
+#ifdef __DISABLE_FMA4__
+#undef __DISABLE_FMA4__
+#undef __FMA4__
+#pragma GCC pop_options
 #endif
 
-#ifdef __XOP__
+#ifndef __XOP__
+#pragma GCC push_options
+#pragma GCC target("xop")
+#define __XOP__
+#define __DISABLE_XOP__
+#endif
+
 #include <xopintrin.h>
+
+#ifdef __DISABLE_XOP__
+#undef __DISABLE_XOP__
+#undef __XOP__
+#pragma GCC pop_options
 #endif
 
-#ifdef __LWP__
+#ifndef __LWP__
+#pragma GCC push_options
+#pragma GCC target("lwp")
+#define __LWP__
+#define __DISABLE_LWP__
+#endif
+
 #include <lwpintrin.h>
+
+#ifdef __DISABLE_LWP__
+#undef __DISABLE_LWP__
+#undef __LWP__
+#pragma GCC pop_options
 #endif
 
-#ifdef __BMI__
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __BMI__
+#define __DISABLE_BMI__
+#endif
+
 #include <bmiintrin.h>
+
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#undef __BMI__
+#pragma GCC pop_options
 #endif
 
-#ifdef __BMI2__
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __BMI2__
+#define __DISABLE_BMI2__
+#endif
+
 #include <bmi2intrin.h>
+
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#undef __BMI2__
+#pragma GCC pop_options
 #endif
 
-#ifdef __TBM__
+#ifndef __TBM__
+#pragma GCC push_options
+#pragma GCC target("tbm")
+#define __TBM__
+#define __DISABLE_TBM__
+#endif
+
 #include <tbmintrin.h>
+
+#ifdef __DISABLE_TBM__
+#undef __DISABLE_TBM__
+#undef __TBM__
+#pragma GCC pop_options
 #endif
 
-#ifdef __LZCNT__
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __LZCNT__
+#define __DISABLE_LZCNT__
+#endif
+
 #include <lzcntintrin.h>
+
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#undef __LZCNT__
+#pragma GCC pop_options
 #endif
 
-#ifdef __POPCNT__
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __POPCNT__
+#define __DISABLE_POPCNT__
+#endif
+
 #include <popcntintrin.h>
+
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#undef __POPCNT__
+#pragma GCC pop_options
 #endif
 
-#ifdef __RDSEED__
+#ifndef __RDSEED__
+#pragma GCC push_options
+#pragma GCC target("rdseed")
+#define __RDSEED__
+#define __DISABLE_RDSEED__
+#endif
+
 #include <rdseedintrin.h>
+
+#ifdef __DISABLE_RDSEED__
+#undef __DISABLE_RDSEED__
+#undef __RDSEED__
+#pragma GCC pop_options
 #endif
 
-#ifdef __PRFCHW__
+#ifndef __PRFCHW__
+#pragma GCC push_options
+#pragma GCC target("prfchw")
+#define __PRFCHW__
+#define __DISABLE_PRFCHW__
+#endif
+
 #include <prfchwintrin.h>
+
+#ifdef __DISABLE_PRFCHW__
+#undef __DISABLE_PRFCHW__
+#undef __PRFCHW__
+#pragma GCC pop_options
 #endif
 
-#ifdef __FXSR__
+#ifndef __FXSR__
+#pragma GCC push_options
+#pragma GCC target("fxsr")
+#define __FXSR__
+#define __DISABLE_FXSR__
+#endif
+
 #include <fxsrintrin.h>
+
+#ifdef __DISABLE_FXSR__
+#undef __DISABLE_FXSR__
+#undef __FXSR__
+#pragma GCC pop_options
 #endif
 
-#ifdef __XSAVE__
+#ifndef __XSAVE__
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#define __XSAVE__
+#define __DISABLE_XSAVE__
+#endif
+
 #include <xsaveintrin.h>
+
+#ifdef __DISABLE_XSAVE__
+#undef __DISABLE_XSAVE__
+#undef __XSAVE__
+#pragma GCC pop_options
 #endif
 
-#ifdef __XSAVEOPT__
+#ifndef __XSAVEOPT__
+#pragma GCC push_options
+#pragma GCC target("xsaveopt")
+#define __XSAVEOPT__
+#define __DISABLE_XSAVEOPT__
+#endif
+
 #include <xsaveoptintrin.h>
+
+#ifdef __DISABLE_XSAVEOPT__
+#undef __DISABLE_XSAVEOPT__
+#undef __XSAVEOPT__
+#pragma GCC pop_options
 #endif
 
 #include <adxintrin.h>
Index: config/i386/i386-c.c
===================================================================
--- config/i386/i386-c.c	(revision 198950)
+++ config/i386/i386-c.c	(working copy)
@@ -369,20 +369,23 @@  ix86_pragma_target_parse (tree args, tree pop_targ
 
   if (! args)
     {
-      cur_tree = ((pop_target)
-		  ? pop_target
-		  : target_option_default_node);
+      cur_tree = (pop_target ? pop_target : target_option_default_node);
       cl_target_option_restore (&global_options,
 				TREE_TARGET_OPTION (cur_tree));
     }
   else
     {
       cur_tree = ix86_valid_target_attribute_tree (args);
-      if (!cur_tree)
-	return false;
+      if (!cur_tree || cur_tree == error_mark_node)
+       {
+         cl_target_option_restore (&global_options,
+                                   TREE_TARGET_OPTION (prev_tree));
+         return false;
+       }
     }
 
   target_option_current_node = cur_tree;
+  ix86_reset_previous_fndecl ();
 
   /* Figure out the previous/current isa, arch, tune and the differences.  */
   prev_opt  = TREE_TARGET_OPTION (prev_tree);
Index: config/i386/immintrin.h
===================================================================
--- config/i386/immintrin.h	(revision 198950)
+++ config/i386/immintrin.h	(working copy)
@@ -24,71 +24,259 @@ 
 #ifndef _IMMINTRIN_H_INCLUDED
 #define _IMMINTRIN_H_INCLUDED
 
-#ifdef __MMX__
+#ifndef __MMX__
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __MMX__
+#define __DISABLE_MMX__
+#endif
+
 #include <mmintrin.h>
+
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#undef __MMX__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE__
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __SSE__
+#define __DISABLE_SSE__
+#endif
+
 #include <xmmintrin.h>
+
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#undef __SSE__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __SSE2__
+#define __DISABLE_SSE2__
+#endif
+
 #include <emmintrin.h>
+
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#undef __SSE2__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSE3__
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __SSE3__
+#define __DISABLE_SSE3__
+#endif
+
 #include <pmmintrin.h>
+
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#undef __SSE3__
+#pragma GCC pop_options
 #endif
 
-#ifdef __SSSE3__
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __SSSE3__
+#define __DISABLE_SSSE3__
+#endif
+
 #include <tmmintrin.h>
+
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#undef __SSSE3__
+#pragma GCC pop_options
 #endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#if !defined (__SSE4_2__) && !defined (__SSE4_1__)
+#pragma GCC push_options
+#pragma GCC target("sse4.2,sse4.1")
+#define __SSE4_1__
+#define __SSE4_2__
+#define __DISABLE_SSE4_2__
+#endif
+
 #include <smmintrin.h>
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#undef __SSE4_1__
+#undef __SSE4_2__
+#pragma GCC pop_options
 #endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __POPCNT__
+#define __DISABLE_POPCNT__
+#endif
+
+#include <popcntintrin.h>
+
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#undef __POPCNT__
+#pragma GCC pop_options
+#endif
+
+#if !defined (__AES__) && !defined (__PCLMUL__)
+#pragma GCC push_options
+#pragma GCC target("aes,pclmul")
+#define __AES__
+#define __PCLMUL__
+#define __DISABLE_AES_PCLMUL__
+#endif
+
 #include <wmmintrin.h>
+
+#ifdef __DISABLE_AES_PCLMUL__
+#undef __DISABLE_AES_PCLMUL__
+#undef __AES__
+#undef __PCLMUL__
+#pragma GCC pop_options
 #endif
 
-#ifdef __AVX__
+
+#ifndef __AVX__
+#pragma GCC push_options
+#pragma GCC target("avx")
+#define __AVX__
+#define __DISABLE_AVX__
+#endif
+
 #include <avxintrin.h>
+
+#ifdef __DISABLE_AVX__
+#undef __DISABLE_AVX__
+#undef __AVX__
+#pragma GCC pop_options
 #endif
 
-#ifdef __AVX2__
+#ifndef __AVX2__
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __AVX2__
+#define __DISABLE_AVX2__
+#endif
+
 #include <avx2intrin.h>
+
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#undef __AVX2__
+#pragma GCC pop_options
 #endif
 
-#ifdef __LZCNT__
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __LZCNT__
+#define __DISABLE_LZCNT__
+#endif
+
 #include <lzcntintrin.h>
+
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#undef __LZCNT__
+#pragma GCC pop_options
 #endif
 
-#ifdef __BMI__
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __BMI__
+#define __DISABLE_BMI__
+#endif
+
 #include <bmiintrin.h>
+
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#undef __BMI__
+#pragma GCC pop_options
 #endif
 
-#ifdef __BMI2__
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __BMI2__
+#define __DISABLE_BMI2__
+#endif
+
 #include <bmi2intrin.h>
+
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#undef __BMI2__
+#pragma GCC pop_options
 #endif
 
-#ifdef __FMA__
+#ifndef __FMA__
+#pragma GCC push_options
+#pragma GCC target("fma")
+#define __FMA__
+#define __DISABLE_FMA__
+#endif
+
 #include <fmaintrin.h>
+
+#ifdef __DISABLE_FMA__
+#undef __DISABLE_FMA__
+#undef __FMA__
+#pragma GCC pop_options
 #endif
 
-#ifdef __F16C__
+#ifndef __F16C__
+#pragma GCC push_options
+#pragma GCC target("f16c")
+#define __F16C__
+#define __DISABLE_F16C__
+#endif
+
 #include <f16cintrin.h>
+
+#ifdef __DISABLE_F16C__
+#undef __DISABLE_F16C__
+#undef __F16C__
+#pragma GCC pop_options
 #endif
 
-#ifdef __RTM__
-#include <rtmintrin.h>
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __RTM__
+#define __DISABLE_RTM__
 #endif
 
-#ifdef __RTM__
+#include <rtmintrin.h>
 #include <xtestintrin.h>
+
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#undef __RTM__
+#pragma GCC pop_options
 #endif
 
-#ifdef __RDRND__
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __RDRND__
+#define __DISABLE_RDRND__
+#endif
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _rdrand16_step (unsigned short *__P)
@@ -102,10 +290,20 @@  _rdrand32_step (unsigned int *__P)
 {
   return __builtin_ia32_rdrand32_step (__P);
 }
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#undef __RDRND__
+#pragma GCC pop_options
+#endif
 
 #ifdef  __x86_64__
-#ifdef __FSGSBASE__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __FSGSBASE__
+#define __DISABLE_FSGSBASE__
+#endif
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _readfsbase_u32 (void)
@@ -161,16 +359,30 @@  _writegsbase_u64 (unsigned long long __B)
 {
   __builtin_ia32_wrgsbase64 (__B);
 }
-#endif /* __FSGSBASE__ */
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#undef __FSGSBASE__
+#pragma GCC pop_options
+#endif
 
-#ifdef __RDRND__
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __RDRND__
+#define __DISABLE_RDRND__
+#endif
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _rdrand64_step (unsigned long long *__P)
 {
   return __builtin_ia32_rdrand64_step (__P);
 }
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#undef __RDRND__
+#pragma GCC pop_options
+#endif
+
 #endif /* __x86_64__  */
 
 #endif /* _IMMINTRIN_H_INCLUDED */
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 198950)
+++ config/i386/i386-protos.h	(working copy)
@@ -40,6 +40,8 @@  extern void ix86_output_addr_diff_elt (FILE *, int
 extern enum calling_abi ix86_cfun_abi (void);
 extern enum calling_abi ix86_function_type_abi (const_tree);
 
+extern void ix86_reset_previous_fndecl (void);
+
 #ifdef RTX_CODE
 extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 198950)
+++ config/i386/i386.c	(working copy)
@@ -4564,6 +4564,13 @@  ix86_can_inline_p (tree caller, tree callee)
 /* Remember the last target of ix86_set_current_function.  */
 static GTY(()) tree ix86_previous_fndecl;
 
+/* Invalidate ix86_previous_fndecl cache.  */
+void
+ix86_reset_previous_fndecl (void)
+{
+  ix86_previous_fndecl = NULL_TREE;
+}
+
 /* Establish appropriate back-end context for processing the function
    FNDECL.  The argument might be NULL to indicate processing at top
    level, outside of any function scope.  */
Index: common/config/i386/i386-common.c
===================================================================
--- common/config/i386/i386-common.c	(revision 198950)
+++ common/config/i386/i386-common.c	(working copy)
@@ -87,6 +87,7 @@  along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
 #define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
+#define OPTION_MASK_ISA_LZCNT_SET OPTION_MASK_ISA_LZCNT
 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
@@ -154,6 +155,7 @@  along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
 #define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
+#define OPTION_MASK_ISA_LZCNT_UNSET OPTION_MASK_ISA_LZCNT
 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
@@ -438,6 +440,18 @@  ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mlzcnt:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LZCNT_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LZCNT_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LZCNT_UNSET;
+	}
+
     case OPT_mtbm:
       if (value)
 	{