Patchwork [i386] Cannot inline sse*.* functions into avx functions

login
register
mail settings
Submitter Sriraman Tallam
Date July 1, 2013, 6:01 p.m.
Message ID <CAAs8Hmx7-oKu4WhQzv4QcQBV2_mgc8ZyyWVbguB9G7SdCYBzAA@mail.gmail.com>
Download mbox | patch
Permalink /patch/256196/
State New
Headers show

Comments

Sriraman Tallam - July 1, 2013, 6:01 p.m.
Hi,

   So, something like the patch attached?

        * config/i386/i386.c (ix86_option_override_internal): Turn
        on all -mavx target flags by default as they are dependent
        on AVX anyway.

Thanks
Sri




On Sun, Jun 30, 2013 at 2:56 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Jun 30, 2013 at 11:47 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>>>
>>> What target flags are enabled by AVX? Assumming that all target flags are
>>> positive seems incorrect to me (like -mno-red-zone function can not be inlined
>>> into -mred-zone).  Does those conditionally enabled AVX codegen flags have any
>>
>> Actually -mred-zone seems right, but stuff like -msseregparm will probably break?
>>
>> Honza
>>
>>> effect when AVX is disabled?  Perhaps we can set them unconditionally?
>
> The issue is with (config/i386.c, ix86_option_override_internal):
>
>   if (TARGET_AVX)
>     {
>       /* When not optimize for size, enable vzeroupper optimization for
>      TARGET_AVX with -fexpensive-optimizations and split 32-byte
>      AVX unaligned load/store.  */
>       if (!optimize_size)
>     {
>       if (flag_expensive_optimizations
>           && !(target_flags_explicit & MASK_VZEROUPPER))
>         target_flags |= MASK_VZEROUPPER;
>       if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
>           && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
>         target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
>       if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
>           && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
>         target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
>       /* Enable 128-bit AVX instruction generation
>          for the auto-vectorizer.  */
>       if (TARGET_AVX128_OPTIMAL
>           && !(target_flags_explicit & MASK_PREFER_AVX128))
>         target_flags |= MASK_PREFER_AVX128;
>     }
>
> These are all tuning flags that are applicable to AVX only. They
> depend on AVX, so can be probably enabled unconditionally.
>
> Uros.
* config/i386/i386.c (ix86_option_override_internal): Turn
	on all -mavx target flags by default as they are dependent
	on AVX anyway.

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 200374)
+++ config/i386/i386.c	(working copy)
@@ -3936,34 +3936,26 @@  ix86_option_override_internal (bool main_args_p)
 #endif
    }
 
-  if (TARGET_AVX)
-    {
-      /* When not optimize for size, enable vzeroupper optimization for
-	 TARGET_AVX with -fexpensive-optimizations and split 32-byte
-	 AVX unaligned load/store.  */
-      if (!optimize_size)
-	{
-	  if (flag_expensive_optimizations
-	      && !(target_flags_explicit & MASK_VZEROUPPER))
-	    target_flags |= MASK_VZEROUPPER;
-	  if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
-	      && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
-	    target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
-	  if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
-	      && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
-	    target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
-	  /* Enable 128-bit AVX instruction generation
-	     for the auto-vectorizer.  */
-	  if (TARGET_AVX128_OPTIMAL
-	      && !(target_flags_explicit & MASK_PREFER_AVX128))
-	    target_flags |= MASK_PREFER_AVX128;
-	}
-    }
-  else
-    {
-      /* Disable vzeroupper pass if TARGET_AVX is disabled.  */
-      target_flags &= ~MASK_VZEROUPPER;
-    }
+  /* When not optimize for size, enable vzeroupper optimization for
+     TARGET_AVX with -fexpensive-optimizations and split 32-byte
+     AVX unaligned load/store.  */
+  if (!optimize_size)
+  {
+     if (flag_expensive_optimizations
+	   && !(target_flags_explicit & MASK_VZEROUPPER))
+	target_flags |= MASK_VZEROUPPER;
+     if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+	   && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+     if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+	   && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+     /* Enable 128-bit AVX instruction generation
+	for the auto-vectorizer.  */
+     if (TARGET_AVX128_OPTIMAL
+	   && !(target_flags_explicit & MASK_PREFER_AVX128))
+	target_flags |= MASK_PREFER_AVX128;
+  }
 
   if (ix86_recip_name)
     {