diff mbox

[PR,target/69454] Disable TARGET_STV when stack is not properly aligned

Message ID 20160203201137.GG3017@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Feb. 3, 2016, 8:11 p.m. UTC
Hi!

On Tue, Feb 02, 2016 at 05:09:34PM +0300, Ilya Enkovich wrote:
> And it's too late to do it after STV pass and therefore we disable it
> when stack is not properly aligned. I think this argumentation goes in
> a loop.

This is a P1 that needs to be fixed, so that we don't defer this forever,
what about the following patch?  As neither stv nor preferred-stack-boundary
nor incoming-stack-boundary are allowed target attribute/GCC target pragma
switches, I can't find a problem with that.  We don't track at expansion
time whether the function is leaf or not, so the patch pessimistically
assumes that the function might be leaf and check both incoming and
preferred stack boundaries.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-02-03  Jakub Jelinek  <jakub@redhat.com>
	    Ilya Enkovich  <enkovich.gnu@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/69454
	* config/i386/i386.c (convert_scalars_to_vector): Remove
	stack alignment fixes.
	(ix86_option_override_internal): Disable TARGET_STV if stack
	might not be aligned enough.
	(ix86_minimum_alignment): Assert that TARGET_STV is false.

	* gcc.target/i386/pr69454-1.c: New test.
	* gcc.target/i386/pr69454-2.c: New test.



	Jakub

Comments

Uros Bizjak Feb. 4, 2016, 8:02 a.m. UTC | #1
On Wed, Feb 3, 2016 at 9:11 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Tue, Feb 02, 2016 at 05:09:34PM +0300, Ilya Enkovich wrote:
>> And it's too late to do it after STV pass and therefore we disable it
>> when stack is not properly aligned. I think this argumentation goes in
>> a loop.
>
> This is a P1 that needs to be fixed, so that we don't defer this forever,
> what about the following patch?  As neither stv nor preferred-stack-boundary
> nor incoming-stack-boundary are allowed target attribute/GCC target pragma
> switches, I can't find a problem with that.  We don't track at expansion
> time whether the function is leaf or not, so the patch pessimistically
> assumes that the function might be leaf and check both incoming and
> preferred stack boundaries.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-02-03  Jakub Jelinek  <jakub@redhat.com>
>             Ilya Enkovich  <enkovich.gnu@gmail.com>
>             H.J. Lu  <hongjiu.lu@intel.com>
>
>         PR target/69454
>         * config/i386/i386.c (convert_scalars_to_vector): Remove
>         stack alignment fixes.
>         (ix86_option_override_internal): Disable TARGET_STV if stack
>         might not be aligned enough.
>         (ix86_minimum_alignment): Assert that TARGET_STV is false.
>
>         * gcc.target/i386/pr69454-1.c: New test.
>         * gcc.target/i386/pr69454-2.c: New test.

OK.

We can eventually introduce realignment for STV for gcc-7.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2016-02-02 20:42:01.024287587 +0100
> +++ gcc/config/i386/i386.c      2016-02-03 18:45:43.271997909 +0100
> @@ -3588,16 +3588,6 @@ convert_scalars_to_vector ()
>    bitmap_obstack_release (NULL);
>    df_process_deferred_rescans ();
>
> -  /* Conversion means we may have 128bit register spills/fills
> -     which require aligned stack.  */
> -  if (converted_insns)
> -    {
> -      if (crtl->stack_alignment_needed < 128)
> -       crtl->stack_alignment_needed = 128;
> -      if (crtl->stack_alignment_estimated < 128)
> -       crtl->stack_alignment_estimated = 128;
> -    }
> -
>    return 0;
>  }
>
> @@ -5453,6 +5443,13 @@ ix86_option_override_internal (bool main
>      opts->x_target_flags |= MASK_VZEROUPPER;
>    if (!(opts_set->x_target_flags & MASK_STV))
>      opts->x_target_flags |= MASK_STV;
> +  /* Disable STV if -mpreferred-stack-boundary=2 or
> +     -mincoming-stack-boundary=2 - the needed
> +     stack realignment will be extra cost the pass doesn't take into
> +     account and the pass can't realign the stack.  */
> +  if (ix86_preferred_stack_boundary < 64
> +      || ix86_incoming_stack_boundary < 64)
> +    opts->x_target_flags &= ~MASK_STV;
>    if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
>        && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
>      opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
> @@ -29323,7 +29320,10 @@ ix86_minimum_alignment (tree exp, machin
>    if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
>        && (!type || !TYPE_USER_ALIGN (type))
>        && (!decl || !DECL_USER_ALIGN (decl)))
> -    return 32;
> +    {
> +      gcc_checking_assert (!TARGET_STV);
> +      return 32;
> +    }
>
>    return align;
>  }
> --- gcc/testsuite/gcc.target/i386/pr69454-1.c.jj        2016-02-03 18:44:17.642175753 +0100
> +++ gcc/testsuite/gcc.target/i386/pr69454-1.c   2016-02-03 18:44:17.642175753 +0100
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2" } */
> +
> +typedef struct { long long w64[2]; } V128;
> +extern V128* fn2(void);
> +long long a;
> +V128 b;
> +void fn1() {
> +  V128 *c = fn2();
> +  c->w64[0] = a ^ b.w64[0];
> +}
> --- gcc/testsuite/gcc.target/i386/pr69454-2.c.jj        2016-02-03 18:44:17.655175574 +0100
> +++ gcc/testsuite/gcc.target/i386/pr69454-2.c   2016-02-03 18:44:17.655175574 +0100
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ia32 } } } */
> +/* { dg-options "-O2 -mpreferred-stack-boundary=2" } */
> +
> +extern void fn2 ();
> +long long a, b;
> +
> +void fn1 ()
> +{
> +  long long c = a;
> +  a = b ^ a;
> +  fn2 ();
> +  a = c;
> +}
>
>
>         Jakub
H.J. Lu Feb. 4, 2016, 7:47 p.m. UTC | #2
On Thu, Feb 4, 2016 at 12:02 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, Feb 3, 2016 at 9:11 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> Hi!
>>
>> On Tue, Feb 02, 2016 at 05:09:34PM +0300, Ilya Enkovich wrote:
>>> And it's too late to do it after STV pass and therefore we disable it
>>> when stack is not properly aligned. I think this argumentation goes in
>>> a loop.
>>
>> This is a P1 that needs to be fixed, so that we don't defer this forever,
>> what about the following patch?  As neither stv nor preferred-stack-boundary
>> nor incoming-stack-boundary are allowed target attribute/GCC target pragma
>> switches, I can't find a problem with that.  We don't track at expansion
>> time whether the function is leaf or not, so the patch pessimistically
>> assumes that the function might be leaf and check both incoming and
>> preferred stack boundaries.
>>
>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>>
>> 2016-02-03  Jakub Jelinek  <jakub@redhat.com>
>>             Ilya Enkovich  <enkovich.gnu@gmail.com>
>>             H.J. Lu  <hongjiu.lu@intel.com>
>>
>>         PR target/69454
>>         * config/i386/i386.c (convert_scalars_to_vector): Remove
>>         stack alignment fixes.
>>         (ix86_option_override_internal): Disable TARGET_STV if stack
>>         might not be aligned enough.
>>         (ix86_minimum_alignment): Assert that TARGET_STV is false.
>>
>>         * gcc.target/i386/pr69454-1.c: New test.
>>         * gcc.target/i386/pr69454-2.c: New test.
>
> OK.
>
> We can eventually introduce realignment for STV for gcc-7.
>
> Thanks,
> Uros.
>
>> --- gcc/config/i386/i386.c.jj   2016-02-02 20:42:01.024287587 +0100
>> +++ gcc/config/i386/i386.c      2016-02-03 18:45:43.271997909 +0100
>> @@ -3588,16 +3588,6 @@ convert_scalars_to_vector ()
>>    bitmap_obstack_release (NULL);
>>    df_process_deferred_rescans ();
>>
>> -  /* Conversion means we may have 128bit register spills/fills
>> -     which require aligned stack.  */
>> -  if (converted_insns)
>> -    {
>> -      if (crtl->stack_alignment_needed < 128)
>> -       crtl->stack_alignment_needed = 128;
>> -      if (crtl->stack_alignment_estimated < 128)
>> -       crtl->stack_alignment_estimated = 128;
>> -    }
>> -
>>    return 0;
>>  }
>>
>> @@ -5453,6 +5443,13 @@ ix86_option_override_internal (bool main
>>      opts->x_target_flags |= MASK_VZEROUPPER;
>>    if (!(opts_set->x_target_flags & MASK_STV))
>>      opts->x_target_flags |= MASK_STV;
>> +  /* Disable STV if -mpreferred-stack-boundary=2 or
>> +     -mincoming-stack-boundary=2 - the needed
>> +     stack realignment will be extra cost the pass doesn't take into
>> +     account and the pass can't realign the stack.  */
>> +  if (ix86_preferred_stack_boundary < 64
>> +      || ix86_incoming_stack_boundary < 64)
>> +    opts->x_target_flags &= ~MASK_STV;
>>    if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
>>        && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
>>      opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
>> @@ -29323,7 +29320,10 @@ ix86_minimum_alignment (tree exp, machin
>>    if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
>>        && (!type || !TYPE_USER_ALIGN (type))
>>        && (!decl || !DECL_USER_ALIGN (decl)))
>> -    return 32;
>> +    {
>> +      gcc_checking_assert (!TARGET_STV);
>> +      return 32;
>> +    }
>>
>>    return align;
>>  }
>> --- gcc/testsuite/gcc.target/i386/pr69454-1.c.jj        2016-02-03 18:44:17.642175753 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr69454-1.c   2016-02-03 18:44:17.642175753 +0100
>> @@ -0,0 +1,11 @@
>> +/* { dg-do compile { target { ia32 } } } */
>> +/* { dg-options "-O2 -msse2 -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2" } */
>> +
>> +typedef struct { long long w64[2]; } V128;
>> +extern V128* fn2(void);
>> +long long a;
>> +V128 b;
>> +void fn1() {
>> +  V128 *c = fn2();
>> +  c->w64[0] = a ^ b.w64[0];
>> +}
>> --- gcc/testsuite/gcc.target/i386/pr69454-2.c.jj        2016-02-03 18:44:17.655175574 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr69454-2.c   2016-02-03 18:44:17.655175574 +0100
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile { target { ia32 } } } */
>> +/* { dg-options "-O2 -mpreferred-stack-boundary=2" } */
>> +
>> +extern void fn2 ();
>> +long long a, b;
>> +
>> +void fn1 ()
>> +{
>> +  long long c = a;
>> +  a = b ^ a;
>> +  fn2 ();
>> +  a = c;
>> +}
>>
>>
>>         Jakub

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69677
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2016-02-02 20:42:01.024287587 +0100
+++ gcc/config/i386/i386.c	2016-02-03 18:45:43.271997909 +0100
@@ -3588,16 +3588,6 @@  convert_scalars_to_vector ()
   bitmap_obstack_release (NULL);
   df_process_deferred_rescans ();
 
-  /* Conversion means we may have 128bit register spills/fills
-     which require aligned stack.  */
-  if (converted_insns)
-    {
-      if (crtl->stack_alignment_needed < 128)
-	crtl->stack_alignment_needed = 128;
-      if (crtl->stack_alignment_estimated < 128)
-	crtl->stack_alignment_estimated = 128;
-    }
-
   return 0;
 }
 
@@ -5453,6 +5443,13 @@  ix86_option_override_internal (bool main
     opts->x_target_flags |= MASK_VZEROUPPER;
   if (!(opts_set->x_target_flags & MASK_STV))
     opts->x_target_flags |= MASK_STV;
+  /* Disable STV if -mpreferred-stack-boundary=2 or
+     -mincoming-stack-boundary=2 - the needed
+     stack realignment will be extra cost the pass doesn't take into
+     account and the pass can't realign the stack.  */
+  if (ix86_preferred_stack_boundary < 64
+      || ix86_incoming_stack_boundary < 64)
+    opts->x_target_flags &= ~MASK_STV;
   if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
       && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
     opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
@@ -29323,7 +29320,10 @@  ix86_minimum_alignment (tree exp, machin
   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
       && (!type || !TYPE_USER_ALIGN (type))
       && (!decl || !DECL_USER_ALIGN (decl)))
-    return 32;
+    {
+      gcc_checking_assert (!TARGET_STV);
+      return 32;
+    }
 
   return align;
 }
--- gcc/testsuite/gcc.target/i386/pr69454-1.c.jj	2016-02-03 18:44:17.642175753 +0100
+++ gcc/testsuite/gcc.target/i386/pr69454-1.c	2016-02-03 18:44:17.642175753 +0100
@@ -0,0 +1,11 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2" } */
+
+typedef struct { long long w64[2]; } V128;
+extern V128* fn2(void);
+long long a;
+V128 b;
+void fn1() {
+  V128 *c = fn2();
+  c->w64[0] = a ^ b.w64[0];
+}
--- gcc/testsuite/gcc.target/i386/pr69454-2.c.jj	2016-02-03 18:44:17.655175574 +0100
+++ gcc/testsuite/gcc.target/i386/pr69454-2.c	2016-02-03 18:44:17.655175574 +0100
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -mpreferred-stack-boundary=2" } */
+
+extern void fn2 ();
+long long a, b;
+
+void fn1 ()
+{
+  long long c = a;
+  a = b ^ a;
+  fn2 ();
+  a = c;
+}