diff mbox

PATCH: PR target/46195: r165965 regresses i386 darwin

Message ID AANLkTimK4soCPb5NQ8RnhfjCmX42rwTaJnB1BiGDGg-m@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu Oct. 29, 2010, 11:55 a.m. UTC
On Thu, Oct 28, 2010 at 11:56 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Oct 28, 2010 at 7:33 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>
>> On Darwin, although long double is aligned to 16byte, it is aligned to
>> 4byte when passed on stack.
>>
>> Jack, please try this on Darwin. OK for trunk if there are no
>> regressions on Darwin and Linux?
>>
>> Thanks.
>>
>>
>> H.J.
>> ---
>> 2010-10-28  H.J. Lu  <hongjiu.lu@intel.com>
>>
>>        PR target/46195
>>        * config/i386/i386.c (bigger_than_4byte_aligned_p): New.
>>        (ix86_function_arg_boundary): Align long double parameters on
>>        stack to 4byte in 32bit.
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index f2bd705..02650df 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -7032,6 +7032,59 @@ ix86_old_function_arg_boundary (enum machine_mode mode, const_tree type,
>>   return align;
>>  }
>>
>> +/* Return true when aggregate TYPE should be aligned at bigger than
>> +   4byte for 32bit argument passing ABI.  */
>> +
>> +static bool
>> +bigger_than_4byte_aligned_p (const_tree type)
>
> I propose to change the function prototype and name to:
>
> int ix86_alignment_needed (const_tree_type)
>
> and this function will return alignment (in bits), as is customary in gcc.

This function is a simplified version of contains_aligned_value_p,
which should return true/false.

>> +{
>> +  enum machine_mode mode = TYPE_MODE (type);
>> +
>> +  if (mode == XFmode || mode == XCmode)
>> +    return false;
>> +
>> +  if (TYPE_ALIGN (type) < 128)
>> +    return false;
>> +
>> +  if (!AGGREGATE_TYPE_P (type))
>> +    return TYPE_ALIGN (type) >= 128;
>> +  else
>> +    {
>> +      /* Walk the aggregates recursively.  */
>> +      switch (TREE_CODE (type))
>> +       {
>> +       case RECORD_TYPE:
>> +       case UNION_TYPE:
>> +       case QUAL_UNION_TYPE:
>> +         {
>> +           tree field;
>> +
>> +           /* Walk all the structure fields.  */
>> +           for (field = TYPE_FIELDS (type);
>> +                field;
>> +                field = DECL_CHAIN (field))
>> +             {
>> +               if (TREE_CODE (field) == FIELD_DECL
>> +                   && bigger_than_4byte_aligned_p (TREE_TYPE (field)))
>> +                 return true;
>> +             }
>> +           break;
>> +         }
>> +
>> +       case ARRAY_TYPE:
>> +         /* Just for use if some languages passes arrays by value.  */
>> +         if (bigger_than_4byte_aligned_p (TREE_TYPE (type)))
>> +           return true;
>> +         break;
>> +
>> +       default:
>> +         gcc_unreachable ();
>> +       }
>> +    }
>> +
>> +  return false;
>> +}
>> +
>>  /* Gives the alignment boundary, in bits, of an argument with the
>>    specified mode and type.  */
>>
>> @@ -7055,7 +7108,18 @@ ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
>>       static bool warned;
>>       int saved_align = align;
>>
>> -      if (!TARGET_64BIT && align < 128)
>> +      /* i386 ABI defines all arguments to be 4 byte aligned.  Even if
>> +        long double is aligned to 16 byte, we always align it at 4
>> +        byte, whether it is a scalar or the part of aggregate, when
>> +        passed as function argument.  */
>> +      if (!TARGET_64BIT
>> +         && (align < 128
>> +             || (align == 128
>> +                 && (mode == XFmode
>> +                     || mode == XCmode
>> +                     || (type
>> +                         && AGGREGATE_TYPE_P (type)
>> +                         && !bigger_than_4byte_aligned_p (type))))))
>>        align = PARM_BOUNDARY;
>
> There is something wrong with the usage of utility function if it has
> to be surrounded by soo much extra checks. I propose to change all
> this to:
>
> align = ix86_alignment_needed (type)
>
> where all the knowledge of type alignment will be moved into the
> ix86_alignment_needed function.

We have to check mode since type may be NULL. Long double
is a very special case for 32bit psABI where normal alignment of a
type/mode != its alignment when passed on stack.  So we have
to check when normal alignment is 128 with the alignment of mode/type
is determined by long double.

Here is the updated patch I changed the new function name to
function_arg_128bit_aligned_32_p.

Comments

Jack Howarth Oct. 29, 2010, 4:19 p.m. UTC | #1
On Fri, Oct 29, 2010 at 04:55:57AM -0700, H.J. Lu wrote:
> 
> We have to check mode since type may be NULL. Long double
> is a very special case for 32bit psABI where normal alignment of a
> type/mode != its alignment when passed on stack.  So we have
> to check when normal alignment is 128 with the alignment of mode/type
> is determined by long double.
> 
> Here is the updated patch I changed the new function name to
> function_arg_128bit_aligned_32_p.
> 

H.J.,
   I can confirm that this new version eliminates the regressions on
x86_64-apple-darwin10 as well.
              Jack

> 
> -- 
> H.J.
> ---
> 2010-10-29  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	PR target/46195
> 	* config/i386/i386.c (function_arg_128bit_aligned_32_p): New.
> 	(ix86_function_arg_boundary): Align long double parameters on
> 	stack to 4byte in 32bit.

> 2010-10-29  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	PR target/46195
> 	* config/i386/i386.c (function_arg_128bit_aligned_32_p): New.
> 	(ix86_function_arg_boundary): Align long double parameters on
> 	stack to 4byte in 32bit.
> 
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index f2bd705..d6bfd8e 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -7032,6 +7032,59 @@ ix86_old_function_arg_boundary (enum machine_mode mode, const_tree type,
>    return align;
>  }
>  
> +/* Return true when aggregate TYPE should be aligned at 128bit for
> +   32bit argument passing ABI.  */
> +
> +static bool
> +function_arg_128bit_aligned_32_p (const_tree type)
> +{
> +  enum machine_mode mode = TYPE_MODE (type);
> +
> +  if (mode == XFmode || mode == XCmode)
> +    return false;
> +
> +  if (TYPE_ALIGN (type) < 128)
> +    return false;
> +
> +  if (!AGGREGATE_TYPE_P (type))
> +    return TYPE_ALIGN (type) >= 128;
> +  else
> +    {
> +      /* Walk the aggregates recursively.  */
> +      switch (TREE_CODE (type))
> +	{
> +	case RECORD_TYPE:
> +	case UNION_TYPE:
> +	case QUAL_UNION_TYPE:
> +	  {
> +	    tree field;
> +
> +	    /* Walk all the structure fields.  */
> +	    for (field = TYPE_FIELDS (type);
> +		 field;
> +		 field = DECL_CHAIN (field))
> +	      {
> +		if (TREE_CODE (field) == FIELD_DECL
> +		    && function_arg_128bit_aligned_32_p (TREE_TYPE (field)))
> +		  return true;
> +	      }
> +	    break;
> +	  }
> +
> +	case ARRAY_TYPE:
> +	  /* Just for use if some languages passes arrays by value.  */
> +	  if (function_arg_128bit_aligned_32_p (TREE_TYPE (type)))
> +	    return true;
> +	  break;
> +
> +	default:
> +	  gcc_unreachable ();
> +	}
> +    }
> +
> +  return false;
> +}
> +
>  /* Gives the alignment boundary, in bits, of an argument with the
>     specified mode and type.  */
>  
> @@ -7055,7 +7108,18 @@ ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
>        static bool warned;
>        int saved_align = align;
>  
> -      if (!TARGET_64BIT && align < 128)
> +      /* i386 ABI defines all arguments to be 4 byte aligned.  Even if
> +	 long double is aligned to 16 byte, we always align it at 4
> +	 byte, whether it is a scalar or the part of aggregate, when
> +	 passed as function argument.  */
> +      if (!TARGET_64BIT
> +	  && (align < 128
> +	      || (align == 128
> +		  && (mode == XFmode 
> +		      || mode == XCmode
> +		      || (type
> +			  && AGGREGATE_TYPE_P (type)
> +			  && !function_arg_128bit_aligned_32_p (type))))))
>  	align = PARM_BOUNDARY;
>  
>        if (warn_psabi
Uros Bizjak Oct. 29, 2010, 4:54 p.m. UTC | #2
On Fri, Oct 29, 2010 at 1:55 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> On Darwin, although long double is aligned to 16byte, it is aligned to
>>> 4byte when passed on stack.
>>>
>>> Jack, please try this on Darwin. OK for trunk if there are no
>>> regressions on Darwin and Linux?
>>>
>>> Thanks.
>>>
>>>
>>> H.J.
>>> ---
>>> 2010-10-28  H.J. Lu  <hongjiu.lu@intel.com>
>>>
>>>        PR target/46195
>>>        * config/i386/i386.c (bigger_than_4byte_aligned_p): New.
>>>        (ix86_function_arg_boundary): Align long double parameters on
>>>        stack to 4byte in 32bit.
>>>
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index f2bd705..02650df 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -7032,6 +7032,59 @@ ix86_old_function_arg_boundary (enum machine_mode mode, const_tree type,
>>>   return align;
>>>  }
>>>
>>> +/* Return true when aggregate TYPE should be aligned at bigger than
>>> +   4byte for 32bit argument passing ABI.  */
>>> +
>>> +static bool
>>> +bigger_than_4byte_aligned_p (const_tree type)
>>
>> I propose to change the function prototype and name to:
>>
>> int ix86_alignment_needed (const_tree_type)
>>
>> and this function will return alignment (in bits), as is customary in gcc.
>
> This function is a simplified version of contains_aligned_value_p,
> which should return true/false.

It is not simplified, only the top condition is changed ...

-  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
-       || mode == TDmode
-       || mode == TFmode
-       || mode == TCmode)
-      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
-    return true;
+  if (mode == XFmode || mode == XCmode)
+    return false;

... and else was added:

  if (AGGREGATE_TYPE_P (type))
    {
...
    }
+  else
+    return TYPE_ALIGN (type) >= 128;

Why can't this be part of existing contains_aligned_value_p?

> +/* Return true when aggregate TYPE should be aligned at 128bit for
> +   32bit argument passing ABI.  */

According to the comment, we already have contains_aligned_value_p for this.

>>> -      if (!TARGET_64BIT && align < 128)
>>> +      /* i386 ABI defines all arguments to be 4 byte aligned.  Even if
>>> +        long double is aligned to 16 byte, we always align it at 4
>>> +        byte, whether it is a scalar or the part of aggregate, when
>>> +        passed as function argument.  */
>>> +      if (!TARGET_64BIT
>>> +         && (align < 128
>>> +             || (align == 128
>>> +                 && (mode == XFmode
>>> +                     || mode == XCmode
>>> +                     || (type
>>> +                         && AGGREGATE_TYPE_P (type)
>>> +                         && !bigger_than_4byte_aligned_p (type))))))
>>>        align = PARM_BOUNDARY;
>>
>> There is something wrong with the usage of utility function if it has
>> to be surrounded by soo much extra checks. I propose to change all
>> this to:
>>
>> align = ix86_alignment_needed (type)
>>
>> where all the knowledge of type alignment will be moved into the
>> ix86_alignment_needed function.
>
> We have to check mode since type may be NULL. Long double
> is a very special case for 32bit psABI where normal alignment of a
> type/mode != its alignment when passed on stack.  So we have
> to check when normal alignment is 128 with the alignment of mode/type
> is determined by long double.

I think that following code is much more readable.

  if (!TARGET_64BIT)
    {
      /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
      if (!type)
	{
	  if (mode == XFmode || mode == XCmode)
	    align = PARM_BOUNDARY;
	}
      else
	{
	  if (!contains_aligned_value_p (type))
	    align = PARM_BOUNDARY;
	}

      if (align < 128)
        align = PARM_BOUNDARY;
    }

Uros.
H.J. Lu Oct. 29, 2010, 5:08 p.m. UTC | #3
On Fri, Oct 29, 2010 at 9:54 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Oct 29, 2010 at 1:55 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>> On Darwin, although long double is aligned to 16byte, it is aligned to
>>>> 4byte when passed on stack.
>>>>
>>>> Jack, please try this on Darwin. OK for trunk if there are no
>>>> regressions on Darwin and Linux?
>>>>
>>>> Thanks.
>>>>
>>>>
>>>> H.J.
>>>> ---
>>>> 2010-10-28  H.J. Lu  <hongjiu.lu@intel.com>
>>>>
>>>>        PR target/46195
>>>>        * config/i386/i386.c (bigger_than_4byte_aligned_p): New.
>>>>        (ix86_function_arg_boundary): Align long double parameters on
>>>>        stack to 4byte in 32bit.
>>>>
>>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>>> index f2bd705..02650df 100644
>>>> --- a/gcc/config/i386/i386.c
>>>> +++ b/gcc/config/i386/i386.c
>>>> @@ -7032,6 +7032,59 @@ ix86_old_function_arg_boundary (enum machine_mode mode, const_tree type,
>>>>   return align;
>>>>  }
>>>>
>>>> +/* Return true when aggregate TYPE should be aligned at bigger than
>>>> +   4byte for 32bit argument passing ABI.  */
>>>> +
>>>> +static bool
>>>> +bigger_than_4byte_aligned_p (const_tree type)
>>>
>>> I propose to change the function prototype and name to:
>>>
>>> int ix86_alignment_needed (const_tree_type)
>>>
>>> and this function will return alignment (in bits), as is customary in gcc.
>>
>> This function is a simplified version of contains_aligned_value_p,
>> which should return true/false.
>
> It is not simplified, only the top condition is changed ...
>
> -  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
> -       || mode == TDmode
> -       || mode == TFmode
> -       || mode == TCmode)
> -      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
> -    return true;
> +  if (mode == XFmode || mode == XCmode)
> +    return false;
>
> ... and else was added:
>
>  if (AGGREGATE_TYPE_P (type))
>    {
> ...
>    }
> +  else
> +    return TYPE_ALIGN (type) >= 128;
>
> Why can't this be part of existing contains_aligned_value_p?

Because contains_aligned_value_p is wrong, which causes:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44948

We use contains_aligned_value_p to compute the old/wrong
value only when we warn psABI changes.

>> +/* Return true when aggregate TYPE should be aligned at 128bit for
>> +   32bit argument passing ABI.  */
>
> According to the comment, we already have contains_aligned_value_p for this.
>
>>>> -      if (!TARGET_64BIT && align < 128)
>>>> +      /* i386 ABI defines all arguments to be 4 byte aligned.  Even if
>>>> +        long double is aligned to 16 byte, we always align it at 4
>>>> +        byte, whether it is a scalar or the part of aggregate, when
>>>> +        passed as function argument.  */
>>>> +      if (!TARGET_64BIT
>>>> +         && (align < 128
>>>> +             || (align == 128
>>>> +                 && (mode == XFmode
>>>> +                     || mode == XCmode
>>>> +                     || (type
>>>> +                         && AGGREGATE_TYPE_P (type)
>>>> +                         && !bigger_than_4byte_aligned_p (type))))))
>>>>        align = PARM_BOUNDARY;
>>>
>>> There is something wrong with the usage of utility function if it has
>>> to be surrounded by soo much extra checks. I propose to change all
>>> this to:
>>>
>>> align = ix86_alignment_needed (type)
>>>
>>> where all the knowledge of type alignment will be moved into the
>>> ix86_alignment_needed function.
>>
>> We have to check mode since type may be NULL. Long double
>> is a very special case for 32bit psABI where normal alignment of a
>> type/mode != its alignment when passed on stack.  So we have
>> to check when normal alignment is 128 with the alignment of mode/type
>> is determined by long double.
>
> I think that following code is much more readable.
>
>  if (!TARGET_64BIT)
>    {
>      /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
>      if (!type)
>        {
>          if (mode == XFmode || mode == XCmode)
>            align = PARM_BOUNDARY;
>        }
>      else
>        {
>          if (!contains_aligned_value_p (type))
>            align = PARM_BOUNDARY;


contains_aligned_value_p may give the wrong answer
and lead to:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44948


>        }
>
>      if (align < 128)
>        align = PARM_BOUNDARY;
>    }
>
> Uros.
>
Uros Bizjak Oct. 29, 2010, 5:24 p.m. UTC | #4
On Fri, Oct 29, 2010 at 7:08 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> This function is a simplified version of contains_aligned_value_p,
>>> which should return true/false.
>>
>> It is not simplified, only the top condition is changed ...
>>
>> -  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
>> -       || mode == TDmode
>> -       || mode == TFmode
>> -       || mode == TCmode)
>> -      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
>> -    return true;
>> +  if (mode == XFmode || mode == XCmode)
>> +    return false;
>>
>> ... and else was added:
>>
>>  if (AGGREGATE_TYPE_P (type))
>>    {
>> ...
>>    }
>> +  else
>> +    return TYPE_ALIGN (type) >= 128;
>>
>> Why can't this be part of existing contains_aligned_value_p?
>
> Because contains_aligned_value_p is wrong, which causes:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44948
> We use contains_aligned_value_p to compute the old/wrong
> value only when we warn psABI changes.

I see. So, please rename existing contains_aligned_value_p to
ix86_compat_aligned_value_p, and please add a big comment explaining
that the function is obsolete and is only be used for compatibility
with previous versions. For consistency,
ix86_old_function_arg_boundary should also be renamed to
ix86_compat_function_arg_boundary and similar comment about obsolete
compat function should be added there.

New function can then be named ix86_contains_aligned_value_p, since it
substitutes existing obsolete function.

>>> We have to check mode since type may be NULL. Long double
>>> is a very special case for 32bit psABI where normal alignment of a
>>> type/mode != its alignment when passed on stack.  So we have
>>> to check when normal alignment is 128 with the alignment of mode/type
>>> is determined by long double.
>>
>> I think that following code is much more readable.
>>
>>  if (!TARGET_64BIT)
>>    {
>>      /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
>>      if (!type)
>>        {
>>          if (mode == XFmode || mode == XCmode)
>>            align = PARM_BOUNDARY;
>>        }
>>      else
>>        {
>>          if (!contains_aligned_value_p (type))
>>            align = PARM_BOUNDARY;
>
>
> contains_aligned_value_p may give the wrong answer
> and lead to:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44948

Yeah, new function should be used here, but please keep the form above
for clarity (I hope it is correct, it is untested).

Thanks,
Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f2bd705..d6bfd8e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7032,6 +7032,59 @@  ix86_old_function_arg_boundary (enum machine_mode mode, const_tree type,
   return align;
 }
 
+/* Return true when aggregate TYPE should be aligned at 128bit for
+   32bit argument passing ABI.  */
+
+static bool
+function_arg_128bit_aligned_32_p (const_tree type)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (mode == XFmode || mode == XCmode)
+    return false;
+
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (!AGGREGATE_TYPE_P (type))
+    return TYPE_ALIGN (type) >= 128;
+  else
+    {
+      /* Walk the aggregates recursively.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  {
+	    tree field;
+
+	    /* Walk all the structure fields.  */
+	    for (field = TYPE_FIELDS (type);
+		 field;
+		 field = DECL_CHAIN (field))
+	      {
+		if (TREE_CODE (field) == FIELD_DECL
+		    && function_arg_128bit_aligned_32_p (TREE_TYPE (field)))
+		  return true;
+	      }
+	    break;
+	  }
+
+	case ARRAY_TYPE:
+	  /* Just for use if some languages passes arrays by value.  */
+	  if (function_arg_128bit_aligned_32_p (TREE_TYPE (type)))
+	    return true;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return false;
+}
+
 /* Gives the alignment boundary, in bits, of an argument with the
    specified mode and type.  */
 
@@ -7055,7 +7108,18 @@  ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
       static bool warned;
       int saved_align = align;
 
-      if (!TARGET_64BIT && align < 128)
+      /* i386 ABI defines all arguments to be 4 byte aligned.  Even if
+	 long double is aligned to 16 byte, we always align it at 4
+	 byte, whether it is a scalar or the part of aggregate, when
+	 passed as function argument.  */
+      if (!TARGET_64BIT
+	  && (align < 128
+	      || (align == 128
+		  && (mode == XFmode 
+		      || mode == XCmode
+		      || (type
+			  && AGGREGATE_TYPE_P (type)
+			  && !function_arg_128bit_aligned_32_p (type))))))
 	align = PARM_BOUNDARY;
 
       if (warn_psabi