diff mbox

PATCH: PR middle-end/47383: ivopts miscompiles Pmode != ptr_mode

Message ID AANLkTinWsqi52+_T1Z4A=+LBTw=_ifiHbyXn=RPsYjwQ@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu Feb. 9, 2011, 5:17 p.m. UTC
On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>> Hi,
>>
>> tree-ssa-loop-ivopts.c has
>>
>> ---
>> /* Returns variant of TYPE that can be used as base for different uses.
>>   We return unsigned type with the same precision, which avoids problems
>>   with overflows.  */
>>
>> static tree
>> generic_type_for (tree type)
>> {
>>  if (POINTER_TYPE_P (type))
>>    return unsigned_type_for (type);
>>
>>  if (TYPE_UNSIGNED (type))
>>    return type;
>>
>>  return unsigned_type_for (type);
>> }
>> ---
>>
>> It converts negative step to unsigned type with the same precision.
>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>> OK for trunk in stage 1?
>
> This does not look correct.  It rather sounds you are expanding
> TARGET_MEM_REFs the wrong way - the address computed by
> it is supposed to be calculated in ptr_mode and only the final
> result extended to Pmode.
>


TARGET_MEM_REF is OK. The problem is ivopts passes
the negative offset to TARGET_MEM_REF as unsigned and
expects offset will wrap.  It only works when Pmode ==
ptr_mode.  I am checking in this patch into x32 branch to
avoid such cases.

Comments

Richard Biener Feb. 9, 2011, 8:17 p.m. UTC | #1
On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> Hi,
>>>
>>> tree-ssa-loop-ivopts.c has
>>>
>>> ---
>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>   We return unsigned type with the same precision, which avoids problems
>>>   with overflows.  */
>>>
>>> static tree
>>> generic_type_for (tree type)
>>> {
>>>  if (POINTER_TYPE_P (type))
>>>    return unsigned_type_for (type);
>>>
>>>  if (TYPE_UNSIGNED (type))
>>>    return type;
>>>
>>>  return unsigned_type_for (type);
>>> }
>>> ---
>>>
>>> It converts negative step to unsigned type with the same precision.
>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>> OK for trunk in stage 1?
>>
>> This does not look correct.  It rather sounds you are expanding
>> TARGET_MEM_REFs the wrong way - the address computed by
>> it is supposed to be calculated in ptr_mode and only the final
>> result extended to Pmode.
>>
>
>
> TARGET_MEM_REF is OK. The problem is ivopts passes
> the negative offset to TARGET_MEM_REF as unsigned and
> expects offset will wrap.

But they will wrap, as arithmetic is carried out in a type with
the same precision (that of ptr_mode).

>  It only works when Pmode ==
> ptr_mode.  I am checking in this patch into x32 branch to
> avoid such cases.

I think you are wrong.

Richard.

>
> --
> H.J.
>
H.J. Lu Feb. 9, 2011, 8:45 p.m. UTC | #2
On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>> Hi,
>>>>
>>>> tree-ssa-loop-ivopts.c has
>>>>
>>>> ---
>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>   We return unsigned type with the same precision, which avoids problems
>>>>   with overflows.  */
>>>>
>>>> static tree
>>>> generic_type_for (tree type)
>>>> {
>>>>  if (POINTER_TYPE_P (type))
>>>>    return unsigned_type_for (type);
>>>>
>>>>  if (TYPE_UNSIGNED (type))
>>>>    return type;
>>>>
>>>>  return unsigned_type_for (type);
>>>> }
>>>> ---
>>>>
>>>> It converts negative step to unsigned type with the same precision.
>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>> OK for trunk in stage 1?
>>>
>>> This does not look correct.  It rather sounds you are expanding
>>> TARGET_MEM_REFs the wrong way - the address computed by
>>> it is supposed to be calculated in ptr_mode and only the final
>>> result extended to Pmode.
>>>
>>
>>
>> TARGET_MEM_REF is OK. The problem is ivopts passes
>> the negative offset to TARGET_MEM_REF as unsigned and
>> expects offset will wrap.
>
> But they will wrap, as arithmetic is carried out in a type with
> the same precision (that of ptr_mode).
>
>>  It only works when Pmode ==
>> ptr_mode.  I am checking in this patch into x32 branch to
>> avoid such cases.
>
> I think you are wrong.
>

If you have a patch, I can give a try.
Richard Biener Feb. 9, 2011, 9:17 p.m. UTC | #3
On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Hi,
>>>>>
>>>>> tree-ssa-loop-ivopts.c has
>>>>>
>>>>> ---
>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>   with overflows.  */
>>>>>
>>>>> static tree
>>>>> generic_type_for (tree type)
>>>>> {
>>>>>  if (POINTER_TYPE_P (type))
>>>>>    return unsigned_type_for (type);
>>>>>
>>>>>  if (TYPE_UNSIGNED (type))
>>>>>    return type;
>>>>>
>>>>>  return unsigned_type_for (type);
>>>>> }
>>>>> ---
>>>>>
>>>>> It converts negative step to unsigned type with the same precision.
>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>> OK for trunk in stage 1?
>>>>
>>>> This does not look correct.  It rather sounds you are expanding
>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>> it is supposed to be calculated in ptr_mode and only the final
>>>> result extended to Pmode.
>>>>
>>>
>>>
>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>> the negative offset to TARGET_MEM_REF as unsigned and
>>> expects offset will wrap.
>>
>> But they will wrap, as arithmetic is carried out in a type with
>> the same precision (that of ptr_mode).
>>
>>>  It only works when Pmode ==
>>> ptr_mode.  I am checking in this patch into x32 branch to
>>> avoid such cases.
>>
>> I think you are wrong.
>>
>
> If you have a patch, I can give a try.

I'd have to debug it and I don't have a target that shows the failure, but
just looking around I assume that in

rtx
addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
                  bool really_expand)
{
  enum machine_mode address_mode = targetm.addr_space.address_mode (as);

address_mode will be Pmode - that would be already wrong.  We need to
use ptr_mode here and at the end of the function convert the
generated address to Pmode appropriately.  Can you verify that theory?

Richard.

>
> --
> H.J.
>
Richard Biener Feb. 9, 2011, 9:22 p.m. UTC | #4
On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>
>>>>>> ---
>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>   with overflows.  */
>>>>>>
>>>>>> static tree
>>>>>> generic_type_for (tree type)
>>>>>> {
>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>    return unsigned_type_for (type);
>>>>>>
>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>    return type;
>>>>>>
>>>>>>  return unsigned_type_for (type);
>>>>>> }
>>>>>> ---
>>>>>>
>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>> OK for trunk in stage 1?
>>>>>
>>>>> This does not look correct.  It rather sounds you are expanding
>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>> result extended to Pmode.
>>>>>
>>>>
>>>>
>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>> expects offset will wrap.
>>>
>>> But they will wrap, as arithmetic is carried out in a type with
>>> the same precision (that of ptr_mode).
>>>
>>>>  It only works when Pmode ==
>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>> avoid such cases.
>>>
>>> I think you are wrong.
>>>
>>
>> If you have a patch, I can give a try.
>
> I'd have to debug it and I don't have a target that shows the failure, but
> just looking around I assume that in
>
> rtx
> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>                  bool really_expand)
> {
>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>
> address_mode will be Pmode - that would be already wrong.  We need to
> use ptr_mode here and at the end of the function convert the
> generated address to Pmode appropriately.  Can you verify that theory?

From the default hook implementation that indeed seems to be the case.
Using ptr_mode might be undesirable as then the generated RTL will
probably never match complex addressing modes.  Thus the component
and address types used for TARGET_MEM_REF are not suitable for
a Pmode != ptr_mode target and would have to use a pointer type
of Pmode mode (which we don't have, and frankly introducing such a
2nd pointer mode on trees might have non-trivial effects).  Eventually
just forcing the offset components of TARGET_MEM_REF to Pmode
size would also be a possibility (though you will run into the issue that
we loose any signedness of pointer offsets by casting them to sizetype).

That said - I'd go for the ptr_mode address RTX generation for
correctness and then start addressing the tree issue by doing step one,
getting rid of the sizetype usage for pointer offsets.

Richard.

> Richard.
>
>>
>> --
>> H.J.
>>
>
H.J. Lu Feb. 9, 2011, 9:28 p.m. UTC | #5
On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>
>>>>>>> ---
>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>   with overflows.  */
>>>>>>>
>>>>>>> static tree
>>>>>>> generic_type_for (tree type)
>>>>>>> {
>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>    return unsigned_type_for (type);
>>>>>>>
>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>    return type;
>>>>>>>
>>>>>>>  return unsigned_type_for (type);
>>>>>>> }
>>>>>>> ---
>>>>>>>
>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>> OK for trunk in stage 1?
>>>>>>
>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>> result extended to Pmode.
>>>>>>
>>>>>
>>>>>
>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>> expects offset will wrap.
>>>>
>>>> But they will wrap, as arithmetic is carried out in a type with
>>>> the same precision (that of ptr_mode).
>>>>
>>>>>  It only works when Pmode ==
>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>> avoid such cases.
>>>>
>>>> I think you are wrong.
>>>>
>>>
>>> If you have a patch, I can give a try.
>>
>> I'd have to debug it and I don't have a target that shows the failure, but
>> just looking around I assume that in
>>
>> rtx
>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>                  bool really_expand)
>> {
>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>
>> address_mode will be Pmode - that would be already wrong.  We need to
>> use ptr_mode here and at the end of the function convert the
>> generated address to Pmode appropriately.  Can you verify that theory?
>
> From the default hook implementation that indeed seems to be the case.
> Using ptr_mode might be undesirable as then the generated RTL will
> probably never match complex addressing modes.  Thus the component
> and address types used for TARGET_MEM_REF are not suitable for
> a Pmode != ptr_mode target and would have to use a pointer type
> of Pmode mode (which we don't have, and frankly introducing such a
> 2nd pointer mode on trees might have non-trivial effects).  Eventually
> just forcing the offset components of TARGET_MEM_REF to Pmode
> size would also be a possibility (though you will run into the issue that
> we loose any signedness of pointer offsets by casting them to sizetype).

The offset components have to be signed while ptr_mode to Pmode
may be zero extended.

> That said - I'd go for the ptr_mode address RTX generation for
> correctness and then start addressing the tree issue by doing step one,
> getting rid of the sizetype usage for pointer offsets.
>

I will try any patches.
Richard Biener Feb. 9, 2011, 9:35 p.m. UTC | #6
On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>
>>>>>>>> ---
>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>   with overflows.  */
>>>>>>>>
>>>>>>>> static tree
>>>>>>>> generic_type_for (tree type)
>>>>>>>> {
>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>
>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>    return type;
>>>>>>>>
>>>>>>>>  return unsigned_type_for (type);
>>>>>>>> }
>>>>>>>> ---
>>>>>>>>
>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>> OK for trunk in stage 1?
>>>>>>>
>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>> result extended to Pmode.
>>>>>>>
>>>>>>
>>>>>>
>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>> expects offset will wrap.
>>>>>
>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>> the same precision (that of ptr_mode).
>>>>>
>>>>>>  It only works when Pmode ==
>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>> avoid such cases.
>>>>>
>>>>> I think you are wrong.
>>>>>
>>>>
>>>> If you have a patch, I can give a try.
>>>
>>> I'd have to debug it and I don't have a target that shows the failure, but
>>> just looking around I assume that in
>>>
>>> rtx
>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>                  bool really_expand)
>>> {
>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>
>>> address_mode will be Pmode - that would be already wrong.  We need to
>>> use ptr_mode here and at the end of the function convert the
>>> generated address to Pmode appropriately.  Can you verify that theory?
>>
>> From the default hook implementation that indeed seems to be the case.
>> Using ptr_mode might be undesirable as then the generated RTL will
>> probably never match complex addressing modes.  Thus the component
>> and address types used for TARGET_MEM_REF are not suitable for
>> a Pmode != ptr_mode target and would have to use a pointer type
>> of Pmode mode (which we don't have, and frankly introducing such a
>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>> just forcing the offset components of TARGET_MEM_REF to Pmode
>> size would also be a possibility (though you will run into the issue that
>> we loose any signedness of pointer offsets by casting them to sizetype).
>
> The offset components have to be signed while ptr_mode to Pmode
> may be zero extended.

The sign of the offset components does not matter if they have the
same width as the resulting pointer.  Or do you say we have
a %eax + %ebx addressing mode that zero-extends eax and sign-extends
ebx?  Or that we can use a 32bit offset register that will be sign-extended?

>> That said - I'd go for the ptr_mode address RTX generation for
>> correctness and then start addressing the tree issue by doing step one,
>> getting rid of the sizetype usage for pointer offsets.
>>
>
> I will try any patches.

Not from me ;)  I pointed you at the problem, that must suffice.

Richard.

> --
> H.J.
>
H.J. Lu Feb. 9, 2011, 11:05 p.m. UTC | #7
On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>   with overflows.  */
>>>>>>>>>
>>>>>>>>> static tree
>>>>>>>>> generic_type_for (tree type)
>>>>>>>>> {
>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>
>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>    return type;
>>>>>>>>>
>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>> }
>>>>>>>>> ---
>>>>>>>>>
>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>
>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>> result extended to Pmode.
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>> expects offset will wrap.
>>>>>>
>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>> the same precision (that of ptr_mode).
>>>>>>
>>>>>>>  It only works when Pmode ==
>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>> avoid such cases.
>>>>>>
>>>>>> I think you are wrong.
>>>>>>
>>>>>
>>>>> If you have a patch, I can give a try.
>>>>
>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>> just looking around I assume that in
>>>>
>>>> rtx
>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>                  bool really_expand)
>>>> {
>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>
>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>> use ptr_mode here and at the end of the function convert the
>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>
>>> From the default hook implementation that indeed seems to be the case.
>>> Using ptr_mode might be undesirable as then the generated RTL will
>>> probably never match complex addressing modes.  Thus the component
>>> and address types used for TARGET_MEM_REF are not suitable for
>>> a Pmode != ptr_mode target and would have to use a pointer type
>>> of Pmode mode (which we don't have, and frankly introducing such a
>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>> size would also be a possibility (though you will run into the issue that
>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>
>> The offset components have to be signed while ptr_mode to Pmode
>> may be zero extended.
>
> The sign of the offset components does not matter if they have the
> same width as the resulting pointer.  Or do you say we have
> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
> ebx?  Or that we can use a 32bit offset register that will be sign-extended?

x32 uses %rax + %rbx * scale. RAX is zero-extended and
RBX is sign-extended.

>>> That said - I'd go for the ptr_mode address RTX generation for
>>> correctness and then start addressing the tree issue by doing step one,
>>> getting rid of the sizetype usage for pointer offsets.
>>>
>>
>> I will try any patches.
>
> Not from me ;)  I pointed you at the problem, that must suffice.
>

I am not sure if your suggestion will work since offset and pointer
are treated differently in x32.
Richard Biener Feb. 9, 2011, 11:30 p.m. UTC | #8
On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>> Hi,
>>>>>>>>>>
>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>
>>>>>>>>>> ---
>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>   with overflows.  */
>>>>>>>>>>
>>>>>>>>>> static tree
>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>> {
>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>
>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>    return type;
>>>>>>>>>>
>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>> }
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>
>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>> result extended to Pmode.
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>> expects offset will wrap.
>>>>>>>
>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>> the same precision (that of ptr_mode).
>>>>>>>
>>>>>>>>  It only works when Pmode ==
>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>> avoid such cases.
>>>>>>>
>>>>>>> I think you are wrong.
>>>>>>>
>>>>>>
>>>>>> If you have a patch, I can give a try.
>>>>>
>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>> just looking around I assume that in
>>>>>
>>>>> rtx
>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>                  bool really_expand)
>>>>> {
>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>
>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>> use ptr_mode here and at the end of the function convert the
>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>
>>>> From the default hook implementation that indeed seems to be the case.
>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>> probably never match complex addressing modes.  Thus the component
>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>> size would also be a possibility (though you will run into the issue that
>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>
>>> The offset components have to be signed while ptr_mode to Pmode
>>> may be zero extended.
>>
>> The sign of the offset components does not matter if they have the
>> same width as the resulting pointer.  Or do you say we have
>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>
> x32 uses %rax + %rbx * scale. RAX is zero-extended and
> RBX is sign-extended.

how can %rax or %rbx be extended further?  Those are 64bit registers.

Richard.
H.J. Lu Feb. 9, 2011, 11:37 p.m. UTC | #9
On Wed, Feb 9, 2011 at 3:30 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>>> Hi,
>>>>>>>>>>>
>>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>>
>>>>>>>>>>> ---
>>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>>   with overflows.  */
>>>>>>>>>>>
>>>>>>>>>>> static tree
>>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>>> {
>>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>>
>>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>>    return type;
>>>>>>>>>>>
>>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>>> }
>>>>>>>>>>> ---
>>>>>>>>>>>
>>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>>
>>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>>> result extended to Pmode.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>>> expects offset will wrap.
>>>>>>>>
>>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>>> the same precision (that of ptr_mode).
>>>>>>>>
>>>>>>>>>  It only works when Pmode ==
>>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>>> avoid such cases.
>>>>>>>>
>>>>>>>> I think you are wrong.
>>>>>>>>
>>>>>>>
>>>>>>> If you have a patch, I can give a try.
>>>>>>
>>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>>> just looking around I assume that in
>>>>>>
>>>>>> rtx
>>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>>                  bool really_expand)
>>>>>> {
>>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>>
>>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>>> use ptr_mode here and at the end of the function convert the
>>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>>
>>>>> From the default hook implementation that indeed seems to be the case.
>>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>>> probably never match complex addressing modes.  Thus the component
>>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>>> size would also be a possibility (though you will run into the issue that
>>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>>
>>>> The offset components have to be signed while ptr_mode to Pmode
>>>> may be zero extended.
>>>
>>> The sign of the offset components does not matter if they have the
>>> same width as the resulting pointer.  Or do you say we have
>>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>>
>> x32 uses %rax + %rbx * scale. RAX is zero-extended and
>> RBX is sign-extended.
>
> how can %rax or %rbx be extended further?  Those are 64bit registers.

RAX is zero-extended from EAX and RBX is sign-extended from EBX,
Richard Biener Feb. 9, 2011, 11:39 p.m. UTC | #10
On Thu, Feb 10, 2011 at 12:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 3:30 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>>>> Hi,
>>>>>>>>>>>>
>>>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>>>
>>>>>>>>>>>> ---
>>>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>>>   with overflows.  */
>>>>>>>>>>>>
>>>>>>>>>>>> static tree
>>>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>>>> {
>>>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>>>
>>>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>>>    return type;
>>>>>>>>>>>>
>>>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>>>> }
>>>>>>>>>>>> ---
>>>>>>>>>>>>
>>>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>>>
>>>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>>>> result extended to Pmode.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>>>> expects offset will wrap.
>>>>>>>>>
>>>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>>>> the same precision (that of ptr_mode).
>>>>>>>>>
>>>>>>>>>>  It only works when Pmode ==
>>>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>>>> avoid such cases.
>>>>>>>>>
>>>>>>>>> I think you are wrong.
>>>>>>>>>
>>>>>>>>
>>>>>>>> If you have a patch, I can give a try.
>>>>>>>
>>>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>>>> just looking around I assume that in
>>>>>>>
>>>>>>> rtx
>>>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>>>                  bool really_expand)
>>>>>>> {
>>>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>>>
>>>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>>>> use ptr_mode here and at the end of the function convert the
>>>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>>>
>>>>>> From the default hook implementation that indeed seems to be the case.
>>>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>>>> probably never match complex addressing modes.  Thus the component
>>>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>>>> size would also be a possibility (though you will run into the issue that
>>>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>>>
>>>>> The offset components have to be signed while ptr_mode to Pmode
>>>>> may be zero extended.
>>>>
>>>> The sign of the offset components does not matter if they have the
>>>> same width as the resulting pointer.  Or do you say we have
>>>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>>>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>>>
>>> x32 uses %rax + %rbx * scale. RAX is zero-extended and
>>> RBX is sign-extended.
>>
>> how can %rax or %rbx be extended further?  Those are 64bit registers.
>
> RAX is zero-extended from EAX and RBX is sign-extended from EBX,

By separate instructions?

Richard.

> --
> H.J.
>
Richard Biener Feb. 10, 2011, 12:10 a.m. UTC | #11
On Thu, Feb 10, 2011 at 12:39 AM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 12:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 3:30 PM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>
>>>>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>>>>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>>>>   with overflows.  */
>>>>>>>>>>>>>
>>>>>>>>>>>>> static tree
>>>>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>>>>> {
>>>>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>>>>
>>>>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>>>>    return type;
>>>>>>>>>>>>>
>>>>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>>>>> }
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>
>>>>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>>>>
>>>>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>>>>> result extended to Pmode.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>>>>> expects offset will wrap.
>>>>>>>>>>
>>>>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>>>>> the same precision (that of ptr_mode).
>>>>>>>>>>
>>>>>>>>>>>  It only works when Pmode ==
>>>>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>>>>> avoid such cases.
>>>>>>>>>>
>>>>>>>>>> I think you are wrong.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> If you have a patch, I can give a try.
>>>>>>>>
>>>>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>>>>> just looking around I assume that in
>>>>>>>>
>>>>>>>> rtx
>>>>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>>>>                  bool really_expand)
>>>>>>>> {
>>>>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>>>>
>>>>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>>>>> use ptr_mode here and at the end of the function convert the
>>>>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>>>>
>>>>>>> From the default hook implementation that indeed seems to be the case.
>>>>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>>>>> probably never match complex addressing modes.  Thus the component
>>>>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>>>>> size would also be a possibility (though you will run into the issue that
>>>>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>>>>
>>>>>> The offset components have to be signed while ptr_mode to Pmode
>>>>>> may be zero extended.
>>>>>
>>>>> The sign of the offset components does not matter if they have the
>>>>> same width as the resulting pointer.  Or do you say we have
>>>>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>>>>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>>>>
>>>> x32 uses %rax + %rbx * scale. RAX is zero-extended and
>>>> RBX is sign-extended.
>>>
>>> how can %rax or %rbx be extended further?  Those are 64bit registers.
>>
>> RAX is zero-extended from EAX and RBX is sign-extended from EBX,
>
> By separate instructions?

By combined brain-power we dug out the last sentence of 3.3.7
of the ia32 basic arch manual which says the address is zero-extended
from 32bit after computing it.  So there is no problem?

Richard.

> Richard.
>
>> --
>> H.J.
>>
>
H.J. Lu Feb. 10, 2011, 12:33 a.m. UTC | #12
On Wed, Feb 9, 2011 at 4:10 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 12:39 AM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Thu, Feb 10, 2011 at 12:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Wed, Feb 9, 2011 at 3:30 PM, Richard Guenther
>>> <richard.guenther@gmail.com> wrote:
>>>> On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
>>>>> <richard.guenther@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>>>>>   with overflows.  */
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> static tree
>>>>>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>>>>>    return type;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>>>>>> }
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>>>>>
>>>>>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>>>>>> result extended to Pmode.
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>>>>>> expects offset will wrap.
>>>>>>>>>>>
>>>>>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>>>>>> the same precision (that of ptr_mode).
>>>>>>>>>>>
>>>>>>>>>>>>  It only works when Pmode ==
>>>>>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>>>>>> avoid such cases.
>>>>>>>>>>>
>>>>>>>>>>> I think you are wrong.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> If you have a patch, I can give a try.
>>>>>>>>>
>>>>>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>>>>>> just looking around I assume that in
>>>>>>>>>
>>>>>>>>> rtx
>>>>>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>>>>>                  bool really_expand)
>>>>>>>>> {
>>>>>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>>>>>
>>>>>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>>>>>> use ptr_mode here and at the end of the function convert the
>>>>>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>>>>>
>>>>>>>> From the default hook implementation that indeed seems to be the case.
>>>>>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>>>>>> probably never match complex addressing modes.  Thus the component
>>>>>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>>>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>>>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>>>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>>>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>>>>>> size would also be a possibility (though you will run into the issue that
>>>>>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>>>>>
>>>>>>> The offset components have to be signed while ptr_mode to Pmode
>>>>>>> may be zero extended.
>>>>>>
>>>>>> The sign of the offset components does not matter if they have the
>>>>>> same width as the resulting pointer.  Or do you say we have
>>>>>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>>>>>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>>>>>
>>>>> x32 uses %rax + %rbx * scale. RAX is zero-extended and
>>>>> RBX is sign-extended.
>>>>
>>>> how can %rax or %rbx be extended further?  Those are 64bit registers.
>>>
>>> RAX is zero-extended from EAX and RBX is sign-extended from EBX,
>>
>> By separate instructions?
>
> By combined brain-power we dug out the last sentence of 3.3.7
> of the ia32 basic arch manual which says the address is zero-extended
> from 32bit after computing it.  So there is no problem?
>

In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
(%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
Andrew Pinski Feb. 10, 2011, 12:35 a.m. UTC | #13
On Wed, Feb 9, 2011 at 4:33 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.


Is there a reason why you can't use (%eax, %ebx, 4) addressing mode
for x32 mode?  I think the biggest question Richard is asking about.
Since x86_64 includes 32bit addressing modes already.

Thanks,
Andrew Pinski
Richard Biener Feb. 10, 2011, 10:18 a.m. UTC | #14
On Thu, Feb 10, 2011 at 1:33 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 4:10 PM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Thu, Feb 10, 2011 at 12:39 AM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>> On Thu, Feb 10, 2011 at 12:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Feb 9, 2011 at 3:30 PM, Richard Guenther
>>>> <richard.guenther@gmail.com> wrote:
>>>>> On Thu, Feb 10, 2011 at 12:05 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>> On Wed, Feb 9, 2011 at 1:35 PM, Richard Guenther
>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>> On Wed, Feb 9, 2011 at 10:28 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>> On Wed, Feb 9, 2011 at 1:22 PM, Richard Guenther
>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>> On Wed, Feb 9, 2011 at 10:17 PM, Richard Guenther
>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>> On Wed, Feb 9, 2011 at 9:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>> On Wed, Feb 9, 2011 at 12:17 PM, Richard Guenther
>>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>>> On Wed, Feb 9, 2011 at 6:17 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>>>>>>>> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
>>>>>>>>>>>>> <richard.guenther@gmail.com> wrote:
>>>>>>>>>>>>>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> tree-ssa-loop-ivopts.c has
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>>>>>>>>>>>>>   We return unsigned type with the same precision, which avoids problems
>>>>>>>>>>>>>>>   with overflows.  */
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> static tree
>>>>>>>>>>>>>>> generic_type_for (tree type)
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>  if (POINTER_TYPE_P (type))
>>>>>>>>>>>>>>>    return unsigned_type_for (type);
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>  if (TYPE_UNSIGNED (type))
>>>>>>>>>>>>>>>    return type;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>  return unsigned_type_for (type);
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> It converts negative step to unsigned type with the same precision.
>>>>>>>>>>>>>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>>>>>>>>>>>>>> OK for trunk in stage 1?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> This does not look correct.  It rather sounds you are expanding
>>>>>>>>>>>>>> TARGET_MEM_REFs the wrong way - the address computed by
>>>>>>>>>>>>>> it is supposed to be calculated in ptr_mode and only the final
>>>>>>>>>>>>>> result extended to Pmode.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> TARGET_MEM_REF is OK. The problem is ivopts passes
>>>>>>>>>>>>> the negative offset to TARGET_MEM_REF as unsigned and
>>>>>>>>>>>>> expects offset will wrap.
>>>>>>>>>>>>
>>>>>>>>>>>> But they will wrap, as arithmetic is carried out in a type with
>>>>>>>>>>>> the same precision (that of ptr_mode).
>>>>>>>>>>>>
>>>>>>>>>>>>>  It only works when Pmode ==
>>>>>>>>>>>>> ptr_mode.  I am checking in this patch into x32 branch to
>>>>>>>>>>>>> avoid such cases.
>>>>>>>>>>>>
>>>>>>>>>>>> I think you are wrong.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> If you have a patch, I can give a try.
>>>>>>>>>>
>>>>>>>>>> I'd have to debug it and I don't have a target that shows the failure, but
>>>>>>>>>> just looking around I assume that in
>>>>>>>>>>
>>>>>>>>>> rtx
>>>>>>>>>> addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
>>>>>>>>>>                  bool really_expand)
>>>>>>>>>> {
>>>>>>>>>>  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
>>>>>>>>>>
>>>>>>>>>> address_mode will be Pmode - that would be already wrong.  We need to
>>>>>>>>>> use ptr_mode here and at the end of the function convert the
>>>>>>>>>> generated address to Pmode appropriately.  Can you verify that theory?
>>>>>>>>>
>>>>>>>>> From the default hook implementation that indeed seems to be the case.
>>>>>>>>> Using ptr_mode might be undesirable as then the generated RTL will
>>>>>>>>> probably never match complex addressing modes.  Thus the component
>>>>>>>>> and address types used for TARGET_MEM_REF are not suitable for
>>>>>>>>> a Pmode != ptr_mode target and would have to use a pointer type
>>>>>>>>> of Pmode mode (which we don't have, and frankly introducing such a
>>>>>>>>> 2nd pointer mode on trees might have non-trivial effects).  Eventually
>>>>>>>>> just forcing the offset components of TARGET_MEM_REF to Pmode
>>>>>>>>> size would also be a possibility (though you will run into the issue that
>>>>>>>>> we loose any signedness of pointer offsets by casting them to sizetype).
>>>>>>>>
>>>>>>>> The offset components have to be signed while ptr_mode to Pmode
>>>>>>>> may be zero extended.
>>>>>>>
>>>>>>> The sign of the offset components does not matter if they have the
>>>>>>> same width as the resulting pointer.  Or do you say we have
>>>>>>> a %eax + %ebx addressing mode that zero-extends eax and sign-extends
>>>>>>> ebx?  Or that we can use a 32bit offset register that will be sign-extended?
>>>>>>
>>>>>> x32 uses %rax + %rbx * scale. RAX is zero-extended and
>>>>>> RBX is sign-extended.
>>>>>
>>>>> how can %rax or %rbx be extended further?  Those are 64bit registers.
>>>>
>>>> RAX is zero-extended from EAX and RBX is sign-extended from EBX,
>>>
>>> By separate instructions?
>>
>> By combined brain-power we dug out the last sentence of 3.3.7
>> of the ia32 basic arch manual which says the address is zero-extended
>> from 32bit after computing it.  So there is no problem?
>>
>
> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.

TARGET_MEM_REF does not support this kind of addressing mode.  That's
one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
and (%rax).

Richard.
H.J. Lu Feb. 10, 2011, 2:41 p.m. UTC | #15
On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
<richard.guenther@gmail.com> wrote:

>>>
>>> By combined brain-power we dug out the last sentence of 3.3.7
>>> of the ia32 basic arch manual which says the address is zero-extended
>>> from 32bit after computing it.  So there is no problem?
>>>
>>
>> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
>> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
>
> TARGET_MEM_REF does not support this kind of addressing mode.  That's
> one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
> and (%rax).
>

lea is also used for arithmetic. Is there a way to tell an RTL is
TARGET_MEM_REF?
Jakub Jelinek Feb. 10, 2011, 3:18 p.m. UTC | #16
On Thu, Feb 10, 2011 at 06:41:36AM -0800, H.J. Lu wrote:
> On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
> >>> By combined brain-power we dug out the last sentence of 3.3.7
> >>> of the ia32 basic arch manual which says the address is zero-extended
> >>> from 32bit after computing it.  So there is no problem?
> >>>
> >>
> >> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
> >> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
> >
> > TARGET_MEM_REF does not support this kind of addressing mode.  That's
> > one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
> > only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
> > and (%rax).
> >
> 
> lea is also used for arithmetic. Is there a way to tell an RTL is
> TARGET_MEM_REF?

IMHO you really should reconsider the decision to use ptr_mode != Pmode for
your port.  IMNSHO you should use addr32 prefixes, and you can perhaps have
some machine reorg pass which will get rid of addr32 prefixes when they are
certainly not needed (most probably just for (%reg) addressing when %reg
is known to be zero-extended from 32 bits into 64 bits).

	Jakub
H.J. Lu Feb. 10, 2011, 3:27 p.m. UTC | #17
On Thu, Feb 10, 2011 at 7:18 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Feb 10, 2011 at 06:41:36AM -0800, H.J. Lu wrote:
>> On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
>> >>> By combined brain-power we dug out the last sentence of 3.3.7
>> >>> of the ia32 basic arch manual which says the address is zero-extended
>> >>> from 32bit after computing it.  So there is no problem?
>> >>>
>> >>
>> >> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
>> >> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
>> >
>> > TARGET_MEM_REF does not support this kind of addressing mode.  That's
>> > one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
>> > only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>> > and (%rax).
>> >
>>
>> lea is also used for arithmetic. Is there a way to tell an RTL is
>> TARGET_MEM_REF?
>
> IMHO you really should reconsider the decision to use ptr_mode != Pmode for
> your port.  IMNSHO you should use addr32 prefixes, and you can perhaps have
> some machine reorg pass which will get rid of addr32 prefixes when they are
> certainly not needed (most probably just for (%reg) addressing when %reg
> is known to be zero-extended from 32 bits into 64 bits).
>

Since x32 is just small model of x86-64 with 32bit pointers, if Pmode
isn't 64bit,
there will be stack and other issues.
Richard Biener Feb. 10, 2011, 4:31 p.m. UTC | #18
On Thu, Feb 10, 2011 at 3:41 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>
>>>>
>>>> By combined brain-power we dug out the last sentence of 3.3.7
>>>> of the ia32 basic arch manual which says the address is zero-extended
>>>> from 32bit after computing it.  So there is no problem?
>>>>
>>>
>>> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
>>> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
>>
>> TARGET_MEM_REF does not support this kind of addressing mode.  That's
>> one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>> and (%rax).
>>
>
> lea is also used for arithmetic. Is there a way to tell an RTL is
> TARGET_MEM_REF?

?

>> TARGET_MEM_REF
>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>> and (%rax).

as in TARGET_MEM_REF <*ax, *bx, 4> needs to be expanded
as

  compute address in ptr_mode, for example with
    lea (%eax,%ebx,4), %ecx
  zero-extend the address
    mov %ecx, %rax
  do the memory access
    mov (%rax), ...

which is a very inefficient way of using addr32 prefix (basically manually
emulating it).

As Jakub says, don't use ptr_mode != Pmode.  Or disable IVOPTs
(it is useless for such port).  I bet you also will run into POINTER_PLUS_EXPR
issues and the fact it does not preserve signedness of the offset operand.

Richard.
Richard Biener Feb. 10, 2011, 4:33 p.m. UTC | #19
On Thu, Feb 10, 2011 at 5:31 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 3:41 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>
>>>>>
>>>>> By combined brain-power we dug out the last sentence of 3.3.7
>>>>> of the ia32 basic arch manual which says the address is zero-extended
>>>>> from 32bit after computing it.  So there is no problem?
>>>>>
>>>>
>>>> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
>>>> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
>>>
>>> TARGET_MEM_REF does not support this kind of addressing mode.  That's
>>> one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
>>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>>> and (%rax).
>>>
>>
>> lea is also used for arithmetic. Is there a way to tell an RTL is
>> TARGET_MEM_REF?
>
> ?
>
>>> TARGET_MEM_REF
>>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>>> and (%rax).
>
> as in TARGET_MEM_REF <*ax, *bx, 4> needs to be expanded
> as
>
>  compute address in ptr_mode, for example with
>    lea (%eax,%ebx,4), %ecx
>  zero-extend the address
>    mov %ecx, %rax
>  do the memory access
>    mov (%rax), ...
>
> which is a very inefficient way of using addr32 prefix (basically manually
> emulating it).
>
> As Jakub says, don't use ptr_mode != Pmode.  Or disable IVOPTs
> (it is useless for such port).  I bet you also will run into POINTER_PLUS_EXPR
> issues and the fact it does not preserve signedness of the offset operand.

Oh, POINTER_PLUS_EXPR might be not an issue because of

      /* Even though the sizetype mode and the pointer's mode can be different
         expand is able to handle this correctly and get the correct result out
         of the PLUS_EXPR code.  */
      /* Make sure to sign-extend the sizetype offset in a POINTER_PLUS_EXPR
         if sizetype precision is smaller than pointer precision.  */
      if (TYPE_PRECISION (sizetype) < TYPE_PRECISION (type))
        treeop1 = fold_convert_loc (loc, type,
                                    fold_convert_loc (loc, ssizetype,
                                                      treeop1));

so you can do something similar when expanding TARGET_MEM_REF.
Always sign_extend offset/index to Pmode instead of relying on
POINTERS_EXTEND_UNSIGNED.

Richard.

> Richard.
>
H.J. Lu Feb. 11, 2011, 12:09 a.m. UTC | #20
On Thu, Feb 10, 2011 at 8:31 AM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Feb 10, 2011 at 3:41 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Feb 10, 2011 at 2:18 AM, Richard Guenther
>> <richard.guenther@gmail.com> wrote:
>>
>>>>>
>>>>> By combined brain-power we dug out the last sentence of 3.3.7
>>>>> of the ia32 basic arch manual which says the address is zero-extended
>>>>> from 32bit after computing it.  So there is no problem?
>>>>>
>>>>
>>>> In x32 mode, we use (%rax,%rbx,4), not (%eax,%ebx,4).  When we generate
>>>> (%rax,%rbx,4) in x32 mode, we have to put sign-extended value in RBX.
>>>
>>> TARGET_MEM_REF does not support this kind of addressing mode.  That's
>>> one of the results of choosing ptr_mode != Pmode.  TARGET_MEM_REF
>>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>>> and (%rax).
>>>
>>
>> lea is also used for arithmetic. Is there a way to tell an RTL is
>> TARGET_MEM_REF?
>
> ?
>
>>> TARGET_MEM_REF
>>> only supports (%eax,%ebx,4), or lea (%eax,%ebx,4), %ecx; mov %ecx, %rax
>>> and (%rax).
>
> as in TARGET_MEM_REF <*ax, *bx, 4> needs to be expanded
> as
>
>  compute address in ptr_mode, for example with
>    lea (%eax,%ebx,4), %ecx
>  zero-extend the address
>    mov %ecx, %rax
>  do the memory access
>    mov (%rax), ...
>
> which is a very inefficient way of using addr32 prefix (basically manually
> emulating it).
>
> As Jakub says, don't use ptr_mode != Pmode.  Or disable IVOPTs
> (it is useless for such port).  I bet you also will run into POINTER_PLUS_EXPR
> issues and the fact it does not preserve signedness of the offset operand.
>

ptr_mode != Pmode won't work too well. I will see what I can do.
H.J. Lu Feb. 12, 2011, 5:30 p.m. UTC | #21
On Wed, Feb 9, 2011 at 9:17 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Feb 9, 2011 at 2:04 AM, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> On Wed, Feb 9, 2011 at 12:08 AM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> Hi,
>>>
>>> tree-ssa-loop-ivopts.c has
>>>
>>> ---
>>> /* Returns variant of TYPE that can be used as base for different uses.
>>>   We return unsigned type with the same precision, which avoids problems
>>>   with overflows.  */
>>>
>>> static tree
>>> generic_type_for (tree type)
>>> {
>>>  if (POINTER_TYPE_P (type))
>>>    return unsigned_type_for (type);
>>>
>>>  if (TYPE_UNSIGNED (type))
>>>    return type;
>>>
>>>  return unsigned_type_for (type);
>>> }
>>> ---
>>>
>>> It converts negative step to unsigned type with the same precision.
>>> It doesn't work when Pmode != ptr_mode.  This patch disables it.
>>> OK for trunk in stage 1?
>>
>> This does not look correct.  It rather sounds you are expanding
>> TARGET_MEM_REFs the wrong way - the address computed by
>> it is supposed to be calculated in ptr_mode and only the final
>> result extended to Pmode.
>>
>
>
> TARGET_MEM_REF is OK. The problem is ivopts passes
> the negative offset to TARGET_MEM_REF as unsigned and
> expects offset will wrap.  It only works when Pmode ==
> ptr_mode.  I am checking in this patch into x32 branch to
> avoid such cases.
>

I am reverting it now.
diff mbox

Patch

diff --git a/gcc/ChangeLog.x32 b/gcc/ChangeLog.x32
index 41bfe18..80e1eed 100644
--- a/gcc/ChangeLog.x32
+++ b/gcc/ChangeLog.x32
@@ -1,3 +1,13 @@ 
+2011-02-09  H.J. Lu  <hongjiu.lu@intel.com>
+
+	PR middle-end/47383
+	* tree-ssa-loop-ivopts.c (find_bivs): Disabled for non-constant
+	base with negative step and Pmode !== ptr_mode.
+	(find_givs_in_stmt): Return for non-constant base with negative
+	step and Pmode !== ptr_mode.
+	(generic_type_for): Change arguments to base and step.  Check
+	non-constant base with negative step and Pmode !== ptr_mode.
+
 2011-01-29  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR target/47537
diff --git a/gcc/testsuite/ChangeLog.x32 b/gcc/testsuite/ChangeLog.x32
index 8759881..f87c9f9 100644
--- a/gcc/testsuite/ChangeLog.x32
+++ b/gcc/testsuite/ChangeLog.x32
@@ -1,3 +1,8 @@ 
+2011-02-08  H.J. Lu  <hongjiu.lu@intel.com>
+
+	PR middle-end/47383
+	* gcc.dg/torture/pr47383.c: New.
+
 2011-01-27  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR rtl-optimization/47502
diff --git a/gcc/testsuite/gcc.dg/torture/pr47383.c b/gcc/testsuite/gcc.dg/torture/pr47383.c
new file mode 100644
index 0000000..ae8d670
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr47383.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+
+static int heap[2*(256 +1+29)+1];
+static int heap_len;
+static int heap_max;
+void 
+__attribute__ ((noinline))
+foo (int elems)
+{
+  int n, m;
+  int max_code = -1;
+  int node = elems;
+  heap_len = 0, heap_max = (2*(256 +1+29)+1);
+  for (n = 0; n < elems; n++)
+    heap[++heap_len] = max_code = n;
+  do {
+    n = heap[1];
+    heap[1] = heap[heap_len--];
+    m = heap[1];
+    heap[--heap_max] = n;
+    heap[--heap_max] = m;
+  } while (heap_len >= 2);
+}
+
+int
+main ()
+{
+  foo (286);
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 479b46f..c4ccbd8 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1029,7 +1029,20 @@  find_bivs (struct ivopts_data *data)
 	  if (POINTER_TYPE_P (type))
 	    step = fold_convert (sizetype, step);
 	  else
-	    step = fold_convert (type, step);
+	    {
+	      /* FIXME: add_candidate_1 calls generic_type_for to cast
+	         STEP to unsigned type with the same precision when BASE
+		 isn't NULL.  It leads to many problems when STEP is
+		 negative and Pmode != ptr_mode.  Disable it for now.  */
+	      if (Pmode != ptr_mode
+		  && base
+		  && TREE_CODE (base) != INTEGER_CST
+		  && TREE_CODE (step) == INTEGER_CST
+		  && tree_int_cst_sgn (step) < 0)
+		continue;
+
+	      step = fold_convert (type, step);
+	    }
 	}
 
       set_iv (data, PHI_RESULT (phi), base, step);
@@ -1121,6 +1134,18 @@  find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
   if (!find_givs_in_stmt_scev (data, stmt, &iv))
     return;
 
+  /* FIXME: add_candidate_1 calls generic_type_for to cast STEP to
+     unsigned type with the same precision when BASE isn't NULL.  It
+     leads to many problems when STEP is negative and Pmode != ptr_mode.
+     Disable it for now.  */
+  if (Pmode != ptr_mode
+      && iv.base
+      && TREE_CODE (iv.base) != INTEGER_CST
+      && iv.step
+      && TREE_CODE (iv.step) == INTEGER_CST
+      && tree_int_cst_sgn (iv.step) < 0)
+    return;
+
   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
 }
 
@@ -2154,13 +2179,23 @@  strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
   return strip_offset_1 (expr, false, false, offset);
 }
 
-/* Returns variant of TYPE that can be used as base for different uses.
-   We return unsigned type with the same precision, which avoids problems
-   with overflows.  */
+/* Returns variant of BASE type that can be used as base for different
+   uses.  We return unsigned type with the same precision, which avoids
+   problems with overflows.
+
+   FIXME: How to avoid non-constant BASE with negative STEP and
+   Pmode !== ptr_mode.  */
 
 static tree
-generic_type_for (tree type)
+generic_type_for (tree base, tree step)
 {
+  tree type = TREE_TYPE (base);
+  if (Pmode != ptr_mode
+      && TREE_CODE (base) != INTEGER_CST
+      && TREE_CODE (step) == INTEGER_CST
+      && tree_int_cst_sgn (step) < 0)
+    gcc_unreachable ();
+
   if (POINTER_TYPE_P (type))
     return unsigned_type_for (type);
 
@@ -2206,13 +2241,12 @@  add_candidate_1 (struct ivopts_data *data,
 {
   unsigned i;
   struct iv_cand *cand = NULL;
-  tree type, orig_type;
+  tree type;
 
   if (base)
     {
-      orig_type = TREE_TYPE (base);
-      type = generic_type_for (orig_type);
-      if (type != orig_type)
+      type = generic_type_for (base, step);
+      if (type != TREE_TYPE (base))
 	{
 	  base = fold_convert (type, base);
 	  step = fold_convert (type, step);