diff mbox

PING: PATCH [4/n]: Prepare x32: Permute the conversion and addition if one operand is a constant

Message ID CAMe9rOr9WpkWv_uW1sTN5XZyoJpGuR4B-K1uEFHDrFfVMUk5LQ@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu July 10, 2011, 9:04 p.m. UTC
On Sun, Jul 10, 2011 at 9:36 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On Sat, Jul 9, 2011 at 23:31, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Sat, Jul 9, 2011 at 2:18 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>> On 07/05/2011 04:27 PM, H.J. Lu wrote:
>>>>>
>>>>>  diff --git a/gcc/explow.c b/gcc/explow.c
>>>>>  index 7387dad..b343bf8 100644
>>>>>  --- a/gcc/explow.c
>>>>>  +++ b/gcc/explow.c
>>>>>  @@ -383,18 +383,13 @@ convert_memory_address_addr_space (enum
>>>>> machine_mode to_mode ATTRIBUTE_UNUSED,
>>>>>
>>>>>      case PLUS:
>>>>>      case MULT:
>>>>>  -      /* For addition we can safely permute the conversion and addition
>>>>>  -        operation if one operand is a constant and converting the
>>>>> constant
>>>>>  -        does not change it or if one operand is a constant and we are
>>>>>  -        using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED<  0).
>>>>>  -        We can always safely permute them if we are making the address
>>>>>  -        narrower.  */
>>>>>  +      /* For addition we safely permute the conversion and addition
>>>>>  +        operation if one operand is a constant since we can't generate
>>>>>  +        new instructions.  We can always safely permute them if we are
>>>>>  +        making the address narrower.  */
>>>>>        if (GET_MODE_SIZE (to_mode)<  GET_MODE_SIZE (from_mode)
>>>>>           || (GET_CODE (x) == PLUS
>>>>>  -&&  CONST_INT_P (XEXP (x, 1))
>>>>>  -&&  (XEXP (x, 1) == convert_memory_address_addr_space
>>>>>  -                                  (to_mode, XEXP (x, 1), as)
>>>>>  -                 || POINTERS_EXTEND_UNSIGNED<  0)))
>>>>>  +&&  CONST_INT_P (XEXP (x, 1))))
>>>>>         return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
>>>>>                                convert_memory_address_addr_space
>>>>>                                  (to_mode, XEXP (x, 0), as),
>>>
>>> This does not seem safe to me.
>>
>> The current code is broken for x32.  See:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47727
>>
>> We can't generate any new instructions.  Do you have any suggestions.
>
> By "safe" I mean that the new condition might be too wide and generate
> wrong code.  Richard is definitely right in comment 6, generating new
> code in simplify-rtx is a no-no (see its usage of
> gen_lowpart_no_emit).

Here is a different approach.  I added convert_memory_address_addr_space_1
and convert_modes_1 so that simplify-rtx won't generate new insns.  OK
for trunk if there are no regressions on Linux/x86?

Thanks.

Comments

H.J. Lu July 10, 2011, 11:51 p.m. UTC | #1
On Sun, Jul 10, 2011 at 2:04 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Jul 10, 2011 at 9:36 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>> On Sat, Jul 9, 2011 at 23:31, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Sat, Jul 9, 2011 at 2:18 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>>> On 07/05/2011 04:27 PM, H.J. Lu wrote:
>>>>>>
>>>>>>  diff --git a/gcc/explow.c b/gcc/explow.c
>>>>>>  index 7387dad..b343bf8 100644
>>>>>>  --- a/gcc/explow.c
>>>>>>  +++ b/gcc/explow.c
>>>>>>  @@ -383,18 +383,13 @@ convert_memory_address_addr_space (enum
>>>>>> machine_mode to_mode ATTRIBUTE_UNUSED,
>>>>>>
>>>>>>      case PLUS:
>>>>>>      case MULT:
>>>>>>  -      /* For addition we can safely permute the conversion and addition
>>>>>>  -        operation if one operand is a constant and converting the
>>>>>> constant
>>>>>>  -        does not change it or if one operand is a constant and we are
>>>>>>  -        using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED<  0).
>>>>>>  -        We can always safely permute them if we are making the address
>>>>>>  -        narrower.  */
>>>>>>  +      /* For addition we safely permute the conversion and addition
>>>>>>  +        operation if one operand is a constant since we can't generate
>>>>>>  +        new instructions.  We can always safely permute them if we are
>>>>>>  +        making the address narrower.  */
>>>>>>        if (GET_MODE_SIZE (to_mode)<  GET_MODE_SIZE (from_mode)
>>>>>>           || (GET_CODE (x) == PLUS
>>>>>>  -&&  CONST_INT_P (XEXP (x, 1))
>>>>>>  -&&  (XEXP (x, 1) == convert_memory_address_addr_space
>>>>>>  -                                  (to_mode, XEXP (x, 1), as)
>>>>>>  -                 || POINTERS_EXTEND_UNSIGNED<  0)))
>>>>>>  +&&  CONST_INT_P (XEXP (x, 1))))
>>>>>>         return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
>>>>>>                                convert_memory_address_addr_space
>>>>>>                                  (to_mode, XEXP (x, 0), as),
>>>>
>>>> This does not seem safe to me.
>>>
>>> The current code is broken for x32.  See:
>>>
>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47727
>>>
>>> We can't generate any new instructions.  Do you have any suggestions.
>>
>> By "safe" I mean that the new condition might be too wide and generate
>> wrong code.  Richard is definitely right in comment 6, generating new
>> code in simplify-rtx is a no-no (see its usage of
>> gen_lowpart_no_emit).
>
> Here is a different approach.  I added convert_memory_address_addr_space_1
> and convert_modes_1 so that simplify-rtx won't generate new insns.  OK
> for trunk if there are no regressions on Linux/x86?
>
> Thanks.
>
> --
> H.J.
> ---
> 2011-07-10  H.J. Lu  <hongjiu.lu@intel.com>
>
>        * explow.c (convert_memory_address_addr_space_1): New.
>        (convert_memory_address_addr_space): Use it.
>
>        * expr.c (convert_modes_1): New.
>        (convert_modes): Use it.
>
>        * expr.h (convert_modes_1): New.
>
>        * rtl.h (convert_memory_address_addr_space_1): New.
>        (convert_memory_address_1): Likewise.
>
>        * simplify-rtx.c (simplify_unary_operation_1): Call
>        convert_memory_address_1 instead of convert_memory_address.
>

It doesn't work.  I got

(gdb) f 2
#2  0x000000000078735a in convert_memory_address_addr_space_1 (to_mode=DImode,
    x=0x7ffff05ac4e0, as=0 '\000', no_emit=1 '\001')
    at /export/gnu/import/git/gcc-x32/gcc/explow.c:410
410	  return convert_modes_1 (to_mode, from_mode, x,
(gdb) call debug_rtx (x)
(plus:SI (symbol_ref:SI ("iplane.1577") [flags 0x2] <var_decl
0x7ffff0857960 iplane>)
    (const_int -4 [0xfffffffffffffffc]))
(gdb) bt
#0  fancy_abort (file=0x13531a8 "/export/gnu/import/git/gcc-x32/gcc/expr.c",
    line=798, function=0x1354a00 "convert_modes_1")
    at /export/gnu/import/git/gcc-x32/gcc/diagnostic.c:893
#1  0x000000000079c60c in convert_modes_1 (mode=DImode, oldmode=SImode,
    x=0x7ffff05ac4e0, unsignedp=1, no_emit=1 '\001')
    at /export/gnu/import/git/gcc-x32/gcc/expr.c:798
#2  0x000000000078735a in convert_memory_address_addr_space_1 (to_mode=DImode,
    x=0x7ffff05ac4e0, as=0 '\000', no_emit=1 '\001')
    at /export/gnu/import/git/gcc-x32/gcc/explow.c:410
#3  0x0000000000787281 in convert_memory_address_addr_space_1 (to_mode=DImode,
    x=0x7ffff05616d0, as=0 '\000', no_emit=1 '\001')
    at /export/gnu/import/git/gcc-x32/gcc/explow.c:381
#4  0x0000000000b0faf4 in simplify_unary_operation_1 (code=ZERO_EXTEND,
    mode=DImode, op=0x7ffff05616d0)
    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:1246
#5  0x0000000000b0d851 in simplify_unary_operation (code=ZERO_EXTEND,
    mode=DImode, op=0x7ffff05616d0, op_mode=SImode)
    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:582
#6  0x0000000000b0d035 in simplify_gen_unary (code=ZERO_EXTEND, mode=DImode,
    op=0x7ffff05616d0, op_mode=SImode)
    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:370
#7  0x000000000117078a in if_then_else_cond (x=0x7ffff02ebb00,
    ptrue=0x7fffffffd720, pfalse=0x7fffffffd718)
---Type <return> to continue, or q <return> to quit---
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:8649
#8  0x00000000011675b5 in combine_simplify_rtx (x=0x7ffff02ebb00,
    op0_mode=SImode, in_dest=0, in_cond=0)
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5316
#9  0x0000000001167315 in subst (x=0x7ffff02ebb00, from=0x7ffff02f8120,
    to=0x7ffff05ac4f8, in_dest=0, in_cond=0, unique_copy=0)
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5253
#10 0x0000000001167104 in subst (x=0x7ffff02f5558, from=0x7ffff02f8120,
    to=0x7ffff05ac4f8, in_dest=0, in_cond=0, unique_copy=0)
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5189
#11 0x00000000011611ae in try_combine (i3=0x7ffff02f4c60, i2=0x7ffff02f4c18,
    i1=0x0, i0=0x0, new_direct_jump_p=0x7fffffffde14,
    last_combined_insn=0x7ffff02f4c60)
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:3178
#12 0x000000000115c487 in combine_instructions (f=0x7ffff07e7700, nregs=3344)
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:1223
#13 0x000000000117c64e in rest_of_handle_combine ()
    at /export/gnu/import/git/gcc-x32/gcc/combine.c:13879
#14 0x0000000000a500e7 in execute_one_pass (pass=0x190d320)
    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2062
#15 0x0000000000a502cd in execute_pass_list (pass=0x190d320)
    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2117
#16 0x0000000000a502ee in execute_pass_list (pass=0x1908180)
---Type <return> to continue, or q <return> to quit---
    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2118
#17 0x0000000000be93e0 in tree_rest_of_compilation (fndecl=0x7ffff074c200)
    at /export/gnu/import/git/gcc-x32/gcc/tree-optimize.c:416
#18 0x00000000006d3ff7 in cgraph_expand_function (node=0x7ffff0792000)
    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1804
#19 0x00000000006d41b6 in cgraph_expand_all_functions ()
    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1863
#20 0x00000000006d48b2 in cgraph_optimize ()
    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:2133
#21 0x00000000006d1b2a in cgraph_finalize_compilation_unit ()
    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1304
#22 0x00000000009cb7b0 in write_global_declarations ()
    at /export/gnu/import/git/gcc-x32/gcc/langhooks.c:303
#23 0x0000000000559cac in gfc_write_global_declarations ()
    at /export/gnu/import/git/gcc-x32/gcc/fortran/f95-lang.c:322
#24 0x0000000000b4649c in compile_file ()
    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:564
#25 0x0000000000b48686 in do_compile ()
    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:1886
#26 0x0000000000b487ec in toplev_main (argc=19, argv=0x7fffffffe2a8)
    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:1958
#27 0x000000000060be84 in main (argc=19, argv=0x7fffffffe2a8)
    at /export/gnu/import/git/gcc-x32/gcc/main.c:36
(gdb)


H.J.
H.J. Lu July 11, 2011, 12:04 a.m. UTC | #2
On Sun, Jul 10, 2011 at 4:51 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Jul 10, 2011 at 2:04 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Sun, Jul 10, 2011 at 9:36 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>> On Sat, Jul 9, 2011 at 23:31, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Sat, Jul 9, 2011 at 2:18 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>>>> On 07/05/2011 04:27 PM, H.J. Lu wrote:
>>>>>>>
>>>>>>>  diff --git a/gcc/explow.c b/gcc/explow.c
>>>>>>>  index 7387dad..b343bf8 100644
>>>>>>>  --- a/gcc/explow.c
>>>>>>>  +++ b/gcc/explow.c
>>>>>>>  @@ -383,18 +383,13 @@ convert_memory_address_addr_space (enum
>>>>>>> machine_mode to_mode ATTRIBUTE_UNUSED,
>>>>>>>
>>>>>>>      case PLUS:
>>>>>>>      case MULT:
>>>>>>>  -      /* For addition we can safely permute the conversion and addition
>>>>>>>  -        operation if one operand is a constant and converting the
>>>>>>> constant
>>>>>>>  -        does not change it or if one operand is a constant and we are
>>>>>>>  -        using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED<  0).
>>>>>>>  -        We can always safely permute them if we are making the address
>>>>>>>  -        narrower.  */
>>>>>>>  +      /* For addition we safely permute the conversion and addition
>>>>>>>  +        operation if one operand is a constant since we can't generate
>>>>>>>  +        new instructions.  We can always safely permute them if we are
>>>>>>>  +        making the address narrower.  */
>>>>>>>        if (GET_MODE_SIZE (to_mode)<  GET_MODE_SIZE (from_mode)
>>>>>>>           || (GET_CODE (x) == PLUS
>>>>>>>  -&&  CONST_INT_P (XEXP (x, 1))
>>>>>>>  -&&  (XEXP (x, 1) == convert_memory_address_addr_space
>>>>>>>  -                                  (to_mode, XEXP (x, 1), as)
>>>>>>>  -                 || POINTERS_EXTEND_UNSIGNED<  0)))
>>>>>>>  +&&  CONST_INT_P (XEXP (x, 1))))
>>>>>>>         return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
>>>>>>>                                convert_memory_address_addr_space
>>>>>>>                                  (to_mode, XEXP (x, 0), as),
>>>>>
>>>>> This does not seem safe to me.
>>>>
>>>> The current code is broken for x32.  See:
>>>>
>>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47727
>>>>
>>>> We can't generate any new instructions.  Do you have any suggestions.
>>>
>>> By "safe" I mean that the new condition might be too wide and generate
>>> wrong code.  Richard is definitely right in comment 6, generating new
>>> code in simplify-rtx is a no-no (see its usage of
>>> gen_lowpart_no_emit).
>>
>> Here is a different approach.  I added convert_memory_address_addr_space_1
>> and convert_modes_1 so that simplify-rtx won't generate new insns.  OK
>> for trunk if there are no regressions on Linux/x86?
>>
>> Thanks.
>>
>> --
>> H.J.
>> ---
>> 2011-07-10  H.J. Lu  <hongjiu.lu@intel.com>
>>
>>        * explow.c (convert_memory_address_addr_space_1): New.
>>        (convert_memory_address_addr_space): Use it.
>>
>>        * expr.c (convert_modes_1): New.
>>        (convert_modes): Use it.
>>
>>        * expr.h (convert_modes_1): New.
>>
>>        * rtl.h (convert_memory_address_addr_space_1): New.
>>        (convert_memory_address_1): Likewise.
>>
>>        * simplify-rtx.c (simplify_unary_operation_1): Call
>>        convert_memory_address_1 instead of convert_memory_address.
>>
>
> It doesn't work.  I got
>
> (gdb) f 2
> #2  0x000000000078735a in convert_memory_address_addr_space_1 (to_mode=DImode,
>    x=0x7ffff05ac4e0, as=0 '\000', no_emit=1 '\001')
>    at /export/gnu/import/git/gcc-x32/gcc/explow.c:410
> 410       return convert_modes_1 (to_mode, from_mode, x,
> (gdb) call debug_rtx (x)
> (plus:SI (symbol_ref:SI ("iplane.1577") [flags 0x2] <var_decl
> 0x7ffff0857960 iplane>)
>    (const_int -4 [0xfffffffffffffffc]))
> (gdb) bt
> #0  fancy_abort (file=0x13531a8 "/export/gnu/import/git/gcc-x32/gcc/expr.c",
>    line=798, function=0x1354a00 "convert_modes_1")
>    at /export/gnu/import/git/gcc-x32/gcc/diagnostic.c:893
> #1  0x000000000079c60c in convert_modes_1 (mode=DImode, oldmode=SImode,
>    x=0x7ffff05ac4e0, unsignedp=1, no_emit=1 '\001')
>    at /export/gnu/import/git/gcc-x32/gcc/expr.c:798
> #2  0x000000000078735a in convert_memory_address_addr_space_1 (to_mode=DImode,
>    x=0x7ffff05ac4e0, as=0 '\000', no_emit=1 '\001')
>    at /export/gnu/import/git/gcc-x32/gcc/explow.c:410
> #3  0x0000000000787281 in convert_memory_address_addr_space_1 (to_mode=DImode,
>    x=0x7ffff05616d0, as=0 '\000', no_emit=1 '\001')
>    at /export/gnu/import/git/gcc-x32/gcc/explow.c:381
> #4  0x0000000000b0faf4 in simplify_unary_operation_1 (code=ZERO_EXTEND,
>    mode=DImode, op=0x7ffff05616d0)
>    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:1246
> #5  0x0000000000b0d851 in simplify_unary_operation (code=ZERO_EXTEND,
>    mode=DImode, op=0x7ffff05616d0, op_mode=SImode)
>    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:582
> #6  0x0000000000b0d035 in simplify_gen_unary (code=ZERO_EXTEND, mode=DImode,
>    op=0x7ffff05616d0, op_mode=SImode)
>    at /export/gnu/import/git/gcc-x32/gcc/simplify-rtx.c:370
> #7  0x000000000117078a in if_then_else_cond (x=0x7ffff02ebb00,
>    ptrue=0x7fffffffd720, pfalse=0x7fffffffd718)
> ---Type <return> to continue, or q <return> to quit---
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:8649
> #8  0x00000000011675b5 in combine_simplify_rtx (x=0x7ffff02ebb00,
>    op0_mode=SImode, in_dest=0, in_cond=0)
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5316
> #9  0x0000000001167315 in subst (x=0x7ffff02ebb00, from=0x7ffff02f8120,
>    to=0x7ffff05ac4f8, in_dest=0, in_cond=0, unique_copy=0)
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5253
> #10 0x0000000001167104 in subst (x=0x7ffff02f5558, from=0x7ffff02f8120,
>    to=0x7ffff05ac4f8, in_dest=0, in_cond=0, unique_copy=0)
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:5189
> #11 0x00000000011611ae in try_combine (i3=0x7ffff02f4c60, i2=0x7ffff02f4c18,
>    i1=0x0, i0=0x0, new_direct_jump_p=0x7fffffffde14,
>    last_combined_insn=0x7ffff02f4c60)
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:3178
> #12 0x000000000115c487 in combine_instructions (f=0x7ffff07e7700, nregs=3344)
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:1223
> #13 0x000000000117c64e in rest_of_handle_combine ()
>    at /export/gnu/import/git/gcc-x32/gcc/combine.c:13879
> #14 0x0000000000a500e7 in execute_one_pass (pass=0x190d320)
>    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2062
> #15 0x0000000000a502cd in execute_pass_list (pass=0x190d320)
>    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2117
> #16 0x0000000000a502ee in execute_pass_list (pass=0x1908180)
> ---Type <return> to continue, or q <return> to quit---
>    at /export/gnu/import/git/gcc-x32/gcc/passes.c:2118
> #17 0x0000000000be93e0 in tree_rest_of_compilation (fndecl=0x7ffff074c200)
>    at /export/gnu/import/git/gcc-x32/gcc/tree-optimize.c:416
> #18 0x00000000006d3ff7 in cgraph_expand_function (node=0x7ffff0792000)
>    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1804
> #19 0x00000000006d41b6 in cgraph_expand_all_functions ()
>    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1863
> #20 0x00000000006d48b2 in cgraph_optimize ()
>    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:2133
> #21 0x00000000006d1b2a in cgraph_finalize_compilation_unit ()
>    at /export/gnu/import/git/gcc-x32/gcc/cgraphunit.c:1304
> #22 0x00000000009cb7b0 in write_global_declarations ()
>    at /export/gnu/import/git/gcc-x32/gcc/langhooks.c:303
> #23 0x0000000000559cac in gfc_write_global_declarations ()
>    at /export/gnu/import/git/gcc-x32/gcc/fortran/f95-lang.c:322
> #24 0x0000000000b4649c in compile_file ()
>    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:564
> #25 0x0000000000b48686 in do_compile ()
>    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:1886
> #26 0x0000000000b487ec in toplev_main (argc=19, argv=0x7fffffffe2a8)
>    at /export/gnu/import/git/gcc-x32/gcc/toplev.c:1958
> #27 0x000000000060be84 in main (argc=19, argv=0x7fffffffe2a8)
>    at /export/gnu/import/git/gcc-x32/gcc/main.c:36
> (gdb)
>
>
> H.J.
>

With my original change,  I got

(const:DI (plus:DI (symbol_ref:DI ("iplane.1577") [flags 0x2]
<var_decl 0x7ffff0857960 iplane>)
        (const_int -4 [0xfffffffffffffffc])))

I think it is safe to permute the conversion and addition operation
if one operand is a constant and we are zero-extending.  This is
how zero-extending works.
Paolo Bonzini July 11, 2011, 11:03 a.m. UTC | #3
On 07/11/2011 02:04 AM, H.J. Lu wrote:
> With my original change,  I got
>
> (const:DI (plus:DI (symbol_ref:DI ("iplane.1577") [flags 0x2]
> <var_decl 0x7ffff0857960 iplane>)
>          (const_int -4 [0xfffffffffffffffc])))
>
> I think it is safe to permute the conversion and addition operation
> if one operand is a constant and we are zero-extending.  This is
> how zero-extending works.

Ok, I think I understand what you mean.  The key is the

    XEXP (x, 1) == convert_memory_address_addr_space
                   (to_mode, XEXP (x, 1), as)

test.  It ensures basically that the constant has 31-bit precision, 
because otherwise the constant would change from e.g. (const_int 
-0x7ffffffc) to (const_int 0x80000004) when zero-extending it from 
SImode to DImode.

But I'm not sure it's safe.  You have,

   (zero_extend:DI (plus:SI FOO:SI) (const_int Y))

and you want to convert it to

   (plus:DI FOO:DI (zero_extend:DI (const_int Y)))

(where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF 
(this piece of code does not assume anything about its shape); if FOO == 
0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and 
0x100000004 (invalid).

If pointers extend as signed you also have a similar case.   If FOO == 
0x7ffffffc and Y = 8, the result of

   (sign_extend:DI (plus:SI FOO:SI) (const_int Y))

and

   (plus:DI FOO:DI (sign_extend:DI (const_int Y)))

will be respectively 0xffffffff80000004 (valid) and 0x80000004 (invalid).


What happens if you just return NULL instead of the assertion (good idea 
adding it!)?

Of course then you need to:

1) check the return values of convert_memory_address_addr_space_1, and 
propagate NULL up to simplify_unary_operation;

2) check in simplify-rtx.c whether the return value of 
convert_memory_address_1 is NULL, and only return if the return value is 
not NULL.  This is not yet necessary (convert_memory_address is the last 
transformation for both SIGN_EXTEND and ZERO_EXTEND) but it is better to 
keep code clean.

Thanks,

Paolo
H.J. Lu July 11, 2011, 3:54 p.m. UTC | #4
On Mon, Jul 11, 2011 at 4:03 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 07/11/2011 02:04 AM, H.J. Lu wrote:
>>
>> With my original change,  I got
>>
>> (const:DI (plus:DI (symbol_ref:DI ("iplane.1577") [flags 0x2]
>> <var_decl 0x7ffff0857960 iplane>)
>>         (const_int -4 [0xfffffffffffffffc])))
>>
>> I think it is safe to permute the conversion and addition operation
>> if one operand is a constant and we are zero-extending.  This is
>> how zero-extending works.
>
> Ok, I think I understand what you mean.  The key is the
>
>   XEXP (x, 1) == convert_memory_address_addr_space
>                  (to_mode, XEXP (x, 1), as)
>
> test.  It ensures basically that the constant has 31-bit precision, because
> otherwise the constant would change from e.g. (const_int -0x7ffffffc) to
> (const_int 0x80000004) when zero-extending it from SImode to DImode.
>
> But I'm not sure it's safe.  You have,
>
>  (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>
> and you want to convert it to
>
>  (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>
> (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF (this
> piece of code does not assume anything about its shape); if FOO ==
> 0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
> 0x100000004 (invalid).

This example contradicts what you said above "It ensures basically that the
constant has 31-bit precision".  For zero-extend, the issue is address-wrap.
As I understand, to support address-wrap, you need to use ptr_mode.

> If pointers extend as signed you also have a similar case.   If FOO ==
> 0x7ffffffc and Y = 8, the result of
>
>  (sign_extend:DI (plus:SI FOO:SI) (const_int Y))
>
> and
>
>  (plus:DI FOO:DI (sign_extend:DI (const_int Y)))
>
> will be respectively 0xffffffff80000004 (valid) and 0x80000004 (invalid).
>
>
> What happens if you just return NULL instead of the assertion (good idea
> adding it!)?
>
> Of course then you need to:
>
> 1) check the return values of convert_memory_address_addr_space_1, and
> propagate NULL up to simplify_unary_operation;
>
> 2) check in simplify-rtx.c whether the return value of
> convert_memory_address_1 is NULL, and only return if the return value is not
> NULL.  This is not yet necessary (convert_memory_address is the last
> transformation for both SIGN_EXTEND and ZERO_EXTEND) but it is better to
> keep code clean.

I will give it a try.

Thanks.
H.J. Lu July 11, 2011, 4:55 p.m. UTC | #5
On Mon, Jul 11, 2011 at 8:54 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Jul 11, 2011 at 4:03 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>> On 07/11/2011 02:04 AM, H.J. Lu wrote:
>>>
>>> With my original change,  I got
>>>
>>> (const:DI (plus:DI (symbol_ref:DI ("iplane.1577") [flags 0x2]
>>> <var_decl 0x7ffff0857960 iplane>)
>>>         (const_int -4 [0xfffffffffffffffc])))
>>>
>>> I think it is safe to permute the conversion and addition operation
>>> if one operand is a constant and we are zero-extending.  This is
>>> how zero-extending works.
>>
>> Ok, I think I understand what you mean.  The key is the
>>
>>   XEXP (x, 1) == convert_memory_address_addr_space
>>                  (to_mode, XEXP (x, 1), as)
>>
>> test.  It ensures basically that the constant has 31-bit precision, because
>> otherwise the constant would change from e.g. (const_int -0x7ffffffc) to
>> (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>
>> But I'm not sure it's safe.  You have,
>>
>>  (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>
>> and you want to convert it to
>>
>>  (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>
>> (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF (this
>> piece of code does not assume anything about its shape); if FOO ==
>> 0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>> 0x100000004 (invalid).
>
> This example contradicts what you said above "It ensures basically that the
> constant has 31-bit precision".  For zero-extend, the issue is address-wrap.
> As I understand, to support address-wrap, you need to use ptr_mode.
>

I am totally confused what the current code

     /* For addition we can safely permute the conversion and addition
         operation if one operand is a constant and converting the constant
         does not change it or if one operand is a constant and we are
         using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED < 0).
         We can always safely permute them if we are making the address
         narrower.  */
      if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
          || (GET_CODE (x) == PLUS
              && CONST_INT_P (XEXP (x, 1))
              && (XEXP (x, 1) == convert_memory_address_addr_space
                                   (to_mode, XEXP (x, 1), as)
                 || POINTERS_EXTEND_UNSIGNED < 0)))
        return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
                               convert_memory_address_addr_space
                                 (to_mode, XEXP (x, 0), as),
                               XEXP (x, 1));

is trying to do.  It doesn't support address-wrap at all, regardless if
converting the constant changes the constant.  I think it should be
OK to permute if no instructions are allowed, like:

     if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
          || (GET_CODE (x) == PLUS
              && CONST_INT_P (XEXP (x, 1))
              && POINTERS_EXTEND_UNSIGNED != 0
              && no_emit))
        return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
                               convert_memory_address_addr_space_1
                                 (to_mode, XEXP (x, 0), as, no_emit),
                               XEXP (x, 1));
Paolo Bonzini July 13, 2011, 4:13 p.m. UTC | #6
On 07/11/2011 05:54 PM, H.J. Lu wrote:
> The key is the
> >
> >     XEXP (x, 1) == convert_memory_address_addr_space
> >                    (to_mode, XEXP (x, 1), as)
> >
> >  test.  It ensures basically that the constant has 31-bit precision, because
> >  otherwise the constant would change from e.g. (const_int -0x7ffffffc) to
> >  (const_int 0x80000004) when zero-extending it from SImode to DImode.
> >
> >  But I'm not sure it's safe.  You have,
> >
> >    (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
> >
> >  and you want to convert it to
> >
> >    (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
> >
> >  (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF (this
> >  piece of code does not assume anything about its shape); if FOO ==
> >  0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
> >  0x100000004 (invalid).
>
> This example contradicts what you said above "It ensures basically that the
> constant has 31-bit precision".

Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the 
same representation in SImode and DImode, and the test above on XEXP (x, 
1) succeeds.

> >  What happens if you just return NULL instead of the assertion (good idea
> >  adding it!)?
> >
> >  Of course then you need to:
> >
> >  1) check the return values of convert_memory_address_addr_space_1, and
> >  propagate NULL up to simplify_unary_operation;
> >
> >  2) check in simplify-rtx.c whether the return value of
> >  convert_memory_address_1 is NULL, and only return if the return value is not
> >  NULL.  This is not yet necessary (convert_memory_address is the last
> >  transformation for both SIGN_EXTEND and ZERO_EXTEND) but it is better to
> >  keep code clean.
>
> I will give it a try.

Thanks, did you get any result?  There's no "I think" in this code.  So 
even if I cannot approve it, I'd really like to see a version that I 
understand and that is clearly conservative, if it works.

Paolo
H.J. Lu July 13, 2011, 4:39 p.m. UTC | #7
On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>
>> The key is the
>> >
>> >     XEXP (x, 1) == convert_memory_address_addr_space
>> >                    (to_mode, XEXP (x, 1), as)
>> >
>> >  test.  It ensures basically that the constant has 31-bit precision,
>> > because
>> >  otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>> > to
>> >  (const_int 0x80000004) when zero-extending it from SImode to DImode.
>> >
>> >  But I'm not sure it's safe.  You have,
>> >
>> >    (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>> >
>> >  and you want to convert it to
>> >
>> >    (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>> >
>> >  (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>> > (this
>> >  piece of code does not assume anything about its shape); if FOO ==
>> >  0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>> >  0x100000004 (invalid).
>>
>> This example contradicts what you said above "It ensures basically that
>> the
>> constant has 31-bit precision".
>
> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
> representation in SImode and DImode, and the test above on XEXP (x, 1)
> succeeds.

And then we permute conversion and addition, which leads to the issue you
raised above.  In another word, the current code permutes conversion
and addition.
It leads to different values in case of symbol (0xfffffffc) + 8.
Basically the current
test for 31-bit (or less) precision is bogus.  The real question is
for a address
computation, A + B, if address wrap-around is supported in
convert_memory_address_addr_space.

>> >  What happens if you just return NULL instead of the assertion (good
>> > idea
>> >  adding it!)?
>> >
>> >  Of course then you need to:
>> >
>> >  1) check the return values of convert_memory_address_addr_space_1, and
>> >  propagate NULL up to simplify_unary_operation;
>> >
>> >  2) check in simplify-rtx.c whether the return value of
>> >  convert_memory_address_1 is NULL, and only return if the return value
>> > is not
>> >  NULL.  This is not yet necessary (convert_memory_address is the last
>> >  transformation for both SIGN_EXTEND and ZERO_EXTEND) but it is better
>> > to
>> >  keep code clean.
>>
>> I will give it a try.
>
> Thanks, did you get any result?  There's no "I think" in this code.  So even
> if I cannot approve it, I'd really like to see a version that I understand
> and that is clearly conservative, if it works.
>

I opened a new bug:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49721

My current code looks like:

   case CONST:
      temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0),
                                                  as, no_emit,
                                                  ignore_address_wrap_around);
      return temp ? gen_rtx_CONST (to_mode, temp) : temp;
      break;

    case PLUS:
    case MULT:
      /* For addition we can safely permute the conversion and addition
         operation if one operand is a constant, address wrap-around
         is ignored and we are using a ptr_extend instruction or
         zero-extending (POINTERS_EXTEND_UNSIGNED != 0).  We can always
         safely permute them if we are making the address narrower.  */
      if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
          || (GET_CODE (x) == PLUS
              && CONST_INT_P (XEXP (x, 1))
              && (POINTERS_EXTEND_UNSIGNED != 0
                  && ignore_address_wrap_around)))
        return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
                               convert_memory_address_addr_space_1
                                 (to_mode, XEXP (x, 0), as, no_emit,
                                  ignore_address_wrap_around),
                               XEXP (x, 1));
      break;
Paolo Bonzini July 13, 2011, 4:51 p.m. UTC | #8
On Wed, Jul 13, 2011 at 18:39, H.J. Lu <hjl.tools@gmail.com> wrote:

>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>> succeeds.
>
> And then we permute conversion and addition, which leads to the issue you
> raised above.  In another word, the current code permutes conversion
> and addition.

No, only if we have ptr_extend.  It may be buggy as well, but let's
make sure first that x32 is done right, then perhaps whoever cares can
fix ptr_extend if it has to be fixed.  I don't know the semantics of
ia64 addp4 so I cannot tell.

> I opened a new bug:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49721

Good, thanks.

> My current code looks like:
>
>   case CONST:
>      temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0),
>                                                  as, no_emit,
>                                                  ignore_address_wrap_around);

Here I stopped reading.  It's not what I asked for, so at least you
should say clearly _why_.

Paolo
Paolo Bonzini July 13, 2011, 4:54 p.m. UTC | #9
>>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>>> succeeds.
>>
>> And then we permute conversion and addition, which leads to the issue you
>> raised above.  In another word, the current code permutes conversion
>> and addition.
>
> No, only if we have ptr_extend.

Oops, hit send too early, I understand now what you mean.  But even
more so, let's make sure x32 is done right so that perhaps we can
remove the bogus test on XEXP (x, 1) for other Pmode != ptr_mode
targets, non-ptr_extend.  Then we can worry perhaps of
POINTERS_EXTEND_UNSIGNED < 0.

Paolo
Andrew Pinski May 29, 2014, 4:52 a.m. UTC | #10
On Wed, Jul 13, 2011 at 9:39 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>>
>>> The key is the
>>> >
>>> >     XEXP (x, 1) == convert_memory_address_addr_space
>>> >                    (to_mode, XEXP (x, 1), as)
>>> >
>>> >  test.  It ensures basically that the constant has 31-bit precision,
>>> > because
>>> >  otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>>> > to
>>> >  (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>> >
>>> >  But I'm not sure it's safe.  You have,
>>> >
>>> >    (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>> >
>>> >  and you want to convert it to
>>> >
>>> >    (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>> >
>>> >  (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>>> > (this
>>> >  piece of code does not assume anything about its shape); if FOO ==
>>> >  0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>>> >  0x100000004 (invalid).
>>>
>>> This example contradicts what you said above "It ensures basically that
>>> the
>>> constant has 31-bit precision".
>>
>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>> succeeds.
>
> And then we permute conversion and addition, which leads to the issue you
> raised above.  In another word, the current code permutes conversion
> and addition.
> It leads to different values in case of symbol (0xfffffffc) + 8.
> Basically the current
> test for 31-bit (or less) precision is bogus.  The real question is
> for a address
> computation, A + B, if address wrap-around is supported in
> convert_memory_address_addr_space.

Unless the code has already reassociated the additions already.
Like in the AARCH64 ILP32 case:

(plus:SI (plus:SI (mult:SI (reg/v:SI 80 [ b ])
            (const_int -4 [0xfffffffffffffffc]))
        (subreg/s/u:SI (reg/v/f:DI 79 [ a ]) 0))
    (const_int -1073742592 [0xffffffffbffffd00]))

The Tree level is correct in that it did not reassociate the addition
but the RTL level ignores that.

So this patch is invalid and incorrect unless you know the non
constant part of the addition is a pointer (which is not the case
here).

Thanks,
Andrew Pinski



>
>>> >  What happens if you just return NULL instead of the assertion (good
>>> > idea
>>> >  adding it!)?
>>> >
>>> >  Of course then you need to:
>>> >
>>> >  1) check the return values of convert_memory_address_addr_space_1, and
>>> >  propagate NULL up to simplify_unary_operation;
>>> >
>>> >  2) check in simplify-rtx.c whether the return value of
>>> >  convert_memory_address_1 is NULL, and only return if the return value
>>> > is not
>>> >  NULL.  This is not yet necessary (convert_memory_address is the last
>>> >  transformation for both SIGN_EXTEND and ZERO_EXTEND) but it is better
>>> > to
>>> >  keep code clean.
>>>
>>> I will give it a try.
>>
>> Thanks, did you get any result?  There's no "I think" in this code.  So even
>> if I cannot approve it, I'd really like to see a version that I understand
>> and that is clearly conservative, if it works.
>>
>
> I opened a new bug:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49721
>
> My current code looks like:
>
>    case CONST:
>       temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0),
>                                                   as, no_emit,
>                                                   ignore_address_wrap_around);
>       return temp ? gen_rtx_CONST (to_mode, temp) : temp;
>       break;
>
>     case PLUS:
>     case MULT:
>       /* For addition we can safely permute the conversion and addition
>          operation if one operand is a constant, address wrap-around
>          is ignored and we are using a ptr_extend instruction or
>          zero-extending (POINTERS_EXTEND_UNSIGNED != 0).  We can always
>          safely permute them if we are making the address narrower.  */
>       if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
>           || (GET_CODE (x) == PLUS
>               && CONST_INT_P (XEXP (x, 1))
>               && (POINTERS_EXTEND_UNSIGNED != 0
>                   && ignore_address_wrap_around)))
>         return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
>                                convert_memory_address_addr_space_1
>                                  (to_mode, XEXP (x, 0), as, no_emit,
>                                   ignore_address_wrap_around),
>                                XEXP (x, 1));
>       break;
>
> --
> H.J.
H.J. Lu May 29, 2014, 4:13 p.m. UTC | #11
On Wed, May 28, 2014 at 9:52 PM, Andrew Pinski <pinskia@gmail.com> wrote:
> On Wed, Jul 13, 2011 at 9:39 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>>>
>>>> The key is the
>>>> >
>>>> >     XEXP (x, 1) == convert_memory_address_addr_space
>>>> >                    (to_mode, XEXP (x, 1), as)
>>>> >
>>>> >  test.  It ensures basically that the constant has 31-bit precision,
>>>> > because
>>>> >  otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>>>> > to
>>>> >  (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>>> >
>>>> >  But I'm not sure it's safe.  You have,
>>>> >
>>>> >    (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>>> >
>>>> >  and you want to convert it to
>>>> >
>>>> >    (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>>> >
>>>> >  (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>>>> > (this
>>>> >  piece of code does not assume anything about its shape); if FOO ==
>>>> >  0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>>>> >  0x100000004 (invalid).
>>>>
>>>> This example contradicts what you said above "It ensures basically that
>>>> the
>>>> constant has 31-bit precision".
>>>
>>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>>> succeeds.
>>
>> And then we permute conversion and addition, which leads to the issue you
>> raised above.  In another word, the current code permutes conversion
>> and addition.
>> It leads to different values in case of symbol (0xfffffffc) + 8.
>> Basically the current
>> test for 31-bit (or less) precision is bogus.  The real question is
>> for a address
>> computation, A + B, if address wrap-around is supported in
>> convert_memory_address_addr_space.
>
> Unless the code has already reassociated the additions already.
> Like in the AARCH64 ILP32 case:
>
> (plus:SI (plus:SI (mult:SI (reg/v:SI 80 [ b ])
>             (const_int -4 [0xfffffffffffffffc]))
>         (subreg/s/u:SI (reg/v/f:DI 79 [ a ]) 0))
>     (const_int -1073742592 [0xffffffffbffffd00]))
>
> The Tree level is correct in that it did not reassociate the addition
> but the RTL level ignores that.
>
> So this patch is invalid and incorrect unless you know the non
> constant part of the addition is a pointer (which is not the case
> here).
>

There is an address overflow.  Is the address overflow behavior
defined here?
Andrew Pinski May 29, 2014, 4:23 p.m. UTC | #12
> On May 29, 2014, at 9:13 AM, "H.J. Lu" <hjl.tools@gmail.com> wrote:
> 
>> On Wed, May 28, 2014 at 9:52 PM, Andrew Pinski <pinskia@gmail.com> wrote:
>>> On Wed, Jul 13, 2011 at 9:39 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>>>> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>>>> 
>>>>> The key is the
>>>>>> 
>>>>>>    XEXP (x, 1) == convert_memory_address_addr_space
>>>>>>                   (to_mode, XEXP (x, 1), as)
>>>>>> 
>>>>>> test.  It ensures basically that the constant has 31-bit precision,
>>>>>> because
>>>>>> otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>>>>>> to
>>>>>> (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>>>>> 
>>>>>> But I'm not sure it's safe.  You have,
>>>>>> 
>>>>>>   (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>>>>> 
>>>>>> and you want to convert it to
>>>>>> 
>>>>>>   (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>>>>> 
>>>>>> (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>>>>>> (this
>>>>>> piece of code does not assume anything about its shape); if FOO ==
>>>>>> 0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>>>>>> 0x100000004 (invalid).
>>>>> 
>>>>> This example contradicts what you said above "It ensures basically that
>>>>> the
>>>>> constant has 31-bit precision".
>>>> 
>>>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>>>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>>>> succeeds.
>>> 
>>> And then we permute conversion and addition, which leads to the issue you
>>> raised above.  In another word, the current code permutes conversion
>>> and addition.
>>> It leads to different values in case of symbol (0xfffffffc) + 8.
>>> Basically the current
>>> test for 31-bit (or less) precision is bogus.  The real question is
>>> for a address
>>> computation, A + B, if address wrap-around is supported in
>>> convert_memory_address_addr_space.
>> 
>> Unless the code has already reassociated the additions already.
>> Like in the AARCH64 ILP32 case:
>> 
>> (plus:SI (plus:SI (mult:SI (reg/v:SI 80 [ b ])
>>            (const_int -4 [0xfffffffffffffffc]))
>>        (subreg/s/u:SI (reg/v/f:DI 79 [ a ]) 0))
>>    (const_int -1073742592 [0xffffffffbffffd00]))
>> 
>> The Tree level is correct in that it did not reassociate the addition
>> but the RTL level ignores that.
>> 
>> So this patch is invalid and incorrect unless you know the non
>> constant part of the addition is a pointer (which is not the case
>> here).
> 
> There is an address overflow.  Is the address overflow behavior
> defined here?

There was no address overflow in the original code and there was no address overflow in the tree level. The rtl level does introduce an address overflow but the semantics of plus is defined to be wrapping so there is no overflow.   This is blocking me from testing ilp32 under gnu/Linux as ld.so gets miscompiled and stack addresses have the "sign" bit set. 

Thanks,
Andrew


> 
> 
> -- 
> H.J.
H.J. Lu May 29, 2014, 5:09 p.m. UTC | #13
On Thu, May 29, 2014 at 9:23 AM,  <pinskia@gmail.com> wrote:
>
>
>> On May 29, 2014, at 9:13 AM, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>>
>>> On Wed, May 28, 2014 at 9:52 PM, Andrew Pinski <pinskia@gmail.com> wrote:
>>>> On Wed, Jul 13, 2011 at 9:39 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>> On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>>>>> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>>>>>
>>>>>> The key is the
>>>>>>>
>>>>>>>    XEXP (x, 1) == convert_memory_address_addr_space
>>>>>>>                   (to_mode, XEXP (x, 1), as)
>>>>>>>
>>>>>>> test.  It ensures basically that the constant has 31-bit precision,
>>>>>>> because
>>>>>>> otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>>>>>>> to
>>>>>>> (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>>>>>>
>>>>>>> But I'm not sure it's safe.  You have,
>>>>>>>
>>>>>>>   (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>>>>>>
>>>>>>> and you want to convert it to
>>>>>>>
>>>>>>>   (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>>>>>>
>>>>>>> (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>>>>>>> (this
>>>>>>> piece of code does not assume anything about its shape); if FOO ==
>>>>>>> 0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>>>>>>> 0x100000004 (invalid).
>>>>>>
>>>>>> This example contradicts what you said above "It ensures basically that
>>>>>> the
>>>>>> constant has 31-bit precision".
>>>>>
>>>>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>>>>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>>>>> succeeds.
>>>>
>>>> And then we permute conversion and addition, which leads to the issue you
>>>> raised above.  In another word, the current code permutes conversion
>>>> and addition.
>>>> It leads to different values in case of symbol (0xfffffffc) + 8.
>>>> Basically the current
>>>> test for 31-bit (or less) precision is bogus.  The real question is
>>>> for a address
>>>> computation, A + B, if address wrap-around is supported in
>>>> convert_memory_address_addr_space.
>>>
>>> Unless the code has already reassociated the additions already.
>>> Like in the AARCH64 ILP32 case:
>>>
>>> (plus:SI (plus:SI (mult:SI (reg/v:SI 80 [ b ])
>>>            (const_int -4 [0xfffffffffffffffc]))
>>>        (subreg/s/u:SI (reg/v/f:DI 79 [ a ]) 0))
>>>    (const_int -1073742592 [0xffffffffbffffd00]))
>>>
>>> The Tree level is correct in that it did not reassociate the addition
>>> but the RTL level ignores that.
>>>
>>> So this patch is invalid and incorrect unless you know the non
>>> constant part of the addition is a pointer (which is not the case
>>> here).
>>
>> There is an address overflow.  Is the address overflow behavior
>> defined here?
>
> There was no address overflow in the original code and there was no address overflow in the tree level. The rtl level does introduce an address overflow but the semantics of plus is defined to be wrapping so there is no overflow.   This is blocking me from testing ilp32 under gnu/Linux as ld.so gets miscompiled and stack addresses have the "sign" bit set.
>

What is your Pmode?
Andrew Pinski May 29, 2014, 5:20 p.m. UTC | #14
> On May 29, 2014, at 10:09 AM, "H.J. Lu" <hjl.tools@gmail.com> wrote:
> 
>> On Thu, May 29, 2014 at 9:23 AM,  <pinskia@gmail.com> wrote:
>> 
>> 
>>>> On May 29, 2014, at 9:13 AM, "H.J. Lu" <hjl.tools@gmail.com> wrote:
>>>> 
>>>>> On Wed, May 28, 2014 at 9:52 PM, Andrew Pinski <pinskia@gmail.com> wrote:
>>>>>> On Wed, Jul 13, 2011 at 9:39 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>>> On Wed, Jul 13, 2011 at 9:13 AM, Paolo Bonzini <bonzini@gnu.org> wrote:
>>>>>>> On 07/11/2011 05:54 PM, H.J. Lu wrote:
>>>>>>> 
>>>>>>> The key is the
>>>>>>>> 
>>>>>>>>   XEXP (x, 1) == convert_memory_address_addr_space
>>>>>>>>                  (to_mode, XEXP (x, 1), as)
>>>>>>>> 
>>>>>>>> test.  It ensures basically that the constant has 31-bit precision,
>>>>>>>> because
>>>>>>>> otherwise the constant would change from e.g. (const_int -0x7ffffffc)
>>>>>>>> to
>>>>>>>> (const_int 0x80000004) when zero-extending it from SImode to DImode.
>>>>>>>> 
>>>>>>>> But I'm not sure it's safe.  You have,
>>>>>>>> 
>>>>>>>>  (zero_extend:DI (plus:SI FOO:SI) (const_int Y))
>>>>>>>> 
>>>>>>>> and you want to convert it to
>>>>>>>> 
>>>>>>>>  (plus:DI FOO:DI (zero_extend:DI (const_int Y)))
>>>>>>>> 
>>>>>>>> (where the zero_extend is folded).  Ignore that FOO is a SYMBOL_REF
>>>>>>>> (this
>>>>>>>> piece of code does not assume anything about its shape); if FOO ==
>>>>>>>> 0xfffffffc and Y = 8, the result will be respectively 0x4 (valid) and
>>>>>>>> 0x100000004 (invalid).
>>>>>>> 
>>>>>>> This example contradicts what you said above "It ensures basically that
>>>>>>> the
>>>>>>> constant has 31-bit precision".
>>>>>> 
>>>>>> Why?  Certainly Y = 8 has 31-bit (or less) precision.  So it has the same
>>>>>> representation in SImode and DImode, and the test above on XEXP (x, 1)
>>>>>> succeeds.
>>>>> 
>>>>> And then we permute conversion and addition, which leads to the issue you
>>>>> raised above.  In another word, the current code permutes conversion
>>>>> and addition.
>>>>> It leads to different values in case of symbol (0xfffffffc) + 8.
>>>>> Basically the current
>>>>> test for 31-bit (or less) precision is bogus.  The real question is
>>>>> for a address
>>>>> computation, A + B, if address wrap-around is supported in
>>>>> convert_memory_address_addr_space.
>>>> 
>>>> Unless the code has already reassociated the additions already.
>>>> Like in the AARCH64 ILP32 case:
>>>> 
>>>> (plus:SI (plus:SI (mult:SI (reg/v:SI 80 [ b ])
>>>>           (const_int -4 [0xfffffffffffffffc]))
>>>>       (subreg/s/u:SI (reg/v/f:DI 79 [ a ]) 0))
>>>>   (const_int -1073742592 [0xffffffffbffffd00]))
>>>> 
>>>> The Tree level is correct in that it did not reassociate the addition
>>>> but the RTL level ignores that.
>>>> 
>>>> So this patch is invalid and incorrect unless you know the non
>>>> constant part of the addition is a pointer (which is not the case
>>>> here).
>>> 
>>> There is an address overflow.  Is the address overflow behavior
>>> defined here?
>> 
>> There was no address overflow in the original code and there was no address overflow in the tree level. The rtl level does introduce an address overflow but the semantics of plus is defined to be wrapping so there is no overflow.   This is blocking me from testing ilp32 under gnu/Linux as ld.so gets miscompiled and stack addresses have the "sign" bit set.
>> 
> 
> What is your Pmode?

Pmode is dimode while ptr_mode is simode.  Pointers are zero extended when converting between si and di modes. 

Thanks,
Andrew


> 
> 
> -- 
> H.J.
Paolo Bonzini May 30, 2014, 7:18 a.m. UTC | #15
Il 29/05/2014 19:20, pinskia@gmail.com ha scritto:
>> What is your Pmode?
>
> Pmode is dimode while ptr_mode is simode.  Pointers are zero extended when converting between si and di modes.

As you noted, the fundamental difference between x32 and aarch64 is that 
aarch64 will still use 64-bit accesses instead of 32-bit.  Did you 
define the VALID_POINTER_MODE hook to rule out Pmode as a valid pointer 
mode?  Perhaps you can use it to make this transformation conditional on 
VALID_POINTER_MODE(Pmode).

Paolo
diff mbox

Patch

diff --git a/gcc/explow.c b/gcc/explow.c
index 3c692f4..ee52e92 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -320,8 +320,9 @@  break_out_memory_refs (rtx x)
    arithmetic insns can be used.  */
 
 rtx
-convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED,
-				   rtx x, addr_space_t as ATTRIBUTE_UNUSED)
+convert_memory_address_addr_space_1 (enum machine_mode to_mode ATTRIBUTE_UNUSED,
+				     rtx x, addr_space_t as ATTRIBUTE_UNUSED,
+				     bool no_emit ATTRIBUTE_UNUSED)
 {
 #ifndef POINTERS_EXTEND_UNSIGNED
   gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
@@ -406,10 +407,17 @@  convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED,
       break;
     }
 
-  return convert_modes (to_mode, from_mode,
-			x, POINTERS_EXTEND_UNSIGNED);
+  return convert_modes_1 (to_mode, from_mode, x,
+			  POINTERS_EXTEND_UNSIGNED, no_emit);
 #endif /* defined(POINTERS_EXTEND_UNSIGNED) */
 }
+
+rtx
+convert_memory_address_addr_space (enum machine_mode to_mode,
+				   rtx x, addr_space_t as)
+{
+  return convert_memory_address_addr_space_1 (to_mode, x, as, false);
+}
 
 /* Return something equivalent to X but valid as a memory address for something
    of mode MODE in the named address space AS.  When X is not itself valid,
diff --git a/gcc/expr.c b/gcc/expr.c
index fb4379f..de7f150 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -693,13 +693,16 @@  convert_to_mode (enum machine_mode mode, rtx x, int unsignedp)
    Both modes may be floating, or both integer.
    UNSIGNEDP is nonzero if X is an unsigned value.
 
+   If NO_EMIT is true, don't emit any instructions.
+
    This can be done by referring to a part of X in place
    or by copying to a new temporary with conversion.
 
    You can give VOIDmode for OLDMODE, if you are sure X has a nonvoid mode.  */
 
 rtx
-convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int unsignedp)
+convert_modes_1 (enum machine_mode mode, enum machine_mode oldmode,
+		 rtx x, int unsignedp, bool no_emit)
 {
   rtx temp;
 
@@ -709,7 +712,12 @@  convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
   if (GET_CODE (x) == SUBREG && SUBREG_PROMOTED_VAR_P (x)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))) >= GET_MODE_SIZE (mode)
       && SUBREG_PROMOTED_UNSIGNED_P (x) == unsignedp)
-    x = gen_lowpart (mode, x);
+    {
+      if (no_emit)
+	x = rtl_hooks.gen_lowpart_no_emit (mode, x);
+      else
+	x = gen_lowpart (mode, x);
+    }
 
   if (GET_MODE (x) != VOIDmode)
     oldmode = GET_MODE (x);
@@ -773,7 +781,10 @@  convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 	  return gen_int_mode (val, mode);
 	}
 
-      return gen_lowpart (mode, x);
+      if (no_emit)
+	return rtl_hooks.gen_lowpart_no_emit (mode, x);
+      else
+	return gen_lowpart (mode, x);
     }
 
   /* Converting from integer constant into mode is always equivalent to an
@@ -784,10 +795,18 @@  convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
       return simplify_gen_subreg (mode, x, oldmode, 0);
     }
 
+  gcc_assert (!no_emit);
   temp = gen_reg_rtx (mode);
   convert_move (temp, x, unsignedp);
   return temp;
 }
+
+rtx
+convert_modes (enum machine_mode mode, enum machine_mode oldmode,
+	       rtx x, int unsignedp)
+{
+  return convert_modes_1 (mode, oldmode, x, unsignedp, false);
+}
 
 /* Return the largest alignment we can use for doing a move (or store)
    of MAX_PIECES.  ALIGN is the largest alignment we could use.  */
diff --git a/gcc/expr.h b/gcc/expr.h
index cb4050d..2ac9788 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -267,6 +267,8 @@  extern rtx convert_to_mode (enum machine_mode, rtx, int);
 
 /* Convert an rtx to MODE from OLDMODE and return the result.  */
 extern rtx convert_modes (enum machine_mode, enum machine_mode, rtx, int);
+extern rtx convert_modes_1 (enum machine_mode, enum machine_mode, rtx,
+			    int, bool);
 
 /* Emit code to move a block Y to a block X.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index e3ceecd..b01eef8 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1638,8 +1638,13 @@  extern int byte_lowpart_offset (enum machine_mode, enum machine_mode);
 extern rtx make_safe_from (rtx, rtx);
 extern rtx convert_memory_address_addr_space (enum machine_mode, rtx,
 					      addr_space_t);
+extern rtx convert_memory_address_addr_space_1 (enum machine_mode, rtx,
+						addr_space_t, bool);
 #define convert_memory_address(to_mode,x) \
 	convert_memory_address_addr_space ((to_mode), (x), ADDR_SPACE_GENERIC)
+#define convert_memory_address_1(to_mode,x,no_emit) \
+	convert_memory_address_addr_space_1 ((to_mode), (x), \
+					     ADDR_SPACE_GENERIC, (no_emit))
 extern const char *get_insn_name (int);
 extern rtx get_last_insn_anywhere (void);
 extern rtx get_first_nonnote_insn (void);
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 82b818b..189c201 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1150,7 +1150,7 @@  simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 		  && REG_P (SUBREG_REG (op))
 		  && REG_POINTER (SUBREG_REG (op))
 		  && GET_MODE (SUBREG_REG (op)) == Pmode)))
-	return convert_memory_address (Pmode, op);
+	return convert_memory_address_1 (Pmode, op, true);
 #endif
       break;
 
@@ -1243,7 +1243,7 @@  simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 		  && REG_P (SUBREG_REG (op))
 		  && REG_POINTER (SUBREG_REG (op))
 		  && GET_MODE (SUBREG_REG (op)) == Pmode)))
-	return convert_memory_address (Pmode, op);
+	return convert_memory_address_1 (Pmode, op, true);
 #endif
       break;