diff mbox

PATCH: PR target/59379: [4.9 Regression] gomp_init_num_threads is compiled into an infinite loop with --with-arch=corei7 --with-cpu=slm

Message ID CAMe9rOo+6w74yPz6fuCF6-dUAz8Lm+F_mGPPZuedMiSykr92sA@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu Jan. 19, 2014, 2:20 p.m. UTC
On Sun, Jan 19, 2014 at 1:55 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sat, Jan 18, 2014 at 9:15 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> For LEA operation with SImode_address_operand, which zero-extends SImode
>> to DImode, ix86_split_lea_for_addr turns
>>
>> (set (reg:DI) ...)
>>
>> into
>>
>> (set (reg:SI) ...)
>>
>> We need to do
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> at the end. If the LEA operation is
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>
> ree pass should remove these. However, we can just emit zero-extend
> insn at the end of sequence, and ree (which is located after
> post-reload split) should handle it:
>
> --cut here--
> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md (revision 206753)
> +++ config/i386/i386.md (working copy)
> @@ -5428,12 +5428,17 @@
>    operands[0] = SET_DEST (pat);
>    operands[1] = SET_SRC (pat);
>
> -  /* Emit all operations in SImode for zero-extended addresses.  Recall
> -     that x86_64 inheretly zero-extends SImode operations to DImode.  */
> +  /* Emit all operations in SImode for zero-extended addresses.  */
>    if (SImode_address_operand (operands[1], VOIDmode))
>      mode = SImode;
>
>    ix86_split_lea_for_addr (curr_insn, operands, mode);
> +
> +  /* Zero-extend return register to DImode for zero-extended addresses.  */
> +  if (mode != <MODE>mode)
> +    emit_insn (gen_zero_extendsidi2
> +              (operands[0], gen_lowpart ((mode), operands[0])));
> +
>    DONE;
>  }
>    [(set_attr "type" "lea")
> --cut here--
>
> The patch was tested with a testcase from Comment #9 of the PR using
> "-O --march=corei7 -mtune=slm", and resulting binary runs without
> problems.
>

Yes, the resulting GCC works correctly.  However, we generate
extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

It is because we generate

(set (reg:SI) (reg:SI)
(set (reg:DI) (zero_extend:DI (reg:SI)))

REE pass doesn't know

(set (reg:SI) (reg:SI)

has an implicit ZERO_EXTEND.  Here is a testcase:

---foo.c---
extern __thread unsigned int __bid_IDEC_glbflags;
typedef unsigned long long UINT64;
typedef __attribute__ ((aligned(16))) struct
{
  UINT64 w[2];
} UINT128;
extern UINT64 __bid64_from_uint64 (UINT64);
extern void __bid_round64_2_18 (int q,
int x,
UINT64 C,
UINT64 * ptr_Cstar,
int *delta_exp,
int *ptr_is_midpoint_lt_even,
int *ptr_is_midpoint_gt_even,
int *ptr_is_inexact_lt_midpoint,
int *ptr_is_inexact_gt_midpoint);
extern void __bid_round128_19_38 (int q,
 int x,
 UINT128 C,
 UINT128 * ptr_Cstar,
 int *delta_exp,
 int *ptr_is_midpoint_lt_even,
 int *ptr_is_midpoint_gt_even,
 int *ptr_is_inexact_lt_midpoint,
 int *ptr_is_inexact_gt_midpoint);
UINT64
__bid64_from_uint64 (UINT64 x)
{
  UINT64 res;
  UINT128 x128, res128;
  unsigned int q, ind;
  int incr_exp = 0;
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
  if (x <= 0x002386F26FC0ffffull) {
    if (x < 0x0020000000000000ull) {
      res = 0x31c0000000000000ull | x;
    } else {
      res = 0x6c70000000000000ull | (x & 0x0007ffffffffffffull);
    }
  }
  else
    {
      if (x < 0x16345785d8a0000ull) {
q = 17;
ind = 1;
      } else if (x < 0xde0b6b3a7640000ull) {
q = 18;
ind = 2;
      } else if (x < 0x8ac7230489e80000ull) {
q = 19;
ind = 3;
      } else {
q = 20;
ind = 4;
      }
      if (q <= 19) {
__bid_round64_2_18 (
   q, ind, x, &res, &incr_exp,
   &is_midpoint_lt_even, &is_midpoint_gt_even,
   &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      }
      else {
x128.w[1] = 0x0;
x128.w[0] = x;
__bid_round128_19_38 (q, ind, x128, &res128, &incr_exp,
     &is_midpoint_lt_even, &is_midpoint_gt_even,
     &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
res = res128.w[0];
      }
      if (incr_exp)
ind++;
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
 is_midpoint_lt_even || is_midpoint_gt_even)
*&__bid_IDEC_glbflags |= 0x00000020;
      if (res < 0x0020000000000000ull) {
res = (((UINT64) ind + 398) << 53) | res;
      } else
{
 res = 0x6000000000000000ull | (((UINT64) ind + 398) << 51) |
   (res & 0x0007ffffffffffffull);
}
    }
  return(res);;
}
-----------

Compiling with -fPIC -O2, the differences between your patch and mine
are


My patch removes 2 extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

Comments

Uros Bizjak Jan. 19, 2014, 2:24 p.m. UTC | #1
On Sun, Jan 19, 2014 at 3:20 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> For LEA operation with SImode_address_operand, which zero-extends SImode
>>> to DImode, ix86_split_lea_for_addr turns
>>>
>>> (set (reg:DI) ...)
>>>
>>> into
>>>
>>> (set (reg:SI) ...)
>>>
>>> We need to do
>>>
>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>
>>> at the end. If the LEA operation is
>>>
>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> ree pass should remove these. However, we can just emit zero-extend
>> insn at the end of sequence, and ree (which is located after
>> post-reload split) should handle it:
>>
>> --cut here--
>> Index: config/i386/i386.md
>> ===================================================================
>> --- config/i386/i386.md (revision 206753)
>> +++ config/i386/i386.md (working copy)
>> @@ -5428,12 +5428,17 @@
>>    operands[0] = SET_DEST (pat);
>>    operands[1] = SET_SRC (pat);
>>
>> -  /* Emit all operations in SImode for zero-extended addresses.  Recall
>> -     that x86_64 inheretly zero-extends SImode operations to DImode.  */
>> +  /* Emit all operations in SImode for zero-extended addresses.  */
>>    if (SImode_address_operand (operands[1], VOIDmode))
>>      mode = SImode;
>>
>>    ix86_split_lea_for_addr (curr_insn, operands, mode);
>> +
>> +  /* Zero-extend return register to DImode for zero-extended addresses.  */
>> +  if (mode != <MODE>mode)
>> +    emit_insn (gen_zero_extendsidi2
>> +              (operands[0], gen_lowpart ((mode), operands[0])));
>> +
>>    DONE;
>>  }
>>    [(set_attr "type" "lea")
>> --cut here--
>>
>> The patch was tested with a testcase from Comment #9 of the PR using
>> "-O --march=corei7 -mtune=slm", and resulting binary runs without
>> problems.
>>
>
> Yes, the resulting GCC works correctly.  However, we generate
> extra
>
> (set (reg:DI) (zero_extend:DI (reg:SI)))
>
> It is because we generate
>
> (set (reg:SI) (reg:SI)
> (set (reg:DI) (zero_extend:DI (reg:SI)))
>
> REE pass doesn't know
>
> (set (reg:SI) (reg:SI)
>
> has an implicit ZERO_EXTEND.  Here is a testcase:

This is the correct sequence,and REE pass should be improved to handle
this situation.

Note, that the problem was that we assumed SImode operations
(including move) have implicit DImode zero-extend, but in fact we
haven't communicate this to the compiler in a proper way.

So, I propose we go with my patch and file an enhancement PR for the REE pass.

Uros.
H.J. Lu Jan. 19, 2014, 2:30 p.m. UTC | #2
On Sun, Jan 19, 2014 at 6:24 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Jan 19, 2014 at 3:20 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>> For LEA operation with SImode_address_operand, which zero-extends SImode
>>>> to DImode, ix86_split_lea_for_addr turns
>>>>
>>>> (set (reg:DI) ...)
>>>>
>>>> into
>>>>
>>>> (set (reg:SI) ...)
>>>>
>>>> We need to do
>>>>
>>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>>
>>>> at the end. If the LEA operation is
>>>>
>>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>
>>> ree pass should remove these. However, we can just emit zero-extend
>>> insn at the end of sequence, and ree (which is located after
>>> post-reload split) should handle it:
>>>
>>> --cut here--
>>> Index: config/i386/i386.md
>>> ===================================================================
>>> --- config/i386/i386.md (revision 206753)
>>> +++ config/i386/i386.md (working copy)
>>> @@ -5428,12 +5428,17 @@
>>>    operands[0] = SET_DEST (pat);
>>>    operands[1] = SET_SRC (pat);
>>>
>>> -  /* Emit all operations in SImode for zero-extended addresses.  Recall
>>> -     that x86_64 inheretly zero-extends SImode operations to DImode.  */
>>> +  /* Emit all operations in SImode for zero-extended addresses.  */
>>>    if (SImode_address_operand (operands[1], VOIDmode))
>>>      mode = SImode;
>>>
>>>    ix86_split_lea_for_addr (curr_insn, operands, mode);
>>> +
>>> +  /* Zero-extend return register to DImode for zero-extended addresses.  */
>>> +  if (mode != <MODE>mode)
>>> +    emit_insn (gen_zero_extendsidi2
>>> +              (operands[0], gen_lowpart ((mode), operands[0])));
>>> +
>>>    DONE;
>>>  }
>>>    [(set_attr "type" "lea")
>>> --cut here--
>>>
>>> The patch was tested with a testcase from Comment #9 of the PR using
>>> "-O --march=corei7 -mtune=slm", and resulting binary runs without
>>> problems.
>>>
>>
>> Yes, the resulting GCC works correctly.  However, we generate
>> extra
>>
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> It is because we generate
>>
>> (set (reg:SI) (reg:SI)
>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>
>> REE pass doesn't know
>>
>> (set (reg:SI) (reg:SI)
>>
>> has an implicit ZERO_EXTEND.  Here is a testcase:
>
> This is the correct sequence,and REE pass should be improved to handle
> this situation.
>
> Note, that the problem was that we assumed SImode operations
> (including move) have implicit DImode zero-extend, but in fact we
> haven't communicate this to the compiler in a proper way.
>
> So, I propose we go with my patch and file an enhancement PR for the REE pass.
>

That is fine with me.  Please install it on all affected branches
and close the PR.  I will open a new PR for REE pass.

Thanks.
Uros Bizjak Jan. 19, 2014, 3:48 p.m. UTC | #3
On Sun, Jan 19, 2014 at 3:30 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>>>> For LEA operation with SImode_address_operand, which zero-extends SImode
>>>>> to DImode, ix86_split_lea_for_addr turns
>>>>>
>>>>> (set (reg:DI) ...)
>>>>>
>>>>> into
>>>>>
>>>>> (set (reg:SI) ...)
>>>>>
>>>>> We need to do
>>>>>
>>>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>>>
>>>>> at the end. If the LEA operation is
>>>>>
>>>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>>
>>>> ree pass should remove these. However, we can just emit zero-extend
>>>> insn at the end of sequence, and ree (which is located after
>>>> post-reload split) should handle it:
>>>>
>>>> --cut here--
>>>> Index: config/i386/i386.md
>>>> ===================================================================
>>>> --- config/i386/i386.md (revision 206753)
>>>> +++ config/i386/i386.md (working copy)
>>>> @@ -5428,12 +5428,17 @@
>>>>    operands[0] = SET_DEST (pat);
>>>>    operands[1] = SET_SRC (pat);
>>>>
>>>> -  /* Emit all operations in SImode for zero-extended addresses.  Recall
>>>> -     that x86_64 inheretly zero-extends SImode operations to DImode.  */
>>>> +  /* Emit all operations in SImode for zero-extended addresses.  */
>>>>    if (SImode_address_operand (operands[1], VOIDmode))
>>>>      mode = SImode;
>>>>
>>>>    ix86_split_lea_for_addr (curr_insn, operands, mode);
>>>> +
>>>> +  /* Zero-extend return register to DImode for zero-extended addresses.  */
>>>> +  if (mode != <MODE>mode)
>>>> +    emit_insn (gen_zero_extendsidi2
>>>> +              (operands[0], gen_lowpart ((mode), operands[0])));
>>>> +
>>>>    DONE;
>>>>  }
>>>>    [(set_attr "type" "lea")
>>>> --cut here--
>>>>
>>>> The patch was tested with a testcase from Comment #9 of the PR using
>>>> "-O --march=corei7 -mtune=slm", and resulting binary runs without
>>>> problems.
>>>>
>>>
>>> Yes, the resulting GCC works correctly.  However, we generate
>>> extra
>>>
>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>
>>> It is because we generate
>>>
>>> (set (reg:SI) (reg:SI)
>>> (set (reg:DI) (zero_extend:DI (reg:SI)))
>>>
>>> REE pass doesn't know
>>>
>>> (set (reg:SI) (reg:SI)
>>>
>>> has an implicit ZERO_EXTEND.  Here is a testcase:
>>
>> This is the correct sequence,and REE pass should be improved to handle
>> this situation.
>>
>> Note, that the problem was that we assumed SImode operations
>> (including move) have implicit DImode zero-extend, but in fact we
>> haven't communicate this to the compiler in a proper way.
>>
>> So, I propose we go with my patch and file an enhancement PR for the REE pass.
>>
>
> That is fine with me.  Please install it on all affected branches
> and close the PR.  I will open a new PR for REE pass.

Installed with following ChangeLog:

2014-01-18  Uros Bizjak  <ubizjak@gmail.com>
        H.J. Lu  <hongjiu.lu@intel.com>

    PR target/59379
    * config/i386/i386.md (*lea<mode>): Zero-extend return register
    to DImode for zero-extended addresses.

Bootstrapped and regression tested on x86_64-pc-linux-gnu, will
backport in a couple of days.

Uros.
diff mbox

Patch

--- bad.s 2014-01-19 06:10:28.006570325 -0800
+++ foo.s 2014-01-19 06:11:46.117754696 -0800
@@ -84,19 +84,18 @@  __bid64_from_uint64:
  movabsq $9007199254740991, %rax
  cmpq %rax, %rbx
  jbe .L23
- movl %ebp, %edx
  leaq 88(%rsp), %rsp
  .cfi_remember_state
  .cfi_def_cfa_offset 24
  movabsq $2251799813685247, %rax
- movl %edx, %edx
+ movl %ebp, %edx
  andq %rbx, %rax
- movabsq $6917529027641081856, %rcx
  popq %rbx
  .cfi_def_cfa_offset 16
+ movabsq $6917529027641081856, %rcx
  addq $398, %rdx
- orq %rcx, %rax
  salq $51, %rdx
+ orq %rcx, %rax
  popq %rbp
  .cfi_def_cfa_offset 8
  orq %rdx, %rax
@@ -154,7 +153,6 @@  __bid64_from_uint64:
  leaq 88(%rsp), %rsp
  .cfi_remember_state
  .cfi_def_cfa_offset 24
- movl %eax, %eax
  addq $398, %rax
  salq $53, %rax
  orq %rbx, %rax