diff mbox

[committed] Use target-insns.def for atomic_test_and_set

Message ID 87r3nsm32u.fsf@e105548-lin.cambridge.arm.com
State New
Headers show

Commit Message

Richard Sandiford July 28, 2015, 8:36 p.m. UTC
Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
Also tested via config-list.mk.  Committed as preapproved.

Thanks,
Richard


gcc/
	* target-insns.def (atomic_test_and_set): New targetm instruction
	pattern.
	* optabs.c (maybe_emit_atomic_test_and_set): Use it instead of
	HAVE_*/gen_* interface.

Comments

Andrew Pinski July 28, 2015, 9:35 p.m. UTC | #1
On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
> Also tested via config-list.mk.  Committed as preapproved.
>
> Thanks,
> Richard
>
>
> gcc/
>         * target-insns.def (atomic_test_and_set): New targetm instruction
>         pattern.
>         * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of
>         HAVE_*/gen_* interface.
>
> Index: gcc/target-insns.def
> ===================================================================
> --- gcc/target-insns.def        2015-07-28 21:00:09.815019853 +0100
> +++ gcc/target-insns.def        2015-07-28 21:00:09.811019905 +0100
> @@ -31,6 +31,7 @@
>
>     Instructions should be documented in md.texi rather than here.  */
>  DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1))
> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2))
>  DEF_TARGET_INSN (builtin_longjmp, (rtx x0))
>  DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0))
>  DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0))
> Index: gcc/optabs.c
> ===================================================================
> --- gcc/optabs.c        2015-07-28 21:00:09.815019853 +0100
> +++ gcc/optabs.c        2015-07-28 21:00:09.811019905 +0100
> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo
>     using the atomic_test_and_set instruction pattern.  A boolean value
>     is returned from the operation, using TARGET if possible.  */
>
> -#ifndef HAVE_atomic_test_and_set
> -#define HAVE_atomic_test_and_set 0
> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
> -#endif
> -
>  static rtx
>  maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
>  {
>    machine_mode pat_bool_mode;
>    struct expand_operand ops[3];
>
> -  if (!HAVE_atomic_test_and_set)
> +  if (!targetm.have_atomic_test_and_set ())
>      return NULL_RTX;

I know this was not there before but this if should be marked as
unlikely as most targets where someone is using __atomic_*/__sync_*
will have those patterns.

Thanks,
Andrew Pinski


>
>    /* While we always get QImode from __atomic_test_and_set, we get
>       other memory modes from __sync_lock_test_and_set.  Note that we
>       use no endian adjustment here.  This matches the 4.6 behavior
>       in the Sparc backend.  */
> -  gcc_checking_assert
> -    (insn_data[CODE_FOR_atomic_test_and_set].operand[1].mode == QImode);
> +  enum insn_code icode = targetm.code_for_atomic_test_and_set;
> +  gcc_checking_assert (insn_data[icode].operand[1].mode == QImode);
>    if (GET_MODE (mem) != QImode)
>      mem = adjust_address_nv (mem, QImode, 0);
>
> -  pat_bool_mode = insn_data[CODE_FOR_atomic_test_and_set].operand[0].mode;
> +  pat_bool_mode = insn_data[icode].operand[0].mode;
>    create_output_operand (&ops[0], target, pat_bool_mode);
>    create_fixed_operand (&ops[1], mem);
>    create_integer_operand (&ops[2], model);
>
> -  if (maybe_expand_insn (CODE_FOR_atomic_test_and_set, 3, ops))
> +  if (maybe_expand_insn (icode, 3, ops))
>      return ops[0].value;
>    return NULL_RTX;
>  }
>
Richard Sandiford July 28, 2015, 10:10 p.m. UTC | #2
Andrew Pinski <pinskia@gmail.com> writes:
> On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
>> Also tested via config-list.mk.  Committed as preapproved.
>>
>> Thanks,
>> Richard
>>
>>
>> gcc/
>>         * target-insns.def (atomic_test_and_set): New targetm instruction
>>         pattern.
>>         * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of
>>         HAVE_*/gen_* interface.
>>
>> Index: gcc/target-insns.def
>> ===================================================================
>> --- gcc/target-insns.def        2015-07-28 21:00:09.815019853 +0100
>> +++ gcc/target-insns.def        2015-07-28 21:00:09.811019905 +0100
>> @@ -31,6 +31,7 @@
>>
>>     Instructions should be documented in md.texi rather than here.  */
>>  DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1))
>> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2))
>>  DEF_TARGET_INSN (builtin_longjmp, (rtx x0))
>>  DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0))
>>  DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0))
>> Index: gcc/optabs.c
>> ===================================================================
>> --- gcc/optabs.c        2015-07-28 21:00:09.815019853 +0100
>> +++ gcc/optabs.c        2015-07-28 21:00:09.811019905 +0100
>> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo
>>     using the atomic_test_and_set instruction pattern.  A boolean value
>>     is returned from the operation, using TARGET if possible.  */
>>
>> -#ifndef HAVE_atomic_test_and_set
>> -#define HAVE_atomic_test_and_set 0
>> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
>> -#endif
>> -
>>  static rtx
>>  maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
>>  {
>>    machine_mode pat_bool_mode;
>>    struct expand_operand ops[3];
>>
>> -  if (!HAVE_atomic_test_and_set)
>> +  if (!targetm.have_atomic_test_and_set ())
>>      return NULL_RTX;
>
> I know this was not there before but this if should be marked as
> unlikely as most targets where someone is using __atomic_*/__sync_*
> will have those patterns.

I think that'd be premature optimisation.  The path being guarded here
generates new rtl instructions, which is a much more expensive operation
than a mispredicated branch.

Thanks,
Richard
Andrew Pinski July 28, 2015, 10:14 p.m. UTC | #3
On Tue, Jul 28, 2015 at 3:10 PM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> Andrew Pinski <pinskia@gmail.com> writes:
>> On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>>> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
>>> Also tested via config-list.mk.  Committed as preapproved.
>>>
>>> Thanks,
>>> Richard
>>>
>>>
>>> gcc/
>>>         * target-insns.def (atomic_test_and_set): New targetm instruction
>>>         pattern.
>>>         * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of
>>>         HAVE_*/gen_* interface.
>>>
>>> Index: gcc/target-insns.def
>>> ===================================================================
>>> --- gcc/target-insns.def        2015-07-28 21:00:09.815019853 +0100
>>> +++ gcc/target-insns.def        2015-07-28 21:00:09.811019905 +0100
>>> @@ -31,6 +31,7 @@
>>>
>>>     Instructions should be documented in md.texi rather than here.  */
>>>  DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1))
>>> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2))
>>>  DEF_TARGET_INSN (builtin_longjmp, (rtx x0))
>>>  DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0))
>>>  DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0))
>>> Index: gcc/optabs.c
>>> ===================================================================
>>> --- gcc/optabs.c        2015-07-28 21:00:09.815019853 +0100
>>> +++ gcc/optabs.c        2015-07-28 21:00:09.811019905 +0100
>>> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo
>>>     using the atomic_test_and_set instruction pattern.  A boolean value
>>>     is returned from the operation, using TARGET if possible.  */
>>>
>>> -#ifndef HAVE_atomic_test_and_set
>>> -#define HAVE_atomic_test_and_set 0
>>> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
>>> -#endif
>>> -
>>>  static rtx
>>>  maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
>>>  {
>>>    machine_mode pat_bool_mode;
>>>    struct expand_operand ops[3];
>>>
>>> -  if (!HAVE_atomic_test_and_set)
>>> +  if (!targetm.have_atomic_test_and_set ())
>>>      return NULL_RTX;
>>
>> I know this was not there before but this if should be marked as
>> unlikely as most targets where someone is using __atomic_*/__sync_*
>> will have those patterns.
>
> I think that'd be premature optimisation.  The path being guarded here
> generates new rtl instructions, which is a much more expensive operation
> than a mispredicated branch.

That might be true that the rest is more expensive but the common path
would be through there.
It is not just about mispredicted branch but more about icache miss.

Thanks,
Andrew


>
> Thanks,
> Richard
>
Richard Sandiford July 29, 2015, 8:37 a.m. UTC | #4
Andrew Pinski <pinskia@gmail.com> writes:
> On Tue, Jul 28, 2015 at 3:10 PM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> Andrew Pinski <pinskia@gmail.com> writes:
>>> On Tue, Jul 28, 2015 at 1:36 PM, Richard Sandiford
>>> <richard.sandiford@arm.com> wrote:
>>>> Bootstrapped & regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
>>>> Also tested via config-list.mk.  Committed as preapproved.
>>>>
>>>> Thanks,
>>>> Richard
>>>>
>>>>
>>>> gcc/
>>>>         * target-insns.def (atomic_test_and_set): New targetm instruction
>>>>         pattern.
>>>>         * optabs.c (maybe_emit_atomic_test_and_set): Use it instead of
>>>>         HAVE_*/gen_* interface.
>>>>
>>>> Index: gcc/target-insns.def
>>>> ===================================================================
>>>> --- gcc/target-insns.def        2015-07-28 21:00:09.815019853 +0100
>>>> +++ gcc/target-insns.def        2015-07-28 21:00:09.811019905 +0100
>>>> @@ -31,6 +31,7 @@
>>>>
>>>>     Instructions should be documented in md.texi rather than here.  */
>>>>  DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1))
>>>> +DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2))
>>>>  DEF_TARGET_INSN (builtin_longjmp, (rtx x0))
>>>>  DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0))
>>>>  DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0))
>>>> Index: gcc/optabs.c
>>>> ===================================================================
>>>> --- gcc/optabs.c        2015-07-28 21:00:09.815019853 +0100
>>>> +++ gcc/optabs.c        2015-07-28 21:00:09.811019905 +0100
>>>> @@ -7258,35 +7258,30 @@ maybe_emit_compare_and_swap_exchange_loo
>>>>     using the atomic_test_and_set instruction pattern.  A boolean value
>>>>     is returned from the operation, using TARGET if possible.  */
>>>>
>>>> -#ifndef HAVE_atomic_test_and_set
>>>> -#define HAVE_atomic_test_and_set 0
>>>> -#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
>>>> -#endif
>>>> -
>>>>  static rtx
>>>>  maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
>>>>  {
>>>>    machine_mode pat_bool_mode;
>>>>    struct expand_operand ops[3];
>>>>
>>>> -  if (!HAVE_atomic_test_and_set)
>>>> +  if (!targetm.have_atomic_test_and_set ())
>>>>      return NULL_RTX;
>>>
>>> I know this was not there before but this if should be marked as
>>> unlikely as most targets where someone is using __atomic_*/__sync_*
>>> will have those patterns.
>>
>> I think that'd be premature optimisation.  The path being guarded here
>> generates new rtl instructions, which is a much more expensive operation
>> than a mispredicated branch.
>
> That might be true that the rest is more expensive but the common path
> would be through there.
> It is not just about mispredicted branch but more about icache miss.

Do you mean an icache miss from having to fetch a new branch
target after a mispredicted branch?  Or one from calling a targetm
function that isn't already cached?  The latter wouldn't be fixed
by marking the condition as unlikely: we'd still need to call the function.

But calls to maybe_emit_atomic_test_and_set are concentrated
in expand.  If the code is hot enough for __builtin_expect or
devirtualisation to be useful, I'd have expected the code to be
available in at least L2.

maybe_emit_atomic_test_and_set is only used to expand calls to
__atomic_test_and_set and __sync_lock_test_and_set (only as a last
resort).  Each call to one of those functions will require a call tree
to be built, will require a gimple statement to be built, will require
trees and gimple statements for the memory address to be built, will
require expand_expr to be called on the argument and a MEM to be
created for the result, will require the instruction pattern rtx to be
generated (itself an indirect call), will require the rtx_insn to be
created and inserted into the rtl stream, will require the instruction
to be reloaded, and will require the instruction to be written out as
text, even at -O0.  An extra indirect function call or missing
__builtin_expect opportunity per built-in call is going to be in the
noise, even if the input code was dominated by calls to these built-in
functions (which seems unlikely).

Also, I think we usually predict null returns to be unlikely anyway,
in the absence of actual profiling information.  In my build the code
already branches on !have and falls through on have.

Thanks,
Richard
diff mbox

Patch

Index: gcc/target-insns.def
===================================================================
--- gcc/target-insns.def	2015-07-28 21:00:09.815019853 +0100
+++ gcc/target-insns.def	2015-07-28 21:00:09.811019905 +0100
@@ -31,6 +31,7 @@ 
 
    Instructions should be documented in md.texi rather than here.  */
 DEF_TARGET_INSN (allocate_stack, (rtx x0, rtx x1))
+DEF_TARGET_INSN (atomic_test_and_set, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (builtin_longjmp, (rtx x0))
 DEF_TARGET_INSN (builtin_setjmp_receiver, (rtx x0))
 DEF_TARGET_INSN (builtin_setjmp_setup, (rtx x0))
Index: gcc/optabs.c
===================================================================
--- gcc/optabs.c	2015-07-28 21:00:09.815019853 +0100
+++ gcc/optabs.c	2015-07-28 21:00:09.811019905 +0100
@@ -7258,35 +7258,30 @@  maybe_emit_compare_and_swap_exchange_loo
    using the atomic_test_and_set instruction pattern.  A boolean value
    is returned from the operation, using TARGET if possible.  */
 
-#ifndef HAVE_atomic_test_and_set
-#define HAVE_atomic_test_and_set 0
-#define CODE_FOR_atomic_test_and_set CODE_FOR_nothing
-#endif
-
 static rtx
 maybe_emit_atomic_test_and_set (rtx target, rtx mem, enum memmodel model)
 {
   machine_mode pat_bool_mode;
   struct expand_operand ops[3];
 
-  if (!HAVE_atomic_test_and_set)
+  if (!targetm.have_atomic_test_and_set ())
     return NULL_RTX;
 
   /* While we always get QImode from __atomic_test_and_set, we get
      other memory modes from __sync_lock_test_and_set.  Note that we
      use no endian adjustment here.  This matches the 4.6 behavior
      in the Sparc backend.  */
-  gcc_checking_assert
-    (insn_data[CODE_FOR_atomic_test_and_set].operand[1].mode == QImode);
+  enum insn_code icode = targetm.code_for_atomic_test_and_set;
+  gcc_checking_assert (insn_data[icode].operand[1].mode == QImode);
   if (GET_MODE (mem) != QImode)
     mem = adjust_address_nv (mem, QImode, 0);
 
-  pat_bool_mode = insn_data[CODE_FOR_atomic_test_and_set].operand[0].mode;
+  pat_bool_mode = insn_data[icode].operand[0].mode;
   create_output_operand (&ops[0], target, pat_bool_mode);
   create_fixed_operand (&ops[1], mem);
   create_integer_operand (&ops[2], model);
 
-  if (maybe_expand_insn (CODE_FOR_atomic_test_and_set, 3, ops))
+  if (maybe_expand_insn (icode, 3, ops))
     return ops[0].value;
   return NULL_RTX;
 }