Patchwork rs6000: Add a builtin to read the time base register on PowerPC

login
register
mail settings
Submitter Tulio Magno Quites Machado Filho
Date Aug. 29, 2012, 1:56 p.m.
Message ID <1346248585-10972-1-git-send-email-tuliom@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/180707/
State New
Headers show

Comments

Tulio Magno Quites Machado Filho - Aug. 29, 2012, 1:56 p.m.
Add __builtin_ppc_get_timebase to read the time base register on PowerPC.
This is required for applications that measure time at high frequencies
with high precision that can't afford a syscall.

[gcc]
2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>

	* config/rs6000/rs6000-builtin.def: Add __builtin_ppc_get_timebase.
	* config/rs6000/rs6000.c (rs6000_expand_noop_builtin): New
	function to expand an expression that calls a builtin without
	arguments.
	(rs6000_expand_builtin): Add __builtin_ppc_get_timebase.
	(rs6000_init_builtins): Likewise.
	* config/rs6000/rs6000.md: Likewise.

[gcc/testsuite]
2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>

	* gcc.target/powerpc/ppc-get-timebase.c: New file.
---
 gcc/config/rs6000/rs6000-builtin.def               |    3 ++
 gcc/config/rs6000/rs6000.c                         |   31 +++++++++++++++++
 gcc/config/rs6000/rs6000.md                        |   36 ++++++++++++++++++++
 .../gcc.target/powerpc/ppc-get-timebase.c          |   22 ++++++++++++
 4 files changed, 92 insertions(+), 0 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
Segher Boessenkool - Aug. 29, 2012, 4:58 p.m.
Hi Tulio,

> Add __builtin_ppc_get_timebase to read the time base register on  
> PowerPC.
> This is required for applications that measure time at high  
> frequencies
> with high precision that can't afford a syscall.

For things that do mftb with high frequency, maybe you should also add a
builtin that does just an mftb, i.e. returns a 32-bit result on 32-bit
implementations.

Please add documentation for the new builtin(s).

> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT,  
> "__builtin_rsqrt", RS6000_BTM_FRSQRTE,
>  BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf",  
> RS6000_BTM_FRSQRTES,
>  	      RS6000_BTC_FP)
>
> +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
> +	     RS6000_BTM_POWERPC, RS6000_BTC_MISC)

RS6000_BTM_POWERPC does not exist anymore.  RS6000_BTM_ALWAYS?

> +/* Expand an expression EXP that calls a builtin without  
> arguments.  */
> +static rtx
> +rs6000_expand_noop_builtin (enum insn_code icode, rtx target)

"noop" gives the wrong idea, "zeroop" perhaps?

> +(define_expand "get_timebase"

You should probably prefix this with powerpc_ or rs6000_ as well.
The existing code is not very consistent in this.

> +  [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
> +  ""
> +  "
> +{
> +  if (TARGET_POWERPC64)
> +    emit_insn (gen_get_timebase_ppc64 (operands[0]));
> +  else if (TARGET_POWERPC)
> +    emit_insn (gen_get_timebase_ppc32 (operands[0]));
> +  else
> +    FAIL;
> +  DONE;
> +}")

TARGET_POWERPC is always true.

> +(define_insn "get_timebase_ppc32"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
> +   (clobber (match_scratch:SI 1 "=r"))]
> +  "TARGET_POWERPC && !TARGET_POWERPC64"
> +{
> +    return "mftbu %0\;"
> +	   "mftb %L0\;"
> +	   "mftbu %1\;"
> +	   "cmpw %0,%1\;"
> +	   "bne- $-16";
> +})

This only works for WORDS_BIG_ENDIAN.

You should say you clobber CR0 here I think; actually, allow any CRn
instead.

Does mftb work on all supported assemblers?  The machine instruction
is phased out, but some assemblers translate it to mfspr.

> +(define_insn "get_timebase_ppc64"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))]
> +  "TARGET_POWERPC64"
> +{
> +    return "mfspr %0, 268";
> +})

POWER3 needs mftb.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +
> +/* Test if __builtin_ppc_get_timebase() is compatible with the  
> current
> +   processor and if it's changing between reads.  A read failure  
> might indicate
> +   a Power ISA or binutils change.  */
> +
> +#include <inttypes.h>
> +
> +int
> +main(void)
> +{
> +  uint64_t t1, t2, t3;
> +
> +  t1 = __builtin_ppc_get_timebase ();
> +  t2 = __builtin_ppc_get_timebase ();
> +  t3 = __builtin_ppc_get_timebase ();
> +
> +  if (t1 != t2 && t1 != t3 && t2 != t3)
> +    return 0;
> +
> +  return 1;
> +}

On some systems the timebase runs at a rather low frequency, say 20MHz.
This test will spuriously fail there.  Waste a million CPU cycles before
reading TB the second time?


Segher
Andrew Pinski - Aug. 29, 2012, 5:46 p.m.
On Wed, Aug 29, 2012 at 6:56 AM, Tulio Magno Quites Machado Filho
<tuliom@linux.vnet.ibm.com> wrote:
> Add __builtin_ppc_get_timebase to read the time base register on PowerPC.
> This is required for applications that measure time at high frequencies
> with high precision that can't afford a syscall.
>
> [gcc]
> 2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
>
>         * config/rs6000/rs6000-builtin.def: Add __builtin_ppc_get_timebase.
>         * config/rs6000/rs6000.c (rs6000_expand_noop_builtin): New
>         function to expand an expression that calls a builtin without
>         arguments.
>         (rs6000_expand_builtin): Add __builtin_ppc_get_timebase.
>         (rs6000_init_builtins): Likewise.
>         * config/rs6000/rs6000.md: Likewise.
>
> [gcc/testsuite]
> 2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
>
>         * gcc.target/powerpc/ppc-get-timebase.c: New file.
> ---
>  gcc/config/rs6000/rs6000-builtin.def               |    3 ++
>  gcc/config/rs6000/rs6000.c                         |   31 +++++++++++++++++
>  gcc/config/rs6000/rs6000.md                        |   36 ++++++++++++++++++++
>  .../gcc.target/powerpc/ppc-get-timebase.c          |   22 ++++++++++++
>  4 files changed, 92 insertions(+), 0 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
>
> diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
> index c8f8f86..75ad184 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE,
>  BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES,
>               RS6000_BTC_FP)
>
> +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
> +            RS6000_BTM_POWERPC, RS6000_BTC_MISC)
> +
>  /* Darwin CfString builtin.  */
>  BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
>               RS6000_BTC_MISC)
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 6c58307..24e274d 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -9747,6 +9747,30 @@ rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
>    return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
>  }
>
> +/* Expand an expression EXP that calls a builtin without arguments.  */
> +static rtx
> +rs6000_expand_noop_builtin (enum insn_code icode, rtx target)
> +{
> +  rtx pat;
> +  enum machine_mode tmode = insn_data[icode].operand[0].mode;
> +
> +  if (icode == CODE_FOR_nothing)
> +    /* Builtin not supported on this processor.  */
> +    return 0;
> +
> +  if (target == 0
> +      || GET_MODE (target) != tmode
> +      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
> +    target = gen_reg_rtx (tmode);
> +
> +  pat = GEN_FCN (icode) (target);
> +  if (! pat)
> +    return 0;
> +  emit_insn (pat);
> +
> +  return target;
> +}
> +
>
>  static rtx
>  rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
> @@ -11336,6 +11360,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
>                                            ? CODE_FOR_bpermd_di
>                                            : CODE_FOR_bpermd_si), exp, target);
>
> +    case RS6000_BUILTIN_GET_TB:
> +      return rs6000_expand_noop_builtin (CODE_FOR_get_timebase, target);
> +
>      case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
>      case ALTIVEC_BUILTIN_MASK_FOR_STORE:
>        {
> @@ -11620,6 +11647,10 @@ rs6000_init_builtins (void)
>                                  POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
>    def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
>
> +  ftype = build_function_type_list (unsigned_intDI_type_node,
> +                                   NULL_TREE);
> +  def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
> +
>  #if TARGET_XCOFF
>    /* AIX libm provides clog as __clog.  */
>    if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index d5ffd81..09bdd80 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -136,6 +136,7 @@
>     UNSPECV_PROBE_STACK_RANGE   ; probe range of stack addresses
>     UNSPECV_EH_RR               ; eh_reg_restore
>     UNSPECV_ISYNC               ; isync instruction
> +   UNSPECV_GETTB               ; get timebase built-in
>    ])
>
>
> @@ -14101,6 +14102,41 @@
>    ""
>    "")
>
> +(define_expand "get_timebase"
> +  [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
> +  ""
> +  "
> +{
> +  if (TARGET_POWERPC64)
> +    emit_insn (gen_get_timebase_ppc64 (operands[0]));
> +  else if (TARGET_POWERPC)
> +    emit_insn (gen_get_timebase_ppc32 (operands[0]));
> +  else
> +    FAIL;
> +  DONE;
> +}")
> +
> +(define_insn "get_timebase_ppc32"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
> +   (clobber (match_scratch:SI 1 "=r"))]
> +  "TARGET_POWERPC && !TARGET_POWERPC64"
> +{
> +    return "mftbu %0\;"
> +          "mftb %L0\;"
> +          "mftbu %1\;"
> +          "cmpw %0,%1\;"
> +          "bne- $-16";
> +})
> +
> +(define_insn "get_timebase_ppc64"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))]
> +  "TARGET_POWERPC64"
> +{
> +    return "mfspr %0, 268";
> +})

The Cell needs to do the 32bit way all the time to work aound the chip
errata/limitation.

Thanks,
Andrew Pinski



> +
>
>
>  (include "sync.md")
> diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
> new file mode 100644
> index 0000000..2f40974
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
> @@ -0,0 +1,22 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +
> +/* Test if __builtin_ppc_get_timebase() is compatible with the current
> +   processor and if it's changing between reads.  A read failure might indicate
> +   a Power ISA or binutils change.  */
> +
> +#include <inttypes.h>
> +
> +int
> +main(void)
> +{
> +  uint64_t t1, t2, t3;
> +
> +  t1 = __builtin_ppc_get_timebase ();
> +  t2 = __builtin_ppc_get_timebase ();
> +  t3 = __builtin_ppc_get_timebase ();
> +
> +  if (t1 != t2 && t1 != t3 && t2 != t3)
> +    return 0;
> +
> +  return 1;
> +}
> --
> 1.7.7.6
>
Hans-Peter Nilsson - Aug. 29, 2012, 5:56 p.m.
On Wed, 29 Aug 2012, Segher Boessenkool wrote:
> > +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
> > @@ -0,0 +1,22 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +
> > +/* Test if __builtin_ppc_get_timebase() is compatible with the current
> > +   processor and if it's changing between reads.  A read failure might
> > indicate
> > +   a Power ISA or binutils change.  */
> > +
> > +#include <inttypes.h>
> > +
> > +int
> > +main(void)
> > +{
> > +  uint64_t t1, t2, t3;
> > +
> > +  t1 = __builtin_ppc_get_timebase ();
> > +  t2 = __builtin_ppc_get_timebase ();
> > +  t3 = __builtin_ppc_get_timebase ();
> > +
> > +  if (t1 != t2 && t1 != t3 && t2 != t3)
> > +    return 0;
> > +
> > +  return 1;
> > +}
>
> On some systems the timebase runs at a rather low frequency, say 20MHz.
> This test will spuriously fail there.  Waste a million CPU cycles before
> reading TB the second time?

Waste said million cycles portably by calling sched_yield()?
(Available only on POSIX systems. :)

brgds, H-P
Michael Meissner - Aug. 29, 2012, 6:21 p.m.
On Wed, Aug 29, 2012 at 01:56:05PM -0400, Hans-Peter Nilsson wrote:
> On Wed, 29 Aug 2012, Segher Boessenkool wrote:
> > On some systems the timebase runs at a rather low frequency, say 20MHz.
> > This test will spuriously fail there.  Waste a million CPU cycles before
> > reading TB the second time?
> 
> Waste said million cycles portably by calling sched_yield()?
> (Available only on POSIX systems. :)

Well only for a test environment.  You don't want to call sched_yield in the
normal case, since the apps that do this many millions of times need this to be
as a fast as possible.
Tulio Magno Quites Machado Filho - Aug. 29, 2012, 6:31 p.m.
Hi Segher,

Segher Boessenkool <segher@kernel.crashing.org> writes:
>> Add __builtin_ppc_get_timebase to read the time base register 
>> on PowerPC.
>> This is required for applications that measure time at high 
>> frequencies
>> with high precision that can't afford a syscall.
>
> For things that do mftb with high frequency, maybe you should 
> also add a
> builtin that does just an mftb, i.e. returns a 32-bit result on 
> 32-bit
> implementations.

Are you thinking in a function that returns only the TBL?
I don't think such a builtin would make sense on a 64-bit 
environment, right?
Do you have a suggestion for its name?

> Please add documentation for the new builtin(s).

Sure!

>> --- a/gcc/config/rs6000/rs6000-builtin.def
>> +++ b/gcc/config/rs6000/rs6000-builtin.def
>> @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, 
>> "__builtin_rsqrt", RS6000_BTM_FRSQRTE,
>>  BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", 
>>  RS6000_BTM_FRSQRTES,
>>  	      RS6000_BTC_FP)
>>
>> +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, 
>> "__builtin_ppc_get_timebase",
>> +	     RS6000_BTM_POWERPC, RS6000_BTC_MISC)
>
> RS6000_BTM_POWERPC does not exist anymore.  RS6000_BTM_ALWAYS?

I'm replacing.

>> +/* Expand an expression EXP that calls a builtin without 
>> arguments.  */
>> +static rtx
>> +rs6000_expand_noop_builtin (enum insn_code icode, rtx target)
>
> "noop" gives the wrong idea, "zeroop" perhaps?

zeroop is much better.

>
>> +(define_expand "get_timebase"
>
> You should probably prefix this with powerpc_ or rs6000_ as 
> well.
> The existing code is not very consistent in this.

OK.

>> +  [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
>> +  ""
>> +  "
>> +{
>> +  if (TARGET_POWERPC64)
>> +    emit_insn (gen_get_timebase_ppc64 (operands[0]));
>> +  else if (TARGET_POWERPC)
>> +    emit_insn (gen_get_timebase_ppc32 (operands[0]));
>> +  else
>> +    FAIL;
>> +  DONE;
>> +}")
>
> TARGET_POWERPC is always true.

OK.

>> +(define_insn "get_timebase_ppc32"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
>> +   (clobber (match_scratch:SI 1 "=r"))]
>> +  "TARGET_POWERPC && !TARGET_POWERPC64"
>> +{
>> +    return "mftbu %0\;"
>> +	   "mftb %L0\;"
>> +	   "mftbu %1\;"
>> +	   "cmpw %0,%1\;"
>> +	   "bne- $-16";
>> +})
>
> This only works for WORDS_BIG_ENDIAN.

Yes.

> You should say you clobber CR0 here I think; actually, allow any 
> CRn
> instead.

Yes.

> Does mftb work on all supported assemblers?  The machine 
> instruction
> is phased out, but some assemblers translate it to mfspr.

According to the Power ISA 2.06 they should translate it to mfspr.

>> +(define_insn "get_timebase_ppc64"
>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))]
>> +  "TARGET_POWERPC64"
>> +{
>> +    return "mfspr %0, 268";
>> +})
>
> POWER3 needs mftb.

Nice catch!

>> +int
>> +main(void)
>> +{
>> +  uint64_t t1, t2, t3;
>> +
>> +  t1 = __builtin_ppc_get_timebase ();
>> +  t2 = __builtin_ppc_get_timebase ();
>> +  t3 = __builtin_ppc_get_timebase ();
>> +
>> +  if (t1 != t2 && t1 != t3 && t2 != t3)
>> +    return 0;
>> +
>> +  return 1;
>> +}
>
> On some systems the timebase runs at a rather low frequency, say 
> 20MHz.
> This test will spuriously fail there.  Waste a million CPU 
> cycles before
> reading TB the second time?

Yes.

Thank you,
Segher Boessenkool - Aug. 29, 2012, 7:02 p.m.
>> On some systems the timebase runs at a rather low frequency, say  
>> 20MHz.
>> This test will spuriously fail there.  Waste a million CPU cycles  
>> before
>> reading TB the second time?
>
> Waste said million cycles portably by calling sched_yield()?
> (Available only on POSIX systems. :)

I was thinking more along the lines of

	int j;
	for (j = 0; j < 1000000; j++)
		asm("" : : "r"(j));

which is more portable (and a lot more predictable).


Segher
Segher Boessenkool - Aug. 29, 2012, 7:09 p.m.
>> For things that do mftb with high frequency, maybe you should also  
>> add a
>> builtin that does just an mftb, i.e. returns a 32-bit result on 32- 
>> bit
>> implementations.
>
> Are you thinking in a function that returns only the TBL?

On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine
instruction to read just TBL on 64-bit, so it doesn't make much
sense to have it return a 32-bit number).

>>> +(define_insn "get_timebase_ppc32"
>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
>>> +   (clobber (match_scratch:SI 1 "=r"))]
>>> +  "TARGET_POWERPC && !TARGET_POWERPC64"
>>> +{
>>> +    return "mftbu %0\;"
>>> +	   "mftb %L0\;"
>>> +	   "mftbu %1\;"
>>> +	   "cmpw %0,%1\;"
>>> +	   "bne- $-16";
>>> +})
>>
>> This only works for WORDS_BIG_ENDIAN.
>
> Yes.

Do you mean you are fixing it?  :-)

>> Does mftb work on all supported assemblers?  The machine instruction
>> is phased out, but some assemblers translate it to mfspr.
>
> According to the Power ISA 2.06 they should translate it to mfspr.

Yes, I realised that later.

But then a binary compiled with an assembler that emits mfspr for mftb
will not run on POWER3 or 601.  I don't know what to do about that;
maybe just document it.


Segher
Hans-Peter Nilsson - Aug. 29, 2012, 7:14 p.m.
On Wed, 29 Aug 2012, Michael Meissner wrote:
> On Wed, Aug 29, 2012 at 01:56:05PM -0400, Hans-Peter Nilsson wrote:
> > On Wed, 29 Aug 2012, Segher Boessenkool wrote:
> > > On some systems the timebase runs at a rather low frequency, say 20MHz.
> > > This test will spuriously fail there.  Waste a million CPU cycles before
> > > reading TB the second time?
> >
> > Waste said million cycles portably by calling sched_yield()?
> > (Available only on POSIX systems. :)
>
> Well only for a test environment.  You don't want to call sched_yield in the
> normal case, since the apps that do this many millions of times need this to be
> as a fast as possible.

Surely, but IMHO what goes for the normal case is not a valid
reading of "waste"..."millions of cycles". ;)

Point being, for simulator environments, you may not want the
loop that was suggested later.  On the other hand, that might
not be an observable period, either.

brgds, H-P
Tulio Magno Quites Machado Filho - Aug. 29, 2012, 7:40 p.m.
Segher Boessenkool <segher@kernel.crashing.org> writes:

>>> For things that do mftb with high frequency, maybe you should 
>>> also add a
>>> builtin that does just an mftb, i.e. returns a 32-bit result 
>>> on 32- 
>>> bit
>>> implementations.
>>
>> Are you thinking in a function that returns only the TBL?
>
> On 32-bit, just TBL; on 64-bit, the whole TB (there is no 
> machine
> instruction to read just TBL on 64-bit, so it doesn't make much
> sense to have it return a 32-bit number).

OK.

>>>> +(define_insn "get_timebase_ppc32"
>>>> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
>>>> +        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
>>>> +   (clobber (match_scratch:SI 1 "=r"))]
>>>> +  "TARGET_POWERPC && !TARGET_POWERPC64"
>>>> +{
>>>> +    return "mftbu %0\;"
>>>> +	   "mftb %L0\;"
>>>> +	   "mftbu %1\;"
>>>> +	   "cmpw %0,%1\;"
>>>> +	   "bne- $-16";
>>>> +})
>>>
>>> This only works for WORDS_BIG_ENDIAN.
>>
>> Yes.
>
> Do you mean you are fixing it?  :-)

Yes. At least I'll try to.  :-)

>>> Does mftb work on all supported assemblers?  The machine 
>>> instruction
>>> is phased out, but some assemblers translate it to mfspr.
>>
>> According to the Power ISA 2.06 they should translate it to 
>> mfspr.
>
> Yes, I realised that later.
>
> But then a binary compiled with an assembler that emits mfspr 
> for mftb
> will not run on POWER3 or 601.  I don't know what to do about 
> that;
> maybe just document it.

We can easily fix this at runtime, which isn't the case here.

Thanks again,
Segher Boessenkool - Aug. 29, 2012, 9:18 p.m.
> Point being, for simulator environments, you may not want the
> loop that was suggested later.  On the other hand, that might
> not be an observable period, either.

I don't think looping a million times would be too slow for the
testsuite: there are many tests that do a lot more work than that,
already.

The worst case for hardware that I know of can take about 100
clock cycles for one timebase tick.

But how about this then, which only iterates much if the test
fails:


int
main (void)
{
	uint64_t t = __builtin_ppc_get_timebase ();
	int j;

	for (j = 0; j < 1000000; j++)
		if (t != __builtin_ppc_get_timebase ())
			break;

	return (j == 1000000);
}


Segher
David Edelsohn - Sept. 4, 2012, 8:03 p.m.
On Wed, Aug 29, 2012 at 3:09 PM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>>> For things that do mftb with high frequency, maybe you should also add a
>>> builtin that does just an mftb, i.e. returns a 32-bit result on 32-bit
>>> implementations.
>>
>> Are you thinking in a function that returns only the TBL?
>
> On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine
> instruction to read just TBL on 64-bit, so it doesn't make much
> sense to have it return a 32-bit number).

It sounds like you are asking for an additional interface for
high-frequency events that only reads one register on both PPC32 and
PPC64.  I do not believe that interface currently exists for PPC in
GLibc and that seems out of the scope of this patch.  It could be a
nice feature, but it's a new feature request that is not necessary for
this round of patches.

Thanks, David
Segher Boessenkool - Sept. 4, 2012, 10:50 p.m.
>>>> For things that do mftb with high frequency, maybe you should  
>>>> also add a
>>>> builtin that does just an mftb, i.e. returns a 32-bit result on  
>>>> 32-bit
>>>> implementations.
>>>
>>> Are you thinking in a function that returns only the TBL?
>>
>> On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine
>> instruction to read just TBL on 64-bit, so it doesn't make much
>> sense to have it return a 32-bit number).
>
> It sounds like you are asking for an additional interface for
> high-frequency events that only reads one register on both PPC32 and
> PPC64.

Yes.  A builtin only makes sense for measuring very short intervals;
the builtin is quite a hassle (the timebase is not part of the UISA,
and as we see it actually differs a lot between implementations), and
there is no advantage over having it in some library if you're
measuring big intervals.

>   I do not believe that interface currently exists for PPC in
> GLibc

Does glibc implement the timebase thing at all?  I lost track of
those patches.

> and that seems out of the scope of this patch.  It could be a
> nice feature, but it's a new feature request that is not necessary for
> this round of patches.

Sure; on the other hand, it seems simple enough to implement.  It
was just a request.


Segher

Patch

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index c8f8f86..75ad184 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1429,6 +1429,9 @@  BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE,
 BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES,
 	      RS6000_BTC_FP)
 
+BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase",
+	     RS6000_BTM_POWERPC, RS6000_BTC_MISC)
+
 /* Darwin CfString builtin.  */
 BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS,
 	      RS6000_BTC_MISC)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6c58307..24e274d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -9747,6 +9747,30 @@  rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
   return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
 }
 
+/* Expand an expression EXP that calls a builtin without arguments.  */
+static rtx
+rs6000_expand_noop_builtin (enum insn_code icode, rtx target)
+{
+  rtx pat;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  pat = GEN_FCN (icode) (target);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
 
 static rtx
 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
@@ -11336,6 +11360,9 @@  rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 					   ? CODE_FOR_bpermd_di
 					   : CODE_FOR_bpermd_si), exp, target);
 
+    case RS6000_BUILTIN_GET_TB:
+      return rs6000_expand_noop_builtin (CODE_FOR_get_timebase, target);
+
     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
       {
@@ -11620,6 +11647,10 @@  rs6000_init_builtins (void)
 				 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
   def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
 
+  ftype = build_function_type_list (unsigned_intDI_type_node,
+				    NULL_TREE);
+  def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
+
 #if TARGET_XCOFF
   /* AIX libm provides clog as __clog.  */
   if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d5ffd81..09bdd80 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -136,6 +136,7 @@ 
    UNSPECV_PROBE_STACK_RANGE	; probe range of stack addresses
    UNSPECV_EH_RR		; eh_reg_restore
    UNSPECV_ISYNC		; isync instruction
+   UNSPECV_GETTB		; get timebase built-in
   ])
 
 
@@ -14101,6 +14102,41 @@ 
   ""
   "")
 
+(define_expand "get_timebase"
+  [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWERPC64)
+    emit_insn (gen_get_timebase_ppc64 (operands[0]));
+  else if (TARGET_POWERPC)
+    emit_insn (gen_get_timebase_ppc32 (operands[0]));
+  else
+    FAIL;
+  DONE;
+}")
+
+(define_insn "get_timebase_ppc32"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))
+   (clobber (match_scratch:SI 1 "=r"))]
+  "TARGET_POWERPC && !TARGET_POWERPC64"
+{
+    return "mftbu %0\;"
+	   "mftb %L0\;"
+	   "mftbu %1\;"
+	   "cmpw %0,%1\;"
+	   "bne- $-16";
+})
+
+(define_insn "get_timebase_ppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))]
+  "TARGET_POWERPC64"
+{
+    return "mfspr %0, 268";
+})
+
 
 
 (include "sync.md")
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
new file mode 100644
index 0000000..2f40974
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c
@@ -0,0 +1,22 @@ 
+/* { dg-do run { target { powerpc*-*-* } } } */
+
+/* Test if __builtin_ppc_get_timebase() is compatible with the current
+   processor and if it's changing between reads.  A read failure might indicate
+   a Power ISA or binutils change.  */
+
+#include <inttypes.h>
+
+int
+main(void)
+{
+  uint64_t t1, t2, t3;
+
+  t1 = __builtin_ppc_get_timebase ();
+  t2 = __builtin_ppc_get_timebase ();
+  t3 = __builtin_ppc_get_timebase ();
+
+  if (t1 != t2 && t1 != t3 && t2 != t3)
+    return 0;
+
+  return 1;
+}