Message ID | 1346248585-10972-1-git-send-email-tuliom@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Hi Tulio, > Add __builtin_ppc_get_timebase to read the time base register on > PowerPC. > This is required for applications that measure time at high > frequencies > with high precision that can't afford a syscall. For things that do mftb with high frequency, maybe you should also add a builtin that does just an mftb, i.e. returns a 32-bit result on 32-bit implementations. Please add documentation for the new builtin(s). > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, > "__builtin_rsqrt", RS6000_BTM_FRSQRTE, > BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", > RS6000_BTM_FRSQRTES, > RS6000_BTC_FP) > > +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase", > + RS6000_BTM_POWERPC, RS6000_BTC_MISC) RS6000_BTM_POWERPC does not exist anymore. RS6000_BTM_ALWAYS? > +/* Expand an expression EXP that calls a builtin without > arguments. */ > +static rtx > +rs6000_expand_noop_builtin (enum insn_code icode, rtx target) "noop" gives the wrong idea, "zeroop" perhaps? > +(define_expand "get_timebase" You should probably prefix this with powerpc_ or rs6000_ as well. The existing code is not very consistent in this. > + [(use (match_operand:DI 0 "gpc_reg_operand" ""))] > + "" > + " > +{ > + if (TARGET_POWERPC64) > + emit_insn (gen_get_timebase_ppc64 (operands[0])); > + else if (TARGET_POWERPC) > + emit_insn (gen_get_timebase_ppc32 (operands[0])); > + else > + FAIL; > + DONE; > +}") TARGET_POWERPC is always true. > +(define_insn "get_timebase_ppc32" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) > + (clobber (match_scratch:SI 1 "=r"))] > + "TARGET_POWERPC && !TARGET_POWERPC64" > +{ > + return "mftbu %0\;" > + "mftb %L0\;" > + "mftbu %1\;" > + "cmpw %0,%1\;" > + "bne- $-16"; > +}) This only works for WORDS_BIG_ENDIAN. You should say you clobber CR0 here I think; actually, allow any CRn instead. Does mftb work on all supported assemblers? The machine instruction is phased out, but some assemblers translate it to mfspr. > +(define_insn "get_timebase_ppc64" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))] > + "TARGET_POWERPC64" > +{ > + return "mfspr %0, 268"; > +}) POWER3 needs mftb. > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c > @@ -0,0 +1,22 @@ > +/* { dg-do run { target { powerpc*-*-* } } } */ > + > +/* Test if __builtin_ppc_get_timebase() is compatible with the > current > + processor and if it's changing between reads. A read failure > might indicate > + a Power ISA or binutils change. */ > + > +#include <inttypes.h> > + > +int > +main(void) > +{ > + uint64_t t1, t2, t3; > + > + t1 = __builtin_ppc_get_timebase (); > + t2 = __builtin_ppc_get_timebase (); > + t3 = __builtin_ppc_get_timebase (); > + > + if (t1 != t2 && t1 != t3 && t2 != t3) > + return 0; > + > + return 1; > +} On some systems the timebase runs at a rather low frequency, say 20MHz. This test will spuriously fail there. Waste a million CPU cycles before reading TB the second time? Segher
On Wed, Aug 29, 2012 at 6:56 AM, Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> wrote: > Add __builtin_ppc_get_timebase to read the time base register on PowerPC. > This is required for applications that measure time at high frequencies > with high precision that can't afford a syscall. > > [gcc] > 2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> > > * config/rs6000/rs6000-builtin.def: Add __builtin_ppc_get_timebase. > * config/rs6000/rs6000.c (rs6000_expand_noop_builtin): New > function to expand an expression that calls a builtin without > arguments. > (rs6000_expand_builtin): Add __builtin_ppc_get_timebase. > (rs6000_init_builtins): Likewise. > * config/rs6000/rs6000.md: Likewise. > > [gcc/testsuite] > 2012-08-29 Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> > > * gcc.target/powerpc/ppc-get-timebase.c: New file. > --- > gcc/config/rs6000/rs6000-builtin.def | 3 ++ > gcc/config/rs6000/rs6000.c | 31 +++++++++++++++++ > gcc/config/rs6000/rs6000.md | 36 ++++++++++++++++++++ > .../gcc.target/powerpc/ppc-get-timebase.c | 22 ++++++++++++ > 4 files changed, 92 insertions(+), 0 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c > > diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def > index c8f8f86..75ad184 100644 > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE, > BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES, > RS6000_BTC_FP) > > +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase", > + RS6000_BTM_POWERPC, RS6000_BTC_MISC) > + > /* Darwin CfString builtin. */ > BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS, > RS6000_BTC_MISC) > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 6c58307..24e274d 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -9747,6 +9747,30 @@ rs6000_overloaded_builtin_p (enum rs6000_builtins fncode) > return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0; > } > > +/* Expand an expression EXP that calls a builtin without arguments. */ > +static rtx > +rs6000_expand_noop_builtin (enum insn_code icode, rtx target) > +{ > + rtx pat; > + enum machine_mode tmode = insn_data[icode].operand[0].mode; > + > + if (icode == CODE_FOR_nothing) > + /* Builtin not supported on this processor. */ > + return 0; > + > + if (target == 0 > + || GET_MODE (target) != tmode > + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) > + target = gen_reg_rtx (tmode); > + > + pat = GEN_FCN (icode) (target); > + if (! pat) > + return 0; > + emit_insn (pat); > + > + return target; > +} > + > > static rtx > rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) > @@ -11336,6 +11360,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, > ? CODE_FOR_bpermd_di > : CODE_FOR_bpermd_si), exp, target); > > + case RS6000_BUILTIN_GET_TB: > + return rs6000_expand_noop_builtin (CODE_FOR_get_timebase, target); > + > case ALTIVEC_BUILTIN_MASK_FOR_LOAD: > case ALTIVEC_BUILTIN_MASK_FOR_STORE: > { > @@ -11620,6 +11647,10 @@ rs6000_init_builtins (void) > POWER7_BUILTIN_BPERMD, "__builtin_bpermd"); > def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD); > > + ftype = build_function_type_list (unsigned_intDI_type_node, > + NULL_TREE); > + def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB); > + > #if TARGET_XCOFF > /* AIX libm provides clog as __clog. */ > if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index d5ffd81..09bdd80 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -136,6 +136,7 @@ > UNSPECV_PROBE_STACK_RANGE ; probe range of stack addresses > UNSPECV_EH_RR ; eh_reg_restore > UNSPECV_ISYNC ; isync instruction > + UNSPECV_GETTB ; get timebase built-in > ]) > > > @@ -14101,6 +14102,41 @@ > "" > "") > > +(define_expand "get_timebase" > + [(use (match_operand:DI 0 "gpc_reg_operand" ""))] > + "" > + " > +{ > + if (TARGET_POWERPC64) > + emit_insn (gen_get_timebase_ppc64 (operands[0])); > + else if (TARGET_POWERPC) > + emit_insn (gen_get_timebase_ppc32 (operands[0])); > + else > + FAIL; > + DONE; > +}") > + > +(define_insn "get_timebase_ppc32" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) > + (clobber (match_scratch:SI 1 "=r"))] > + "TARGET_POWERPC && !TARGET_POWERPC64" > +{ > + return "mftbu %0\;" > + "mftb %L0\;" > + "mftbu %1\;" > + "cmpw %0,%1\;" > + "bne- $-16"; > +}) > + > +(define_insn "get_timebase_ppc64" > + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))] > + "TARGET_POWERPC64" > +{ > + return "mfspr %0, 268"; > +}) The Cell needs to do the 32bit way all the time to work aound the chip errata/limitation. Thanks, Andrew Pinski > + > > > (include "sync.md") > diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c > new file mode 100644 > index 0000000..2f40974 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c > @@ -0,0 +1,22 @@ > +/* { dg-do run { target { powerpc*-*-* } } } */ > + > +/* Test if __builtin_ppc_get_timebase() is compatible with the current > + processor and if it's changing between reads. A read failure might indicate > + a Power ISA or binutils change. */ > + > +#include <inttypes.h> > + > +int > +main(void) > +{ > + uint64_t t1, t2, t3; > + > + t1 = __builtin_ppc_get_timebase (); > + t2 = __builtin_ppc_get_timebase (); > + t3 = __builtin_ppc_get_timebase (); > + > + if (t1 != t2 && t1 != t3 && t2 != t3) > + return 0; > + > + return 1; > +} > -- > 1.7.7.6 >
On Wed, 29 Aug 2012, Segher Boessenkool wrote: > > +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c > > @@ -0,0 +1,22 @@ > > +/* { dg-do run { target { powerpc*-*-* } } } */ > > + > > +/* Test if __builtin_ppc_get_timebase() is compatible with the current > > + processor and if it's changing between reads. A read failure might > > indicate > > + a Power ISA or binutils change. */ > > + > > +#include <inttypes.h> > > + > > +int > > +main(void) > > +{ > > + uint64_t t1, t2, t3; > > + > > + t1 = __builtin_ppc_get_timebase (); > > + t2 = __builtin_ppc_get_timebase (); > > + t3 = __builtin_ppc_get_timebase (); > > + > > + if (t1 != t2 && t1 != t3 && t2 != t3) > > + return 0; > > + > > + return 1; > > +} > > On some systems the timebase runs at a rather low frequency, say 20MHz. > This test will spuriously fail there. Waste a million CPU cycles before > reading TB the second time? Waste said million cycles portably by calling sched_yield()? (Available only on POSIX systems. :) brgds, H-P
On Wed, Aug 29, 2012 at 01:56:05PM -0400, Hans-Peter Nilsson wrote: > On Wed, 29 Aug 2012, Segher Boessenkool wrote: > > On some systems the timebase runs at a rather low frequency, say 20MHz. > > This test will spuriously fail there. Waste a million CPU cycles before > > reading TB the second time? > > Waste said million cycles portably by calling sched_yield()? > (Available only on POSIX systems. :) Well only for a test environment. You don't want to call sched_yield in the normal case, since the apps that do this many millions of times need this to be as a fast as possible.
Hi Segher, Segher Boessenkool <segher@kernel.crashing.org> writes: >> Add __builtin_ppc_get_timebase to read the time base register >> on PowerPC. >> This is required for applications that measure time at high >> frequencies >> with high precision that can't afford a syscall. > > For things that do mftb with high frequency, maybe you should > also add a > builtin that does just an mftb, i.e. returns a 32-bit result on > 32-bit > implementations. Are you thinking in a function that returns only the TBL? I don't think such a builtin would make sense on a 64-bit environment, right? Do you have a suggestion for its name? > Please add documentation for the new builtin(s). Sure! >> --- a/gcc/config/rs6000/rs6000-builtin.def >> +++ b/gcc/config/rs6000/rs6000-builtin.def >> @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, >> "__builtin_rsqrt", RS6000_BTM_FRSQRTE, >> BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", >> RS6000_BTM_FRSQRTES, >> RS6000_BTC_FP) >> >> +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, >> "__builtin_ppc_get_timebase", >> + RS6000_BTM_POWERPC, RS6000_BTC_MISC) > > RS6000_BTM_POWERPC does not exist anymore. RS6000_BTM_ALWAYS? I'm replacing. >> +/* Expand an expression EXP that calls a builtin without >> arguments. */ >> +static rtx >> +rs6000_expand_noop_builtin (enum insn_code icode, rtx target) > > "noop" gives the wrong idea, "zeroop" perhaps? zeroop is much better. > >> +(define_expand "get_timebase" > > You should probably prefix this with powerpc_ or rs6000_ as > well. > The existing code is not very consistent in this. OK. >> + [(use (match_operand:DI 0 "gpc_reg_operand" ""))] >> + "" >> + " >> +{ >> + if (TARGET_POWERPC64) >> + emit_insn (gen_get_timebase_ppc64 (operands[0])); >> + else if (TARGET_POWERPC) >> + emit_insn (gen_get_timebase_ppc32 (operands[0])); >> + else >> + FAIL; >> + DONE; >> +}") > > TARGET_POWERPC is always true. OK. >> +(define_insn "get_timebase_ppc32" >> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >> + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) >> + (clobber (match_scratch:SI 1 "=r"))] >> + "TARGET_POWERPC && !TARGET_POWERPC64" >> +{ >> + return "mftbu %0\;" >> + "mftb %L0\;" >> + "mftbu %1\;" >> + "cmpw %0,%1\;" >> + "bne- $-16"; >> +}) > > This only works for WORDS_BIG_ENDIAN. Yes. > You should say you clobber CR0 here I think; actually, allow any > CRn > instead. Yes. > Does mftb work on all supported assemblers? The machine > instruction > is phased out, but some assemblers translate it to mfspr. According to the Power ISA 2.06 they should translate it to mfspr. >> +(define_insn "get_timebase_ppc64" >> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >> + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))] >> + "TARGET_POWERPC64" >> +{ >> + return "mfspr %0, 268"; >> +}) > > POWER3 needs mftb. Nice catch! >> +int >> +main(void) >> +{ >> + uint64_t t1, t2, t3; >> + >> + t1 = __builtin_ppc_get_timebase (); >> + t2 = __builtin_ppc_get_timebase (); >> + t3 = __builtin_ppc_get_timebase (); >> + >> + if (t1 != t2 && t1 != t3 && t2 != t3) >> + return 0; >> + >> + return 1; >> +} > > On some systems the timebase runs at a rather low frequency, say > 20MHz. > This test will spuriously fail there. Waste a million CPU > cycles before > reading TB the second time? Yes. Thank you,
>> On some systems the timebase runs at a rather low frequency, say >> 20MHz. >> This test will spuriously fail there. Waste a million CPU cycles >> before >> reading TB the second time? > > Waste said million cycles portably by calling sched_yield()? > (Available only on POSIX systems. :) I was thinking more along the lines of int j; for (j = 0; j < 1000000; j++) asm("" : : "r"(j)); which is more portable (and a lot more predictable). Segher
>> For things that do mftb with high frequency, maybe you should also >> add a >> builtin that does just an mftb, i.e. returns a 32-bit result on 32- >> bit >> implementations. > > Are you thinking in a function that returns only the TBL? On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine instruction to read just TBL on 64-bit, so it doesn't make much sense to have it return a 32-bit number). >>> +(define_insn "get_timebase_ppc32" >>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>> + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) >>> + (clobber (match_scratch:SI 1 "=r"))] >>> + "TARGET_POWERPC && !TARGET_POWERPC64" >>> +{ >>> + return "mftbu %0\;" >>> + "mftb %L0\;" >>> + "mftbu %1\;" >>> + "cmpw %0,%1\;" >>> + "bne- $-16"; >>> +}) >> >> This only works for WORDS_BIG_ENDIAN. > > Yes. Do you mean you are fixing it? :-) >> Does mftb work on all supported assemblers? The machine instruction >> is phased out, but some assemblers translate it to mfspr. > > According to the Power ISA 2.06 they should translate it to mfspr. Yes, I realised that later. But then a binary compiled with an assembler that emits mfspr for mftb will not run on POWER3 or 601. I don't know what to do about that; maybe just document it. Segher
On Wed, 29 Aug 2012, Michael Meissner wrote: > On Wed, Aug 29, 2012 at 01:56:05PM -0400, Hans-Peter Nilsson wrote: > > On Wed, 29 Aug 2012, Segher Boessenkool wrote: > > > On some systems the timebase runs at a rather low frequency, say 20MHz. > > > This test will spuriously fail there. Waste a million CPU cycles before > > > reading TB the second time? > > > > Waste said million cycles portably by calling sched_yield()? > > (Available only on POSIX systems. :) > > Well only for a test environment. You don't want to call sched_yield in the > normal case, since the apps that do this many millions of times need this to be > as a fast as possible. Surely, but IMHO what goes for the normal case is not a valid reading of "waste"..."millions of cycles". ;) Point being, for simulator environments, you may not want the loop that was suggested later. On the other hand, that might not be an observable period, either. brgds, H-P
Segher Boessenkool <segher@kernel.crashing.org> writes: >>> For things that do mftb with high frequency, maybe you should >>> also add a >>> builtin that does just an mftb, i.e. returns a 32-bit result >>> on 32- >>> bit >>> implementations. >> >> Are you thinking in a function that returns only the TBL? > > On 32-bit, just TBL; on 64-bit, the whole TB (there is no > machine > instruction to read just TBL on 64-bit, so it doesn't make much > sense to have it return a 32-bit number). OK. >>>> +(define_insn "get_timebase_ppc32" >>>> + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >>>> + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) >>>> + (clobber (match_scratch:SI 1 "=r"))] >>>> + "TARGET_POWERPC && !TARGET_POWERPC64" >>>> +{ >>>> + return "mftbu %0\;" >>>> + "mftb %L0\;" >>>> + "mftbu %1\;" >>>> + "cmpw %0,%1\;" >>>> + "bne- $-16"; >>>> +}) >>> >>> This only works for WORDS_BIG_ENDIAN. >> >> Yes. > > Do you mean you are fixing it? :-) Yes. At least I'll try to. :-) >>> Does mftb work on all supported assemblers? The machine >>> instruction >>> is phased out, but some assemblers translate it to mfspr. >> >> According to the Power ISA 2.06 they should translate it to >> mfspr. > > Yes, I realised that later. > > But then a binary compiled with an assembler that emits mfspr > for mftb > will not run on POWER3 or 601. I don't know what to do about > that; > maybe just document it. We can easily fix this at runtime, which isn't the case here. Thanks again,
> Point being, for simulator environments, you may not want the > loop that was suggested later. On the other hand, that might > not be an observable period, either. I don't think looping a million times would be too slow for the testsuite: there are many tests that do a lot more work than that, already. The worst case for hardware that I know of can take about 100 clock cycles for one timebase tick. But how about this then, which only iterates much if the test fails: int main (void) { uint64_t t = __builtin_ppc_get_timebase (); int j; for (j = 0; j < 1000000; j++) if (t != __builtin_ppc_get_timebase ()) break; return (j == 1000000); } Segher
On Wed, Aug 29, 2012 at 3:09 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote: >>> For things that do mftb with high frequency, maybe you should also add a >>> builtin that does just an mftb, i.e. returns a 32-bit result on 32-bit >>> implementations. >> >> Are you thinking in a function that returns only the TBL? > > On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine > instruction to read just TBL on 64-bit, so it doesn't make much > sense to have it return a 32-bit number). It sounds like you are asking for an additional interface for high-frequency events that only reads one register on both PPC32 and PPC64. I do not believe that interface currently exists for PPC in GLibc and that seems out of the scope of this patch. It could be a nice feature, but it's a new feature request that is not necessary for this round of patches. Thanks, David
>>>> For things that do mftb with high frequency, maybe you should >>>> also add a >>>> builtin that does just an mftb, i.e. returns a 32-bit result on >>>> 32-bit >>>> implementations. >>> >>> Are you thinking in a function that returns only the TBL? >> >> On 32-bit, just TBL; on 64-bit, the whole TB (there is no machine >> instruction to read just TBL on 64-bit, so it doesn't make much >> sense to have it return a 32-bit number). > > It sounds like you are asking for an additional interface for > high-frequency events that only reads one register on both PPC32 and > PPC64. Yes. A builtin only makes sense for measuring very short intervals; the builtin is quite a hassle (the timebase is not part of the UISA, and as we see it actually differs a lot between implementations), and there is no advantage over having it in some library if you're measuring big intervals. > I do not believe that interface currently exists for PPC in > GLibc Does glibc implement the timebase thing at all? I lost track of those patches. > and that seems out of the scope of this patch. It could be a > nice feature, but it's a new feature request that is not necessary for > this round of patches. Sure; on the other hand, it seems simple enough to implement. It was just a request. Segher
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index c8f8f86..75ad184 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1429,6 +1429,9 @@ BU_SPECIAL_X (RS6000_BUILTIN_RSQRT, "__builtin_rsqrt", RS6000_BTM_FRSQRTE, BU_SPECIAL_X (RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf", RS6000_BTM_FRSQRTES, RS6000_BTC_FP) +BU_SPECIAL_X (RS6000_BUILTIN_GET_TB, "__builtin_ppc_get_timebase", + RS6000_BTM_POWERPC, RS6000_BTC_MISC) + /* Darwin CfString builtin. */ BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6c58307..24e274d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -9747,6 +9747,30 @@ rs6000_overloaded_builtin_p (enum rs6000_builtins fncode) return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0; } +/* Expand an expression EXP that calls a builtin without arguments. */ +static rtx +rs6000_expand_noop_builtin (enum insn_code icode, rtx target) +{ + rtx pat; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target); + if (! pat) + return 0; + emit_insn (pat); + + return target; +} + static rtx rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) @@ -11336,6 +11360,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ? CODE_FOR_bpermd_di : CODE_FOR_bpermd_si), exp, target); + case RS6000_BUILTIN_GET_TB: + return rs6000_expand_noop_builtin (CODE_FOR_get_timebase, target); + case ALTIVEC_BUILTIN_MASK_FOR_LOAD: case ALTIVEC_BUILTIN_MASK_FOR_STORE: { @@ -11620,6 +11647,10 @@ rs6000_init_builtins (void) POWER7_BUILTIN_BPERMD, "__builtin_bpermd"); def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD); + ftype = build_function_type_list (unsigned_intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB); + #if TARGET_XCOFF /* AIX libm provides clog as __clog. */ if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d5ffd81..09bdd80 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -136,6 +136,7 @@ UNSPECV_PROBE_STACK_RANGE ; probe range of stack addresses UNSPECV_EH_RR ; eh_reg_restore UNSPECV_ISYNC ; isync instruction + UNSPECV_GETTB ; get timebase built-in ]) @@ -14101,6 +14102,41 @@ "" "") +(define_expand "get_timebase" + [(use (match_operand:DI 0 "gpc_reg_operand" ""))] + "" + " +{ + if (TARGET_POWERPC64) + emit_insn (gen_get_timebase_ppc64 (operands[0])); + else if (TARGET_POWERPC) + emit_insn (gen_get_timebase_ppc32 (operands[0])); + else + FAIL; + DONE; +}") + +(define_insn "get_timebase_ppc32" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB)) + (clobber (match_scratch:SI 1 "=r"))] + "TARGET_POWERPC && !TARGET_POWERPC64" +{ + return "mftbu %0\;" + "mftb %L0\;" + "mftbu %1\;" + "cmpw %0,%1\;" + "bne- $-16"; +}) + +(define_insn "get_timebase_ppc64" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_GETTB))] + "TARGET_POWERPC64" +{ + return "mfspr %0, 268"; +}) + (include "sync.md") diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c new file mode 100644 index 0000000..2f40974 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/ppc-get-timebase.c @@ -0,0 +1,22 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ + +/* Test if __builtin_ppc_get_timebase() is compatible with the current + processor and if it's changing between reads. A read failure might indicate + a Power ISA or binutils change. */ + +#include <inttypes.h> + +int +main(void) +{ + uint64_t t1, t2, t3; + + t1 = __builtin_ppc_get_timebase (); + t2 = __builtin_ppc_get_timebase (); + t3 = __builtin_ppc_get_timebase (); + + if (t1 != t2 && t1 != t3 && t2 != t3) + return 0; + + return 1; +}