diff mbox

builtin fe[gs]etround

Message ID alpine.DEB.2.02.1402231133510.29647@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse Feb. 23, 2014, 11:09 a.m. UTC
Hello,

a natural first step to optimize changes of rounding modes seems to be 
making these 2 functions builtins. I don't know exactly how far 
optimizations will be able to go (the fact that fesetround can fail 
complicates things a lot). What is included here:

1) fegetround is pure.

2) Neither function aliases (use or clobber) any memory. I expect this is 
likely not true on all platforms, some probably store the rounding mode in 
a global variable that is accessible through other means (though mixing 
direct accesses with calls to fe*etround seems a questionable style). Any 
opinion or advice here?

Regtested on x86_64-linux-gnu, certainly not for 4.9.

2014-02-23  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* builtins.def (BUILT_IN_FEGETROUND, BUILT_IN_FESETROUND): Add.
 	* tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
 	call_may_clobber_ref_p_1): Handle them.

gcc/testsuite/
 	* gcc.dg/tree-ssa/fegsetround.c: New file.

Comments

Richard Biener Feb. 24, 2014, 9:02 a.m. UTC | #1
On Sun, Feb 23, 2014 at 12:09 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> a natural first step to optimize changes of rounding modes seems to be
> making these 2 functions builtins. I don't know exactly how far
> optimizations will be able to go (the fact that fesetround can fail
> complicates things a lot). What is included here:
>
> 1) fegetround is pure.
>
> 2) Neither function aliases (use or clobber) any memory. I expect this is
> likely not true on all platforms, some probably store the rounding mode in a
> global variable that is accessible through other means (though mixing direct
> accesses with calls to fe*etround seems a questionable style). Any opinion
> or advice here?
>
> Regtested on x86_64-linux-gnu, certainly not for 4.9.

Hohumm ... before making any of these functions less of a barrier than they
are (at least for loads and stores), shouldn't we think of, and fix, the lack of
any dependences between FP status word changes and actual arithmetic
instructions?

In fact, using 'pure' or 'not use/clobber memory' here is exactly walking
on shaking grounds.  Simply because we lack of a way to say that
this stmt uses/clobbers the FP state (fegetround would be 'const' when
following your logic in 2)).

Otherwise, what is it worth optimizing^breaking things even more than
we do now?

[not that I have an answer for the FP state dependency that I like]

Thanks,
Richard.

> 2014-02-23  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * builtins.def (BUILT_IN_FEGETROUND, BUILT_IN_FESETROUND): Add.
>         * tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
>         call_may_clobber_ref_p_1): Handle them.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/fegsetround.c: New file.
>
> --
> Marc Glisse
> Index: gcc/builtins.def
> ===================================================================
> --- gcc/builtins.def    (revision 208045)
> +++ gcc/builtins.def    (working copy)
> @@ -276,20 +276,22 @@ DEF_C99_BUILTIN        (BUILT_IN_EXPM1F,
>  DEF_C99_BUILTIN        (BUILT_IN_EXPM1L, "expm1l",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_LIB_BUILTIN        (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD32, "fabsd32",
> BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD64, "fabsd64",
> BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_FABSD128, "fabsd128",
> BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT,
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_C99_BUILTIN        (BUILT_IN_FDIML, "fdiml",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
> +DEF_C99_BUILTIN        (BUILT_IN_FEGETROUND, "fegetround", BT_FN_INT,
> ATTR_PURE_NOTHROW_LEAF_LIST)
> +DEF_C99_BUILTIN        (BUILT_IN_FESETROUND, "fesetround", BT_FN_INT_INT,
> ATTR_NOTHROW_LEAF_LIST)
>  DEF_LIB_BUILTIN        (BUILT_IN_FLOOR, "floor", BT_FN_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORF, "floorf", BT_FN_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORL, "floorl",
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMA, "fma",
> BT_FN_DOUBLE_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAF, "fmaf",
> BT_FN_FLOAT_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAL, "fmal",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAX, "fmax", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAXF, "fmaxf", BT_FN_FLOAT_FLOAT_FLOAT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMAXL, "fmaxl",
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN        (BUILT_IN_FMIN, "fmin", BT_FN_DOUBLE_DOUBLE_DOUBLE,
> ATTR_CONST_NOTHROW_LEAF_LIST)
> Index: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c (working copy)
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-std=c99 -O -fdump-tree-optimized" } */
> +
> +#include <fenv.h>
> +
> +int a;
> +int f ()
> +{
> +  a = 42;
> +  // don't read a
> +  int x = fegetround ();
> +  fesetround (x + 1);
> +  a = 0;
> +  return a;
> +}
> +int g ()
> +{
> +  a = 0;
> +  // don't write a
> +  int x = fegetround ();
> +  fesetround (x + 1);
> +  return a;
> +}
> +int h ()
> +{
> +  // pure
> +  return fegetround () - fegetround ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 0" 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "a = 42" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
> ___________________________________________________________________
> Added: svn:keywords
> ## -0,0 +1 ##
> +Author Date Id Revision URL
> \ No newline at end of property
> Added: svn:eol-style
> ## -0,0 +1 ##
> +native
> \ No newline at end of property
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c        (revision 208045)
> +++ gcc/tree-ssa-alias.c        (working copy)
> @@ -1537,20 +1537,22 @@ ref_maybe_used_by_call_p_1 (gimple call,
>         case BUILT_IN_MODFF:
>         case BUILT_IN_MODFL:
>         case BUILT_IN_REMQUO:
>         case BUILT_IN_REMQUOF:
>         case BUILT_IN_REMQUOL:
>         case BUILT_IN_SINCOS:
>         case BUILT_IN_SINCOSF:
>         case BUILT_IN_SINCOSL:
>         case BUILT_IN_ASSUME_ALIGNED:
>         case BUILT_IN_VA_END:
> +       case BUILT_IN_FEGETROUND:
> +       case BUILT_IN_FESETROUND:
>           return false;
>         /* __sync_* builtins and some OpenMP builtins act as threading
>            barriers.  */
>  #undef DEF_SYNC_BUILTIN
>  #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
>  #include "sync-builtins.def"
>  #undef DEF_SYNC_BUILTIN
>         case BUILT_IN_GOMP_ATOMIC_START:
>         case BUILT_IN_GOMP_ATOMIC_END:
>         case BUILT_IN_GOMP_BARRIER:
> @@ -1831,20 +1833,21 @@ call_may_clobber_ref_p_1 (gimple call, a
>         case BUILT_IN_STRNDUP:
>           /* Unix98 specifies that errno is set on allocation failure.  */
>           if (flag_errno_math
>               && targetm.ref_may_alias_errno (ref))
>             return true;
>           return false;
>         case BUILT_IN_STACK_SAVE:
>         case BUILT_IN_ALLOCA:
>         case BUILT_IN_ALLOCA_WITH_ALIGN:
>         case BUILT_IN_ASSUME_ALIGNED:
> +       case BUILT_IN_FESETROUND:
>           return false;
>         /* But posix_memalign stores a pointer into the memory pointed to
>            by its first argument.  */
>         case BUILT_IN_POSIX_MEMALIGN:
>           {
>             tree ptrptr = gimple_call_arg (call, 0);
>             ao_ref dref;
>             ao_ref_init_from_ptr_and_size (&dref, ptrptr,
>                                            TYPE_SIZE_UNIT (ptr_type_node));
>             return (refs_may_alias_p_1 (&dref, ref, false)
>
Richard Biener Feb. 24, 2014, 9:13 a.m. UTC | #2
On Mon, Feb 24, 2014 at 10:02 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Sun, Feb 23, 2014 at 12:09 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> a natural first step to optimize changes of rounding modes seems to be
>> making these 2 functions builtins. I don't know exactly how far
>> optimizations will be able to go (the fact that fesetround can fail
>> complicates things a lot). What is included here:
>>
>> 1) fegetround is pure.
>>
>> 2) Neither function aliases (use or clobber) any memory. I expect this is
>> likely not true on all platforms, some probably store the rounding mode in a
>> global variable that is accessible through other means (though mixing direct
>> accesses with calls to fe*etround seems a questionable style). Any opinion
>> or advice here?
>>
>> Regtested on x86_64-linux-gnu, certainly not for 4.9.
>
> Hohumm ... before making any of these functions less of a barrier than they
> are (at least for loads and stores), shouldn't we think of, and fix, the lack of
> any dependences between FP status word changes and actual arithmetic
> instructions?
>
> In fact, using 'pure' or 'not use/clobber memory' here is exactly walking
> on shaking grounds.  Simply because we lack of a way to say that
> this stmt uses/clobbers the FP state (fegetround would be 'const' when
> following your logic in 2)).
>
> Otherwise, what is it worth optimizing^breaking things even more than
> we do now?
>
> [not that I have an answer for the FP state dependency that I like]

Just to elaborate on the two obvious options:

1) represent all arithmetic with builtins, using an extra explicit FP
state argument
and set / query that with the FP manipulation / query functions (also
with every call)

2) use sth similar to virtual operands - conveniently the vuse/vdef members are
present even for unary, binary and ternary assigns (you'd only use the
vuse field
here).  Issues with calls (might consume/clobber FP state) - there the
vop fields
are already used, so you'd need to add an extra use (easy) and a def (ugh)

eventually people wanted to get multiple defs for the simple stmts (assigns
and calls) back for stuff like modeling CPU flags explicitely (the overflow flag
for example).  And FP ISAs now have support for per-stmt rounding mode
flags (and element masks for vector instructions).  Thus eventually this
may be a good reason to support extra (but less efficient to get at / modify?)
SSA(!) uses and defs to these stmt kinds.  But it needs to be well-designed
to not throw away the speedups and simplicity we gained when removing
general support for multiple defs.

(should be obvious that I lean towards 2) but am not very happy with the
consequences for gimple data structures)

Richard.

> Thanks,
> Richard.
Marc Glisse Feb. 24, 2014, 12:43 p.m. UTC | #3
On Mon, 24 Feb 2014, Richard Biener wrote:

> On Sun, Feb 23, 2014 at 12:09 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> a natural first step to optimize changes of rounding modes seems to be
>> making these 2 functions builtins. I don't know exactly how far
>> optimizations will be able to go (the fact that fesetround can fail
>> complicates things a lot). What is included here:
>>
>> 1) fegetround is pure.
>>
>> 2) Neither function aliases (use or clobber) any memory. I expect this is
>> likely not true on all platforms, some probably store the rounding mode in a
>> global variable that is accessible through other means (though mixing direct
>> accesses with calls to fe*etround seems a questionable style). Any opinion
>> or advice here?
>>
>> Regtested on x86_64-linux-gnu, certainly not for 4.9.
>
> Hohumm ... before making any of these functions less of a barrier than they
> are (at least for loads and stores), shouldn't we think of, and fix, the lack of
> any dependences between FP status word changes and actual arithmetic
> instructions?

I'd welcome such change, but it is beyond my gcc-foo (and my free time) 
for now.

> In fact, using 'pure' or 'not use/clobber memory' here is exactly walking
> on shaking grounds.

I have a hard time seeing how making fegetround pure can break anything 
that accidentally works now. fegetround really is pure, it is fine to move 
it across float operations.

> Simply because we lack of a way to say that this stmt uses/clobbers the 
> FP state (fegetround would be 'const' when following your logic in 2)).

Not exactly, the logic in 2 is to say that the FP rounding mode is still a 
global variable, but not one that is accessible directly, so the alias 
oracle can never be called on a ref to it.

(note that we probably don't want a single FP state but separate rounding 
mode on one side and exception flags on the other, since they are 
preserved/modified very differently)

> Otherwise, what is it worth optimizing^breaking things even more than
> we do now?

With just my patch, probably not much. For someone interested, the kind of 
thing that I would like:

#include <fenv.h>
double protect(double x){asm volatile("":"+mx"(x));return x;}
double add(double x,double y){
   int old=fegetround();
   fesetround(FE_UPWARD);
   double res = protect(protect(x)+protect(y));
   fesetround(old);
   return res;
}
double f(double x,double y,double z){
   return add(add(x,y),z);
}

(in practice I might add: if(old!=FE_UPWARD) in front of both fesetround)
becomes:

   old_9 = fegetround ();
   fesetround (2048);
   __asm__ __volatile__("" : "=mx" x_10 : "0" x_2(D));
   __asm__ __volatile__("" : "=mx" x_11 : "0" y_3(D));
   _12 = x_10 + x_11;
   __asm__ __volatile__("" : "=mx" res_13 : "0" _12);
   fesetround (old_9);
   old_14 = fegetround ();
   fesetround (2048);
   __asm__ __volatile__("" : "=mx" x_15 : "0" res_13);
   __asm__ __volatile__("" : "=mx" x_16 : "0" z_6(D));
   _17 = x_15 + x_16;
   __asm__ __volatile__("" : "=mx" res_18 : "0" _17);
   fesetround (old_14);
   return res_18;

The interesting part is the 3 instructions in the middle. It is "easy" to 
replace old_14 with old_9: the vuse has a def_stmt which is an fesetround, 
and that fesetround must have succeeded because its argument has a 
def_stmt which is an fegetround. We are left with:

   fesetround (old_9);
   fesetround (2048);

If we know somehow that the second fesetround can't fail (hardcode a list 
of safe values per platform?), we can remove fesetround (old_9). If we 
also assume that only fesetround can modify the rounding mode, we can 
prove that the second fesetround is redundant and remove it. We could also 
imagine saying that in both blocks the rounding mode is what you get when 
it was old_9 and you try to set it to 2048, and thus remove both 
middle fesetround at once. In any case, that brings the desired state of 
both additions sharing a single pre/post fesetround.

Obviously that's wrong since the inline asm can modify the rounding mode 
(though why would you mix that with calls to fe*etround?), so that would 
probably require a nicer "protect", or even the special additions you 
mention in the next email.
Richard Biener Feb. 24, 2014, 1:27 p.m. UTC | #4
On Mon, Feb 24, 2014 at 1:43 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 24 Feb 2014, Richard Biener wrote:
>
>> On Sun, Feb 23, 2014 at 12:09 PM, Marc Glisse <marc.glisse@inria.fr>
>> wrote:
>>>
>>> Hello,
>>>
>>> a natural first step to optimize changes of rounding modes seems to be
>>> making these 2 functions builtins. I don't know exactly how far
>>> optimizations will be able to go (the fact that fesetround can fail
>>> complicates things a lot). What is included here:
>>>
>>> 1) fegetround is pure.
>>>
>>> 2) Neither function aliases (use or clobber) any memory. I expect this is
>>> likely not true on all platforms, some probably store the rounding mode
>>> in a
>>> global variable that is accessible through other means (though mixing
>>> direct
>>> accesses with calls to fe*etround seems a questionable style). Any
>>> opinion
>>> or advice here?
>>>
>>> Regtested on x86_64-linux-gnu, certainly not for 4.9.
>>
>>
>> Hohumm ... before making any of these functions less of a barrier than
>> they
>> are (at least for loads and stores), shouldn't we think of, and fix, the
>> lack of
>> any dependences between FP status word changes and actual arithmetic
>> instructions?
>
>
> I'd welcome such change, but it is beyond my gcc-foo (and my free time) for
> now.
>
>
>> In fact, using 'pure' or 'not use/clobber memory' here is exactly walking
>> on shaking grounds.
>
>
> I have a hard time seeing how making fegetround pure can break anything that
> accidentally works now. fegetround really is pure, it is fine to move it
> across float operations.

You mean "const" ;)  But yes, if you declare FP state to be "global memory"
then pure works (and is needed - you have to retain the dependency on a
fesetround).

Then if you assume that the "global memory" the FP state is in cannot be
addressed directly but has to go through fe* routines then declaring all
of them not clobbering/using a ref you can "name" would work.  Luckily
we don't have any predicates that disambiguate calls against each other
(yet).

What would break that works accidentially now is memory CSE across
fesetround/fegetround calls that exposes non-memory dependence chains
in arithmetic.  That's probably what makes most cases work that are
isolated into separate functions and that work on memory.

>> Simply because we lack of a way to say that this stmt uses/clobbers the FP
>> state (fegetround would be 'const' when following your logic in 2)).
>
>
> Not exactly, the logic in 2 is to say that the FP rounding mode is still a
> global variable, but not one that is accessible directly, so the alias
> oracle can never be called on a ref to it.
>
> (note that we probably don't want a single FP state but separate rounding
> mode on one side and exception flags on the other, since they are
> preserved/modified very differently)
>
>
>> Otherwise, what is it worth optimizing^breaking things even more than
>> we do now?
>
>
> With just my patch, probably not much. For someone interested, the kind of
> thing that I would like:
>
> #include <fenv.h>
> double protect(double x){asm volatile("":"+mx"(x));return x;}
> double add(double x,double y){
>   int old=fegetround();
>   fesetround(FE_UPWARD);
>   double res = protect(protect(x)+protect(y));
>   fesetround(old);
>   return res;
> }
> double f(double x,double y,double z){
>   return add(add(x,y),z);
> }
>
> (in practice I might add: if(old!=FE_UPWARD) in front of both fesetround)
> becomes:
>
>   old_9 = fegetround ();
>   fesetround (2048);
>   __asm__ __volatile__("" : "=mx" x_10 : "0" x_2(D));
>   __asm__ __volatile__("" : "=mx" x_11 : "0" y_3(D));
>   _12 = x_10 + x_11;
>   __asm__ __volatile__("" : "=mx" res_13 : "0" _12);
>   fesetround (old_9);
>   old_14 = fegetround ();
>   fesetround (2048);
>   __asm__ __volatile__("" : "=mx" x_15 : "0" res_13);
>   __asm__ __volatile__("" : "=mx" x_16 : "0" z_6(D));
>   _17 = x_15 + x_16;
>   __asm__ __volatile__("" : "=mx" res_18 : "0" _17);
>   fesetround (old_14);
>   return res_18;
>
> The interesting part is the 3 instructions in the middle. It is "easy" to
> replace old_14 with old_9: the vuse has a def_stmt which is an fesetround,
> and that fesetround must have succeeded because its argument has a def_stmt
> which is an fegetround. We are left with:
>
>   fesetround (old_9);
>   fesetround (2048);
>
> If we know somehow that the second fesetround can't fail (hardcode a list of
> safe values per platform?), we can remove fesetround (old_9). If we also
> assume that only fesetround can modify the rounding mode, we can prove that
> the second fesetround is redundant and remove it. We could also imagine
> saying that in both blocks the rounding mode is what you get when it was
> old_9 and you try to set it to 2048, and thus remove both middle fesetround
> at once. In any case, that brings the desired state of both additions
> sharing a single pre/post fesetround.
>
> Obviously that's wrong since the inline asm can modify the rounding mode
> (though why would you mix that with calls to fe*etround?), so that would
> probably require a nicer "protect", or even the special additions you
> mention in the next email.

Well, your asm cannot modify it as you don't have a use or clobber for FP
state (but there isn't any...).  Also I think the 'volatile' in the
asms isn't needed.

I see what you are after though.  I'm still not decided if we want to start
optimizing FP state modification when we can't even honor dependence on it ...
(optimizing FP state inspection might be another thing and somewhat more
obvious).

Let's revisit this during stage1.  I'd like to see the full set of C99 FP state
handling as builtins though, not a piecemail addition.

Thanks,
Richard.

> --
> Marc Glisse
Marc Glisse Feb. 24, 2014, 1:47 p.m. UTC | #5
On Mon, 24 Feb 2014, Richard Biener wrote:

> Well, your asm cannot modify it as you don't have a use or clobber for FP
> state (but there isn't any...).

Ah, right, I was focusing on "volatile" and forgot there is no "memory"
clobber in those asms, cool, that may simplify things a lot.

> Also I think the 'volatile' in the asms isn't needed.

Right, I had it because of PR 56027 where we don't use fesetround but
the SSE intrinsics. But according to your comment #3 there, volatile is
needed even with fesetround.
diff mbox

Patch

Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def	(revision 208045)
+++ gcc/builtins.def	(working copy)
@@ -276,20 +276,22 @@  DEF_C99_BUILTIN        (BUILT_IN_EXPM1F,
 DEF_C99_BUILTIN        (BUILT_IN_EXPM1L, "expm1l", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_LIB_BUILTIN        (BUILT_IN_FABS, "fabs", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSF, "fabsf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_FABSL, "fabsl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_FABSD32, "fabsd32", BT_FN_DFLOAT32_DFLOAT32, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_FABSD64, "fabsd64", BT_FN_DFLOAT64_DFLOAT64, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_FABSD128, "fabsd128", BT_FN_DFLOAT128_DFLOAT128, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN        (BUILT_IN_FDIM, "fdim", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_BUILTIN        (BUILT_IN_FDIMF, "fdimf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_BUILTIN        (BUILT_IN_FDIML, "fdiml", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
+DEF_C99_BUILTIN        (BUILT_IN_FEGETROUND, "fegetround", BT_FN_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
+DEF_C99_BUILTIN        (BUILT_IN_FESETROUND, "fesetround", BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_FLOOR, "floor", BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORF, "floorf", BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_FLOORL, "floorl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN        (BUILT_IN_FMA, "fma", BT_FN_DOUBLE_DOUBLE_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING)
 DEF_C99_BUILTIN        (BUILT_IN_FMAF, "fmaf", BT_FN_FLOAT_FLOAT_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING)
 DEF_C99_BUILTIN        (BUILT_IN_FMAL, "fmal", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING)
 DEF_C99_BUILTIN        (BUILT_IN_FMAX, "fmax", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN        (BUILT_IN_FMAXF, "fmaxf", BT_FN_FLOAT_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN        (BUILT_IN_FMAXL, "fmaxl", BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_C99_BUILTIN        (BUILT_IN_FMIN, "fmin", BT_FN_DOUBLE_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
Index: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c	(working copy)
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O -fdump-tree-optimized" } */
+
+#include <fenv.h>
+
+int a;
+int f ()
+{
+  a = 42;
+  // don't read a
+  int x = fegetround ();
+  fesetround (x + 1);
+  a = 0;
+  return a;
+}
+int g ()
+{
+  a = 0;
+  // don't write a
+  int x = fegetround ();
+  fesetround (x + 1);
+  return a;
+}
+int h ()
+{
+  // pure
+  return fegetround () - fegetround ();
+}
+
+/* { dg-final { scan-tree-dump-times "return 0" 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "a = 42" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/fegsetround.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/tree-ssa-alias.c
===================================================================
--- gcc/tree-ssa-alias.c	(revision 208045)
+++ gcc/tree-ssa-alias.c	(working copy)
@@ -1537,20 +1537,22 @@  ref_maybe_used_by_call_p_1 (gimple call,
 	case BUILT_IN_MODFF:
 	case BUILT_IN_MODFL:
 	case BUILT_IN_REMQUO:
 	case BUILT_IN_REMQUOF:
 	case BUILT_IN_REMQUOL:
 	case BUILT_IN_SINCOS:
 	case BUILT_IN_SINCOSF:
 	case BUILT_IN_SINCOSL:
 	case BUILT_IN_ASSUME_ALIGNED:
 	case BUILT_IN_VA_END:
+	case BUILT_IN_FEGETROUND:
+	case BUILT_IN_FESETROUND:
 	  return false;
 	/* __sync_* builtins and some OpenMP builtins act as threading
 	   barriers.  */
 #undef DEF_SYNC_BUILTIN
 #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
 #include "sync-builtins.def"
 #undef DEF_SYNC_BUILTIN
 	case BUILT_IN_GOMP_ATOMIC_START:
 	case BUILT_IN_GOMP_ATOMIC_END:
 	case BUILT_IN_GOMP_BARRIER:
@@ -1831,20 +1833,21 @@  call_may_clobber_ref_p_1 (gimple call, a
 	case BUILT_IN_STRNDUP:
 	  /* Unix98 specifies that errno is set on allocation failure.  */
 	  if (flag_errno_math
 	      && targetm.ref_may_alias_errno (ref))
 	    return true;
 	  return false;
 	case BUILT_IN_STACK_SAVE:
 	case BUILT_IN_ALLOCA:
 	case BUILT_IN_ALLOCA_WITH_ALIGN:
 	case BUILT_IN_ASSUME_ALIGNED:
+	case BUILT_IN_FESETROUND:
 	  return false;
 	/* But posix_memalign stores a pointer into the memory pointed to
 	   by its first argument.  */
 	case BUILT_IN_POSIX_MEMALIGN:
 	  {
 	    tree ptrptr = gimple_call_arg (call, 0);
 	    ao_ref dref;
 	    ao_ref_init_from_ptr_and_size (&dref, ptrptr,
 					   TYPE_SIZE_UNIT (ptr_type_node));
 	    return (refs_may_alias_p_1 (&dref, ref, false)