Message ID | 20150512221234.GA54352@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
On Wed, May 13, 2015 at 12:12 AM, Jan Hubicka <hubicka@ucw.cz> wrote: > > Hi, > this patch works around PR where we refuse to inline always_inline memcpy > into function with explicit Ofast optimization attribute. This is because > we think we can not promote -fno-fast-math code to -fast-math code. > This is of course safe for memcpy because it contains to fast-math code, > but we don't really do the analysis for each of the flags we check. > > Earlier compilers was happily producing wrong code here and it seems practical > to do that on GCC 5 to deal with common falout. I will implement the more > fine grained check incrementally. > > Bootstrapped/regtested x86_64-linux. Will commit it to mainline shortly and > to release branch later this week. Hmm, the changelog or the description above doesn't cover the @@ -481,6 +490,17 @@ can_inline_edge_p (struct cgraph_edge *e e->inline_failed = CIF_OPTIMIZATION_MISMATCH; inlinable = false; } + /* If explicit optimize attribute are not used, the mismatch is caused + by different command line options used to build different units. + Do not care about COMDAT functions - those are intended to be + optimized with the optimization flags of module they are used in. + Also do not care about mixing up size/speed optimization when + DECL_DISREGARD_INLINE_LIMITS is set. */ + else if ((callee->merged + && !lookup_attribute ("optimize", + DECL_ATTRIBUTES (caller->decl))) + || DECL_DISREGARD_INLINE_LIMITS (callee->decl)) + ; /* If mismatch is caused by merging two LTO units with different optimizationflags we want to be bit nicer. However never inline if one of functions is not optimized at all. */ hunk. > PR ipa/65873 > * ipa-inline.c (can_inline_edge_p): Allow early inlining of always > inlines across optimization boundary. > * testsuite/gcc.c-torture/compile/pr65873.c: New testcase. > Index: ipa-inline.c > =================================================================== > --- ipa-inline.c (revision 223093) > +++ ipa-inline.c (working copy) > @@ -427,46 +427,55 @@ can_inline_edge_p (struct cgraph_edge *e > && lookup_attribute ("always_inline", > DECL_ATTRIBUTES (callee->decl))); > > + /* Until GCC 4.9 we did not check the semantics alterning flags > + bellow and inline across optimization boundry. > + Enabling checks bellow breaks several packages by refusing > + to inline library always_inline functions. See PR65873. > + Disable the check for early inlining for now until better solution > + is found. */ > + if (always_inline && early) > + ; > /* There are some options that change IL semantics which means > we cannot inline in these cases for correctness reason. > Not even for always_inline declared functions. */ > /* Strictly speaking only when the callee contains signed integer > math where overflow is undefined. */ > - if ((check_maybe_up (flag_strict_overflow) > - /* this flag is set by optimize. Allow inlining across > - optimize boundary. */ > - && (!opt_for_fn (caller->decl, optimize) > - == !opt_for_fn (callee->decl, optimize) || !always_inline)) > - || check_match (flag_wrapv) > - || check_match (flag_trapv) > - /* Strictly speaking only when the callee uses FP math. */ > - || check_maybe_up (flag_rounding_math) > - || check_maybe_up (flag_trapping_math) > - || check_maybe_down (flag_unsafe_math_optimizations) > - || check_maybe_down (flag_finite_math_only) > - || check_maybe_up (flag_signaling_nans) > - || check_maybe_down (flag_cx_limited_range) > - || check_maybe_up (flag_signed_zeros) > - || check_maybe_down (flag_associative_math) > - || check_maybe_down (flag_reciprocal_math) > - /* We do not want to make code compiled with exceptions to be brought > - into a non-EH function unless we know that the callee does not > - throw. This is tracked by DECL_FUNCTION_PERSONALITY. */ > - || (check_match (flag_non_call_exceptions) > - /* TODO: We also may allow bringing !flag_non_call_exceptions > - to flag_non_call_exceptions function, but that may need > - extra work in tree-inline to add the extra EH edges. */ > - && (!opt_for_fn (callee->decl, flag_non_call_exceptions) > - || DECL_FUNCTION_PERSONALITY (callee->decl))) > - || (check_maybe_up (flag_exceptions) > - && DECL_FUNCTION_PERSONALITY (callee->decl)) > - /* Strictly speaking only when the callee contains function > - calls that may end up setting errno. */ > - || check_maybe_up (flag_errno_math) > - /* When devirtualization is diabled for callee, it is not safe > - to inline it as we possibly mangled the type info. > - Allow early inlining of always inlines. */ > - || (!early && check_maybe_down (flag_devirtualize))) > + else if ((check_maybe_up (flag_strict_overflow) > + /* this flag is set by optimize. Allow inlining across > + optimize boundary. */ > + && (!opt_for_fn (caller->decl, optimize) > + == !opt_for_fn (callee->decl, optimize) || !always_inline)) > + || check_match (flag_wrapv) > + || check_match (flag_trapv) > + /* Strictly speaking only when the callee uses FP math. */ > + || check_maybe_up (flag_rounding_math) > + || check_maybe_up (flag_trapping_math) > + || check_maybe_down (flag_unsafe_math_optimizations) > + || check_maybe_down (flag_finite_math_only) > + || check_maybe_up (flag_signaling_nans) > + || check_maybe_down (flag_cx_limited_range) > + || check_maybe_up (flag_signed_zeros) > + || check_maybe_down (flag_associative_math) > + || check_maybe_down (flag_reciprocal_math) > + /* We do not want to make code compiled with exceptions to be > + brought into a non-EH function unless we know that the callee > + does not throw. > + This is tracked by DECL_FUNCTION_PERSONALITY. */ > + || (check_match (flag_non_call_exceptions) > + /* TODO: We also may allow bringing !flag_non_call_exceptions > + to flag_non_call_exceptions function, but that may need > + extra work in tree-inline to add the extra EH edges. */ > + && (!opt_for_fn (callee->decl, flag_non_call_exceptions) > + || DECL_FUNCTION_PERSONALITY (callee->decl))) > + || (check_maybe_up (flag_exceptions) > + && DECL_FUNCTION_PERSONALITY (callee->decl)) > + /* Strictly speaking only when the callee contains function > + calls that may end up setting errno. */ > + || check_maybe_up (flag_errno_math) > + /* When devirtualization is diabled for callee, it is not safe > + to inline it as we possibly mangled the type info. > + Allow early inlining of always inlines. */ > + || (!early && check_maybe_down (flag_devirtualize))) > { > e->inline_failed = CIF_OPTIMIZATION_MISMATCH; > inlinable = false; > @@ -481,6 +490,17 @@ can_inline_edge_p (struct cgraph_edge *e > e->inline_failed = CIF_OPTIMIZATION_MISMATCH; > inlinable = false; > } > + /* If explicit optimize attribute are not used, the mismatch is caused > + by different command line options used to build different units. > + Do not care about COMDAT functions - those are intended to be > + optimized with the optimization flags of module they are used in. > + Also do not care about mixing up size/speed optimization when > + DECL_DISREGARD_INLINE_LIMITS is set. */ > + else if ((callee->merged > + && !lookup_attribute ("optimize", > + DECL_ATTRIBUTES (caller->decl))) > + || DECL_DISREGARD_INLINE_LIMITS (callee->decl)) > + ; > /* If mismatch is caused by merging two LTO units with different > optimizationflags we want to be bit nicer. However never inline > if one of functions is not optimized at all. */ > Index: testsuite/gcc.c-torture/compile/pr65873.c > =================================================================== > --- testsuite/gcc.c-torture/compile/pr65873.c (revision 0) > +++ testsuite/gcc.c-torture/compile/pr65873.c (revision 0) > @@ -0,0 +1,14 @@ > +typedef __SIZE_TYPE__ size_t; > + > +extern inline __attribute__ ((__always_inline__, __gnu_inline__, __artificial__, __nothrow__, __leaf__)) void * > +memcpy (void *__restrict __dest, const void *__restrict __src, size_t __len) > +{ > + return __builtin___memcpy_chk (__dest, __src, __len, __builtin_object_size (__dest, 0)); > +} > + > +__attribute__((optimize ("Ofast"))) void > +bar (void *d, void *s, size_t l) > +{ > + memcpy (d, s, l); > +} > +
> On Wed, May 13, 2015 at 12:12 AM, Jan Hubicka <hubicka@ucw.cz> wrote: > > > > Hi, > > this patch works around PR where we refuse to inline always_inline memcpy > > into function with explicit Ofast optimization attribute. This is because > > we think we can not promote -fno-fast-math code to -fast-math code. > > This is of course safe for memcpy because it contains to fast-math code, > > but we don't really do the analysis for each of the flags we check. > > > > Earlier compilers was happily producing wrong code here and it seems practical > > to do that on GCC 5 to deal with common falout. I will implement the more > > fine grained check incrementally. > > > > Bootstrapped/regtested x86_64-linux. Will commit it to mainline shortly and > > to release branch later this week. > > Hmm, the changelog or the description above doesn't cover the > > @@ -481,6 +490,17 @@ can_inline_edge_p (struct cgraph_edge *e > e->inline_failed = CIF_OPTIMIZATION_MISMATCH; > inlinable = false; > } > + /* If explicit optimize attribute are not used, the mismatch is caused > + by different command line options used to build different units. > + Do not care about COMDAT functions - those are intended to be > + optimized with the optimization flags of module they are used in. > + Also do not care about mixing up size/speed optimization when > + DECL_DISREGARD_INLINE_LIMITS is set. */ > + else if ((callee->merged > + && !lookup_attribute ("optimize", > + DECL_ATTRIBUTES (caller->decl))) > + || DECL_DISREGARD_INLINE_LIMITS (callee->decl)) > + ; > /* If mismatch is caused by merging two LTO units with different > optimizationflags we want to be bit nicer. However never inline > if one of functions is not optimized at all. */ > > hunk. Ah, sorry. The change was intended, but I forgot to write about it. Yep, as comment says this makes us a bit more relaxed about inlining COMDATs when the prevailing variant is -Os and caller body is -O3 or vice versa. Honza > > > PR ipa/65873 > > * ipa-inline.c (can_inline_edge_p): Allow early inlining of always > > inlines across optimization boundary. > > * testsuite/gcc.c-torture/compile/pr65873.c: New testcase. > > Index: ipa-inline.c > > =================================================================== > > --- ipa-inline.c (revision 223093) > > +++ ipa-inline.c (working copy) > > @@ -427,46 +427,55 @@ can_inline_edge_p (struct cgraph_edge *e > > && lookup_attribute ("always_inline", > > DECL_ATTRIBUTES (callee->decl))); > > > > + /* Until GCC 4.9 we did not check the semantics alterning flags > > + bellow and inline across optimization boundry. > > + Enabling checks bellow breaks several packages by refusing > > + to inline library always_inline functions. See PR65873. > > + Disable the check for early inlining for now until better solution > > + is found. */ > > + if (always_inline && early) > > + ; > > /* There are some options that change IL semantics which means > > we cannot inline in these cases for correctness reason. > > Not even for always_inline declared functions. */ > > /* Strictly speaking only when the callee contains signed integer > > math where overflow is undefined. */ > > - if ((check_maybe_up (flag_strict_overflow) > > - /* this flag is set by optimize. Allow inlining across > > - optimize boundary. */ > > - && (!opt_for_fn (caller->decl, optimize) > > - == !opt_for_fn (callee->decl, optimize) || !always_inline)) > > - || check_match (flag_wrapv) > > - || check_match (flag_trapv) > > - /* Strictly speaking only when the callee uses FP math. */ > > - || check_maybe_up (flag_rounding_math) > > - || check_maybe_up (flag_trapping_math) > > - || check_maybe_down (flag_unsafe_math_optimizations) > > - || check_maybe_down (flag_finite_math_only) > > - || check_maybe_up (flag_signaling_nans) > > - || check_maybe_down (flag_cx_limited_range) > > - || check_maybe_up (flag_signed_zeros) > > - || check_maybe_down (flag_associative_math) > > - || check_maybe_down (flag_reciprocal_math) > > - /* We do not want to make code compiled with exceptions to be brought > > - into a non-EH function unless we know that the callee does not > > - throw. This is tracked by DECL_FUNCTION_PERSONALITY. */ > > - || (check_match (flag_non_call_exceptions) > > - /* TODO: We also may allow bringing !flag_non_call_exceptions > > - to flag_non_call_exceptions function, but that may need > > - extra work in tree-inline to add the extra EH edges. */ > > - && (!opt_for_fn (callee->decl, flag_non_call_exceptions) > > - || DECL_FUNCTION_PERSONALITY (callee->decl))) > > - || (check_maybe_up (flag_exceptions) > > - && DECL_FUNCTION_PERSONALITY (callee->decl)) > > - /* Strictly speaking only when the callee contains function > > - calls that may end up setting errno. */ > > - || check_maybe_up (flag_errno_math) > > - /* When devirtualization is diabled for callee, it is not safe > > - to inline it as we possibly mangled the type info. > > - Allow early inlining of always inlines. */ > > - || (!early && check_maybe_down (flag_devirtualize))) > > + else if ((check_maybe_up (flag_strict_overflow) > > + /* this flag is set by optimize. Allow inlining across > > + optimize boundary. */ > > + && (!opt_for_fn (caller->decl, optimize) > > + == !opt_for_fn (callee->decl, optimize) || !always_inline)) > > + || check_match (flag_wrapv) > > + || check_match (flag_trapv) > > + /* Strictly speaking only when the callee uses FP math. */ > > + || check_maybe_up (flag_rounding_math) > > + || check_maybe_up (flag_trapping_math) > > + || check_maybe_down (flag_unsafe_math_optimizations) > > + || check_maybe_down (flag_finite_math_only) > > + || check_maybe_up (flag_signaling_nans) > > + || check_maybe_down (flag_cx_limited_range) > > + || check_maybe_up (flag_signed_zeros) > > + || check_maybe_down (flag_associative_math) > > + || check_maybe_down (flag_reciprocal_math) > > + /* We do not want to make code compiled with exceptions to be > > + brought into a non-EH function unless we know that the callee > > + does not throw. > > + This is tracked by DECL_FUNCTION_PERSONALITY. */ > > + || (check_match (flag_non_call_exceptions) > > + /* TODO: We also may allow bringing !flag_non_call_exceptions > > + to flag_non_call_exceptions function, but that may need > > + extra work in tree-inline to add the extra EH edges. */ > > + && (!opt_for_fn (callee->decl, flag_non_call_exceptions) > > + || DECL_FUNCTION_PERSONALITY (callee->decl))) > > + || (check_maybe_up (flag_exceptions) > > + && DECL_FUNCTION_PERSONALITY (callee->decl)) > > + /* Strictly speaking only when the callee contains function > > + calls that may end up setting errno. */ > > + || check_maybe_up (flag_errno_math) > > + /* When devirtualization is diabled for callee, it is not safe > > + to inline it as we possibly mangled the type info. > > + Allow early inlining of always inlines. */ > > + || (!early && check_maybe_down (flag_devirtualize))) > > { > > e->inline_failed = CIF_OPTIMIZATION_MISMATCH; > > inlinable = false; > > @@ -481,6 +490,17 @@ can_inline_edge_p (struct cgraph_edge *e > > e->inline_failed = CIF_OPTIMIZATION_MISMATCH; > > inlinable = false; > > } > > + /* If explicit optimize attribute are not used, the mismatch is caused > > + by different command line options used to build different units. > > + Do not care about COMDAT functions - those are intended to be > > + optimized with the optimization flags of module they are used in. > > + Also do not care about mixing up size/speed optimization when > > + DECL_DISREGARD_INLINE_LIMITS is set. */ > > + else if ((callee->merged > > + && !lookup_attribute ("optimize", > > + DECL_ATTRIBUTES (caller->decl))) > > + || DECL_DISREGARD_INLINE_LIMITS (callee->decl)) > > + ; > > /* If mismatch is caused by merging two LTO units with different > > optimizationflags we want to be bit nicer. However never inline > > if one of functions is not optimized at all. */ > > Index: testsuite/gcc.c-torture/compile/pr65873.c > > =================================================================== > > --- testsuite/gcc.c-torture/compile/pr65873.c (revision 0) > > +++ testsuite/gcc.c-torture/compile/pr65873.c (revision 0) > > @@ -0,0 +1,14 @@ > > +typedef __SIZE_TYPE__ size_t; > > + > > +extern inline __attribute__ ((__always_inline__, __gnu_inline__, __artificial__, __nothrow__, __leaf__)) void * > > +memcpy (void *__restrict __dest, const void *__restrict __src, size_t __len) > > +{ > > + return __builtin___memcpy_chk (__dest, __src, __len, __builtin_object_size (__dest, 0)); > > +} > > + > > +__attribute__((optimize ("Ofast"))) void > > +bar (void *d, void *s, size_t l) > > +{ > > + memcpy (d, s, l); > > +} > > +
On 05/12/2015 04:12 PM, Jan Hubicka wrote: > > Hi, > this patch works around PR where we refuse to inline always_inline memcpy > into function with explicit Ofast optimization attribute. This is because > we think we can not promote -fno-fast-math code to -fast-math code. > This is of course safe for memcpy because it contains to fast-math code, > but we don't really do the analysis for each of the flags we check. > > Earlier compilers was happily producing wrong code here and it seems practical > to do that on GCC 5 to deal with common falout. I will implement the more > fine grained check incrementally. > > Bootstrapped/regtested x86_64-linux. Will commit it to mainline shortly and > to release branch later this week. > > PR ipa/65873 > * ipa-inline.c (can_inline_edge_p): Allow early inlining of always > inlines across optimization boundary. > * testsuite/gcc.c-torture/compile/pr65873.c: New testcase. > Index: ipa-inline.c > =================================================================== > --- ipa-inline.c (revision 223093) > +++ ipa-inline.c (working copy) > @@ -427,46 +427,55 @@ can_inline_edge_p (struct cgraph_edge *e > && lookup_attribute ("always_inline", > DECL_ATTRIBUTES (callee->decl))); > I'm having problems parsing the following comment block: > + /* Until GCC 4.9 we did not check the semantics alterning flags "alterning"? > + bellow and inline across optimization boundry. s/bellow/below/g s/boundry/boundary/ > + Enabling checks bellow breaks several packages by refusing > + to inline library always_inline functions. See PR65873. > + Disable the check for early inlining for now until better solution > + is found. */ > + if (always_inline && early) > + ; -Sandra
Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 223093) +++ ipa-inline.c (working copy) @@ -427,46 +427,55 @@ can_inline_edge_p (struct cgraph_edge *e && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee->decl))); + /* Until GCC 4.9 we did not check the semantics alterning flags + bellow and inline across optimization boundry. + Enabling checks bellow breaks several packages by refusing + to inline library always_inline functions. See PR65873. + Disable the check for early inlining for now until better solution + is found. */ + if (always_inline && early) + ; /* There are some options that change IL semantics which means we cannot inline in these cases for correctness reason. Not even for always_inline declared functions. */ /* Strictly speaking only when the callee contains signed integer math where overflow is undefined. */ - if ((check_maybe_up (flag_strict_overflow) - /* this flag is set by optimize. Allow inlining across - optimize boundary. */ - && (!opt_for_fn (caller->decl, optimize) - == !opt_for_fn (callee->decl, optimize) || !always_inline)) - || check_match (flag_wrapv) - || check_match (flag_trapv) - /* Strictly speaking only when the callee uses FP math. */ - || check_maybe_up (flag_rounding_math) - || check_maybe_up (flag_trapping_math) - || check_maybe_down (flag_unsafe_math_optimizations) - || check_maybe_down (flag_finite_math_only) - || check_maybe_up (flag_signaling_nans) - || check_maybe_down (flag_cx_limited_range) - || check_maybe_up (flag_signed_zeros) - || check_maybe_down (flag_associative_math) - || check_maybe_down (flag_reciprocal_math) - /* We do not want to make code compiled with exceptions to be brought - into a non-EH function unless we know that the callee does not - throw. This is tracked by DECL_FUNCTION_PERSONALITY. */ - || (check_match (flag_non_call_exceptions) - /* TODO: We also may allow bringing !flag_non_call_exceptions - to flag_non_call_exceptions function, but that may need - extra work in tree-inline to add the extra EH edges. */ - && (!opt_for_fn (callee->decl, flag_non_call_exceptions) - || DECL_FUNCTION_PERSONALITY (callee->decl))) - || (check_maybe_up (flag_exceptions) - && DECL_FUNCTION_PERSONALITY (callee->decl)) - /* Strictly speaking only when the callee contains function - calls that may end up setting errno. */ - || check_maybe_up (flag_errno_math) - /* When devirtualization is diabled for callee, it is not safe - to inline it as we possibly mangled the type info. - Allow early inlining of always inlines. */ - || (!early && check_maybe_down (flag_devirtualize))) + else if ((check_maybe_up (flag_strict_overflow) + /* this flag is set by optimize. Allow inlining across + optimize boundary. */ + && (!opt_for_fn (caller->decl, optimize) + == !opt_for_fn (callee->decl, optimize) || !always_inline)) + || check_match (flag_wrapv) + || check_match (flag_trapv) + /* Strictly speaking only when the callee uses FP math. */ + || check_maybe_up (flag_rounding_math) + || check_maybe_up (flag_trapping_math) + || check_maybe_down (flag_unsafe_math_optimizations) + || check_maybe_down (flag_finite_math_only) + || check_maybe_up (flag_signaling_nans) + || check_maybe_down (flag_cx_limited_range) + || check_maybe_up (flag_signed_zeros) + || check_maybe_down (flag_associative_math) + || check_maybe_down (flag_reciprocal_math) + /* We do not want to make code compiled with exceptions to be + brought into a non-EH function unless we know that the callee + does not throw. + This is tracked by DECL_FUNCTION_PERSONALITY. */ + || (check_match (flag_non_call_exceptions) + /* TODO: We also may allow bringing !flag_non_call_exceptions + to flag_non_call_exceptions function, but that may need + extra work in tree-inline to add the extra EH edges. */ + && (!opt_for_fn (callee->decl, flag_non_call_exceptions) + || DECL_FUNCTION_PERSONALITY (callee->decl))) + || (check_maybe_up (flag_exceptions) + && DECL_FUNCTION_PERSONALITY (callee->decl)) + /* Strictly speaking only when the callee contains function + calls that may end up setting errno. */ + || check_maybe_up (flag_errno_math) + /* When devirtualization is diabled for callee, it is not safe + to inline it as we possibly mangled the type info. + Allow early inlining of always inlines. */ + || (!early && check_maybe_down (flag_devirtualize))) { e->inline_failed = CIF_OPTIMIZATION_MISMATCH; inlinable = false; @@ -481,6 +490,17 @@ can_inline_edge_p (struct cgraph_edge *e e->inline_failed = CIF_OPTIMIZATION_MISMATCH; inlinable = false; } + /* If explicit optimize attribute are not used, the mismatch is caused + by different command line options used to build different units. + Do not care about COMDAT functions - those are intended to be + optimized with the optimization flags of module they are used in. + Also do not care about mixing up size/speed optimization when + DECL_DISREGARD_INLINE_LIMITS is set. */ + else if ((callee->merged + && !lookup_attribute ("optimize", + DECL_ATTRIBUTES (caller->decl))) + || DECL_DISREGARD_INLINE_LIMITS (callee->decl)) + ; /* If mismatch is caused by merging two LTO units with different optimizationflags we want to be bit nicer. However never inline if one of functions is not optimized at all. */ Index: testsuite/gcc.c-torture/compile/pr65873.c =================================================================== --- testsuite/gcc.c-torture/compile/pr65873.c (revision 0) +++ testsuite/gcc.c-torture/compile/pr65873.c (revision 0) @@ -0,0 +1,14 @@ +typedef __SIZE_TYPE__ size_t; + +extern inline __attribute__ ((__always_inline__, __gnu_inline__, __artificial__, __nothrow__, __leaf__)) void * +memcpy (void *__restrict __dest, const void *__restrict __src, size_t __len) +{ + return __builtin___memcpy_chk (__dest, __src, __len, __builtin_object_size (__dest, 0)); +} + +__attribute__((optimize ("Ofast"))) void +bar (void *d, void *s, size_t l) +{ + memcpy (d, s, l); +} +