Message ID | 53B5C658.9020700@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
Ping. On 03-07-2014 18:08, Adhemerval Zanella wrote: > This patch implements the TARGET_ATOMIC_ASSIGN_EXPAND_FENV for > powerpc-fpu. I have to adjust current c11-atomic-exec-5 testcase > because for IBM long double 0 += LDBL_MAX might generate > overflow/underflow in internal __gcc_qadd calculations. > > The c11-atomic-exec-5 now passes for linux/powerpc, checked on > powerpc32-linux-fpu, powerpc64-linux, and powerpc64le-linux. > > -- > > 2014-07-03 Adhemerval Zanella <azanella@linux.vnet.ibm.com> > > gcc: > * config/rs6000/rs6000.c (rs6000_atomic_assign_expand_fenv): New > function. > > gcc/testsuite: > * gcc.dg/atomic/c11-atomic-exec-5.c > (test_main_long_double_add_overflow): Define and run only for > LDBL_MANT_DIG != 106. > (test_main_complex_long_double_add_overflow): Likewise. > (test_main_long_double_sub_overflow): Likewise. > (test_main_complex_long_double_sub_overflow): Likewise. > > --- > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index bf67e72..75a2a45 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -1621,6 +1621,9 @@ static const struct attribute_spec rs6000_attribute_table[] = > > #undef TARGET_CAN_USE_DOLOOP_P > #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost > + > +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV > +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv > > > /* Processor table. */ > @@ -32991,6 +32994,105 @@ emit_fusion_gpr_load (rtx *operands) > return ""; > } > > +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ > + > +static void > +rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) > +{ > + if (!TARGET_HARD_FLOAT || !TARGET_FPRS) > + return; > + > + tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; > + tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; > + tree call_mffs = build_call_expr (mffs, 0); > + > + /* Generates the equivalent of feholdexcept (&fenv_var) > + > + *fenv_var = __builtin_mffs (); > + double fenv_hold; > + *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; > + __builtin_mtfsf (0xff, fenv_hold); */ > + > + /* Mask to clear everything except for the rounding modes and non-IEEE > + arithmetic flag. */ > + const unsigned HOST_WIDE_INT hold_exception_mask = > + HOST_WIDE_INT_C (0xffffffff00000007); > + > + tree fenv_var = create_tmp_var (double_type_node, NULL); > + > + tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); > + > + tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); > + tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, > + build_int_cst (uint64_type_node, hold_exception_mask)); > + > + tree fenv_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, fenv_llu_and); > + > + tree hold_mtfsf = build_call_expr (mtfsf, 2, > + build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf); > + > + *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); > + > + /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): > + > + double fenv_clear = __builtin_mffs (); > + *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; > + __builtin_mtfsf (0xff, fenv_clear); */ > + > + /* Mask to clear everything except for the rounding modes and non-IEEE > + arithmetic flag. */ > + const unsigned HOST_WIDE_INT clear_exception_mask = > + HOST_WIDE_INT_UC (0xffffffff00000000); > + > + tree fenv_clear = create_tmp_var (double_type_node, NULL); > + > + tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs); > + > + tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); > + tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, > + fenv_clean_llu, build_int_cst (uint64_type_node, clear_exception_mask)); > + > + tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, > + fenv_clear_llu_and); > + > + tree clear_mtfsf = build_call_expr (mtfsf, 2, > + build_int_cst (unsigned_type_node, 0xff), fenv_clear_mtfsf); > + > + *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); > + > + /* Generates the equivalent of feupdateenv (&fenv_var) > + > + double old_fenv = __builtin_mffs (); > + double fenv_update; > + *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | > + (*(uint64_t*)fenv_var 0x1ff80fff); > + __builtin_mtfsf (0xff, fenv_update); */ > + > + const unsigned HOST_WIDE_INT update_exception_mask = > + HOST_WIDE_INT_UC (0xffffffff1fffff00); > + const unsigned HOST_WIDE_INT new_exception_mask = > + HOST_WIDE_INT_UC (0x1ff80fff); > + > + tree old_fenv = create_tmp_var (double_type_node, NULL); > + tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); > + > + tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs); > + tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, > + old_llu, build_int_cst (uint64_type_node, update_exception_mask)); > + > + tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, > + build_int_cst (uint64_type_node, new_exception_mask)); > + > + tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, > + old_llu_and, new_llu_and); > + > + tree fenv_mtfsf_update = build1 (VIEW_CONVERT_EXPR, double_type_node, > + new_llu_mask); > + > + *update = build_call_expr (mtfsf, 2, > + build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf_update); > +} > + > > struct gcc_target targetm = TARGET_INITIALIZER; > > diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c > index bc87de4..0a2c9c4 100644 > --- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c > +++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c > @@ -325,11 +325,11 @@ TEST_FUNCS (complex_double_div_overflow, _Complex double, , /= DBL_MIN, 0, > TEST_FUNCS (long_double_add_invalid, long double, , += __builtin_infl (), 0, > 0, __builtin_isinf, 0, > -__builtin_infl (), FE_INVALID) > +#if LDBL_MANT_DIG != 106 > TEST_FUNCS (long_double_add_overflow, long double, , += LDBL_MAX, 0, > LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT, > 0, 0) > #define NOT_LDBL_EPSILON_2(X) ((X) != LDBL_EPSILON / 2) > -#if LDBL_MANT_DIG != 106 > TEST_FUNCS (long_double_add_inexact, long double, , += LDBL_EPSILON / 2, 0, > 1.0L, NOT_LDBL_EPSILON_2, FE_INEXACT, > 0, 0) > @@ -342,18 +342,18 @@ TEST_FUNCS (long_double_preinc_inexact, long double, ++, , 0, > TEST_FUNCS (long_double_postinc_inexact, long double, , ++, 0, > LDBL_EPSILON / 2, NOT_MINUS_1, FE_INEXACT, > -1, 0) > -#endif > TEST_FUNCS (complex_long_double_add_overflow, _Complex long double, , += LDBL_MAX, 0, > LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT, > 0, 0) > +#endif > TEST_FUNCS (long_double_sub_invalid, long double, , -= __builtin_infl (), 0, > 0, __builtin_isinf, 0, > __builtin_infl (), FE_INVALID) > +#if LDBL_MANT_DIG != 106 > TEST_FUNCS (long_double_sub_overflow, long double, , -= LDBL_MAX, 0, > -LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT, > 0, 0) > #define NOT_MINUS_LDBL_EPSILON_2(X) ((X) != -LDBL_EPSILON / 2) > -#if LDBL_MANT_DIG != 106 > TEST_FUNCS (long_double_sub_inexact, long double, , -= LDBL_EPSILON / 2, 0, > -1.0L, NOT_MINUS_LDBL_EPSILON_2, FE_INEXACT, > 0, 0) > @@ -366,10 +366,10 @@ TEST_FUNCS (long_double_predec_inexact, long double, --, , 0, > TEST_FUNCS (long_double_postdec_inexact, long double, , --, 0, > -LDBL_EPSILON / 2, NOT_1, FE_INEXACT, > 1, 0) > -#endif > TEST_FUNCS (complex_long_double_sub_overflow, _Complex long double, , -= LDBL_MAX, 0, > -LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT, > 0, 0) > +#endif > TEST_FUNCS (long_double_mul_invalid, long double, , *= __builtin_infl (), 0, > __builtin_infl (), __builtin_isinf, 0, > 0, FE_INVALID) > @@ -501,23 +501,23 @@ main (void) > ret |= test_main_int_div_double_inexact (); > ret |= test_main_complex_double_div_overflow (); > ret |= test_main_long_double_add_invalid (); > - ret |= test_main_long_double_add_overflow (); > #if LDBL_MANT_DIG != 106 > + ret |= test_main_long_double_add_overflow (); > ret |= test_main_long_double_add_inexact (); > ret |= test_main_long_double_add_inexact_int (); > ret |= test_main_long_double_preinc_inexact (); > ret |= test_main_long_double_postinc_inexact (); > -#endif > ret |= test_main_complex_long_double_add_overflow (); > +#endif > ret |= test_main_long_double_sub_invalid (); > - ret |= test_main_long_double_sub_overflow (); > #if LDBL_MANT_DIG != 106 > + ret |= test_main_long_double_sub_overflow (); > ret |= test_main_long_double_sub_inexact (); > ret |= test_main_long_double_sub_inexact_int (); > ret |= test_main_long_double_predec_inexact (); > ret |= test_main_long_double_postdec_inexact (); > -#endif > ret |= test_main_complex_long_double_sub_overflow (); > +#endif > ret |= test_main_long_double_mul_invalid (); > ret |= test_main_long_double_mul_overflow (); > ret |= test_main_long_double_mul_overflow_float (); >
On Thu, 3 Jul 2014, Adhemerval Zanella wrote: > + /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): > + > + double fenv_clear = __builtin_mffs (); > + *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; > + __builtin_mtfsf (0xff, fenv_clear); */ > + > + /* Mask to clear everything except for the rounding modes and non-IEEE > + arithmetic flag. */ > + const unsigned HOST_WIDE_INT clear_exception_mask = > + HOST_WIDE_INT_UC (0xffffffff00000000); This mask is different from the one before, and it looks like it's clearing the rounding mode bits. You probably don't need to do this masking here. In general, for the feclearexcept operation it's sufficient to reuse the same status/control register settings as you used in the feholdexcept operation - nothing (visible at C level) should have changed since that call except for the exception flags, and anyway when the feclearexcept operation is executed, the logical idea is to make things as if the floating-point operation preceding the failed compare-and-exchange never happened, so reusing the register setting makes logical sense in that way as well. (On x86, that reuse is what's done for SSE floating point; for 387 we use fnclex in both operations, and never explicitly compute a control word setting with exceptions cleared and masked.) Other than that I don't see any issues with the changes (this is not an approval of the patch, however). The testsuite changes are OK.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index bf67e72..75a2a45 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1621,6 +1621,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv /* Processor table. */ @@ -32991,6 +32994,105 @@ emit_fusion_gpr_load (rtx *operands) return ""; } +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ + +static void +rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + if (!TARGET_HARD_FLOAT || !TARGET_FPRS) + return; + + tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; + tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; + tree call_mffs = build_call_expr (mffs, 0); + + /* Generates the equivalent of feholdexcept (&fenv_var) + + *fenv_var = __builtin_mffs (); + double fenv_hold; + *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; + __builtin_mtfsf (0xff, fenv_hold); */ + + /* Mask to clear everything except for the rounding modes and non-IEEE + arithmetic flag. */ + const unsigned HOST_WIDE_INT hold_exception_mask = + HOST_WIDE_INT_C (0xffffffff00000007); + + tree fenv_var = create_tmp_var (double_type_node, NULL); + + tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); + + tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); + tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, + build_int_cst (uint64_type_node, hold_exception_mask)); + + tree fenv_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, fenv_llu_and); + + tree hold_mtfsf = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf); + + *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); + + /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): + + double fenv_clear = __builtin_mffs (); + *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; + __builtin_mtfsf (0xff, fenv_clear); */ + + /* Mask to clear everything except for the rounding modes and non-IEEE + arithmetic flag. */ + const unsigned HOST_WIDE_INT clear_exception_mask = + HOST_WIDE_INT_UC (0xffffffff00000000); + + tree fenv_clear = create_tmp_var (double_type_node, NULL); + + tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs); + + tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); + tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, + fenv_clean_llu, build_int_cst (uint64_type_node, clear_exception_mask)); + + tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, + fenv_clear_llu_and); + + tree clear_mtfsf = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), fenv_clear_mtfsf); + + *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); + + /* Generates the equivalent of feupdateenv (&fenv_var) + + double old_fenv = __builtin_mffs (); + double fenv_update; + *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | + (*(uint64_t*)fenv_var 0x1ff80fff); + __builtin_mtfsf (0xff, fenv_update); */ + + const unsigned HOST_WIDE_INT update_exception_mask = + HOST_WIDE_INT_UC (0xffffffff1fffff00); + const unsigned HOST_WIDE_INT new_exception_mask = + HOST_WIDE_INT_UC (0x1ff80fff); + + tree old_fenv = create_tmp_var (double_type_node, NULL); + tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); + + tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs); + tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, + old_llu, build_int_cst (uint64_type_node, update_exception_mask)); + + tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, + build_int_cst (uint64_type_node, new_exception_mask)); + + tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, + old_llu_and, new_llu_and); + + tree fenv_mtfsf_update = build1 (VIEW_CONVERT_EXPR, double_type_node, + new_llu_mask); + + *update = build_call_expr (mtfsf, 2, + build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf_update); +} + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c index bc87de4..0a2c9c4 100644 --- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c +++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c @@ -325,11 +325,11 @@ TEST_FUNCS (complex_double_div_overflow, _Complex double, , /= DBL_MIN, 0, TEST_FUNCS (long_double_add_invalid, long double, , += __builtin_infl (), 0, 0, __builtin_isinf, 0, -__builtin_infl (), FE_INVALID) +#if LDBL_MANT_DIG != 106 TEST_FUNCS (long_double_add_overflow, long double, , += LDBL_MAX, 0, LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT, 0, 0) #define NOT_LDBL_EPSILON_2(X) ((X) != LDBL_EPSILON / 2) -#if LDBL_MANT_DIG != 106 TEST_FUNCS (long_double_add_inexact, long double, , += LDBL_EPSILON / 2, 0, 1.0L, NOT_LDBL_EPSILON_2, FE_INEXACT, 0, 0) @@ -342,18 +342,18 @@ TEST_FUNCS (long_double_preinc_inexact, long double, ++, , 0, TEST_FUNCS (long_double_postinc_inexact, long double, , ++, 0, LDBL_EPSILON / 2, NOT_MINUS_1, FE_INEXACT, -1, 0) -#endif TEST_FUNCS (complex_long_double_add_overflow, _Complex long double, , += LDBL_MAX, 0, LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT, 0, 0) +#endif TEST_FUNCS (long_double_sub_invalid, long double, , -= __builtin_infl (), 0, 0, __builtin_isinf, 0, __builtin_infl (), FE_INVALID) +#if LDBL_MANT_DIG != 106 TEST_FUNCS (long_double_sub_overflow, long double, , -= LDBL_MAX, 0, -LDBL_MAX, __builtin_isinf, FE_OVERFLOW | FE_INEXACT, 0, 0) #define NOT_MINUS_LDBL_EPSILON_2(X) ((X) != -LDBL_EPSILON / 2) -#if LDBL_MANT_DIG != 106 TEST_FUNCS (long_double_sub_inexact, long double, , -= LDBL_EPSILON / 2, 0, -1.0L, NOT_MINUS_LDBL_EPSILON_2, FE_INEXACT, 0, 0) @@ -366,10 +366,10 @@ TEST_FUNCS (long_double_predec_inexact, long double, --, , 0, TEST_FUNCS (long_double_postdec_inexact, long double, , --, 0, -LDBL_EPSILON / 2, NOT_1, FE_INEXACT, 1, 0) -#endif TEST_FUNCS (complex_long_double_sub_overflow, _Complex long double, , -= LDBL_MAX, 0, -LDBL_MAX, REAL_ISINF, FE_OVERFLOW | FE_INEXACT, 0, 0) +#endif TEST_FUNCS (long_double_mul_invalid, long double, , *= __builtin_infl (), 0, __builtin_infl (), __builtin_isinf, 0, 0, FE_INVALID) @@ -501,23 +501,23 @@ main (void) ret |= test_main_int_div_double_inexact (); ret |= test_main_complex_double_div_overflow (); ret |= test_main_long_double_add_invalid (); - ret |= test_main_long_double_add_overflow (); #if LDBL_MANT_DIG != 106 + ret |= test_main_long_double_add_overflow (); ret |= test_main_long_double_add_inexact (); ret |= test_main_long_double_add_inexact_int (); ret |= test_main_long_double_preinc_inexact (); ret |= test_main_long_double_postinc_inexact (); -#endif ret |= test_main_complex_long_double_add_overflow (); +#endif ret |= test_main_long_double_sub_invalid (); - ret |= test_main_long_double_sub_overflow (); #if LDBL_MANT_DIG != 106 + ret |= test_main_long_double_sub_overflow (); ret |= test_main_long_double_sub_inexact (); ret |= test_main_long_double_sub_inexact_int (); ret |= test_main_long_double_predec_inexact (); ret |= test_main_long_double_postdec_inexact (); -#endif ret |= test_main_complex_long_double_sub_overflow (); +#endif ret |= test_main_long_double_mul_invalid (); ret |= test_main_long_double_mul_overflow (); ret |= test_main_long_double_mul_overflow_float ();