Message ID | 20160624133521.GC7387@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Fri, 24 Jun 2016, Jakub Jelinek wrote: > On Thu, Jun 23, 2016 at 05:23:21PM +0200, Jakub Jelinek wrote: > > Thinking about this again, there could be another option - keep > > __atomic_compare_exchange_N in the IL, but under certain conditions (similar > > to what the patch uses in fold_builtin_atomic_compare_exchange) for these > > builtins ignore &var on the second argument, and if we actually turn var > > into non-addressable, convert the builtin call similarly to what > > fold_builtin_atomic_compare_exchange does in the patch (except the store > > would be non-conditional then; the gimple-fold.c part wouldn't be needed > > then). > > Here is this second approach implemented. Generally it gives slight code > size savings over the other patch on the testcases I've posted (and even the > other patch used to produce generally smaller code than vanilla). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Thanks, Richard. > 2016-06-24 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/66867 > * builtins.c (expand_ifn_atomic_compare_exchange_into_call, > expand_ifn_atomic_compare_exchange): New functions. > * internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function. > * tree.h (build_call_expr_internal_loc): Rename to ... > (build_call_expr_internal_loc_array): ... this. Fix up type of > last argument. > * internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn. > * predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of > ATOMIC_COMPARE_EXCHANGE result. > * builtins.h (expand_ifn_atomic_compare_exchange): New prototype. > * gimple-fold.h (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New prototypes. > * gimple-fold.c (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New functions.. > * tree-ssa.c (execute_update_addresses_taken): If > optimize_atomic_compare_exchange_p, ignore &var in 2nd argument > of call when finding addressable vars, and if such var becomes > non-addressable, call fold_builtin_atomic_compare_exchange. > > --- gcc/builtins.c.jj 2016-06-08 21:01:25.000000000 +0200 > +++ gcc/builtins.c 2016-06-24 11:18:03.839096035 +0200 > @@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange ( > return target; > } > > +/* Helper function for expand_ifn_atomic_compare_exchange - expand > + internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N > + call. The weak parameter must be dropped to match the expected parameter > + list and the expected argument changed from value to pointer to memory > + slot. */ > + > +static void > +expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode) > +{ > + unsigned int z; > + vec<tree, va_gc> *vec; > + > + vec_alloc (vec, 5); > + vec->quick_push (gimple_call_arg (call, 0)); > + tree expected = gimple_call_arg (call, 1); > + rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode), > + TREE_TYPE (expected)); > + rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL); > + if (expd != x) > + emit_move_insn (x, expd); > + tree v = make_tree (TREE_TYPE (expected), x); > + vec->quick_push (build1 (ADDR_EXPR, > + build_pointer_type (TREE_TYPE (expected)), v)); > + vec->quick_push (gimple_call_arg (call, 2)); > + /* Skip the boolean weak parameter. */ > + for (z = 4; z < 6; z++) > + vec->quick_push (gimple_call_arg (call, z)); > + built_in_function fncode > + = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1 > + + exact_log2 (GET_MODE_SIZE (mode))); > + tree fndecl = builtin_decl_explicit (fncode); > + tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)), > + fndecl); > + tree exp = build_call_vec (boolean_type_node, fn, vec); > + tree lhs = gimple_call_lhs (call); > + rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE); > + if (lhs) > + { > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + if (GET_MODE (boolret) != mode) > + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); > + x = force_reg (mode, x); > + write_complex_part (target, boolret, true); > + write_complex_part (target, x, false); > + } > +} > + > +/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function. */ > + > +void > +expand_ifn_atomic_compare_exchange (gcall *call) > +{ > + int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255; > + gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == 16); > + machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0); > + rtx expect, desired, mem, oldval, boolret; > + enum memmodel success, failure; > + tree lhs; > + bool is_weak; > + source_location loc > + = expansion_point_location_if_in_system_header (gimple_location (call)); > + > + success = get_memmodel (gimple_call_arg (call, 4)); > + failure = get_memmodel (gimple_call_arg (call, 5)); > + > + if (failure > success) > + { > + warning_at (loc, OPT_Winvalid_memory_model, > + "failure memory model cannot be stronger than success " > + "memory model for %<__atomic_compare_exchange%>"); > + success = MEMMODEL_SEQ_CST; > + } > + > + if (is_mm_release (failure) || is_mm_acq_rel (failure)) > + { > + warning_at (loc, OPT_Winvalid_memory_model, > + "invalid failure memory model for " > + "%<__atomic_compare_exchange%>"); > + failure = MEMMODEL_SEQ_CST; > + success = MEMMODEL_SEQ_CST; > + } > + > + if (!flag_inline_atomics) > + { > + expand_ifn_atomic_compare_exchange_into_call (call, mode); > + return; > + } > + > + /* Expand the operands. */ > + mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode); > + > + expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode); > + desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode); > + > + is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0; > + > + boolret = NULL; > + oldval = NULL; > + > + if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, desired, > + is_weak, success, failure)) > + { > + expand_ifn_atomic_compare_exchange_into_call (call, mode); > + return; > + } > + > + lhs = gimple_call_lhs (call); > + if (lhs) > + { > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + if (GET_MODE (boolret) != mode) > + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); > + write_complex_part (target, boolret, true); > + write_complex_part (target, oldval, false); > + } > +} > + > /* Expand the __atomic_load intrinsic: > TYPE __atomic_load (TYPE *object, enum memmodel) > EXP is the CALL_EXPR. > --- gcc/internal-fn.c.jj 2016-06-15 19:09:09.000000000 +0200 > +++ gcc/internal-fn.c 2016-06-22 15:30:06.838951934 +0200 > @@ -2143,6 +2143,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (intern > expand_ifn_atomic_bit_test_and (call); > } > > +/* Expand atomic bit test and set. */ > + > +static void > +expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call) > +{ > + expand_ifn_atomic_compare_exchange (call); > +} > + > /* Expand a call to FN using the operands in STMT. FN has a single > output operand and NARGS input operands. */ > > --- gcc/tree.h.jj 2016-06-20 21:16:07.000000000 +0200 > +++ gcc/tree.h 2016-06-21 17:35:19.806362408 +0200 > @@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (locatio > extern tree build_call_expr (tree, int, ...); > extern tree build_call_expr_internal_loc (location_t, enum internal_fn, > tree, int, ...); > -extern tree build_call_expr_internal_loc (location_t, enum internal_fn, > - tree, int, tree *); > +extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn, > + tree, int, const tree *); > extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree, > int, ...); > extern tree build_string_literal (int, const char *); > --- gcc/gimple-fold.h.jj 2016-01-08 07:31:08.000000000 +0100 > +++ gcc/gimple-fold.h 2016-06-24 11:19:34.002040898 +0200 > @@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons ( > enum tree_code, tree, tree); > extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree, > enum tree_code, tree, tree); > +extern bool optimize_atomic_compare_exchange_p (gimple *); > +extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *); > extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree, > const_tree); > extern tree no_follow_ssa_edges (tree); > --- gcc/internal-fn.def.jj 2016-05-03 13:36:50.000000000 +0200 > +++ gcc/internal-fn.def 2016-06-21 17:10:23.516879436 +0200 > @@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | EC > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) > +DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL) > > #undef DEF_INTERNAL_INT_FN > #undef DEF_INTERNAL_FLT_FN > --- gcc/predict.c.jj 2016-06-22 11:17:44.763444374 +0200 > +++ gcc/predict.c 2016-06-22 14:26:08.894724088 +0200 > @@ -1978,6 +1978,25 @@ expr_expected_value_1 (tree type, tree o > if (TREE_CONSTANT (op0)) > return op0; > > + if (code == IMAGPART_EXPR) > + { > + if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME) > + { > + def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0)); > + if (is_gimple_call (def) > + && gimple_call_internal_p (def) > + && (gimple_call_internal_fn (def) > + == IFN_ATOMIC_COMPARE_EXCHANGE)) > + { > + /* Assume that any given atomic operation has low contention, > + and thus the compare-and-swap operation succeeds. */ > + if (predictor) > + *predictor = PRED_COMPARE_AND_SWAP; > + return build_one_cst (TREE_TYPE (op0)); > + } > + } > + } > + > if (code != SSA_NAME) > return NULL_TREE; > > --- gcc/builtins.h.jj 2016-05-03 13:36:50.000000000 +0200 > +++ gcc/builtins.h 2016-06-21 18:05:11.678635858 +0200 > @@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type ( > extern void std_expand_builtin_va_start (tree, rtx); > extern void expand_builtin_trap (void); > extern void expand_ifn_atomic_bit_test_and (gcall *); > +extern void expand_ifn_atomic_compare_exchange (gcall *); > extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int); > extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int); > extern enum built_in_function builtin_mathfn_code (const_tree); > --- gcc/gimple-fold.c.jj 2016-06-16 21:00:08.000000000 +0200 > +++ gcc/gimple-fold.c 2016-06-24 11:36:12.222916831 +0200 > @@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *c > return result; > } > > +/* Return true if stmt is __atomic_compare_exchange_N call which is suitable > + for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is > + &var where var is only addressable because of such calls. */ > + > +bool > +optimize_atomic_compare_exchange_p (gimple *stmt) > +{ > + if (gimple_call_num_args (stmt) != 6 > + || !flag_inline_atomics > + || !optimize > + || (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0 > + || !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL) > + || !gimple_vdef (stmt) > + || !gimple_vuse (stmt)) > + return false; > + > + tree fndecl = gimple_call_fndecl (stmt); > + switch (DECL_FUNCTION_CODE (fndecl)) > + { > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: > + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: > + break; > + default: > + return false; > + } > + > + tree expected = gimple_call_arg (stmt, 1); > + if (TREE_CODE (expected) != ADDR_EXPR > + || !SSA_VAR_P (TREE_OPERAND (expected, 0)) > + || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0))) > + || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl) > + || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0))) > + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE > + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE) > + return false; > + > + tree weak = gimple_call_arg (stmt, 3); > + if (!integer_zerop (weak) && !integer_onep (weak)) > + return false; > + > + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); > + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); > + machine_mode mode = TYPE_MODE (itype); > + > + if (direct_optab_handler (atomic_compare_and_swap_optab, mode) > + == CODE_FOR_nothing > + && optab_handler (sync_compare_and_swap_optab, mode) == CODE_FOR_nothing) > + return false; > + > + if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0))) > + != GET_MODE_SIZE (mode)) > + return false; > + > + return true; > +} > + > +/* Fold > + r = __atomic_compare_exchange_N (p, &e, d, w, s, f); > + into > + _Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, f); > + i = IMAGPART_EXPR <t>; > + r = (_Bool) i; > + e = REALPART_EXPR <t>; */ > + > +void > +fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) > +{ > + gimple *stmt = gsi_stmt (*gsi); > + tree fndecl = gimple_call_fndecl (stmt); > + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); > + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); > + tree ctype = build_complex_type (itype); > + tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); > + gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > + expected); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + gimple_stmt_iterator gsiret = gsi_for_stmt (g); > + if (!useless_type_conversion_p (itype, TREE_TYPE (expected))) > + { > + g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR, > + build1 (VIEW_CONVERT_EXPR, itype, > + gimple_assign_lhs (g))); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + } > + int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0) > + + int_size_in_bytes (itype); > + g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, > + gimple_call_arg (stmt, 0), > + gimple_assign_lhs (g), > + gimple_call_arg (stmt, 2), > + build_int_cst (integer_type_node, flag), > + gimple_call_arg (stmt, 4), > + gimple_call_arg (stmt, 5)); > + tree lhs = make_ssa_name (ctype); > + gimple_call_set_lhs (g, lhs); > + gimple_set_vdef (g, gimple_vdef (stmt)); > + gimple_set_vuse (g, gimple_vuse (stmt)); > + SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; > + if (gimple_call_lhs (stmt)) > + { > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR, > + build1 (IMAGPART_EXPR, itype, lhs)); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, > + gimple_assign_lhs (g)); > + } > + gsi_replace (gsi, g, true); > + g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR, > + build1 (REALPART_EXPR, itype, lhs)); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + if (!useless_type_conversion_p (TREE_TYPE (expected), itype)) > + { > + g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), > + VIEW_CONVERT_EXPR, > + build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected), > + gimple_assign_lhs (g))); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + } > + g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g)); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + *gsi = gsiret; > +} > + > /* Return true if ARG0 CODE ARG1 in infinite signed precision operation > doesn't fit into TYPE. The test for overflow should be regardless of > -fwrapv, and even for unsigned types. */ > --- gcc/tree-ssa.c.jj 2016-06-14 16:29:49.000000000 +0200 > +++ gcc/tree-ssa.c 2016-06-24 10:38:54.929029421 +0200 > @@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void) > enum gimple_code code = gimple_code (stmt); > tree decl; > > - /* Note all addresses taken by the stmt. */ > - gimple_ior_addresses_taken (addresses_taken, stmt); > + if (code == GIMPLE_CALL > + && optimize_atomic_compare_exchange_p (stmt)) > + { > + /* For __atomic_compare_exchange_N if the second argument > + is &var, don't mark var addressable; > + if it becomes non-addressable, we'll rewrite it into > + ATOMIC_COMPARE_EXCHANGE call. */ > + tree arg = gimple_call_arg (stmt, 1); > + gimple_call_set_arg (stmt, 1, null_pointer_node); > + gimple_ior_addresses_taken (addresses_taken, stmt); > + gimple_call_set_arg (stmt, 1, arg); > + } > + else > + /* Note all addresses taken by the stmt. */ > + gimple_ior_addresses_taken (addresses_taken, stmt); > > /* If we have a call or an assignment, see if the lhs contains > a local decl that requires not to be a gimple register. */ > @@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void) > else if (gimple_code (stmt) == GIMPLE_CALL) > { > unsigned i; > + if (optimize_atomic_compare_exchange_p (stmt)) > + { > + tree expected = gimple_call_arg (stmt, 1); > + if (bitmap_bit_p (suitable_for_renaming, > + DECL_UID (TREE_OPERAND (expected, 0)))) > + { > + fold_builtin_atomic_compare_exchange (&gsi); > + continue; > + } > + } > for (i = 0; i < gimple_call_num_args (stmt); ++i) > { > tree *argp = gimple_call_arg_ptr (stmt, i); > > > Jakub > >
On Fri, Jun 24, 2016 at 6:35 AM, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Jun 23, 2016 at 05:23:21PM +0200, Jakub Jelinek wrote: >> Thinking about this again, there could be another option - keep >> __atomic_compare_exchange_N in the IL, but under certain conditions (similar >> to what the patch uses in fold_builtin_atomic_compare_exchange) for these >> builtins ignore &var on the second argument, and if we actually turn var >> into non-addressable, convert the builtin call similarly to what >> fold_builtin_atomic_compare_exchange does in the patch (except the store >> would be non-conditional then; the gimple-fold.c part wouldn't be needed >> then). > > Here is this second approach implemented. Generally it gives slight code > size savings over the other patch on the testcases I've posted (and even the > other patch used to produce generally smaller code than vanilla). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2016-06-24 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/66867 > * builtins.c (expand_ifn_atomic_compare_exchange_into_call, > expand_ifn_atomic_compare_exchange): New functions. > * internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function. > * tree.h (build_call_expr_internal_loc): Rename to ... > (build_call_expr_internal_loc_array): ... this. Fix up type of > last argument. > * internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn. > * predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of > ATOMIC_COMPARE_EXCHANGE result. > * builtins.h (expand_ifn_atomic_compare_exchange): New prototype. > * gimple-fold.h (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New prototypes. > * gimple-fold.c (optimize_atomic_compare_exchange_p, > fold_builtin_atomic_compare_exchange): New functions.. > * tree-ssa.c (execute_update_addresses_taken): If > optimize_atomic_compare_exchange_p, ignore &var in 2nd argument > of call when finding addressable vars, and if such var becomes > non-addressable, call fold_builtin_atomic_compare_exchange. > This caused: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71716 H.J.
--- gcc/builtins.c.jj 2016-06-08 21:01:25.000000000 +0200 +++ gcc/builtins.c 2016-06-24 11:18:03.839096035 +0200 @@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange ( return target; } +/* Helper function for expand_ifn_atomic_compare_exchange - expand + internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N + call. The weak parameter must be dropped to match the expected parameter + list and the expected argument changed from value to pointer to memory + slot. */ + +static void +expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode) +{ + unsigned int z; + vec<tree, va_gc> *vec; + + vec_alloc (vec, 5); + vec->quick_push (gimple_call_arg (call, 0)); + tree expected = gimple_call_arg (call, 1); + rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode), + TREE_TYPE (expected)); + rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL); + if (expd != x) + emit_move_insn (x, expd); + tree v = make_tree (TREE_TYPE (expected), x); + vec->quick_push (build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (expected)), v)); + vec->quick_push (gimple_call_arg (call, 2)); + /* Skip the boolean weak parameter. */ + for (z = 4; z < 6; z++) + vec->quick_push (gimple_call_arg (call, z)); + built_in_function fncode + = (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1 + + exact_log2 (GET_MODE_SIZE (mode))); + tree fndecl = builtin_decl_explicit (fncode); + tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)), + fndecl); + tree exp = build_call_vec (boolean_type_node, fn, vec); + tree lhs = gimple_call_lhs (call); + rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE); + if (lhs) + { + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + if (GET_MODE (boolret) != mode) + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); + x = force_reg (mode, x); + write_complex_part (target, boolret, true); + write_complex_part (target, x, false); + } +} + +/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function. */ + +void +expand_ifn_atomic_compare_exchange (gcall *call) +{ + int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255; + gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == 16); + machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0); + rtx expect, desired, mem, oldval, boolret; + enum memmodel success, failure; + tree lhs; + bool is_weak; + source_location loc + = expansion_point_location_if_in_system_header (gimple_location (call)); + + success = get_memmodel (gimple_call_arg (call, 4)); + failure = get_memmodel (gimple_call_arg (call, 5)); + + if (failure > success) + { + warning_at (loc, OPT_Winvalid_memory_model, + "failure memory model cannot be stronger than success " + "memory model for %<__atomic_compare_exchange%>"); + success = MEMMODEL_SEQ_CST; + } + + if (is_mm_release (failure) || is_mm_acq_rel (failure)) + { + warning_at (loc, OPT_Winvalid_memory_model, + "invalid failure memory model for " + "%<__atomic_compare_exchange%>"); + failure = MEMMODEL_SEQ_CST; + success = MEMMODEL_SEQ_CST; + } + + if (!flag_inline_atomics) + { + expand_ifn_atomic_compare_exchange_into_call (call, mode); + return; + } + + /* Expand the operands. */ + mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode); + + expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode); + desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode); + + is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0; + + boolret = NULL; + oldval = NULL; + + if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, desired, + is_weak, success, failure)) + { + expand_ifn_atomic_compare_exchange_into_call (call, mode); + return; + } + + lhs = gimple_call_lhs (call); + if (lhs) + { + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + if (GET_MODE (boolret) != mode) + boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1); + write_complex_part (target, boolret, true); + write_complex_part (target, oldval, false); + } +} + /* Expand the __atomic_load intrinsic: TYPE __atomic_load (TYPE *object, enum memmodel) EXP is the CALL_EXPR. --- gcc/internal-fn.c.jj 2016-06-15 19:09:09.000000000 +0200 +++ gcc/internal-fn.c 2016-06-22 15:30:06.838951934 +0200 @@ -2143,6 +2143,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (intern expand_ifn_atomic_bit_test_and (call); } +/* Expand atomic bit test and set. */ + +static void +expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call) +{ + expand_ifn_atomic_compare_exchange (call); +} + /* Expand a call to FN using the operands in STMT. FN has a single output operand and NARGS input operands. */ --- gcc/tree.h.jj 2016-06-20 21:16:07.000000000 +0200 +++ gcc/tree.h 2016-06-21 17:35:19.806362408 +0200 @@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (locatio extern tree build_call_expr (tree, int, ...); extern tree build_call_expr_internal_loc (location_t, enum internal_fn, tree, int, ...); -extern tree build_call_expr_internal_loc (location_t, enum internal_fn, - tree, int, tree *); +extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn, + tree, int, const tree *); extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree, int, ...); extern tree build_string_literal (int, const char *); --- gcc/gimple-fold.h.jj 2016-01-08 07:31:08.000000000 +0100 +++ gcc/gimple-fold.h 2016-06-24 11:19:34.002040898 +0200 @@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons ( enum tree_code, tree, tree); extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree, enum tree_code, tree, tree); +extern bool optimize_atomic_compare_exchange_p (gimple *); +extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *); extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree, const_tree); extern tree no_follow_ssa_edges (tree); --- gcc/internal-fn.def.jj 2016-05-03 13:36:50.000000000 +0200 +++ gcc/internal-fn.def 2016-06-21 17:10:23.516879436 +0200 @@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | EC DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL) #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN --- gcc/predict.c.jj 2016-06-22 11:17:44.763444374 +0200 +++ gcc/predict.c 2016-06-22 14:26:08.894724088 +0200 @@ -1978,6 +1978,25 @@ expr_expected_value_1 (tree type, tree o if (TREE_CONSTANT (op0)) return op0; + if (code == IMAGPART_EXPR) + { + if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME) + { + def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0)); + if (is_gimple_call (def) + && gimple_call_internal_p (def) + && (gimple_call_internal_fn (def) + == IFN_ATOMIC_COMPARE_EXCHANGE)) + { + /* Assume that any given atomic operation has low contention, + and thus the compare-and-swap operation succeeds. */ + if (predictor) + *predictor = PRED_COMPARE_AND_SWAP; + return build_one_cst (TREE_TYPE (op0)); + } + } + } + if (code != SSA_NAME) return NULL_TREE; --- gcc/builtins.h.jj 2016-05-03 13:36:50.000000000 +0200 +++ gcc/builtins.h 2016-06-21 18:05:11.678635858 +0200 @@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type ( extern void std_expand_builtin_va_start (tree, rtx); extern void expand_builtin_trap (void); extern void expand_ifn_atomic_bit_test_and (gcall *); +extern void expand_ifn_atomic_compare_exchange (gcall *); extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int); extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int); extern enum built_in_function builtin_mathfn_code (const_tree); --- gcc/gimple-fold.c.jj 2016-06-16 21:00:08.000000000 +0200 +++ gcc/gimple-fold.c 2016-06-24 11:36:12.222916831 +0200 @@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *c return result; } +/* Return true if stmt is __atomic_compare_exchange_N call which is suitable + for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is + &var where var is only addressable because of such calls. */ + +bool +optimize_atomic_compare_exchange_p (gimple *stmt) +{ + if (gimple_call_num_args (stmt) != 6 + || !flag_inline_atomics + || !optimize + || (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0 + || !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL) + || !gimple_vdef (stmt) + || !gimple_vuse (stmt)) + return false; + + tree fndecl = gimple_call_fndecl (stmt); + switch (DECL_FUNCTION_CODE (fndecl)) + { + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: + break; + default: + return false; + } + + tree expected = gimple_call_arg (stmt, 1); + if (TREE_CODE (expected) != ADDR_EXPR + || !SSA_VAR_P (TREE_OPERAND (expected, 0)) + || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0))) + || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl) + || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0))) + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE + || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE) + return false; + + tree weak = gimple_call_arg (stmt, 3); + if (!integer_zerop (weak) && !integer_onep (weak)) + return false; + + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); + machine_mode mode = TYPE_MODE (itype); + + if (direct_optab_handler (atomic_compare_and_swap_optab, mode) + == CODE_FOR_nothing + && optab_handler (sync_compare_and_swap_optab, mode) == CODE_FOR_nothing) + return false; + + if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0))) + != GET_MODE_SIZE (mode)) + return false; + + return true; +} + +/* Fold + r = __atomic_compare_exchange_N (p, &e, d, w, s, f); + into + _Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, f); + i = IMAGPART_EXPR <t>; + r = (_Bool) i; + e = REALPART_EXPR <t>; */ + +void +fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); + tree ctype = build_complex_type (itype); + tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); + gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), + expected); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + gimple_stmt_iterator gsiret = gsi_for_stmt (g); + if (!useless_type_conversion_p (itype, TREE_TYPE (expected))) + { + g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, itype, + gimple_assign_lhs (g))); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + } + int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0) + + int_size_in_bytes (itype); + g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, + gimple_call_arg (stmt, 0), + gimple_assign_lhs (g), + gimple_call_arg (stmt, 2), + build_int_cst (integer_type_node, flag), + gimple_call_arg (stmt, 4), + gimple_call_arg (stmt, 5)); + tree lhs = make_ssa_name (ctype); + gimple_call_set_lhs (g, lhs); + gimple_set_vdef (g, gimple_vdef (stmt)); + gimple_set_vuse (g, gimple_vuse (stmt)); + SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; + if (gimple_call_lhs (stmt)) + { + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR, + build1 (IMAGPART_EXPR, itype, lhs)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, + gimple_assign_lhs (g)); + } + gsi_replace (gsi, g, true); + g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR, + build1 (REALPART_EXPR, itype, lhs)); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + if (!useless_type_conversion_p (TREE_TYPE (expected), itype)) + { + g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), + VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected), + gimple_assign_lhs (g))); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + } + g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g)); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + *gsi = gsiret; +} + /* Return true if ARG0 CODE ARG1 in infinite signed precision operation doesn't fit into TYPE. The test for overflow should be regardless of -fwrapv, and even for unsigned types. */ --- gcc/tree-ssa.c.jj 2016-06-14 16:29:49.000000000 +0200 +++ gcc/tree-ssa.c 2016-06-24 10:38:54.929029421 +0200 @@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void) enum gimple_code code = gimple_code (stmt); tree decl; - /* Note all addresses taken by the stmt. */ - gimple_ior_addresses_taken (addresses_taken, stmt); + if (code == GIMPLE_CALL + && optimize_atomic_compare_exchange_p (stmt)) + { + /* For __atomic_compare_exchange_N if the second argument + is &var, don't mark var addressable; + if it becomes non-addressable, we'll rewrite it into + ATOMIC_COMPARE_EXCHANGE call. */ + tree arg = gimple_call_arg (stmt, 1); + gimple_call_set_arg (stmt, 1, null_pointer_node); + gimple_ior_addresses_taken (addresses_taken, stmt); + gimple_call_set_arg (stmt, 1, arg); + } + else + /* Note all addresses taken by the stmt. */ + gimple_ior_addresses_taken (addresses_taken, stmt); /* If we have a call or an assignment, see if the lhs contains a local decl that requires not to be a gimple register. */ @@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void) else if (gimple_code (stmt) == GIMPLE_CALL) { unsigned i; + if (optimize_atomic_compare_exchange_p (stmt)) + { + tree expected = gimple_call_arg (stmt, 1); + if (bitmap_bit_p (suitable_for_renaming, + DECL_UID (TREE_OPERAND (expected, 0)))) + { + fold_builtin_atomic_compare_exchange (&gsi); + continue; + } + } for (i = 0; i < gimple_call_num_args (stmt); ++i) { tree *argp = gimple_call_arg_ptr (stmt, i);