Message ID | 20171106184939.GV14653@tucnak |
---|---|
State | New |
Headers | show |
Series | Replace has_single_use guards in store-merging | expand |
On Mon, 6 Nov 2017, Jakub Jelinek wrote: > Hi! > > As mentioned earlier, the !has_single_use checks disable store merging > in many cases, it is enough to have a single multiple-use somewhere and > all of sudden we break the group. > > The following patch replaces it by heuristics, it is GIMPLE statement count > based, but I think it should work pretty fine in general. > The code counts how many statements there are for the group without > store-merging (i.e. stores, if not storing constant, then associated loads > as well, and bitwise stmts), and then counts how many are needed > if store merging is performed and expected DCE gets rid of dead stmts > - i.e. we count what we'll emit in the store merging sequences for the group > (without the masking for bitfields with padding bits in between, as that > is even normal stores to bitfields have that implicitly and only expansion > makes it explicit into the IL) and whatever original statements had multiple > uses (and stmts they use if any). > So, e.g. if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1); we > can still store merge it, as there are 4 original stmts and we'd replace it > with 3 new stmts: _1 = t.a; _3 = [t.a;t.b]; [s.a;s.b] = _3; use (_1); > while if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1, _2); > we don't replace it anymore, as that would be trading 4 original for 4 new > ones. > > During the combined {x86_64,i686}-linux bootstraps/regtests, without this > and the today posted patch the counts were: > integer_cst 215621 442752 > mem_ref 12115 24919 > bit_and_expr 37 88 > bit_ior_expr 19 46 > bit_xor_expr 27 58 > and with the two patches: > integer_cst 215605 442817 > mem_ref 17320 36133 > bit_and_expr 93 224 > bit_ior_expr 66 153 > bit_xor_expr 56 153 > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-11-06 Jakub Jelinek <jakub@redhat.com> > > * gimple-ssa-store-merging.c (count_multiple_uses): New function. > (split_group): Add total_orig and total_new arguments, estimate the > number of statements related to the store group without store merging > and with store merging. > (imm_store_chain_info::output_merged_store): Adjust split_group > callers, punt if estimated number of statements with store merging > is not smaller than estimated number of statements without it. > Formatting fix. > (handled_load): Remove has_single_use checks. > (pass_store_merging::process_store): Likewise. > > --- gcc/gimple-ssa-store-merging.c.jj 2017-11-06 08:57:07.000000000 +0100 > +++ gcc/gimple-ssa-store-merging.c 2017-11-06 16:35:38.122437951 +0100 > @@ -1370,6 +1370,65 @@ find_constituent_stores (struct merged_s > return ret; > } > > +/* Return how many SSA_NAMEs used to compute value to store in the INFO > + store have multiple uses. If any SSA_NAME has multiple uses, also > + count statements needed to compute it. */ > + > +static unsigned > +count_multiple_uses (store_immediate_info *info) > +{ > + gimple *stmt = info->stmt; > + unsigned ret = 0; > + switch (info->rhs_code) > + { > + case INTEGER_CST: > + return 0; > + case BIT_AND_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + if (!has_single_use (gimple_assign_rhs1 (stmt))) > + { > + ret += 1 + info->ops[0].bit_not_p; > + if (info->ops[1].base_addr) > + ret += 1 + info->ops[1].bit_not_p; > + return ret + 1; > + } > + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > + /* stmt is now the BIT_*_EXPR. */ > + if (!has_single_use (gimple_assign_rhs1 (stmt))) > + ret += 1 + info->ops[0].bit_not_p; > + else if (info->ops[0].bit_not_p) > + { > + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > + if (!has_single_use (gimple_assign_rhs1 (stmt2))) > + ++ret; > + } > + if (info->ops[1].base_addr == NULL_TREE) > + return ret; > + if (!has_single_use (gimple_assign_rhs2 (stmt))) > + ret += 1 + info->ops[1].bit_not_p; > + else if (info->ops[1].bit_not_p) > + { > + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); > + if (!has_single_use (gimple_assign_rhs1 (stmt2))) > + ++ret; > + } > + return ret; > + case MEM_REF: > + if (!has_single_use (gimple_assign_rhs1 (stmt))) > + return 1 + info->ops[0].bit_not_p; > + else if (info->ops[0].bit_not_p) > + { > + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > + if (!has_single_use (gimple_assign_rhs1 (stmt))) > + return 1; > + } > + return 0; > + default: > + gcc_unreachable (); > + } Can't you simply use unsigned ret = 0; FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) if (!has_single_use (op)) ++ret; return ret; ? Not sure if the bit_not_p handling is required. It doesn't seem you handle multi-uses of the BIT_*_EXPR results itself? Or does it only handle multi-uses of the BIT_*_EXPR but not the feeding loads? That is, it looks like an approximation already - can we simplify it a bit? Thanks, Richard. > +} > + > /* Split a merged store described by GROUP by populating the SPLIT_STORES > vector (if non-NULL) with split_store structs describing the byte offset > (from the base), the bit size and alignment of each store as well as the > @@ -1385,7 +1444,9 @@ find_constituent_stores (struct merged_s > static unsigned int > split_group (merged_store_group *group, bool allow_unaligned_store, > bool allow_unaligned_load, > - vec<struct split_store *> *split_stores) > + vec<struct split_store *> *split_stores, > + unsigned *total_orig, > + unsigned *total_new) > { > unsigned HOST_WIDE_INT pos = group->bitregion_start; > unsigned HOST_WIDE_INT size = group->bitregion_end - pos; > @@ -1393,6 +1454,7 @@ split_group (merged_store_group *group, > unsigned HOST_WIDE_INT group_align = group->align; > unsigned HOST_WIDE_INT align_base = group->align_base; > unsigned HOST_WIDE_INT group_load_align = group_align; > + bool any_orig = false; > > gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); > > @@ -1400,6 +1462,34 @@ split_group (merged_store_group *group, > unsigned HOST_WIDE_INT try_pos = bytepos; > group->stores.qsort (sort_by_bitpos); > > + if (total_orig) > + { > + unsigned int i; > + store_immediate_info *info = group->stores[0]; > + > + total_new[0] = 0; > + total_orig[0] = 1; /* The orig store. */ > + info = group->stores[0]; > + if (info->ops[0].base_addr) > + total_orig[0] += 1 + info->ops[0].bit_not_p; > + if (info->ops[1].base_addr) > + total_orig[0] += 1 + info->ops[1].bit_not_p; > + switch (info->rhs_code) > + { > + case BIT_AND_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ > + break; > + default: > + break; > + } > + total_orig[0] *= group->stores.length (); > + > + FOR_EACH_VEC_ELT (group->stores, i, info) > + total_new[0] += count_multiple_uses (info); > + } > + > if (!allow_unaligned_load) > for (int i = 0; i < 2; ++i) > if (group->load_align[i]) > @@ -1524,7 +1614,10 @@ split_group (merged_store_group *group, > if (info > && info->bitpos >= try_bitpos > && info->bitpos + info->bitsize <= try_bitpos + try_size) > - store->orig = true; > + { > + store->orig = true; > + any_orig = true; > + } > split_stores->safe_push (store); > } > > @@ -1532,6 +1625,37 @@ split_group (merged_store_group *group, > size -= try_size; > } > > + if (total_orig) > + { > + /* If we are reusing some original stores and any of the > + original SSA_NAMEs had multiple uses, we need to subtract > + those now before we add the new ones. */ > + if (total_new[0] && any_orig) > + { > + unsigned int i; > + struct split_store *store; > + FOR_EACH_VEC_ELT (*split_stores, i, store) > + if (store->orig) > + total_new[0] -= count_multiple_uses (store->orig_stores[0]); > + } > + total_new[0] += ret; /* The new store. */ > + store_immediate_info *info = group->stores[0]; > + if (info->ops[0].base_addr) > + total_new[0] += ret * (1 + info->ops[0].bit_not_p); > + if (info->ops[1].base_addr) > + total_new[0] += ret * (1 + info->ops[1].bit_not_p); > + switch (info->rhs_code) > + { > + case BIT_AND_EXPR: > + case BIT_IOR_EXPR: > + case BIT_XOR_EXPR: > + total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ > + break; > + default: > + break; > + } > + } > + > return ret; > } > > @@ -1564,26 +1688,35 @@ imm_store_chain_info::output_merged_stor > for unaligned and how many stores we'd emit for aligned stores. > Only use unaligned stores if it allows fewer stores than aligned. */ > unsigned aligned_cnt > - = split_group (group, false, allow_unaligned_load, NULL); > + = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL); > unsigned unaligned_cnt > - = split_group (group, true, allow_unaligned_load, NULL); > + = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL); > if (aligned_cnt <= unaligned_cnt) > allow_unaligned_store = false; > } > + unsigned total_orig, total_new; > split_group (group, allow_unaligned_store, allow_unaligned_load, > - &split_stores); > + &split_stores, &total_orig, &total_new); > > if (split_stores.length () >= orig_num_stmts) > { > /* We didn't manage to reduce the number of statements. Bail out. */ > if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, "Exceeded original number of stmts (%u)." > - " Not profitable to emit new sequence.\n", > - orig_num_stmts); > - } > + fprintf (dump_file, "Exceeded original number of stmts (%u)." > + " Not profitable to emit new sequence.\n", > + orig_num_stmts); > return false; > } > + if (total_orig <= total_new) > + { > + /* If number of estimated new statements is above estimated original > + statements, bail out too. */ > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, "Estimated number of original stmts (%u)" > + " not larger than estimated number of new" > + " stmts (%u).\n", > + total_orig, total_new); > + } > > gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); > gimple_seq seq = NULL; > @@ -2096,7 +2229,6 @@ handled_load (gimple *stmt, store_operan > { > tree rhs1 = gimple_assign_rhs1 (stmt); > if (TREE_CODE (rhs1) == SSA_NAME > - && has_single_use (rhs1) > && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos, > bitregion_start, bitregion_end)) > { > @@ -2159,7 +2291,7 @@ pass_store_merging::process_store (gimpl > rhs_code = INTEGER_CST; > ops[0].val = rhs; > } > - else if (TREE_CODE (rhs) != SSA_NAME || !has_single_use (rhs)) > + else if (TREE_CODE (rhs) != SSA_NAME) > invalid = true; > else > { > @@ -2179,7 +2311,7 @@ pass_store_merging::process_store (gimpl > rhs1 = gimple_assign_rhs1 (def_stmt); > rhs2 = gimple_assign_rhs2 (def_stmt); > invalid = true; > - if (TREE_CODE (rhs1) != SSA_NAME || !has_single_use (rhs1)) > + if (TREE_CODE (rhs1) != SSA_NAME) > break; > def_stmt1 = SSA_NAME_DEF_STMT (rhs1); > if (!is_gimple_assign (def_stmt1) > @@ -2188,7 +2320,7 @@ pass_store_merging::process_store (gimpl > break; > if (rhs_valid_for_store_merging_p (rhs2)) > ops[1].val = rhs2; > - else if (TREE_CODE (rhs2) != SSA_NAME || !has_single_use (rhs2)) > + else if (TREE_CODE (rhs2) != SSA_NAME) > break; > else > { > > Jakub > >
On Wed, Nov 08, 2017 at 04:20:15PM +0100, Richard Biener wrote: > Can't you simply use > > unsigned ret = 0; > FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) > if (!has_single_use (op)) > ++ret; > return ret; > > ? Not sure if the bit_not_p handling is required. Consider the case with most possible statements: _1 = load1; _2 = load2; _3 = ~_1; _4 = ~_2; _5 = _3 ^ _4; store0 = _5; All these statements construct the value for the store, and if we remove the old stores and add new stores we'll need similar statements for each of the new stores. What the function is attempting to compute how many of these original statements will be not DCEd. If _5 has multiple uses, then we'll need all of them, so 5 stmts not being DCEd. If _5 has single use, but _3 (and/or _4) has multiple uses, we'll need the corresponding loads in addition to the BIT_NOT_EXPR statement(s). If only _1 (and/or _2) has multiple uses, we'll need the load(s) but nothing else. So, there isn't a single stmt I could FOR_EACH_SSA_TREE_OPERAND on. For BIT_{AND,IOR,XOR}_EXPR doing it just on that stmt would be too rough approximation and would miss the case when the bitwise binary op result is used. > It doesn't seem you handle multi-uses of the BIT_*_EXPR results > itself? Or does it only handle multi-uses of the BIT_*_EXPR > but not the feeding loads? I believe I handle all those precisely above (the only reason I've talked about aproximation is that bit field loads/stores are counted as one stmt and the masking added for handling multiple semi-adjacent bitfield loads/stores aren't counted either). Given the above example: if (!has_single_use (gimple_assign_rhs1 (stmt))) { ret += 1 + info->ops[0].bit_not_p; if (info->ops[1].base_addr) ret += 1 + info->ops[1].bit_not_p; return ret + 1; } Above should handle the _5 multiple uses case (the first operand is guaranteed by the discovery code to be a possibly negated load). stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); /* stmt is now the BIT_*_EXPR. */ if (!has_single_use (gimple_assign_rhs1 (stmt))) ret += 1 + info->ops[0].bit_not_p; Above should handle the _3 multiple uses. else if (info->ops[0].bit_not_p) { gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); if (!has_single_use (gimple_assign_rhs1 (stmt2))) ++ret; } Above should handle multiple uses of _1. if (info->ops[1].base_addr == NULL_TREE) return ret; is an early return for the case when second argument to BIT_*_EXPR is constant. if (!has_single_use (gimple_assign_rhs2 (stmt))) ret += 1 + info->ops[1].bit_not_p; Above should handle the _4 multiple uses. else if (info->ops[1].bit_not_p) { gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); if (!has_single_use (gimple_assign_rhs1 (stmt2))) ++ret; } Above should handle the _2 multiple uses. And for another example like: _6 = load1; _7 = ~_6; store0 = _7; if (!has_single_use (gimple_assign_rhs1 (stmt))) return 1 + info->ops[0].bit_not_p; Above should handle _7 multiple uses else if (info->ops[0].bit_not_p) { stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); if (!has_single_use (gimple_assign_rhs1 (stmt))) return 1; } Above should handle _6 multiple uses. Jakub
On Wed, 8 Nov 2017, Jakub Jelinek wrote: > On Wed, Nov 08, 2017 at 04:20:15PM +0100, Richard Biener wrote: > > Can't you simply use > > > > unsigned ret = 0; > > FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) > > if (!has_single_use (op)) > > ++ret; > > return ret; > > > > ? Not sure if the bit_not_p handling is required. > > Consider the case with most possible statements: > _1 = load1; > _2 = load2; > _3 = ~_1; > _4 = ~_2; > _5 = _3 ^ _4; > store0 = _5; > All these statements construct the value for the store, and if we remove > the old stores and add new stores we'll need similar statements for each > of the new stores. What the function is attempting to compute how many > of these original statements will be not DCEd. > If _5 has multiple uses, then we'll need all of them, so 5 stmts not > being DCEd. If _5 has single use, but _3 (and/or _4) has multiple uses, > we'll need the corresponding loads in addition to the BIT_NOT_EXPR > statement(s). If only _1 (and/or _2) has multiple uses, we'll need > the load(s) but nothing else. > So, there isn't a single stmt I could FOR_EACH_SSA_TREE_OPERAND on. > For BIT_{AND,IOR,XOR}_EXPR doing it just on that stmt would be too rough > approximation and would miss the case when the bitwise binary op result > is used. Hmm, I see. > > It doesn't seem you handle multi-uses of the BIT_*_EXPR results > > itself? Or does it only handle multi-uses of the BIT_*_EXPR > > but not the feeding loads? > > I believe I handle all those precisely above (the only reason I've talked > about aproximation is that bit field loads/stores are counted as one stmt > and the masking added for handling multiple semi-adjacent bitfield > loads/stores aren't counted either). > > Given the above example: > if (!has_single_use (gimple_assign_rhs1 (stmt))) > { > ret += 1 + info->ops[0].bit_not_p; > if (info->ops[1].base_addr) > ret += 1 + info->ops[1].bit_not_p; > return ret + 1; > } > Above should handle the _5 multiple uses case (the first operand is guaranteed > by the discovery code to be a possibly negated load). > stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > /* stmt is now the BIT_*_EXPR. */ > if (!has_single_use (gimple_assign_rhs1 (stmt))) > ret += 1 + info->ops[0].bit_not_p; > Above should handle the _3 multiple uses. > else if (info->ops[0].bit_not_p) > { > gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > if (!has_single_use (gimple_assign_rhs1 (stmt2))) > ++ret; > } > Above should handle multiple uses of _1. > if (info->ops[1].base_addr == NULL_TREE) > return ret; > is an early return for the case when second argument to BIT_*_EXPR is > constant. > if (!has_single_use (gimple_assign_rhs2 (stmt))) > ret += 1 + info->ops[1].bit_not_p; > Above should handle the _4 multiple uses. > else if (info->ops[1].bit_not_p) > { > gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); > if (!has_single_use (gimple_assign_rhs1 (stmt2))) > ++ret; > } > Above should handle the _2 multiple uses. > > And for another example like: > _6 = load1; > _7 = ~_6; > store0 = _7; > if (!has_single_use (gimple_assign_rhs1 (stmt))) > return 1 + info->ops[0].bit_not_p; > Above should handle _7 multiple uses > else if (info->ops[0].bit_not_p) > { > stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > if (!has_single_use (gimple_assign_rhs1 (stmt))) > return 1; > } > Above should handle _6 multiple uses. Thanks for the explanation, it's now more clear what the code does. The patch is ok. (and hopefully makes the PGO bootstrap issue latent again - heh) Thanks, Richard.
Hi Jakub, On 9 November 2017 at 13:58, Richard Biener <rguenther@suse.de> wrote: > On Wed, 8 Nov 2017, Jakub Jelinek wrote: > >> On Wed, Nov 08, 2017 at 04:20:15PM +0100, Richard Biener wrote: >> > Can't you simply use >> > >> > unsigned ret = 0; >> > FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) >> > if (!has_single_use (op)) >> > ++ret; >> > return ret; >> > >> > ? Not sure if the bit_not_p handling is required. >> >> Consider the case with most possible statements: >> _1 = load1; >> _2 = load2; >> _3 = ~_1; >> _4 = ~_2; >> _5 = _3 ^ _4; >> store0 = _5; >> All these statements construct the value for the store, and if we remove >> the old stores and add new stores we'll need similar statements for each >> of the new stores. What the function is attempting to compute how many >> of these original statements will be not DCEd. >> If _5 has multiple uses, then we'll need all of them, so 5 stmts not >> being DCEd. If _5 has single use, but _3 (and/or _4) has multiple uses, >> we'll need the corresponding loads in addition to the BIT_NOT_EXPR >> statement(s). If only _1 (and/or _2) has multiple uses, we'll need >> the load(s) but nothing else. >> So, there isn't a single stmt I could FOR_EACH_SSA_TREE_OPERAND on. >> For BIT_{AND,IOR,XOR}_EXPR doing it just on that stmt would be too rough >> approximation and would miss the case when the bitwise binary op result >> is used. > > Hmm, I see. > >> > It doesn't seem you handle multi-uses of the BIT_*_EXPR results >> > itself? Or does it only handle multi-uses of the BIT_*_EXPR >> > but not the feeding loads? >> >> I believe I handle all those precisely above (the only reason I've talked >> about aproximation is that bit field loads/stores are counted as one stmt >> and the masking added for handling multiple semi-adjacent bitfield >> loads/stores aren't counted either). >> >> Given the above example: >> if (!has_single_use (gimple_assign_rhs1 (stmt))) >> { >> ret += 1 + info->ops[0].bit_not_p; >> if (info->ops[1].base_addr) >> ret += 1 + info->ops[1].bit_not_p; >> return ret + 1; >> } >> Above should handle the _5 multiple uses case (the first operand is guaranteed >> by the discovery code to be a possibly negated load). >> stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); >> /* stmt is now the BIT_*_EXPR. */ >> if (!has_single_use (gimple_assign_rhs1 (stmt))) >> ret += 1 + info->ops[0].bit_not_p; >> Above should handle the _3 multiple uses. >> else if (info->ops[0].bit_not_p) >> { >> gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); >> if (!has_single_use (gimple_assign_rhs1 (stmt2))) >> ++ret; >> } >> Above should handle multiple uses of _1. >> if (info->ops[1].base_addr == NULL_TREE) >> return ret; >> is an early return for the case when second argument to BIT_*_EXPR is >> constant. >> if (!has_single_use (gimple_assign_rhs2 (stmt))) >> ret += 1 + info->ops[1].bit_not_p; >> Above should handle the _4 multiple uses. >> else if (info->ops[1].bit_not_p) >> { >> gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); >> if (!has_single_use (gimple_assign_rhs1 (stmt2))) >> ++ret; >> } >> Above should handle the _2 multiple uses. >> >> And for another example like: >> _6 = load1; >> _7 = ~_6; >> store0 = _7; >> if (!has_single_use (gimple_assign_rhs1 (stmt))) >> return 1 + info->ops[0].bit_not_p; >> Above should handle _7 multiple uses >> else if (info->ops[0].bit_not_p) >> { >> stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); >> if (!has_single_use (gimple_assign_rhs1 (stmt))) >> return 1; >> } >> Above should handle _6 multiple uses. > > Thanks for the explanation, it's now more clear what the code does. > > The patch is ok. (and hopefully makes the PGO bootstrap issue latent > again - heh) > I've noticed that this patch (r254579) introduces an ICE on aarch64: gcc.target/aarch64/vect-compile.c (internal compiler error) gcc.target/aarch64/vect.c (internal compiler error) during GIMPLE pass: store-merging In file included from /gcc/testsuite/gcc.target/aarch64/vect.c:5:0: /gcc/testsuite/gcc.target/aarch64/vect.x: In function 'test_orn': /gcc/testsuite/gcc.target/aarch64/vect.x:7:6: internal compiler error: tree check: expected ssa_name, have mem_ref in has_single_use, at ssa-iterators.h:400 0x547c93 tree_check_failed(tree_node const*, char const*, int, char const*, ...) /gcc/tree.c:9098 0x120a79d tree_check /gcc/tree.h:3344 0x120a79d has_single_use /gcc/ssa-iterators.h:400 0x120bb67 count_multiple_uses /gcc/gimple-ssa-store-merging.c:1413 0x120bd81 split_group /gcc/gimple-ssa-store-merging.c:1490 0x120c890 output_merged_store /gcc/gimple-ssa-store-merging.c:1699 0x120f07f output_merged_stores /gcc/gimple-ssa-store-merging.c:2023 0x120f07f terminate_and_process_chain /gcc/gimple-ssa-store-merging.c:2061 0x120f07f terminate_and_release_chain /gcc/gimple-ssa-store-merging.c:986 0x120f8b7 terminate_all_aliasing_chains /gcc/gimple-ssa-store-merging.c:969 0x12107d6 execute /gcc/gimple-ssa-store-merging.c:2454 Thanks, Christophe > Thanks, > Richard.
On Thu, Nov 16, 2017 at 11:00:01AM +0100, Christophe Lyon wrote: > I've noticed that this patch (r254579) introduces an ICE on aarch64: > gcc.target/aarch64/vect-compile.c (internal compiler error) > gcc.target/aarch64/vect.c (internal compiler error) That should have been fixed in r254628. Jakub
On 16 November 2017 at 11:04, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Nov 16, 2017 at 11:00:01AM +0100, Christophe Lyon wrote: >> I've noticed that this patch (r254579) introduces an ICE on aarch64: >> gcc.target/aarch64/vect-compile.c (internal compiler error) >> gcc.target/aarch64/vect.c (internal compiler error) > > That should have been fixed in r254628. > Great, thanks. Validations are still catching-up on my side, I still have a few days of delay. > Jakub
--- gcc/gimple-ssa-store-merging.c.jj 2017-11-06 08:57:07.000000000 +0100 +++ gcc/gimple-ssa-store-merging.c 2017-11-06 16:35:38.122437951 +0100 @@ -1370,6 +1370,65 @@ find_constituent_stores (struct merged_s return ret; } +/* Return how many SSA_NAMEs used to compute value to store in the INFO + store have multiple uses. If any SSA_NAME has multiple uses, also + count statements needed to compute it. */ + +static unsigned +count_multiple_uses (store_immediate_info *info) +{ + gimple *stmt = info->stmt; + unsigned ret = 0; + switch (info->rhs_code) + { + case INTEGER_CST: + return 0; + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (!has_single_use (gimple_assign_rhs1 (stmt))) + { + ret += 1 + info->ops[0].bit_not_p; + if (info->ops[1].base_addr) + ret += 1 + info->ops[1].bit_not_p; + return ret + 1; + } + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + /* stmt is now the BIT_*_EXPR. */ + if (!has_single_use (gimple_assign_rhs1 (stmt))) + ret += 1 + info->ops[0].bit_not_p; + else if (info->ops[0].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + if (info->ops[1].base_addr == NULL_TREE) + return ret; + if (!has_single_use (gimple_assign_rhs2 (stmt))) + ret += 1 + info->ops[1].bit_not_p; + else if (info->ops[1].bit_not_p) + { + gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt2))) + ++ret; + } + return ret; + case MEM_REF: + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1 + info->ops[0].bit_not_p; + else if (info->ops[0].bit_not_p) + { + stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!has_single_use (gimple_assign_rhs1 (stmt))) + return 1; + } + return 0; + default: + gcc_unreachable (); + } +} + /* Split a merged store described by GROUP by populating the SPLIT_STORES vector (if non-NULL) with split_store structs describing the byte offset (from the base), the bit size and alignment of each store as well as the @@ -1385,7 +1444,9 @@ find_constituent_stores (struct merged_s static unsigned int split_group (merged_store_group *group, bool allow_unaligned_store, bool allow_unaligned_load, - vec<struct split_store *> *split_stores) + vec<struct split_store *> *split_stores, + unsigned *total_orig, + unsigned *total_new) { unsigned HOST_WIDE_INT pos = group->bitregion_start; unsigned HOST_WIDE_INT size = group->bitregion_end - pos; @@ -1393,6 +1454,7 @@ split_group (merged_store_group *group, unsigned HOST_WIDE_INT group_align = group->align; unsigned HOST_WIDE_INT align_base = group->align_base; unsigned HOST_WIDE_INT group_load_align = group_align; + bool any_orig = false; gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); @@ -1400,6 +1462,34 @@ split_group (merged_store_group *group, unsigned HOST_WIDE_INT try_pos = bytepos; group->stores.qsort (sort_by_bitpos); + if (total_orig) + { + unsigned int i; + store_immediate_info *info = group->stores[0]; + + total_new[0] = 0; + total_orig[0] = 1; /* The orig store. */ + info = group->stores[0]; + if (info->ops[0].base_addr) + total_orig[0] += 1 + info->ops[0].bit_not_p; + if (info->ops[1].base_addr) + total_orig[0] += 1 + info->ops[1].bit_not_p; + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ + break; + default: + break; + } + total_orig[0] *= group->stores.length (); + + FOR_EACH_VEC_ELT (group->stores, i, info) + total_new[0] += count_multiple_uses (info); + } + if (!allow_unaligned_load) for (int i = 0; i < 2; ++i) if (group->load_align[i]) @@ -1524,7 +1614,10 @@ split_group (merged_store_group *group, if (info && info->bitpos >= try_bitpos && info->bitpos + info->bitsize <= try_bitpos + try_size) - store->orig = true; + { + store->orig = true; + any_orig = true; + } split_stores->safe_push (store); } @@ -1532,6 +1625,37 @@ split_group (merged_store_group *group, size -= try_size; } + if (total_orig) + { + /* If we are reusing some original stores and any of the + original SSA_NAMEs had multiple uses, we need to subtract + those now before we add the new ones. */ + if (total_new[0] && any_orig) + { + unsigned int i; + struct split_store *store; + FOR_EACH_VEC_ELT (*split_stores, i, store) + if (store->orig) + total_new[0] -= count_multiple_uses (store->orig_stores[0]); + } + total_new[0] += ret; /* The new store. */ + store_immediate_info *info = group->stores[0]; + if (info->ops[0].base_addr) + total_new[0] += ret * (1 + info->ops[0].bit_not_p); + if (info->ops[1].base_addr) + total_new[0] += ret * (1 + info->ops[1].bit_not_p); + switch (info->rhs_code) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ + break; + default: + break; + } + } + return ret; } @@ -1564,26 +1688,35 @@ imm_store_chain_info::output_merged_stor for unaligned and how many stores we'd emit for aligned stores. Only use unaligned stores if it allows fewer stores than aligned. */ unsigned aligned_cnt - = split_group (group, false, allow_unaligned_load, NULL); + = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL); unsigned unaligned_cnt - = split_group (group, true, allow_unaligned_load, NULL); + = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL); if (aligned_cnt <= unaligned_cnt) allow_unaligned_store = false; } + unsigned total_orig, total_new; split_group (group, allow_unaligned_store, allow_unaligned_load, - &split_stores); + &split_stores, &total_orig, &total_new); if (split_stores.length () >= orig_num_stmts) { /* We didn't manage to reduce the number of statements. Bail out. */ if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Exceeded original number of stmts (%u)." - " Not profitable to emit new sequence.\n", - orig_num_stmts); - } + fprintf (dump_file, "Exceeded original number of stmts (%u)." + " Not profitable to emit new sequence.\n", + orig_num_stmts); return false; } + if (total_orig <= total_new) + { + /* If number of estimated new statements is above estimated original + statements, bail out too. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Estimated number of original stmts (%u)" + " not larger than estimated number of new" + " stmts (%u).\n", + total_orig, total_new); + } gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); gimple_seq seq = NULL; @@ -2096,7 +2229,6 @@ handled_load (gimple *stmt, store_operan { tree rhs1 = gimple_assign_rhs1 (stmt); if (TREE_CODE (rhs1) == SSA_NAME - && has_single_use (rhs1) && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos, bitregion_start, bitregion_end)) { @@ -2159,7 +2291,7 @@ pass_store_merging::process_store (gimpl rhs_code = INTEGER_CST; ops[0].val = rhs; } - else if (TREE_CODE (rhs) != SSA_NAME || !has_single_use (rhs)) + else if (TREE_CODE (rhs) != SSA_NAME) invalid = true; else { @@ -2179,7 +2311,7 @@ pass_store_merging::process_store (gimpl rhs1 = gimple_assign_rhs1 (def_stmt); rhs2 = gimple_assign_rhs2 (def_stmt); invalid = true; - if (TREE_CODE (rhs1) != SSA_NAME || !has_single_use (rhs1)) + if (TREE_CODE (rhs1) != SSA_NAME) break; def_stmt1 = SSA_NAME_DEF_STMT (rhs1); if (!is_gimple_assign (def_stmt1) @@ -2188,7 +2320,7 @@ pass_store_merging::process_store (gimpl break; if (rhs_valid_for_store_merging_p (rhs2)) ops[1].val = rhs2; - else if (TREE_CODE (rhs2) != SSA_NAME || !has_single_use (rhs2)) + else if (TREE_CODE (rhs2) != SSA_NAME) break; else {