Message ID | 20171110135936.GM14653@tucnak |
---|---|
State | New |
Headers | show |
Series | Handle different bit_not_p in store merging (PR tree-optimization/78821) | expand |
Hi Jakub, On 10/11/17 13:59, Jakub Jelinek wrote: > Hi! > > This is something Uros requested in the PR, at least with BIT_NOT_EXPRs > it is easy. Previous store merging changes required that bit_not_p > is equal on all stores in the group (in all 3 spots, i.e. on the result > of BIT_{AND,IOR,XOR}_EXPR and on both of the operands). > > This patch handles mixed values of that flag. If none of the > orig_stores have a particular bit_not_p set, then as previously nothing > is inverted, if all of them have it set, then as previously we > BIT_NOT_EXPR > the particular SSA_NAME, and newly if there is a mix of false and true > in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this > invert only the bits that were bit_not_p and not the others. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > Might be worth doing a sanity check test run on a big-endian target since there is a bit of BYTES_BIG_ENDIAN logic in the patch (though it looks correct to me). Thanks, Kyrill > 2017-11-10 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/78821 > * gimple-ssa-store-merging.c (compatible_load_p): Don't require > that bit_not_p is the same. > (imm_store_chain_info::coalesce_immediate_stores): Likewise. > (split_group): Count precisely bit_not_p bits in each statement. > (invert_op): New function. > (imm_store_chain_info::output_merged_store): Use invert_op to > emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some > but not all orig_stores have BIT_NOT_EXPR in the corresponding > spots. > > * gcc.dg/store_merging_15.c: New test. > > --- gcc/gimple-ssa-store-merging.c.jj 2017-11-10 08:04:49.000000000 > +0100 > +++ gcc/gimple-ssa-store-merging.c 2017-11-10 10:53:13.152731543 > +0100 > @@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m > { > store_immediate_info *infof = merged_store->stores[0]; > if (!info->ops[idx].base_addr > - || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p > || (info->ops[idx].bitpos - infof->ops[idx].bitpos > != info->bitpos - infof->bitpos) > || !operand_equal_p (info->ops[idx].base_addr, > @@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate > Merge it into the current store group. There can be gaps in > between > the stores, but there can't be gaps in between bitregions. */ > else if (info->bitregion_start <= merged_store->bitregion_end > - && info->rhs_code == merged_store->stores[0]->rhs_code > - && info->bit_not_p == merged_store->stores[0]->bit_not_p) > + && info->rhs_code == merged_store->stores[0]->rhs_code) > { > store_immediate_info *infof = merged_store->stores[0]; > > @@ -1496,16 +1494,14 @@ split_group (merged_store_group *group, > total_orig[0] = 1; /* The orig store. */ > info = group->stores[0]; > if (info->ops[0].base_addr) > - total_orig[0] += 1 + info->ops[0].bit_not_p; > + total_orig[0]++; > if (info->ops[1].base_addr) > - total_orig[0] += 1 + info->ops[1].bit_not_p; > + total_orig[0]++; > switch (info->rhs_code) > { > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (info->bit_not_p) > - total_orig[0]++; /* The orig BIT_NOT_EXPR stmt. */ > total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ > break; > default: > @@ -1514,7 +1510,12 @@ split_group (merged_store_group *group, > total_orig[0] *= group->stores.length (); > > FOR_EACH_VEC_ELT (group->stores, i, info) > - total_new[0] += count_multiple_uses (info); > + { > + total_new[0] += count_multiple_uses (info); > + total_orig[0] += (info->bit_not_p > + + info->ops[0].bit_not_p > + + info->ops[1].bit_not_p); > + } > } > > if (!allow_unaligned_load) > @@ -1654,13 +1655,13 @@ split_group (merged_store_group *group, > > if (total_orig) > { > + unsigned int i; > + struct split_store *store; > /* If we are reusing some original stores and any of the > original SSA_NAMEs had multiple uses, we need to subtract > those now before we add the new ones. */ > if (total_new[0] && any_orig) > { > - unsigned int i; > - struct split_store *store; > FOR_EACH_VEC_ELT (*split_stores, i, store) > if (store->orig) > total_new[0] -= count_multiple_uses > (store->orig_stores[0]); > @@ -1668,26 +1669,105 @@ split_group (merged_store_group *group, > total_new[0] += ret; /* The new store. */ > store_immediate_info *info = group->stores[0]; > if (info->ops[0].base_addr) > - total_new[0] += ret * (1 + info->ops[0].bit_not_p); > + total_new[0] += ret; > if (info->ops[1].base_addr) > - total_new[0] += ret * (1 + info->ops[1].bit_not_p); > + total_new[0] += ret; > switch (info->rhs_code) > { > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (info->bit_not_p) > - total_new[0] += ret; /* The new BIT_NOT_EXPR stmt. */ > total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ > break; > default: > break; > } > + FOR_EACH_VEC_ELT (*split_stores, i, store) > + { > + unsigned int j; > + bool bit_not_p[3] = { false, false, false }; > + /* If all orig_stores have certain bit_not_p set, then > + we'd use a BIT_NOT_EXPR stmt and need to account for it. > + If some orig_stores have certain bit_not_p set, then > + we'd use a BIT_XOR_EXPR with a mask and need to account for > + it. */ > + FOR_EACH_VEC_ELT (store->orig_stores, j, info) > + { > + if (info->ops[0].bit_not_p) > + bit_not_p[0] = true; > + if (info->ops[1].bit_not_p) > + bit_not_p[1] = true; > + if (info->bit_not_p) > + bit_not_p[2] = true; > + } > + total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2]; > + } > + > } > > return ret; > } > > +/* Return the operation through which the operand IDX (if < 2) or > + result (IDX == 2) should be inverted. If NOP_EXPR, no inversion > + is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR, > + the bits should be xored with mask. */ > + > +static enum tree_code > +invert_op (split_store *split_store, int idx, tree int_type, tree &mask) > +{ > + unsigned int i; > + store_immediate_info *info; > + unsigned int cnt = 0; > + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) > + { > + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : > info->bit_not_p; > + if (bit_not_p) > + ++cnt; > + } > + mask = NULL_TREE; > + if (cnt == 0) > + return NOP_EXPR; > + if (cnt == split_store->orig_stores.length ()) > + return BIT_NOT_EXPR; > + > + unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * > BITS_PER_UNIT; > + unsigned buf_size = split_store->size / BITS_PER_UNIT; > + unsigned char *buf > + = XALLOCAVEC (unsigned char, buf_size); > + memset (buf, ~0U, buf_size); > + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) > + { > + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : > info->bit_not_p; > + if (!bit_not_p) > + continue; > + /* Clear regions with bit_not_p and invert afterwards, rather than > + clear regions with !bit_not_p, so that gaps in between stores > aren't > + set in the mask. */ > + unsigned HOST_WIDE_INT bitsize = info->bitsize; > + unsigned int pos_in_buffer = 0; > + if (info->bitpos < try_bitpos) > + { > + gcc_assert (info->bitpos + bitsize > try_bitpos); > + bitsize -= (try_bitpos - info->bitpos); > + } > + else > + pos_in_buffer = info->bitpos - try_bitpos; > + if (pos_in_buffer + bitsize > split_store->size) > + bitsize = split_store->size - pos_in_buffer; > + unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT); > + if (BYTES_BIG_ENDIAN) > + clear_bit_region_be (p, (BITS_PER_UNIT - 1 > + - (pos_in_buffer % BITS_PER_UNIT)), > bitsize); > + else > + clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize); > + } > + for (unsigned int i = 0; i < buf_size; ++i) > + buf[i] = ~buf[i]; > + mask = native_interpret_expr (int_type, buf, buf_size); > + return BIT_XOR_EXPR; > +} > + > /* Given a merged store group GROUP output the widened version of it. > The store chain is against the base object BASE. > Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't > output > @@ -1894,10 +1974,13 @@ imm_store_chain_info::output_merged_stor > gimple_seq_add_stmt_without_update (&seq, stmt); > } > ops[j] = gimple_assign_lhs (stmt); > - if (op.bit_not_p) > + tree xor_mask; > + enum tree_code inv_op > + = invert_op (split_store, j, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > { > stmt = gimple_build_assign (make_ssa_name > (int_type), > - BIT_NOT_EXPR, ops[j]); > + inv_op, ops[j], > xor_mask); > gimple_set_location (stmt, load_loc); > ops[j] = gimple_assign_lhs (stmt); > > @@ -1947,10 +2030,13 @@ imm_store_chain_info::output_merged_stor > else > gimple_seq_add_stmt_without_update (&seq, stmt); > src = gimple_assign_lhs (stmt); > - if (split_store->orig_stores[0]->bit_not_p) > + tree xor_mask; > + enum tree_code inv_op; > + inv_op = invert_op (split_store, 2, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > { > stmt = gimple_build_assign (make_ssa_name (int_type), > - BIT_NOT_EXPR, src); > + inv_op, src, xor_mask); > gimple_set_location (stmt, bit_loc); > if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) > gimple_seq_add_stmt_without_update (&load_seq[0], > stmt); > --- gcc/testsuite/gcc.dg/store_merging_15.c.jj 2017-11-10 > 10:57:54.481369637 +0100 > +++ gcc/testsuite/gcc.dg/store_merging_15.c 2017-11-10 > 11:14:12.762646259 +0100 > @@ -0,0 +1,56 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target store_merge } */ > +/* { dg-options "-O2 -fdump-tree-store-merging" } */ > + > +struct S { unsigned char a, b; unsigned short c; unsigned char d, e, > f, g; unsigned long long h; }; > + > +__attribute__((noipa)) void > +f1 (struct S *__restrict p, struct S *__restrict q) > +{ > + p->a = ~q->a; > + p->b = q->b; > + p->c = ~q->c; > + p->d = ~q->d; > + p->e = q->e; > + p->f = ~q->f; > + p->g = ~q->g; > +} > + > +__attribute__((noipa)) void > +f2 (struct S *__restrict p, struct S *__restrict q) > +{ > + p->a = ~(unsigned char) (p->a & q->a); > + p->b = ((unsigned char) ~p->b) & q->b; > + p->c = p->c & (unsigned short) ~q->c; > + p->d = p->d & q->d; > + p->e = p->e & (unsigned char) ~q->e; > + p->f = p->f & (unsigned char) ~q->f; > + p->g = ~(unsigned char) (p->g & q->g); > +} > + > +struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 }; > +struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 }; > +struct S v = { 36, 37, 38, 39, 40, 41, 42, 43 }; > + > +int > +main () > +{ > + asm volatile ("" : : : "memory"); > + f1 (&s, &u); > + asm volatile ("" : : : "memory"); > + if (s.a != (unsigned char) ~28 || s.b != 29 > + || s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31 > + || s.e != 32 || s.f != (unsigned char) ~33 || s.g != (unsigned > char) ~34 > + || s.h != 27) > + __builtin_abort (); > + f2 (&u, &v); > + asm volatile ("" : : : "memory"); > + if (u.a != (unsigned char) ~(28 & 36) || u.b != (((unsigned char) > ~29) & 37) > + || u.c != (30 & (unsigned short) ~38) || u.d != (31 & 39) > + || u.e != (32 & (unsigned char) ~40) || u.f != (33 & (unsigned > char) ~41) > + || u.g != (unsigned char) ~(34 & 42) || u.h != 35) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "Merging successful" 2 > "store-merging" } } */ > > Jakub
On Fri, Nov 10, 2017 at 04:51:19PM +0000, Kyrill Tkachov wrote: > Hi Jakub, > > On 10/11/17 13:59, Jakub Jelinek wrote: > > This is something Uros requested in the PR, at least with BIT_NOT_EXPRs > > it is easy. Previous store merging changes required that bit_not_p > > is equal on all stores in the group (in all 3 spots, i.e. on the result > > of BIT_{AND,IOR,XOR}_EXPR and on both of the operands). > > > > This patch handles mixed values of that flag. If none of the > > orig_stores have a particular bit_not_p set, then as previously nothing > > is inverted, if all of them have it set, then as previously we > > BIT_NOT_EXPR > > the particular SSA_NAME, and newly if there is a mix of false and true > > in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this > > invert only the bits that were bit_not_p and not the others. > > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > Might be worth doing a sanity check test run on a big-endian target > since there is a bit of BYTES_BIG_ENDIAN logic in the patch (though it looks > correct to me). Indeed. Successfully bootstrapped/regtested also on powerpc64{,le}-linux now. Jakub
On Fri, 10 Nov 2017, Jakub Jelinek wrote: > Hi! > > This is something Uros requested in the PR, at least with BIT_NOT_EXPRs > it is easy. Previous store merging changes required that bit_not_p > is equal on all stores in the group (in all 3 spots, i.e. on the result > of BIT_{AND,IOR,XOR}_EXPR and on both of the operands). > > This patch handles mixed values of that flag. If none of the > orig_stores have a particular bit_not_p set, then as previously nothing > is inverted, if all of them have it set, then as previously we BIT_NOT_EXPR > the particular SSA_NAME, and newly if there is a mix of false and true > in a particular bit_not_p, we compute a mask and BIT_XOR_EXPR it, this > invert only the bits that were bit_not_p and not the others. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Thanks, Richard. > 2017-11-10 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/78821 > * gimple-ssa-store-merging.c (compatible_load_p): Don't require > that bit_not_p is the same. > (imm_store_chain_info::coalesce_immediate_stores): Likewise. > (split_group): Count precisely bit_not_p bits in each statement. > (invert_op): New function. > (imm_store_chain_info::output_merged_store): Use invert_op to > emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some > but not all orig_stores have BIT_NOT_EXPR in the corresponding spots. > > * gcc.dg/store_merging_15.c: New test. > > --- gcc/gimple-ssa-store-merging.c.jj 2017-11-10 08:04:49.000000000 +0100 > +++ gcc/gimple-ssa-store-merging.c 2017-11-10 10:53:13.152731543 +0100 > @@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m > { > store_immediate_info *infof = merged_store->stores[0]; > if (!info->ops[idx].base_addr > - || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p > || (info->ops[idx].bitpos - infof->ops[idx].bitpos > != info->bitpos - infof->bitpos) > || !operand_equal_p (info->ops[idx].base_addr, > @@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate > Merge it into the current store group. There can be gaps in between > the stores, but there can't be gaps in between bitregions. */ > else if (info->bitregion_start <= merged_store->bitregion_end > - && info->rhs_code == merged_store->stores[0]->rhs_code > - && info->bit_not_p == merged_store->stores[0]->bit_not_p) > + && info->rhs_code == merged_store->stores[0]->rhs_code) > { > store_immediate_info *infof = merged_store->stores[0]; > > @@ -1496,16 +1494,14 @@ split_group (merged_store_group *group, > total_orig[0] = 1; /* The orig store. */ > info = group->stores[0]; > if (info->ops[0].base_addr) > - total_orig[0] += 1 + info->ops[0].bit_not_p; > + total_orig[0]++; > if (info->ops[1].base_addr) > - total_orig[0] += 1 + info->ops[1].bit_not_p; > + total_orig[0]++; > switch (info->rhs_code) > { > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (info->bit_not_p) > - total_orig[0]++; /* The orig BIT_NOT_EXPR stmt. */ > total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ > break; > default: > @@ -1514,7 +1510,12 @@ split_group (merged_store_group *group, > total_orig[0] *= group->stores.length (); > > FOR_EACH_VEC_ELT (group->stores, i, info) > - total_new[0] += count_multiple_uses (info); > + { > + total_new[0] += count_multiple_uses (info); > + total_orig[0] += (info->bit_not_p > + + info->ops[0].bit_not_p > + + info->ops[1].bit_not_p); > + } > } > > if (!allow_unaligned_load) > @@ -1654,13 +1655,13 @@ split_group (merged_store_group *group, > > if (total_orig) > { > + unsigned int i; > + struct split_store *store; > /* If we are reusing some original stores and any of the > original SSA_NAMEs had multiple uses, we need to subtract > those now before we add the new ones. */ > if (total_new[0] && any_orig) > { > - unsigned int i; > - struct split_store *store; > FOR_EACH_VEC_ELT (*split_stores, i, store) > if (store->orig) > total_new[0] -= count_multiple_uses (store->orig_stores[0]); > @@ -1668,26 +1669,105 @@ split_group (merged_store_group *group, > total_new[0] += ret; /* The new store. */ > store_immediate_info *info = group->stores[0]; > if (info->ops[0].base_addr) > - total_new[0] += ret * (1 + info->ops[0].bit_not_p); > + total_new[0] += ret; > if (info->ops[1].base_addr) > - total_new[0] += ret * (1 + info->ops[1].bit_not_p); > + total_new[0] += ret; > switch (info->rhs_code) > { > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (info->bit_not_p) > - total_new[0] += ret; /* The new BIT_NOT_EXPR stmt. */ > total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ > break; > default: > break; > } > + FOR_EACH_VEC_ELT (*split_stores, i, store) > + { > + unsigned int j; > + bool bit_not_p[3] = { false, false, false }; > + /* If all orig_stores have certain bit_not_p set, then > + we'd use a BIT_NOT_EXPR stmt and need to account for it. > + If some orig_stores have certain bit_not_p set, then > + we'd use a BIT_XOR_EXPR with a mask and need to account for > + it. */ > + FOR_EACH_VEC_ELT (store->orig_stores, j, info) > + { > + if (info->ops[0].bit_not_p) > + bit_not_p[0] = true; > + if (info->ops[1].bit_not_p) > + bit_not_p[1] = true; > + if (info->bit_not_p) > + bit_not_p[2] = true; > + } > + total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2]; > + } > + > } > > return ret; > } > > +/* Return the operation through which the operand IDX (if < 2) or > + result (IDX == 2) should be inverted. If NOP_EXPR, no inversion > + is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR, > + the bits should be xored with mask. */ > + > +static enum tree_code > +invert_op (split_store *split_store, int idx, tree int_type, tree &mask) > +{ > + unsigned int i; > + store_immediate_info *info; > + unsigned int cnt = 0; > + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) > + { > + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; > + if (bit_not_p) > + ++cnt; > + } > + mask = NULL_TREE; > + if (cnt == 0) > + return NOP_EXPR; > + if (cnt == split_store->orig_stores.length ()) > + return BIT_NOT_EXPR; > + > + unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT; > + unsigned buf_size = split_store->size / BITS_PER_UNIT; > + unsigned char *buf > + = XALLOCAVEC (unsigned char, buf_size); > + memset (buf, ~0U, buf_size); > + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) > + { > + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; > + if (!bit_not_p) > + continue; > + /* Clear regions with bit_not_p and invert afterwards, rather than > + clear regions with !bit_not_p, so that gaps in between stores aren't > + set in the mask. */ > + unsigned HOST_WIDE_INT bitsize = info->bitsize; > + unsigned int pos_in_buffer = 0; > + if (info->bitpos < try_bitpos) > + { > + gcc_assert (info->bitpos + bitsize > try_bitpos); > + bitsize -= (try_bitpos - info->bitpos); > + } > + else > + pos_in_buffer = info->bitpos - try_bitpos; > + if (pos_in_buffer + bitsize > split_store->size) > + bitsize = split_store->size - pos_in_buffer; > + unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT); > + if (BYTES_BIG_ENDIAN) > + clear_bit_region_be (p, (BITS_PER_UNIT - 1 > + - (pos_in_buffer % BITS_PER_UNIT)), bitsize); > + else > + clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize); > + } > + for (unsigned int i = 0; i < buf_size; ++i) > + buf[i] = ~buf[i]; > + mask = native_interpret_expr (int_type, buf, buf_size); > + return BIT_XOR_EXPR; > +} > + > /* Given a merged store group GROUP output the widened version of it. > The store chain is against the base object BASE. > Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output > @@ -1894,10 +1974,13 @@ imm_store_chain_info::output_merged_stor > gimple_seq_add_stmt_without_update (&seq, stmt); > } > ops[j] = gimple_assign_lhs (stmt); > - if (op.bit_not_p) > + tree xor_mask; > + enum tree_code inv_op > + = invert_op (split_store, j, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > { > stmt = gimple_build_assign (make_ssa_name (int_type), > - BIT_NOT_EXPR, ops[j]); > + inv_op, ops[j], xor_mask); > gimple_set_location (stmt, load_loc); > ops[j] = gimple_assign_lhs (stmt); > > @@ -1947,10 +2030,13 @@ imm_store_chain_info::output_merged_stor > else > gimple_seq_add_stmt_without_update (&seq, stmt); > src = gimple_assign_lhs (stmt); > - if (split_store->orig_stores[0]->bit_not_p) > + tree xor_mask; > + enum tree_code inv_op; > + inv_op = invert_op (split_store, 2, int_type, xor_mask); > + if (inv_op != NOP_EXPR) > { > stmt = gimple_build_assign (make_ssa_name (int_type), > - BIT_NOT_EXPR, src); > + inv_op, src, xor_mask); > gimple_set_location (stmt, bit_loc); > if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) > gimple_seq_add_stmt_without_update (&load_seq[0], stmt); > --- gcc/testsuite/gcc.dg/store_merging_15.c.jj 2017-11-10 10:57:54.481369637 +0100 > +++ gcc/testsuite/gcc.dg/store_merging_15.c 2017-11-10 11:14:12.762646259 +0100 > @@ -0,0 +1,56 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target store_merge } */ > +/* { dg-options "-O2 -fdump-tree-store-merging" } */ > + > +struct S { unsigned char a, b; unsigned short c; unsigned char d, e, f, g; unsigned long long h; }; > + > +__attribute__((noipa)) void > +f1 (struct S *__restrict p, struct S *__restrict q) > +{ > + p->a = ~q->a; > + p->b = q->b; > + p->c = ~q->c; > + p->d = ~q->d; > + p->e = q->e; > + p->f = ~q->f; > + p->g = ~q->g; > +} > + > +__attribute__((noipa)) void > +f2 (struct S *__restrict p, struct S *__restrict q) > +{ > + p->a = ~(unsigned char) (p->a & q->a); > + p->b = ((unsigned char) ~p->b) & q->b; > + p->c = p->c & (unsigned short) ~q->c; > + p->d = p->d & q->d; > + p->e = p->e & (unsigned char) ~q->e; > + p->f = p->f & (unsigned char) ~q->f; > + p->g = ~(unsigned char) (p->g & q->g); > +} > + > +struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 }; > +struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 }; > +struct S v = { 36, 37, 38, 39, 40, 41, 42, 43 }; > + > +int > +main () > +{ > + asm volatile ("" : : : "memory"); > + f1 (&s, &u); > + asm volatile ("" : : : "memory"); > + if (s.a != (unsigned char) ~28 || s.b != 29 > + || s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31 > + || s.e != 32 || s.f != (unsigned char) ~33 || s.g != (unsigned char) ~34 > + || s.h != 27) > + __builtin_abort (); > + f2 (&u, &v); > + asm volatile ("" : : : "memory"); > + if (u.a != (unsigned char) ~(28 & 36) || u.b != (((unsigned char) ~29) & 37) > + || u.c != (30 & (unsigned short) ~38) || u.d != (31 & 39) > + || u.e != (32 & (unsigned char) ~40) || u.f != (33 & (unsigned char) ~41) > + || u.g != (unsigned char) ~(34 & 42) || u.h != 35) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */ > > Jakub > >
--- gcc/gimple-ssa-store-merging.c.jj 2017-11-10 08:04:49.000000000 +0100 +++ gcc/gimple-ssa-store-merging.c 2017-11-10 10:53:13.152731543 +0100 @@ -1036,7 +1036,6 @@ compatible_load_p (merged_store_group *m { store_immediate_info *infof = merged_store->stores[0]; if (!info->ops[idx].base_addr - || info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p || (info->ops[idx].bitpos - infof->ops[idx].bitpos != info->bitpos - infof->bitpos) || !operand_equal_p (info->ops[idx].base_addr, @@ -1176,8 +1175,7 @@ imm_store_chain_info::coalesce_immediate Merge it into the current store group. There can be gaps in between the stores, but there can't be gaps in between bitregions. */ else if (info->bitregion_start <= merged_store->bitregion_end - && info->rhs_code == merged_store->stores[0]->rhs_code - && info->bit_not_p == merged_store->stores[0]->bit_not_p) + && info->rhs_code == merged_store->stores[0]->rhs_code) { store_immediate_info *infof = merged_store->stores[0]; @@ -1496,16 +1494,14 @@ split_group (merged_store_group *group, total_orig[0] = 1; /* The orig store. */ info = group->stores[0]; if (info->ops[0].base_addr) - total_orig[0] += 1 + info->ops[0].bit_not_p; + total_orig[0]++; if (info->ops[1].base_addr) - total_orig[0] += 1 + info->ops[1].bit_not_p; + total_orig[0]++; switch (info->rhs_code) { case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: - if (info->bit_not_p) - total_orig[0]++; /* The orig BIT_NOT_EXPR stmt. */ total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ break; default: @@ -1514,7 +1510,12 @@ split_group (merged_store_group *group, total_orig[0] *= group->stores.length (); FOR_EACH_VEC_ELT (group->stores, i, info) - total_new[0] += count_multiple_uses (info); + { + total_new[0] += count_multiple_uses (info); + total_orig[0] += (info->bit_not_p + + info->ops[0].bit_not_p + + info->ops[1].bit_not_p); + } } if (!allow_unaligned_load) @@ -1654,13 +1655,13 @@ split_group (merged_store_group *group, if (total_orig) { + unsigned int i; + struct split_store *store; /* If we are reusing some original stores and any of the original SSA_NAMEs had multiple uses, we need to subtract those now before we add the new ones. */ if (total_new[0] && any_orig) { - unsigned int i; - struct split_store *store; FOR_EACH_VEC_ELT (*split_stores, i, store) if (store->orig) total_new[0] -= count_multiple_uses (store->orig_stores[0]); @@ -1668,26 +1669,105 @@ split_group (merged_store_group *group, total_new[0] += ret; /* The new store. */ store_immediate_info *info = group->stores[0]; if (info->ops[0].base_addr) - total_new[0] += ret * (1 + info->ops[0].bit_not_p); + total_new[0] += ret; if (info->ops[1].base_addr) - total_new[0] += ret * (1 + info->ops[1].bit_not_p); + total_new[0] += ret; switch (info->rhs_code) { case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: - if (info->bit_not_p) - total_new[0] += ret; /* The new BIT_NOT_EXPR stmt. */ total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ break; default: break; } + FOR_EACH_VEC_ELT (*split_stores, i, store) + { + unsigned int j; + bool bit_not_p[3] = { false, false, false }; + /* If all orig_stores have certain bit_not_p set, then + we'd use a BIT_NOT_EXPR stmt and need to account for it. + If some orig_stores have certain bit_not_p set, then + we'd use a BIT_XOR_EXPR with a mask and need to account for + it. */ + FOR_EACH_VEC_ELT (store->orig_stores, j, info) + { + if (info->ops[0].bit_not_p) + bit_not_p[0] = true; + if (info->ops[1].bit_not_p) + bit_not_p[1] = true; + if (info->bit_not_p) + bit_not_p[2] = true; + } + total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2]; + } + } return ret; } +/* Return the operation through which the operand IDX (if < 2) or + result (IDX == 2) should be inverted. If NOP_EXPR, no inversion + is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR, + the bits should be xored with mask. */ + +static enum tree_code +invert_op (split_store *split_store, int idx, tree int_type, tree &mask) +{ + unsigned int i; + store_immediate_info *info; + unsigned int cnt = 0; + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) + { + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; + if (bit_not_p) + ++cnt; + } + mask = NULL_TREE; + if (cnt == 0) + return NOP_EXPR; + if (cnt == split_store->orig_stores.length ()) + return BIT_NOT_EXPR; + + unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT; + unsigned buf_size = split_store->size / BITS_PER_UNIT; + unsigned char *buf + = XALLOCAVEC (unsigned char, buf_size); + memset (buf, ~0U, buf_size); + FOR_EACH_VEC_ELT (split_store->orig_stores, i, info) + { + bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p; + if (!bit_not_p) + continue; + /* Clear regions with bit_not_p and invert afterwards, rather than + clear regions with !bit_not_p, so that gaps in between stores aren't + set in the mask. */ + unsigned HOST_WIDE_INT bitsize = info->bitsize; + unsigned int pos_in_buffer = 0; + if (info->bitpos < try_bitpos) + { + gcc_assert (info->bitpos + bitsize > try_bitpos); + bitsize -= (try_bitpos - info->bitpos); + } + else + pos_in_buffer = info->bitpos - try_bitpos; + if (pos_in_buffer + bitsize > split_store->size) + bitsize = split_store->size - pos_in_buffer; + unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + clear_bit_region_be (p, (BITS_PER_UNIT - 1 + - (pos_in_buffer % BITS_PER_UNIT)), bitsize); + else + clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize); + } + for (unsigned int i = 0; i < buf_size; ++i) + buf[i] = ~buf[i]; + mask = native_interpret_expr (int_type, buf, buf_size); + return BIT_XOR_EXPR; +} + /* Given a merged store group GROUP output the widened version of it. The store chain is against the base object BASE. Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output @@ -1894,10 +1974,13 @@ imm_store_chain_info::output_merged_stor gimple_seq_add_stmt_without_update (&seq, stmt); } ops[j] = gimple_assign_lhs (stmt); - if (op.bit_not_p) + tree xor_mask; + enum tree_code inv_op + = invert_op (split_store, j, int_type, xor_mask); + if (inv_op != NOP_EXPR) { stmt = gimple_build_assign (make_ssa_name (int_type), - BIT_NOT_EXPR, ops[j]); + inv_op, ops[j], xor_mask); gimple_set_location (stmt, load_loc); ops[j] = gimple_assign_lhs (stmt); @@ -1947,10 +2030,13 @@ imm_store_chain_info::output_merged_stor else gimple_seq_add_stmt_without_update (&seq, stmt); src = gimple_assign_lhs (stmt); - if (split_store->orig_stores[0]->bit_not_p) + tree xor_mask; + enum tree_code inv_op; + inv_op = invert_op (split_store, 2, int_type, xor_mask); + if (inv_op != NOP_EXPR) { stmt = gimple_build_assign (make_ssa_name (int_type), - BIT_NOT_EXPR, src); + inv_op, src, xor_mask); gimple_set_location (stmt, bit_loc); if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) gimple_seq_add_stmt_without_update (&load_seq[0], stmt); --- gcc/testsuite/gcc.dg/store_merging_15.c.jj 2017-11-10 10:57:54.481369637 +0100 +++ gcc/testsuite/gcc.dg/store_merging_15.c 2017-11-10 11:14:12.762646259 +0100 @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target store_merge } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ + +struct S { unsigned char a, b; unsigned short c; unsigned char d, e, f, g; unsigned long long h; }; + +__attribute__((noipa)) void +f1 (struct S *__restrict p, struct S *__restrict q) +{ + p->a = ~q->a; + p->b = q->b; + p->c = ~q->c; + p->d = ~q->d; + p->e = q->e; + p->f = ~q->f; + p->g = ~q->g; +} + +__attribute__((noipa)) void +f2 (struct S *__restrict p, struct S *__restrict q) +{ + p->a = ~(unsigned char) (p->a & q->a); + p->b = ((unsigned char) ~p->b) & q->b; + p->c = p->c & (unsigned short) ~q->c; + p->d = p->d & q->d; + p->e = p->e & (unsigned char) ~q->e; + p->f = p->f & (unsigned char) ~q->f; + p->g = ~(unsigned char) (p->g & q->g); +} + +struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 }; +struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 }; +struct S v = { 36, 37, 38, 39, 40, 41, 42, 43 }; + +int +main () +{ + asm volatile ("" : : : "memory"); + f1 (&s, &u); + asm volatile ("" : : : "memory"); + if (s.a != (unsigned char) ~28 || s.b != 29 + || s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31 + || s.e != 32 || s.f != (unsigned char) ~33 || s.g != (unsigned char) ~34 + || s.h != 27) + __builtin_abort (); + f2 (&u, &v); + asm volatile ("" : : : "memory"); + if (u.a != (unsigned char) ~(28 & 36) || u.b != (((unsigned char) ~29) & 37) + || u.c != (30 & (unsigned short) ~38) || u.d != (31 & 39) + || u.e != (32 & (unsigned char) ~40) || u.f != (33 & (unsigned char) ~41) + || u.g != (unsigned char) ~(34 & 42) || u.h != 35) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */