Message ID | 20170713205155.GU2123@tucnak |
---|---|
State | New |
Headers | show |
Hi Jakub, On 13/07/17 21:51, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR, the following testcase started using recently > BIT_FIELD_REFs instead of MEM_REFs and thus the bswap pass, while it > properly determines the very long sequence of stmts is a nop transformation, > throws that away and doesn't optimize it, and no other optimizations > are able to optimize it away. > > The patch attempts to not do anything if there is a simple identity > copy, but if the nop reshuffle needs more than one operation, it will > try to replace the final SSA_NAME BIT_IOR_EXPR assignment with assignment > from the src value (typically SSA_NAME). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-07-13 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/81396 > * tree-ssa-math-opts.c (struct symbolic_number): Add n_ops field. > (init_symbolic_number): Initialize it to 1. > (perform_symbolic_merge): Add n_ops from both operands into the new > n_ops. > (find_bswap_or_nop): Don't consider n->n == cmpnop computations > without base_addr as useless if they need more than one operation. > (bswap_replace): Handle !bswap case for NULL base_addr. > > * gcc.dg/tree-ssa/pr81396.c: New test. > > --- gcc/tree-ssa-math-opts.c.jj 2017-07-06 20:31:43.000000000 +0200 > +++ gcc/tree-ssa-math-opts.c 2017-07-13 19:27:02.985354778 +0200 > @@ -1968,6 +1968,7 @@ struct symbolic_number { > tree alias_set; > tree vuse; > unsigned HOST_WIDE_INT range; > + int n_ops; > }; > > #define BITS_PER_MARKER 8 > @@ -2083,6 +2084,7 @@ init_symbolic_number (struct symbolic_nu > return false; > n->range = size; > n->n = CMPNOP; > + n->n_ops = 1; > > if (size < 64 / BITS_PER_MARKER) > n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; > @@ -2293,6 +2295,7 @@ perform_symbolic_merge (gimple *source_s > return NULL; > } > n->n = n1->n | n2->n; > + n->n_ops = n1->n_ops + n2->n_ops; > > return source_stmt; > } > @@ -2588,7 +2591,7 @@ find_bswap_or_nop (gimple *stmt, struct > return NULL; > > /* Useless bit manipulation performed by code. */ > - if (!n->base_addr && n->n == cmpnop) > + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) > return NULL; > > n->range *= BITS_PER_UNIT; > @@ -2747,6 +2750,36 @@ bswap_replace (gimple *cur_stmt, gimple > } > src = val_tmp; > } > + else if (!bswap) > + { Would it make sense to add an assert right here checking that this is a cmpnop operation? Looks good to me otherwise. Best regards, Thomas > + gimple *g; > + if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) > + { > + if (!is_gimple_val (src)) > + return false; > + g = gimple_build_assign (tgt, NOP_EXPR, src); > + } > + else > + g = gimple_build_assign (tgt, src); > + if (n->range == 16) > + nop_stats.found_16bit++; > + else if (n->range == 32) > + nop_stats.found_32bit++; > + else > + { > + gcc_assert (n->range == 64); > + nop_stats.found_64bit++; > + } > + if (dump_file) > + { > + fprintf (dump_file, > + "%d bit reshuffle in target endianness found at: ", > + (int) n->range); > + print_gimple_stmt (dump_file, cur_stmt, 0); > + } > + gsi_replace (&gsi, g, true); > + return true; > + } > else if (TREE_CODE (src) == BIT_FIELD_REF) > src = TREE_OPERAND (src, 0); > > --- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj 2017-07-13 19:22:10.191954620 +0200 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c 2017-07-13 19:24:16.638399984 +0200 > @@ -0,0 +1,25 @@ > +/* PR tree-optimization/81396 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +typedef unsigned long long uint64_t; > + > +uint64_t > +foo (uint64_t word) > +{ > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8 > + const unsigned char *const ptr = (const unsigned char *) &word; > + return ((uint64_t) ptr[0] > + | ((uint64_t) ptr[1] << 8) > + | ((uint64_t) ptr[2] << (8 * 2)) > + | ((uint64_t) ptr[3] << (8 * 3)) > + | ((uint64_t) ptr[4] << (8 * 4)) > + | ((uint64_t) ptr[5] << (8 * 5)) > + | ((uint64_t) ptr[6] << (8 * 6)) > + | ((uint64_t) ptr[7] << (8 * 7))); > +#else > + return word; > +#endif > +} > + > +/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */ > > Jakub >
On Fri, Jul 14, 2017 at 09:45:39AM +0100, Thomas Preudhomme wrote: > > + else if (!bswap) > > + { > > Would it make sense to add an assert right here checking that this is a > cmpnop operation? The earlier bswap_replace code to handle n->base_addr && !bswap doesn't have anything like that either. And the assert would be certainly non-trivial, like: #if ENABLE_ASSERT_CHECKING uint64_t cmpnop = CMPNOP; /* Note, n->range has been multiplied by BITS_PER_UNIT in between, so it needs to be divided by it again. */ if (n->range / BITS_PER_UNIT < (int) sizeof (int64_t)) { mask = ((uint64_t) 1 << (n->range / BITS_PER_UNIT * BITS_PER_MARKER)) - 1; cmpnop &= mask; } gcc_assert (n->n == cmpnop); #endif I think trusting find_bswap_or_nop did the job right is enough. Jakub
On 14/07/17 09:56, Jakub Jelinek wrote: > On Fri, Jul 14, 2017 at 09:45:39AM +0100, Thomas Preudhomme wrote: >>> + else if (!bswap) >>> + { >> >> Would it make sense to add an assert right here checking that this is a >> cmpnop operation? > > The earlier bswap_replace code to handle n->base_addr && !bswap doesn't > have anything like that either. > > And the assert would be certainly non-trivial, like: > #if ENABLE_ASSERT_CHECKING > uint64_t cmpnop = CMPNOP; > /* Note, n->range has been multiplied by BITS_PER_UNIT in between, so it > needs to be divided by it again. */ > if (n->range / BITS_PER_UNIT < (int) sizeof (int64_t)) > { > mask = ((uint64_t) 1 << (n->range / BITS_PER_UNIT * BITS_PER_MARKER)) - 1; > cmpnop &= mask; > } > gcc_assert (n->n == cmpnop); > #endif > > I think trusting find_bswap_or_nop did the job right is enough. Ooops I meant check that if !base_addr then !bswap but that's already guaranteed by the structure of the if. All good then. Best regards, Thomas
On Thu, 13 Jul 2017, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR, the following testcase started using recently > BIT_FIELD_REFs instead of MEM_REFs and thus the bswap pass, while it > properly determines the very long sequence of stmts is a nop transformation, > throws that away and doesn't optimize it, and no other optimizations > are able to optimize it away. > > The patch attempts to not do anything if there is a simple identity > copy, but if the nop reshuffle needs more than one operation, it will > try to replace the final SSA_NAME BIT_IOR_EXPR assignment with assignment > from the src value (typically SSA_NAME). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-07-13 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/81396 > * tree-ssa-math-opts.c (struct symbolic_number): Add n_ops field. > (init_symbolic_number): Initialize it to 1. > (perform_symbolic_merge): Add n_ops from both operands into the new > n_ops. > (find_bswap_or_nop): Don't consider n->n == cmpnop computations > without base_addr as useless if they need more than one operation. > (bswap_replace): Handle !bswap case for NULL base_addr. > > * gcc.dg/tree-ssa/pr81396.c: New test. > > --- gcc/tree-ssa-math-opts.c.jj 2017-07-06 20:31:43.000000000 +0200 > +++ gcc/tree-ssa-math-opts.c 2017-07-13 19:27:02.985354778 +0200 > @@ -1968,6 +1968,7 @@ struct symbolic_number { > tree alias_set; > tree vuse; > unsigned HOST_WIDE_INT range; > + int n_ops; > }; Ok if you document n_ops a bit in the comment above the structure. Thanks, Richard. > #define BITS_PER_MARKER 8 > @@ -2083,6 +2084,7 @@ init_symbolic_number (struct symbolic_nu > return false; > n->range = size; > n->n = CMPNOP; > + n->n_ops = 1; > > if (size < 64 / BITS_PER_MARKER) > n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; > @@ -2293,6 +2295,7 @@ perform_symbolic_merge (gimple *source_s > return NULL; > } > n->n = n1->n | n2->n; > + n->n_ops = n1->n_ops + n2->n_ops; > > return source_stmt; > } > @@ -2588,7 +2591,7 @@ find_bswap_or_nop (gimple *stmt, struct > return NULL; > > /* Useless bit manipulation performed by code. */ > - if (!n->base_addr && n->n == cmpnop) > + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) > return NULL; > > n->range *= BITS_PER_UNIT; > @@ -2747,6 +2750,36 @@ bswap_replace (gimple *cur_stmt, gimple > } > src = val_tmp; > } > + else if (!bswap) > + { > + gimple *g; > + if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) > + { > + if (!is_gimple_val (src)) > + return false; > + g = gimple_build_assign (tgt, NOP_EXPR, src); > + } > + else > + g = gimple_build_assign (tgt, src); > + if (n->range == 16) > + nop_stats.found_16bit++; > + else if (n->range == 32) > + nop_stats.found_32bit++; > + else > + { > + gcc_assert (n->range == 64); > + nop_stats.found_64bit++; > + } > + if (dump_file) > + { > + fprintf (dump_file, > + "%d bit reshuffle in target endianness found at: ", > + (int) n->range); > + print_gimple_stmt (dump_file, cur_stmt, 0); > + } > + gsi_replace (&gsi, g, true); > + return true; > + } > else if (TREE_CODE (src) == BIT_FIELD_REF) > src = TREE_OPERAND (src, 0); > > --- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj 2017-07-13 19:22:10.191954620 +0200 > +++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c 2017-07-13 19:24:16.638399984 +0200 > @@ -0,0 +1,25 @@ > +/* PR tree-optimization/81396 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +typedef unsigned long long uint64_t; > + > +uint64_t > +foo (uint64_t word) > +{ > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8 > + const unsigned char *const ptr = (const unsigned char *) &word; > + return ((uint64_t) ptr[0] > + | ((uint64_t) ptr[1] << 8) > + | ((uint64_t) ptr[2] << (8 * 2)) > + | ((uint64_t) ptr[3] << (8 * 3)) > + | ((uint64_t) ptr[4] << (8 * 4)) > + | ((uint64_t) ptr[5] << (8 * 5)) > + | ((uint64_t) ptr[6] << (8 * 6)) > + | ((uint64_t) ptr[7] << (8 * 7))); > +#else > + return word; > +#endif > +} > + > +/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */ > > Jakub > >
--- gcc/tree-ssa-math-opts.c.jj 2017-07-06 20:31:43.000000000 +0200 +++ gcc/tree-ssa-math-opts.c 2017-07-13 19:27:02.985354778 +0200 @@ -1968,6 +1968,7 @@ struct symbolic_number { tree alias_set; tree vuse; unsigned HOST_WIDE_INT range; + int n_ops; }; #define BITS_PER_MARKER 8 @@ -2083,6 +2084,7 @@ init_symbolic_number (struct symbolic_nu return false; n->range = size; n->n = CMPNOP; + n->n_ops = 1; if (size < 64 / BITS_PER_MARKER) n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; @@ -2293,6 +2295,7 @@ perform_symbolic_merge (gimple *source_s return NULL; } n->n = n1->n | n2->n; + n->n_ops = n1->n_ops + n2->n_ops; return source_stmt; } @@ -2588,7 +2591,7 @@ find_bswap_or_nop (gimple *stmt, struct return NULL; /* Useless bit manipulation performed by code. */ - if (!n->base_addr && n->n == cmpnop) + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) return NULL; n->range *= BITS_PER_UNIT; @@ -2747,6 +2750,36 @@ bswap_replace (gimple *cur_stmt, gimple } src = val_tmp; } + else if (!bswap) + { + gimple *g; + if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) + { + if (!is_gimple_val (src)) + return false; + g = gimple_build_assign (tgt, NOP_EXPR, src); + } + else + g = gimple_build_assign (tgt, src); + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + if (dump_file) + { + fprintf (dump_file, + "%d bit reshuffle in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + gsi_replace (&gsi, g, true); + return true; + } else if (TREE_CODE (src) == BIT_FIELD_REF) src = TREE_OPERAND (src, 0); --- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj 2017-07-13 19:22:10.191954620 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c 2017-07-13 19:24:16.638399984 +0200 @@ -0,0 +1,25 @@ +/* PR tree-optimization/81396 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +typedef unsigned long long uint64_t; + +uint64_t +foo (uint64_t word) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8 + const unsigned char *const ptr = (const unsigned char *) &word; + return ((uint64_t) ptr[0] + | ((uint64_t) ptr[1] << 8) + | ((uint64_t) ptr[2] << (8 * 2)) + | ((uint64_t) ptr[3] << (8 * 3)) + | ((uint64_t) ptr[4] << (8 * 4)) + | ((uint64_t) ptr[5] << (8 * 5)) + | ((uint64_t) ptr[6] << (8 * 6)) + | ((uint64_t) ptr[7] << (8 * 7))); +#else + return word; +#endif +} + +/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */