Message ID | alpine.LSU.2.11.1502161520120.27763@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
On Mon, 16 Feb 2015, Richard Biener wrote: > > Predictive commoning happens to re-use SSA names it released while > there are still uses of them (oops), confusing the hell out of > other code (expected). Fixed thus. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. So I was wrong in that this doesn't fix PR65063 but it pointed at a similar issue. The loop transform code doesn't handle the case where we replace looparound PHIs and need a epilogue loop (thus we use unrolling). The following patch disables unrolling in that case. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-02-17 Richard Biener <rguenther@suse.de> PR tree-optimization/65063 * tree-predcom.c (determine_unroll_factor): Return 1 if we have replaced looparound PHIs. * gcc.dg/pr65063.c: New testcase. Index: gcc/tree-predcom.c =================================================================== *** gcc/tree-predcom.c (revision 220755) --- gcc/tree-predcom.c (working copy) *************** determine_unroll_factor (vec<chain_p> ch *** 1775,1783 **** FOR_EACH_VEC_ELT (chains, i, chain) { ! if (chain->type == CT_INVARIANT || chain->combined) continue; /* The best unroll factor for this chain is equal to the number of temporary variables that we create for it. */ af = chain->length; --- 1775,1794 ---- FOR_EACH_VEC_ELT (chains, i, chain) { ! if (chain->type == CT_INVARIANT) continue; + if (chain->combined) + { + /* For combined chains, we can't handle unrolling if we replace + looparound PHIs. */ + dref a; + unsigned j; + for (j = 1; chain->refs.iterate (j, &a); j++) + if (gimple_code (a->stmt) == GIMPLE_PHI) + return 1; + } + /* The best unroll factor for this chain is equal to the number of temporary variables that we create for it. */ af = chain->length; Index: gcc/testsuite/gcc.dg/pr65063.c =================================================================== *** gcc/testsuite/gcc.dg/pr65063.c (revision 0) --- gcc/testsuite/gcc.dg/pr65063.c (working copy) *************** *** 0 **** --- 1,33 ---- + /* { dg-do run } */ + /* { dg-options "-O3 -fno-tree-loop-ivcanon -fno-tree-vectorize" } */ + + static int in[8][4]; + static int out[4]; + static const int check_result[] = {0, 16, 256, 4096}; + + static inline void foo () + { + int sum; + int i, j, k; + for (k = 0; k < 4; k++) + { + sum = 1; + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + sum *= in[i + k][j]; + out[k] = sum; + } + } + + int main () + { + int i, j, k; + for (i = 0; i < 8; i++) + for (j = 0; j < 4; j++) + in[i][j] = (i + 2) / 3; + foo (); + for (k = 0; k < 4; k++) + if (out[k] != check_result[k]) + __builtin_abort (); + return 0; + }
Index: gcc/tree-predcom.c =================================================================== --- gcc/tree-predcom.c (revision 220731) +++ gcc/tree-predcom.c (working copy) @@ -1745,9 +1745,8 @@ execute_pred_commoning_chain (struct loo if (chain->combined) { /* For combined chains, just remove the statements that are used to - compute the values of the expression (except for the root one). */ - for (i = 1; chain->refs.iterate (i, &a); i++) - remove_stmt (a->stmt); + compute the values of the expression (except for the root one). + We delay this until after all chains are processed. */ } else { @@ -1811,6 +1810,21 @@ execute_pred_commoning (struct loop *loo execute_pred_commoning_chain (loop, chain, tmp_vars); } + FOR_EACH_VEC_ELT (chains, i, chain) + { + if (chain->type == CT_INVARIANT) + ; + else if (chain->combined) + { + /* For combined chains, just remove the statements that are used to + compute the values of the expression (except for the root one). */ + dref a; + unsigned j; + for (j = 1; chain->refs.iterate (j, &a); j++) + remove_stmt (a->stmt); + } + } + update_ssa (TODO_update_ssa_only_virtuals); } Index: gcc/testsuite/gcc.dg/pr63593.c =================================================================== --- gcc/testsuite/gcc.dg/pr63593.c (revision 0) +++ gcc/testsuite/gcc.dg/pr63593.c (working copy) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-tree-vectorize" } */ + +int in[2 * 4][4]; +int out[4]; + +void +foo (void) +{ + int sum; + int i, j, k; + for (k = 0; k < 4; k++) + { + sum = 1; + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + sum *= in[i + k][j]; + out[k] = sum; + } +} Index: gcc/testsuite/gcc.dg/pr65063.c =================================================================== --- gcc/testsuite/gcc.dg/pr65063.c (revision 0) +++ gcc/testsuite/gcc.dg/pr65063.c (working copy) @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-tree-loop-ivcanon -fno-tree-vectorize" } */ + +static int in[8][4]; +static int out[4]; +static const int check_result[] = {0, 16, 256, 4096}; + +static inline void foo () +{ + int sum; + int i, j, k; + for (k = 0; k < 4; k++) + { + sum = 1; + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + sum *= in[i + k][j]; + out[k] = sum; + } +} + +int main () +{ + int i, j, k; + for (i = 0; i < 8; i++) + for (j = 0; j < 4; j++) + in[i][j] = (i + 2) / 3; + foo (); + for (k = 0; k < 4; k++) + if (out[k] != check_result[k]) + __builtin_abort (); + return 0; +}