Message ID | 20171013194310.GM14653@tucnak |
---|---|
State | New |
Headers | show |
Series | Slightly improve phiopt value_replacement optimization (PR middle-end/62263, PR middle-end/82498) | expand |
On October 13, 2017 9:43:10 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote: >Hi! > >First of all, there was a typo, we are optimizing >(x != 0) ? x + y : y >into x + y rather than y. > >And, as the comment mentions, the condition that there is just a single >stmt is too restrictive and in the various patterns posted in the >various >rotate related PRs there are cases where in addition to the last >assign stmt there are some preparation statements (sometimes >conversion, >sometimes bit masking with constant, sometimes both). >I think it is undesirable to turn the conditional code into always >executed >one if it is too large, so this patch allows just very few very cheap >preparation statements which feed each other and it is easily possible >to >propagate the cond_rhs constant through those statements to compute >what >the result from those would be (and make sure no UB). > >With this patch on top of the previous one, on rotate-8.c testcase >on x86_64-linux and i686-linux we get the most efficient code in all >cases >- just a rol/ror instruction with perhaps some moves into registers >needed >to perform that, but without any conditionals, masking etc. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. >2017-10-13 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/62263 > PR middle-end/82498 > * tree-ssa-phiopt.c (value_replacement): Comment fix. Handle > up to 2 preparation statements for ASSIGN in MIDDLE_BB. > > * c-c++-common/rotate-8.c: Expect no PHIs in optimized dump. > >--- gcc/tree-ssa-phiopt.c.jj 2017-10-10 22:04:08.000000000 +0200 >+++ gcc/tree-ssa-phiopt.c 2017-10-13 17:55:01.578450763 +0200 >@@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb, > > } > >- /* Now optimize (x != 0) ? x + y : y to just y. >- The following condition is too restrictive, there can easily be >another >- stmt in middle_bb, for instance a CONVERT_EXPR for the second >argument. */ >- gimple *assign = last_and_only_stmt (middle_bb); >- if (!assign || gimple_code (assign) != GIMPLE_ASSIGN >+ /* Now optimize (x != 0) ? x + y : y to just x + y. */ >+ gsi = gsi_last_nondebug_bb (middle_bb); >+ if (gsi_end_p (gsi)) >+ return 0; >+ >+ gimple *assign = gsi_stmt (gsi); >+ if (!is_gimple_assign (assign) > || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS > || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) > && !POINTER_TYPE_P (TREE_TYPE (arg0)))) >@@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb, > if (!gimple_seq_empty_p (phi_nodes (middle_bb))) > return 0; > >+ /* Allow up to 2 cheap preparation statements that prepare argument >+ for assign, e.g.: >+ if (y_4 != 0) >+ goto <bb 3>; >+ else >+ goto <bb 4>; >+ <bb 3>: >+ _1 = (int) y_4; >+ iftmp.0_6 = x_5(D) r<< _1; >+ <bb 4>: >+ # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> >+ or: >+ if (y_3(D) == 0) >+ goto <bb 4>; >+ else >+ goto <bb 3>; >+ <bb 3>: >+ y_4 = y_3(D) & 31; >+ _1 = (int) y_4; >+ _6 = x_5(D) r<< _1; >+ <bb 4>: >+ # _2 = PHI <x_5(D)(2), _6(3)> */ >+ gimple *prep_stmt[2] = { NULL, NULL }; >+ int prep_cnt; >+ for (prep_cnt = 0; ; prep_cnt++) >+ { >+ gsi_prev_nondebug (&gsi); >+ if (gsi_end_p (gsi)) >+ break; >+ >+ gimple *g = gsi_stmt (gsi); >+ if (gimple_code (g) == GIMPLE_LABEL) >+ break; >+ >+ if (prep_cnt == 2 || !is_gimple_assign (g)) >+ return 0; >+ >+ tree lhs = gimple_assign_lhs (g); >+ tree rhs1 = gimple_assign_rhs1 (g); >+ use_operand_p use_p; >+ gimple *use_stmt; >+ if (TREE_CODE (lhs) != SSA_NAME >+ || TREE_CODE (rhs1) != SSA_NAME >+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) >+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) >+ || !single_imm_use (lhs, &use_p, &use_stmt) >+ || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) >+ return 0; >+ switch (gimple_assign_rhs_code (g)) >+ { >+ CASE_CONVERT: >+ break; >+ case PLUS_EXPR: >+ case BIT_AND_EXPR: >+ case BIT_IOR_EXPR: >+ case BIT_XOR_EXPR: >+ if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) >+ return 0; >+ break; >+ default: >+ return 0; >+ } >+ prep_stmt[prep_cnt] = g; >+ } >+ > /* Only transform if it removes the condition. */ >if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), >e0, e1)) > return 0; >@@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb, > && profile_status_for_fn (cfun) != PROFILE_ABSENT >&& EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () > /* If assign is cheap, there is no point avoiding it. */ >- && estimate_num_insns (assign, &eni_time_weights) >+ && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights) > >= 3 * estimate_num_insns (cond, &eni_time_weights)) > return 0; > >@@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb, > tree cond_lhs = gimple_cond_lhs (cond); > tree cond_rhs = gimple_cond_rhs (cond); > >+ /* Propagate the cond_rhs constant through preparation stmts, >+ make sure UB isn't invoked while doing that. */ >+ for (int i = prep_cnt - 1; i >= 0; --i) >+ { >+ gimple *g = prep_stmt[i]; >+ tree grhs1 = gimple_assign_rhs1 (g); >+ if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) >+ return 0; >+ cond_lhs = gimple_assign_lhs (g); >+ cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); >+ if (TREE_CODE (cond_rhs) != INTEGER_CST >+ || TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) >+ { >+ cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, >+ gimple_assign_rhs2 (g)); >+ if (TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ } >+ cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); >+ if (TREE_CODE (cond_rhs) != INTEGER_CST >+ || TREE_OVERFLOW (cond_rhs)) >+ return 0; >+ } >+ > if (((code == NE_EXPR && e1 == false_edge) > || (code == EQ_EXPR && e1 == true_edge)) > && arg0 == lhs >@@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb, > duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), > phires_range_info); > } >- gimple_stmt_iterator gsi_from = gsi_for_stmt (assign); >+ gimple_stmt_iterator gsi_from; >+ for (int i = prep_cnt - 1; i >= 0; --i) >+ { >+ tree plhs = gimple_assign_lhs (prep_stmt[i]); >+ SSA_NAME_RANGE_INFO (plhs) = NULL; >+ gsi_from = gsi_for_stmt (prep_stmt[i]); >+ gsi_move_before (&gsi_from, &gsi); >+ } >+ gsi_from = gsi_for_stmt (assign); > gsi_move_before (&gsi_from, &gsi); > replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); > return 2; >--- gcc/testsuite/c-c++-common/rotate-8.c.jj 2017-10-13 >15:55:32.000000000 +0200 >+++ gcc/testsuite/c-c++-common/rotate-8.c 2017-10-13 17:57:32.861627651 >+0200 >@@ -3,6 +3,7 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ >/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } >*/ >+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */ > > unsigned int > f1 (unsigned int x, unsigned char y) > > Jakub
--- gcc/tree-ssa-phiopt.c.jj 2017-10-10 22:04:08.000000000 +0200 +++ gcc/tree-ssa-phiopt.c 2017-10-13 17:55:01.578450763 +0200 @@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb, } - /* Now optimize (x != 0) ? x + y : y to just y. - The following condition is too restrictive, there can easily be another - stmt in middle_bb, for instance a CONVERT_EXPR for the second argument. */ - gimple *assign = last_and_only_stmt (middle_bb); - if (!assign || gimple_code (assign) != GIMPLE_ASSIGN + /* Now optimize (x != 0) ? x + y : y to just x + y. */ + gsi = gsi_last_nondebug_bb (middle_bb); + if (gsi_end_p (gsi)) + return 0; + + gimple *assign = gsi_stmt (gsi); + if (!is_gimple_assign (assign) || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)) && !POINTER_TYPE_P (TREE_TYPE (arg0)))) @@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb, if (!gimple_seq_empty_p (phi_nodes (middle_bb))) return 0; + /* Allow up to 2 cheap preparation statements that prepare argument + for assign, e.g.: + if (y_4 != 0) + goto <bb 3>; + else + goto <bb 4>; + <bb 3>: + _1 = (int) y_4; + iftmp.0_6 = x_5(D) r<< _1; + <bb 4>: + # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)> + or: + if (y_3(D) == 0) + goto <bb 4>; + else + goto <bb 3>; + <bb 3>: + y_4 = y_3(D) & 31; + _1 = (int) y_4; + _6 = x_5(D) r<< _1; + <bb 4>: + # _2 = PHI <x_5(D)(2), _6(3)> */ + gimple *prep_stmt[2] = { NULL, NULL }; + int prep_cnt; + for (prep_cnt = 0; ; prep_cnt++) + { + gsi_prev_nondebug (&gsi); + if (gsi_end_p (gsi)) + break; + + gimple *g = gsi_stmt (gsi); + if (gimple_code (g) == GIMPLE_LABEL) + break; + + if (prep_cnt == 2 || !is_gimple_assign (g)) + return 0; + + tree lhs = gimple_assign_lhs (g); + tree rhs1 = gimple_assign_rhs1 (g); + use_operand_p use_p; + gimple *use_stmt; + if (TREE_CODE (lhs) != SSA_NAME + || TREE_CODE (rhs1) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + || !single_imm_use (lhs, &use_p, &use_stmt) + || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign)) + return 0; + switch (gimple_assign_rhs_code (g)) + { + CASE_CONVERT: + break; + case PLUS_EXPR: + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST) + return 0; + break; + default: + return 0; + } + prep_stmt[prep_cnt] = g; + } + /* Only transform if it removes the condition. */ if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1)) return 0; @@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb, && profile_status_for_fn (cfun) != PROFILE_ABSENT && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even () /* If assign is cheap, there is no point avoiding it. */ - && estimate_num_insns (assign, &eni_time_weights) + && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights) >= 3 * estimate_num_insns (cond, &eni_time_weights)) return 0; @@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb, tree cond_lhs = gimple_cond_lhs (cond); tree cond_rhs = gimple_cond_rhs (cond); + /* Propagate the cond_rhs constant through preparation stmts, + make sure UB isn't invoked while doing that. */ + for (int i = prep_cnt - 1; i >= 0; --i) + { + gimple *g = prep_stmt[i]; + tree grhs1 = gimple_assign_rhs1 (g); + if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1)) + return 0; + cond_lhs = gimple_assign_lhs (g); + cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs); + if (TREE_CODE (cond_rhs) != INTEGER_CST + || TREE_OVERFLOW (cond_rhs)) + return 0; + if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS) + { + cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs, + gimple_assign_rhs2 (g)); + if (TREE_OVERFLOW (cond_rhs)) + return 0; + } + cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs); + if (TREE_CODE (cond_rhs) != INTEGER_CST + || TREE_OVERFLOW (cond_rhs)) + return 0; + } + if (((code == NE_EXPR && e1 == false_edge) || (code == EQ_EXPR && e1 == true_edge)) && arg0 == lhs @@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb, duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires), phires_range_info); } - gimple_stmt_iterator gsi_from = gsi_for_stmt (assign); + gimple_stmt_iterator gsi_from; + for (int i = prep_cnt - 1; i >= 0; --i) + { + tree plhs = gimple_assign_lhs (prep_stmt[i]); + SSA_NAME_RANGE_INFO (plhs) = NULL; + gsi_from = gsi_for_stmt (prep_stmt[i]); + gsi_move_before (&gsi_from, &gsi); + } + gsi_from = gsi_for_stmt (assign); gsi_move_before (&gsi_from, &gsi); replace_phi_edge_with_variable (cond_bb, e1, phi, lhs); return 2; --- gcc/testsuite/c-c++-common/rotate-8.c.jj 2017-10-13 15:55:32.000000000 +0200 +++ gcc/testsuite/c-c++-common/rotate-8.c 2017-10-13 17:57:32.861627651 +0200 @@ -3,6 +3,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ /* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } */ +/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */ unsigned int f1 (unsigned int x, unsigned char y)