Message ID | 20131025221933.GN30970@tucnak.zalov.cz |
---|---|
State | New |
Headers | show |
On Sat, 26 Oct 2013, Jakub Jelinek wrote: > Hi! > > And here is a patch that allows vectorization without peeling for alignment > and scalar loop for bound even for fn2, fn3 and fn4 in the following > testcase, though as with the range __builtin_unreachable () notes, it is > quite fragile, because it only works if there are no immediate uses of the > tested SSA_NAME before the assertion. Perhaps more reliable way would be to > convert those assertions info __builtin_assume_aligned, but that has the > disadvantage that it's first argument is a pointer and it returns a pointer, > so we'd need to cast integers to pointers and back, or add ASSUME_ALIGNED > internal function. > > Bootstrapped/regtested on x86_64-linux and i686-linux. Ok. Thanks, Richard. > int a[1024]; > > void > fn1 (int x, int y) > { > int i; > x &= -32; > y &= -32; > for (i = x + 32; i < y; i++) > a[i]++; > } > > void > fn2 (int x, int y) > { > int i; > if (x & 31) > __builtin_unreachable (); > if (y & 31) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > void > fn3 (int x, int y) > { > int i; > if (x % 32) > __builtin_unreachable (); > if (y % 32) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > void > fn3 (int x, int y) > { > int i; > if ((x % 32) != 0) > __builtin_unreachable (); > if ((y % 32) != 0) > __builtin_unreachable (); > for (i = x + 32; i < x + y; i++) > a[i]++; > } > > 2013-10-25 Jakub Jelinek <jakub@redhat.com> > > * tree-vrp.c (maybe_set_nonzero_bits): New function. > (remove_range_assertions): Call it. > > --- gcc/tree-vrp.c.jj 2013-10-24 14:32:29.000000000 +0200 > +++ gcc/tree-vrp.c 2013-10-25 21:21:35.183092937 +0200 > @@ -6459,6 +6459,60 @@ check_all_array_refs (void) > } > } > > +/* Handle > + _4 = x_3 & 31; > + if (_4 != 0) > + goto <bb 6>; > + else > + goto <bb 7>; > + <bb 6>: > + __builtin_unreachable (); > + <bb 7>: > + x_5 = ASSERT_EXPR <x_3, ...>; > + If x_3 has no other immediate uses (checked by caller), > + var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits > + from the non-zero bitmask. */ > + > +static void > +maybe_set_nonzero_bits (basic_block bb, tree var) > +{ > + edge e = single_pred_edge (bb); > + basic_block cond_bb = e->src; > + gimple stmt = last_stmt (cond_bb); > + tree cst; > + > + if (stmt == NULL > + || gimple_code (stmt) != GIMPLE_COND > + || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE) > + ? EQ_EXPR : NE_EXPR) > + || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME > + || !integer_zerop (gimple_cond_rhs (stmt))) > + return; > + > + stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt)); > + if (!is_gimple_assign (stmt) > + || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR > + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) > + return; > + if (gimple_assign_rhs1 (stmt) != var) > + { > + gimple stmt2; > + > + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) > + return; > + stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); > + if (!gimple_assign_cast_p (stmt2) > + || gimple_assign_rhs1 (stmt2) != var > + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2)) > + || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt))) > + != TYPE_PRECISION (TREE_TYPE (var)))) > + return; > + } > + cst = gimple_assign_rhs2 (stmt); > + set_nonzero_bits (var, (get_nonzero_bits (var) > + & ~tree_to_double_int (cst))); > +} > + > /* Convert range assertion expressions into the implied copies and > copy propagate away the copies. Doing the trivial copy propagation > here avoids the need to run the full copy propagation pass after > @@ -6576,8 +6630,11 @@ remove_range_assertions (void) > } > } > if (ok) > - set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, > - SSA_NAME_RANGE_INFO (lhs)->max); > + { > + set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, > + SSA_NAME_RANGE_INFO (lhs)->max); > + maybe_set_nonzero_bits (bb, var); > + } > } > } > > > Jakub > >
--- gcc/tree-vrp.c.jj 2013-10-24 14:32:29.000000000 +0200 +++ gcc/tree-vrp.c 2013-10-25 21:21:35.183092937 +0200 @@ -6459,6 +6459,60 @@ check_all_array_refs (void) } } +/* Handle + _4 = x_3 & 31; + if (_4 != 0) + goto <bb 6>; + else + goto <bb 7>; + <bb 6>: + __builtin_unreachable (); + <bb 7>: + x_5 = ASSERT_EXPR <x_3, ...>; + If x_3 has no other immediate uses (checked by caller), + var is the x_3 var from ASSERT_EXPR, we can clear low 5 bits + from the non-zero bitmask. */ + +static void +maybe_set_nonzero_bits (basic_block bb, tree var) +{ + edge e = single_pred_edge (bb); + basic_block cond_bb = e->src; + gimple stmt = last_stmt (cond_bb); + tree cst; + + if (stmt == NULL + || gimple_code (stmt) != GIMPLE_COND + || gimple_cond_code (stmt) != ((e->flags & EDGE_TRUE_VALUE) + ? EQ_EXPR : NE_EXPR) + || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME + || !integer_zerop (gimple_cond_rhs (stmt))) + return; + + stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt)); + if (!is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != BIT_AND_EXPR + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST) + return; + if (gimple_assign_rhs1 (stmt) != var) + { + gimple stmt2; + + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) + return; + stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); + if (!gimple_assign_cast_p (stmt2) + || gimple_assign_rhs1 (stmt2) != var + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt2)) + || (TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (stmt))) + != TYPE_PRECISION (TREE_TYPE (var)))) + return; + } + cst = gimple_assign_rhs2 (stmt); + set_nonzero_bits (var, (get_nonzero_bits (var) + & ~tree_to_double_int (cst))); +} + /* Convert range assertion expressions into the implied copies and copy propagate away the copies. Doing the trivial copy propagation here avoids the need to run the full copy propagation pass after @@ -6576,8 +6630,11 @@ remove_range_assertions (void) } } if (ok) - set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, - SSA_NAME_RANGE_INFO (lhs)->max); + { + set_range_info (var, SSA_NAME_RANGE_INFO (lhs)->min, + SSA_NAME_RANGE_INFO (lhs)->max); + maybe_set_nonzero_bits (bb, var); + } } }