Message ID | 877ego74h1.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | Use unsigned arithmetic for demoted vector plus/minus/mult (PR 88064) | expand |
On Wed, Dec 5, 2018 at 9:39 AM Richard Sandiford <richard.sandiford@arm.com> wrote: > > As Jakub pointed out, if we narrow a plus, minus or mult operation based > on the number of bits that consumers need, we have to convert a signed > operation to an unsigned one in order to avoid new undefined behaviour. > This patch does that and generalises vect_convert_input and > vect_recog_over_widening_pattern to cope with the extra casts. > (The changes to both functions are covered by existing tests.) > > Tested on x86_64-linux-gnu, aarch64-linux-gnu and aarch64_be-elf. > OK to install? OK. Richard. > Richard > > > 2018-12-05 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > PR tree-optimization/88064 > * tree-vect-patterns.c (vect_convert_input): Convert the result of > an existing cast if it has the right width but the wrong sign. > Do not test the signedness of the required result when > considering whether to split an existing cast; instead split to > a type with the same signedness as the source of the cast, then > convert it to the opposite signedness where necessary. > (vect_recog_over_widening_pattern): Handle sign changes between > the final PLUS_EXPR and the RSHIFT_EXPR. > (vect_recog_average_pattern): Use an unsigned operation when > truncating an addition, subtraction or multiplication. Cast the > result back to the "real" signedness before promoting. > > gcc/testsuite/ > PR tree-optimization/88064 > * gcc.dg/vect/vect-over-widen-23.c: New test. > > Index: gcc/tree-vect-patterns.c > =================================================================== > --- gcc/tree-vect-patterns.c 2018-12-05 08:33:40.658922796 +0000 > +++ gcc/tree-vect-patterns.c 2018-12-05 08:33:55.130797055 +0000 > @@ -720,34 +720,60 @@ vect_convert_input (stmt_vec_info stmt_i > if (TREE_CODE (unprom->op) == INTEGER_CST) > return wide_int_to_tree (type, wi::to_widest (unprom->op)); > > - /* See if we can reuse an existing result. */ > + tree input = unprom->op; > if (unprom->caster) > { > tree lhs = gimple_get_lhs (unprom->caster->stmt); > - if (types_compatible_p (TREE_TYPE (lhs), type)) > - return lhs; > + tree lhs_type = TREE_TYPE (lhs); > + > + /* If the result of the existing cast is the right width, use it > + instead of the source of the cast. */ > + if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type)) > + input = lhs; > + /* If the precision we want is between the source and result > + precisions of the existing cast, try splitting the cast into > + two and tapping into a mid-way point. */ > + else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type) > + && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)) > + { > + /* In order to preserve the semantics of the original cast, > + give the mid-way point the same signedness as the input value. > + > + It would be possible to use a signed type here instead if > + TYPE is signed and UNPROM->TYPE is unsigned, but that would > + make the sign of the midtype sensitive to the order in > + which we process the statements, since the signedness of > + TYPE is the signedness required by just one of possibly > + many users. Also, unsigned promotions are usually as cheap > + as or cheaper than signed ones, so it's better to keep an > + unsigned promotion. */ > + tree midtype = build_nonstandard_integer_type > + (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type)); > + tree vec_midtype = get_vectype_for_scalar_type (midtype); > + if (vec_midtype) > + { > + input = vect_recog_temp_ssa_var (midtype, NULL); > + gassign *new_stmt = gimple_build_assign (input, NOP_EXPR, > + unprom->op); > + if (!vect_split_statement (unprom->caster, input, new_stmt, > + vec_midtype)) > + append_pattern_def_seq (stmt_info, new_stmt, vec_midtype); > + } > + } > + > + /* See if we can reuse an existing result. */ > + if (types_compatible_p (type, TREE_TYPE (input))) > + return input; > } > > /* We need a new conversion statement. */ > tree new_op = vect_recog_temp_ssa_var (type, NULL); > - gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op); > - > - /* If the operation is the input to a vectorizable cast, try splitting > - that cast into two, taking the required result as a mid-way point. */ > - if (unprom->caster) > - { > - tree lhs = gimple_get_lhs (unprom->caster->stmt); > - if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type) > - && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type) > - && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type)) > - && vect_split_statement (unprom->caster, new_op, new_stmt, vectype)) > - return new_op; > - } > + gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input); > > /* If OP is an external value, see if we can insert the new statement > on an incoming edge. */ > - if (unprom->dt == vect_external_def) > - if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op)) > + if (input == unprom->op && unprom->dt == vect_external_def) > + if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input)) > { > basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt); > gcc_assert (!new_bb); > @@ -1644,28 +1670,37 @@ vect_recog_over_widening_pattern (stmt_v > bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED); > tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p); > > + /* If we're truncating an operation, we need to make sure that we > + don't introduce new undefined overflow. The codes tested here are > + a subset of those accepted by vect_truncatable_operation_p. */ > + tree op_type = new_type; > + if (TYPE_OVERFLOW_UNDEFINED (new_type) > + && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR)) > + op_type = build_nonstandard_integer_type (new_precision, true); > + > /* We specifically don't check here whether the target supports the > new operation, since it might be something that a later pattern > wants to rewrite anyway. If targets have a minimum element size > for some optabs, we should pattern-match smaller ops to larger ops > where beneficial. */ > tree new_vectype = get_vectype_for_scalar_type (new_type); > - if (!new_vectype) > + tree op_vectype = get_vectype_for_scalar_type (op_type); > + if (!new_vectype || !op_vectype) > return NULL; > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n", > type, new_type); > > - /* Calculate the rhs operands for an operation on NEW_TYPE. */ > + /* Calculate the rhs operands for an operation on OP_TYPE. */ > tree ops[3] = {}; > for (unsigned int i = 1; i < first_op; ++i) > ops[i - 1] = gimple_op (last_stmt, i); > vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1], > - new_type, &unprom[0], new_vectype); > + op_type, &unprom[0], op_vectype); > > - /* Use the operation to produce a result of type NEW_TYPE. */ > - tree new_var = vect_recog_temp_ssa_var (new_type, NULL); > + /* Use the operation to produce a result of type OP_TYPE. */ > + tree new_var = vect_recog_temp_ssa_var (op_type, NULL); > gimple *pattern_stmt = gimple_build_assign (new_var, code, > ops[0], ops[1], ops[2]); > gimple_set_location (pattern_stmt, gimple_location (last_stmt)); > @@ -1674,6 +1709,13 @@ vect_recog_over_widening_pattern (stmt_v > dump_printf_loc (MSG_NOTE, vect_location, > "created pattern stmt: %G", pattern_stmt); > > + /* Convert back to the original signedness, if OP_TYPE is different > + from NEW_TYPE. */ > + if (op_type != new_type) > + pattern_stmt = vect_convert_output (last_stmt_info, new_type, > + pattern_stmt, op_vectype); > + > + /* Promote the result to the original type. */ > pattern_stmt = vect_convert_output (last_stmt_info, type, > pattern_stmt, new_vectype); > > @@ -1719,8 +1761,16 @@ vect_recog_average_pattern (stmt_vec_inf > if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) > return NULL; > > - /* Get the definition of the shift input. */ > + /* Look through any change in sign on the shift input. */ > tree rshift_rhs = gimple_assign_rhs1 (last_stmt); > + vect_unpromoted_value unprom_plus; > + rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs, > + &unprom_plus); > + if (!rshift_rhs > + || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type)) > + return NULL; > + > + /* Get the definition of the shift input. */ > stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); > if (!plus_stmt_info) > return NULL; > Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c > =================================================================== > --- /dev/null 2018-11-29 13:15:04.463550658 +0000 > +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c 2018-12-05 08:33:55.130797055 +0000 > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" } > +/* { dg-require-effective-target vect_int } */ > + > +#include "tree-vect.h" > + > +#if VECTOR_BITS > 128 > +#define N (VECTOR_BITS / 2) > +#else > +#define N 64 > +#endif > + > +int a[N], b[N], c[N]; > + > +void > +foo () > +{ > + int i; > + for (i = 0; i < N; i++) > + { > + long long d = a[i]; > + long long e = b[i]; > + d += e; > + c[i] = d; > + } > +} > + > +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */ > +/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+} "vect" } } */ > +/* { dg-final { scan-tree-dump {VIEW_CONVERT_EXPR<vector[^ ]* unsigned} "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c 2018-12-05 08:33:40.658922796 +0000 +++ gcc/tree-vect-patterns.c 2018-12-05 08:33:55.130797055 +0000 @@ -720,34 +720,60 @@ vect_convert_input (stmt_vec_info stmt_i if (TREE_CODE (unprom->op) == INTEGER_CST) return wide_int_to_tree (type, wi::to_widest (unprom->op)); - /* See if we can reuse an existing result. */ + tree input = unprom->op; if (unprom->caster) { tree lhs = gimple_get_lhs (unprom->caster->stmt); - if (types_compatible_p (TREE_TYPE (lhs), type)) - return lhs; + tree lhs_type = TREE_TYPE (lhs); + + /* If the result of the existing cast is the right width, use it + instead of the source of the cast. */ + if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type)) + input = lhs; + /* If the precision we want is between the source and result + precisions of the existing cast, try splitting the cast into + two and tapping into a mid-way point. */ + else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type) + && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)) + { + /* In order to preserve the semantics of the original cast, + give the mid-way point the same signedness as the input value. + + It would be possible to use a signed type here instead if + TYPE is signed and UNPROM->TYPE is unsigned, but that would + make the sign of the midtype sensitive to the order in + which we process the statements, since the signedness of + TYPE is the signedness required by just one of possibly + many users. Also, unsigned promotions are usually as cheap + as or cheaper than signed ones, so it's better to keep an + unsigned promotion. */ + tree midtype = build_nonstandard_integer_type + (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type)); + tree vec_midtype = get_vectype_for_scalar_type (midtype); + if (vec_midtype) + { + input = vect_recog_temp_ssa_var (midtype, NULL); + gassign *new_stmt = gimple_build_assign (input, NOP_EXPR, + unprom->op); + if (!vect_split_statement (unprom->caster, input, new_stmt, + vec_midtype)) + append_pattern_def_seq (stmt_info, new_stmt, vec_midtype); + } + } + + /* See if we can reuse an existing result. */ + if (types_compatible_p (type, TREE_TYPE (input))) + return input; } /* We need a new conversion statement. */ tree new_op = vect_recog_temp_ssa_var (type, NULL); - gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op); - - /* If the operation is the input to a vectorizable cast, try splitting - that cast into two, taking the required result as a mid-way point. */ - if (unprom->caster) - { - tree lhs = gimple_get_lhs (unprom->caster->stmt); - if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type) - && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type) - && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type)) - && vect_split_statement (unprom->caster, new_op, new_stmt, vectype)) - return new_op; - } + gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input); /* If OP is an external value, see if we can insert the new statement on an incoming edge. */ - if (unprom->dt == vect_external_def) - if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op)) + if (input == unprom->op && unprom->dt == vect_external_def) + if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input)) { basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt); gcc_assert (!new_bb); @@ -1644,28 +1670,37 @@ vect_recog_over_widening_pattern (stmt_v bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED); tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p); + /* If we're truncating an operation, we need to make sure that we + don't introduce new undefined overflow. The codes tested here are + a subset of those accepted by vect_truncatable_operation_p. */ + tree op_type = new_type; + if (TYPE_OVERFLOW_UNDEFINED (new_type) + && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR)) + op_type = build_nonstandard_integer_type (new_precision, true); + /* We specifically don't check here whether the target supports the new operation, since it might be something that a later pattern wants to rewrite anyway. If targets have a minimum element size for some optabs, we should pattern-match smaller ops to larger ops where beneficial. */ tree new_vectype = get_vectype_for_scalar_type (new_type); - if (!new_vectype) + tree op_vectype = get_vectype_for_scalar_type (op_type); + if (!new_vectype || !op_vectype) return NULL; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n", type, new_type); - /* Calculate the rhs operands for an operation on NEW_TYPE. */ + /* Calculate the rhs operands for an operation on OP_TYPE. */ tree ops[3] = {}; for (unsigned int i = 1; i < first_op; ++i) ops[i - 1] = gimple_op (last_stmt, i); vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1], - new_type, &unprom[0], new_vectype); + op_type, &unprom[0], op_vectype); - /* Use the operation to produce a result of type NEW_TYPE. */ - tree new_var = vect_recog_temp_ssa_var (new_type, NULL); + /* Use the operation to produce a result of type OP_TYPE. */ + tree new_var = vect_recog_temp_ssa_var (op_type, NULL); gimple *pattern_stmt = gimple_build_assign (new_var, code, ops[0], ops[1], ops[2]); gimple_set_location (pattern_stmt, gimple_location (last_stmt)); @@ -1674,6 +1709,13 @@ vect_recog_over_widening_pattern (stmt_v dump_printf_loc (MSG_NOTE, vect_location, "created pattern stmt: %G", pattern_stmt); + /* Convert back to the original signedness, if OP_TYPE is different + from NEW_TYPE. */ + if (op_type != new_type) + pattern_stmt = vect_convert_output (last_stmt_info, new_type, + pattern_stmt, op_vectype); + + /* Promote the result to the original type. */ pattern_stmt = vect_convert_output (last_stmt_info, type, pattern_stmt, new_vectype); @@ -1719,8 +1761,16 @@ vect_recog_average_pattern (stmt_vec_inf if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) return NULL; - /* Get the definition of the shift input. */ + /* Look through any change in sign on the shift input. */ tree rshift_rhs = gimple_assign_rhs1 (last_stmt); + vect_unpromoted_value unprom_plus; + rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs, + &unprom_plus); + if (!rshift_rhs + || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type)) + return NULL; + + /* Get the definition of the shift input. */ stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); if (!plus_stmt_info) return NULL; Index: gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c =================================================================== --- /dev/null 2018-11-29 13:15:04.463550658 +0000 +++ gcc/testsuite/gcc.dg/vect/vect-over-widen-23.c 2018-12-05 08:33:55.130797055 +0000 @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-forwprop -fno-tree-vrp" } +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS / 2) +#else +#define N 64 +#endif + +int a[N], b[N], c[N]; + +void +foo () +{ + int i; + for (i = 0; i < N; i++) + { + long long d = a[i]; + long long e = b[i]; + d += e; + c[i] = d; + } +} + +/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */ +/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+} "vect" } } */ +/* { dg-final { scan-tree-dump {VIEW_CONVERT_EXPR<vector[^ ]* unsigned} "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */