Message ID | 20210831120229.GV920497@tucnak |
---|---|
State | New |
Headers | show |
Series | vectorizer: Fix up vectorization using WIDEN_MINUS_EXPR [PR102124] | expand |
Jakub Jelinek <jakub@redhat.com> writes: > Hi! > > The following testcase is miscompiled on aarch64-linux at -O3 since the > introduction of WIDEN_MINUS_EXPR. > The problem is if the inner type (half_type) is unsigned and the result > type in which the subtraction is performed (type) has precision more than > twice as larger as the inner type's precision. > For other widening operations like WIDEN_{PLUS,MULT}_EXPR, if half_type > is unsigned, the addition/multiplication result in itype is also unsigned > and needs to be zero-extended to type. > But subtraction is special, even when half_type is unsigned, the subtraction > behaves as signed (also regardless of whether the result type is signed or > unsigned), 0xfeU - 0xffU is -1 or 0xffffffffU, not 0x0000ffff. > > I think it is better not to use mixed signedness of types in > WIDEN_MINUS_EXPR (have unsigned vector of operands and signed result > vector), so this patch instead adds another cast to make sure we always > sign-extend the result from itype to type if type is wider than itype. > > Bootstrapped/regtested on aarch64-linux, x86_64-linux and i686-linux, ok > for trunk/11.3? > > 2021-08-31 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/102124 > * tree-vect-patterns.c (vect_recog_widen_op_pattern): For ORIG_CODE > MINUS_EXPR, if itype is unsigned with smaller precision than type, > add an extra cast to signed variant of itype to ensure sign-extension. > > * gcc.dg/torture/pr102124.c: New test. LGTM. I was wondering whether it would be better to add a new non-default parameter to select this behaviour, so that any new callers have to think about it too. It would also be more consistent with the shift_p parameter. Either way's fine though, so the patch is OK as-is. Thanks, Richard > --- gcc/tree-vect-patterns.c.jj 2021-08-17 21:05:07.000000000 +0200 > +++ gcc/tree-vect-patterns.c 2021-08-30 11:54:03.651474845 +0200 > @@ -1268,11 +1268,31 @@ vect_recog_widen_op_pattern (vec_info *v > /* Check target support */ > tree vectype = get_vectype_for_scalar_type (vinfo, half_type); > tree vecitype = get_vectype_for_scalar_type (vinfo, itype); > + tree ctype = itype; > + tree vecctype = vecitype; > + if (orig_code == MINUS_EXPR > + && TYPE_UNSIGNED (itype) > + && TYPE_PRECISION (type) > TYPE_PRECISION (itype)) > + { > + /* Subtraction is special, even if half_type is unsigned and no matter > + whether type is signed or unsigned, if type is wider than itype, > + we need to sign-extend from the widening operation result to the > + result type. > + Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff, > + itype unsigned short and type either int or unsigned int. > + Widened (unsigned short) 0xfe - (unsigned short) 0xff is > + (unsigned short) 0xffff, but for type int we want the result -1 > + and for type unsigned int 0xffffffff rather than 0xffff. */ > + ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0); > + vecctype = get_vectype_for_scalar_type (vinfo, ctype); > + } > + > enum tree_code dummy_code; > int dummy_int; > auto_vec<tree> dummy_vec; > if (!vectype > || !vecitype > + || !vecctype > || !supportable_widening_operation (vinfo, wide_code, last_stmt_info, > vecitype, vectype, > &dummy_code, &dummy_code, > @@ -1291,8 +1311,12 @@ vect_recog_widen_op_pattern (vec_info *v > gimple *pattern_stmt = gimple_build_assign (var, wide_code, > oprnd[0], oprnd[1]); > > + if (vecctype != vecitype) > + pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype, > + pattern_stmt, vecitype); > + > return vect_convert_output (vinfo, last_stmt_info, > - type, pattern_stmt, vecitype); > + type, pattern_stmt, vecctype); > } > > /* Try to detect multiplication on widened inputs, converting MULT_EXPR > --- gcc/testsuite/gcc.dg/torture/pr102124.c.jj 2021-08-30 12:08:05.838649133 +0200 > +++ gcc/testsuite/gcc.dg/torture/pr102124.c 2021-08-30 12:07:52.669834031 +0200 > @@ -0,0 +1,27 @@ > +/* PR tree-optimization/102124 */ > + > +int > +foo (const unsigned char *a, const unsigned char *b, unsigned long len) > +{ > + int ab, ba; > + unsigned long i; > + for (i = 0, ab = 0, ba = 0; i < len; i++) > + { > + ab |= a[i] - b[i]; > + ba |= b[i] - a[i]; > + } > + return (ab | ba) >= 0; > +} > + > +int > +main () > +{ > + unsigned char a[32] = { 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' }; > + unsigned char b[32] = { 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' }; > + unsigned char c[32] = { 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b' }; > + if (!foo (a, b, 16)) > + __builtin_abort (); > + if (foo (a, c, 16)) > + __builtin_abort (); > + return 0; > +} > > Jakub
--- gcc/tree-vect-patterns.c.jj 2021-08-17 21:05:07.000000000 +0200 +++ gcc/tree-vect-patterns.c 2021-08-30 11:54:03.651474845 +0200 @@ -1268,11 +1268,31 @@ vect_recog_widen_op_pattern (vec_info *v /* Check target support */ tree vectype = get_vectype_for_scalar_type (vinfo, half_type); tree vecitype = get_vectype_for_scalar_type (vinfo, itype); + tree ctype = itype; + tree vecctype = vecitype; + if (orig_code == MINUS_EXPR + && TYPE_UNSIGNED (itype) + && TYPE_PRECISION (type) > TYPE_PRECISION (itype)) + { + /* Subtraction is special, even if half_type is unsigned and no matter + whether type is signed or unsigned, if type is wider than itype, + we need to sign-extend from the widening operation result to the + result type. + Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff, + itype unsigned short and type either int or unsigned int. + Widened (unsigned short) 0xfe - (unsigned short) 0xff is + (unsigned short) 0xffff, but for type int we want the result -1 + and for type unsigned int 0xffffffff rather than 0xffff. */ + ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0); + vecctype = get_vectype_for_scalar_type (vinfo, ctype); + } + enum tree_code dummy_code; int dummy_int; auto_vec<tree> dummy_vec; if (!vectype || !vecitype + || !vecctype || !supportable_widening_operation (vinfo, wide_code, last_stmt_info, vecitype, vectype, &dummy_code, &dummy_code, @@ -1291,8 +1311,12 @@ vect_recog_widen_op_pattern (vec_info *v gimple *pattern_stmt = gimple_build_assign (var, wide_code, oprnd[0], oprnd[1]); + if (vecctype != vecitype) + pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype, + pattern_stmt, vecitype); + return vect_convert_output (vinfo, last_stmt_info, - type, pattern_stmt, vecitype); + type, pattern_stmt, vecctype); } /* Try to detect multiplication on widened inputs, converting MULT_EXPR --- gcc/testsuite/gcc.dg/torture/pr102124.c.jj 2021-08-30 12:08:05.838649133 +0200 +++ gcc/testsuite/gcc.dg/torture/pr102124.c 2021-08-30 12:07:52.669834031 +0200 @@ -0,0 +1,27 @@ +/* PR tree-optimization/102124 */ + +int +foo (const unsigned char *a, const unsigned char *b, unsigned long len) +{ + int ab, ba; + unsigned long i; + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } + return (ab | ba) >= 0; +} + +int +main () +{ + unsigned char a[32] = { 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' }; + unsigned char b[32] = { 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' }; + unsigned char c[32] = { 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b' }; + if (!foo (a, b, 16)) + __builtin_abort (); + if (foo (a, c, 16)) + __builtin_abort (); + return 0; +}