Message ID | 20111019171440.GE2210@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Wed, 19 Oct 2011, Jakub Jelinek wrote: > Hi! > > Similarly to casts of bool to integer, even stores into bool arrays > can be handled similarly. Just we need to ensure tree-vect-data-refs.c > doesn't reject vectorization before tree-vect-patterns.c has a chance > to optimize it. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok with ... > 2011-10-19 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/50596 > * tree-vect-stmts.c (vect_mark_relevant): Only use > FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME. > (vectorizable_store): If is_pattern_stmt_p look through > VIEW_CONVERT_EXPR on lhs. > * tree-vect-patterns.c (vect_recog_bool_pattern): Optimize > also stores into bool memory in addition to casts from bool > to integral types. > (vect_mark_pattern_stmts): If pattern_stmt already has vinfo > created, don't create it again. > * tree-vect-data-refs.c (vect_analyze_data_refs): For stores > into bool memory use vectype for integral type corresponding > to bool's mode. > * tree-vect-loop.c (vect_determine_vectorization_factor): Give up > if a store into bool memory hasn't been replaced by the pattern > recognizer. > > * gcc.dg/vect/vect-cond-10.c: New test. > > --- gcc/tree-vect-stmts.c.jj 2011-10-18 23:52:07.000000000 +0200 > +++ gcc/tree-vect-stmts.c 2011-10-19 14:19:00.000000000 +0200 > @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w > /* This use is out of pattern use, if LHS has other uses that are > pattern uses, we should mark the stmt itself, and not the pattern > stmt. */ > - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) > - { > - if (is_gimple_debug (USE_STMT (use_p))) > - continue; > - use_stmt = USE_STMT (use_p); > + if (TREE_CODE (lhs) == SSA_NAME) > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) > + { > + if (is_gimple_debug (USE_STMT (use_p))) > + continue; > + use_stmt = USE_STMT (use_p); > > - if (vinfo_for_stmt (use_stmt) > - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) > - { > - found = true; > - break; > - } > - } > + if (vinfo_for_stmt (use_stmt) > + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) > + { > + found = true; > + break; > + } > + } > } > > if (!found) > @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_ > return false; > > scalar_dest = gimple_assign_lhs (stmt); > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > + && is_pattern_stmt_p (stmt_info)) > + scalar_dest = TREE_OPERAND (scalar_dest, 0); > if (TREE_CODE (scalar_dest) != ARRAY_REF > && TREE_CODE (scalar_dest) != INDIRECT_REF > && TREE_CODE (scalar_dest) != COMPONENT_REF Just change the if () stmt to if (!handled_component_p (scalar_dest) && TREE_CODE (scalar_dest) != MEM_REF) return false; > --- gcc/tree-vect-patterns.c.jj 2011-10-18 23:52:05.000000000 +0200 > +++ gcc/tree-vect-patterns.c 2011-10-19 13:55:27.000000000 +0200 > @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he > VEC_safe_push (gimple, heap, *stmts, last_stmt); > return pattern_stmt; > } > + else if (rhs_code == SSA_NAME > + && STMT_VINFO_DATA_REF (stmt_vinfo)) > + { > + stmt_vec_info pattern_stmt_info; > + vectype = STMT_VINFO_VECTYPE (stmt_vinfo); > + gcc_assert (vectype != NULL_TREE); > + if (!check_bool_pattern (var, loop_vinfo)) > + return NULL; > + > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) > + { > + lhs = copy_node (lhs); We don't handle TARGET_MEM_REF in vectorizable_store, so no need to do it here. In fact, just unconditionally do ... > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > + } > + else > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); ... this (wrap it in a V_C_E). No need to special-case any MEM_REFs. > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) This should never be false, so you can as well unconditionally build the conversion stmt. > + { > + tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > + gimple cast_stmt > + = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE); > + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt; > + rhs = rhs2; > + } > + pattern_stmt > + = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); > + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); > + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); > + STMT_VINFO_DATA_REF (pattern_stmt_info) > + = STMT_VINFO_DATA_REF (stmt_vinfo); > + STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) > + = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); > + STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); > + STMT_VINFO_DR_OFFSET (pattern_stmt_info) > + = STMT_VINFO_DR_OFFSET (stmt_vinfo); > + STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); > + STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) > + = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); > + *type_out = vectype; > + *type_in = vectype; > + VEC_safe_push (gimple, heap, *stmts, last_stmt); > + return pattern_stmt; > + } > else > return NULL; > } > @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); > gimple def_stmt; > > - set_vinfo_for_stmt (pattern_stmt, > - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); > - gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); > pattern_stmt_info = vinfo_for_stmt (pattern_stmt); > + if (pattern_stmt_info == NULL) > + { > + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); > + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); > + } > + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); > > STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; > STMT_VINFO_DEF_TYPE (pattern_stmt_info) > - = STMT_VINFO_DEF_TYPE (orig_stmt_info); > + = STMT_VINFO_DEF_TYPE (orig_stmt_info); > STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; > STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; > STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; > STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) > - = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); > + = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); > if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) > { > def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); > --- gcc/tree-vect-data-refs.c.jj 2011-09-20 21:43:07.000000000 +0200 > +++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200 > @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo > > /* Set vectype for STMT. */ > scalar_type = TREE_TYPE (DR_REF (dr)); > - STMT_VINFO_VECTYPE (stmt_info) = > - get_vectype_for_scalar_type (scalar_type); > + STMT_VINFO_VECTYPE (stmt_info) > + = get_vectype_for_scalar_type (scalar_type); > + if (!STMT_VINFO_VECTYPE (stmt_info) > + && ((TYPE_PRECISION (scalar_type) == 1 > + && TYPE_UNSIGNED (scalar_type)) > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > + && DR_IS_WRITE (dr) > + && loop_vinfo) > + { > + /* For bool stores use integral type with the same > + TYPE_MODE, but bigger precision. vect_recog_bool_pattern > + can transform those into something vectorizable. */ > + unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type)); > + scalar_type = build_nonstandard_integer_type (modesize, 1); > + STMT_VINFO_VECTYPE (stmt_info) > + = get_vectype_for_scalar_type (scalar_type); > + } > if (!STMT_VINFO_VECTYPE (stmt_info)) > { > if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > --- gcc/tree-vect-loop.c.jj 2011-09-26 14:06:52.000000000 +0200 > +++ gcc/tree-vect-loop.c 2011-10-19 14:49:18.000000000 +0200 > @@ -1,5 +1,5 @@ > /* Loop Vectorization > - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 > + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 > Free Software Foundation, Inc. > Contributed by Dorit Naishlos <dorit@il.ibm.com> and > Ira Rosen <irar@il.ibm.com> > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > || is_pattern_stmt_p (stmt_info)); > vectype = STMT_VINFO_VECTYPE (stmt_info); > + if (STMT_VINFO_DATA_REF (stmt_info)) > + { > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > + /* vect_analyze_data_refs will allow bool writes through, > + in order to allow vect_recog_bool_pattern to transform > + those. If they couldn't be transformed, give up now. */ > + if (((TYPE_PRECISION (scalar_type) == 1 > + && TYPE_UNSIGNED (scalar_type)) > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) Shouldn't it be always possible to vectorize those? For loads we can assume the memory contains only 1 or 0 (we assume that for scalar loads), for stores we can mask out all other bits explicitly if you add support for truncating conversions to non-mode precision (in fact, we could support non-mode precision vectorization that way, if not support bitfield loads or extending conversions). So maybe that obsoletes my conditional approval ;) Can you investigate whether the above would work? Thanks, Richard. > + && DR_IS_WRITE (dr) > + && !is_pattern_stmt_p (stmt_info)) > + { > + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > + { > + fprintf (vect_dump, > + "not vectorized: unsupported data-type "); > + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); > + } > + return false; > + } > + } > } > else > { > --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj 2011-10-19 15:54:42.000000000 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c 2011-10-19 16:00:22.000000000 +0200 > @@ -0,0 +1,165 @@ > +/* { dg-require-effective-target vect_cond_mixed } */ > + > +#include "tree-vect.h" > + > +#define N 1024 > +float a[N], b[N], c[N], d[N]; > +_Bool k[N]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + k[i] = x & y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f2 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + k[i] = (a[i] < b[i]) & (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f3 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + k[i] = x | y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f4 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + k[i] = (a[i] < b[i]) | (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f5 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + p[i] = x & y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f6 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + p[i] = (a[i] < b[i]) & (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f7 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + p[i] = x | y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f8 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + p[i] = (a[i] < b[i]) | (c[i] < d[i]); > +} > + > +int > +main () > +{ > + int i; > + > + check_vect (); > + > + for (i = 0; i < N; i++) > + { > + switch (i % 9) > + { > + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; > + case 1: a[i] = 0; b[i] = 0; break; > + case 2: a[i] = i + 1; b[i] = - i - 1; break; > + case 3: a[i] = i; b[i] = i + 7; break; > + case 4: a[i] = i; b[i] = i; break; > + case 5: a[i] = i + 16; b[i] = i + 3; break; > + case 6: a[i] = - i - 5; b[i] = - i; break; > + case 7: a[i] = - i; b[i] = - i; break; > + case 8: a[i] = - i; b[i] = - i - 7; break; > + } > + } > + for (i = 0; i < N; i++) > + { > + switch ((i / 9) % 3) > + { > + case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break; > + case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break; > + case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break; > + } > + } > + f1 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f2 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f3 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f4 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f5 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f6 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f7 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f8 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub > >
On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: > > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > > + && is_pattern_stmt_p (stmt_info)) > > + scalar_dest = TREE_OPERAND (scalar_dest, 0); > > if (TREE_CODE (scalar_dest) != ARRAY_REF > > && TREE_CODE (scalar_dest) != INDIRECT_REF > > && TREE_CODE (scalar_dest) != COMPONENT_REF > > Just change the if () stmt to > > if (!handled_component_p (scalar_dest) > && TREE_CODE (scalar_dest) != MEM_REF) > return false; That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts). The VCEs I hope don't appear, but the first two might, and I'm not sure we are prepared to handle them. Certainly not BIT_FIELD_REFs. > > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); > > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) > > + { > > + lhs = copy_node (lhs); > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to > do it here. In fact, just unconditionally do ... > > > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > > + } > > + else > > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); > > ... this (wrap it in a V_C_E). No need to special-case any > MEM_REFs. Ok. After all it seems vectorizable_store pretty much ignores it (except for the scalar_dest check above). For aliasing it uses the type from DR_REF and otherwise it uses the vectorized type. > > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) > > This should never be false, so you can as well unconditionally build > the conversion stmt. You mean because currently adjust_bool_pattern will prefer signed types over unsigned while here lhs will be unsigned? I guess I should change it to use signed type for the memory store too to avoid the extra cast instead. Both types can be certainly the same precision, e.g. for: unsigned char a[N], b[N]; unsigned int d[N], e[N]; bool c[N]; ... for (i = 0; i < N; ++i) c[i] = a[i] < b[i]; or different precision, e.g. for: for (i = 0; i < N; ++i) c[i] = d[i] < e[i]; > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > > || is_pattern_stmt_p (stmt_info)); > > vectype = STMT_VINFO_VECTYPE (stmt_info); > > + if (STMT_VINFO_DATA_REF (stmt_info)) > > + { > > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > > + /* vect_analyze_data_refs will allow bool writes through, > > + in order to allow vect_recog_bool_pattern to transform > > + those. If they couldn't be transformed, give up now. */ > > + if (((TYPE_PRECISION (scalar_type) == 1 > > + && TYPE_UNSIGNED (scalar_type)) > > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > > Shouldn't it be always possible to vectorize those? For loads > we can assume the memory contains only 1 or 0 (we assume that for > scalar loads), for stores we can mask out all other bits explicitly > if you add support for truncating conversions to non-mode precision > (in fact, we could support non-mode precision vectorization that way, > if not support bitfield loads or extending conversions). Not without the pattern recognizer transforming it into something. That is something we've discussed on IRC before I started working on the first vect_recog_bool_pattern patch, we'd need to special case bool and one-bit precision types in way too many places all around the vectorizer. Another reason for that was that what vect_recog_bool_pattern does currently is certainly way faster than what would we end up with if we just handled bool as unsigned (or signed?) char with masking on casts and stores - the ability to use any integer type for the bools rather than char as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and corresponding VEC_UNPACK_{LO,HI}_EXPRs. So the chosen solution was attempt to transform some of bool patterns into something the vectorizer can handle easily. And that can be extended over time what it handles. The above just reflects it, probably just me trying to be too cautious, the vectorization would likely fail on the stmt feeding the store, because get_vectype_for_scalar_type would fail on it. If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE) vectorization (hopefully with still preserving the pattern bool recognizer for the above stated reasons), we'd start with changing get_vectype_for_scalar_type to handle those types (then the tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would be unnecessary), but then we'd need to handle it in other places too (I guess loads would be fine (unless BIT_FIELD_REF loads), but then casts and stores need extra code). Jakub
On Thu, Oct 20, 2011 at 12:31 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: >> > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR >> > + && is_pattern_stmt_p (stmt_info)) >> > + scalar_dest = TREE_OPERAND (scalar_dest, 0); >> > if (TREE_CODE (scalar_dest) != ARRAY_REF >> > && TREE_CODE (scalar_dest) != INDIRECT_REF >> > && TREE_CODE (scalar_dest) != COMPONENT_REF >> >> Just change the if () stmt to >> >> if (!handled_component_p (scalar_dest) >> && TREE_CODE (scalar_dest) != MEM_REF) >> return false; > > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts). > The VCEs I hope don't appear, but the first two might, and I'm not sure > we are prepared to handle them. Certainly not BIT_FIELD_REFs. > >> > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); >> > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) >> > + { >> > + lhs = copy_node (lhs); >> >> We don't handle TARGET_MEM_REF in vectorizable_store, so no need to >> do it here. In fact, just unconditionally do ... >> >> > + TREE_TYPE (lhs) = TREE_TYPE (vectype); >> > + } >> > + else >> > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); >> >> ... this (wrap it in a V_C_E). No need to special-case any >> MEM_REFs. > > Ok. After all it seems vectorizable_store pretty much ignores it > (except for the scalar_dest check above). For aliasing it uses the type > from DR_REF and otherwise it uses the vectorized type. > >> > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) >> >> This should never be false, so you can as well unconditionally build >> the conversion stmt. > > You mean because currently adjust_bool_pattern will prefer signed types > over unsigned while here lhs will be unsigned? I guess I should > change it to use signed type for the memory store too to avoid the extra > cast instead. Both types can be certainly the same precision, e.g. for: > unsigned char a[N], b[N]; > unsigned int d[N], e[N]; > bool c[N]; > ... > for (i = 0; i < N; ++i) > c[i] = a[i] < b[i]; > or different precision, e.g. for: > for (i = 0; i < N; ++i) > c[i] = d[i] < e[i]; > >> > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo >> > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) >> > || is_pattern_stmt_p (stmt_info)); >> > vectype = STMT_VINFO_VECTYPE (stmt_info); >> > + if (STMT_VINFO_DATA_REF (stmt_info)) >> > + { >> > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); >> > + tree scalar_type = TREE_TYPE (DR_REF (dr)); >> > + /* vect_analyze_data_refs will allow bool writes through, >> > + in order to allow vect_recog_bool_pattern to transform >> > + those. If they couldn't be transformed, give up now. */ >> > + if (((TYPE_PRECISION (scalar_type) == 1 >> > + && TYPE_UNSIGNED (scalar_type)) >> > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) >> >> Shouldn't it be always possible to vectorize those? For loads >> we can assume the memory contains only 1 or 0 (we assume that for >> scalar loads), for stores we can mask out all other bits explicitly >> if you add support for truncating conversions to non-mode precision >> (in fact, we could support non-mode precision vectorization that way, >> if not support bitfield loads or extending conversions). > > Not without the pattern recognizer transforming it into something. > That is something we've discussed on IRC before I started working on the > first vect_recog_bool_pattern patch, we'd need to special case bool and > one-bit precision types in way too many places all around the vectorizer. > Another reason for that was that what vect_recog_bool_pattern does currently > is certainly way faster than what would we end up with if we just handled > bool as unsigned (or signed?) char with masking on casts and stores > - the ability to use any integer type for the bools rather than char > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and > corresponding VEC_UNPACK_{LO,HI}_EXPRs. > So the chosen solution was attempt to transform some of bool patterns > into something the vectorizer can handle easily. > And that can be extended over time what it handles. > > The above just reflects it, probably just me trying to be too cautious, > the vectorization would likely fail on the stmt feeding the store, because > get_vectype_for_scalar_type would fail on it. > > If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE) > vectorization (hopefully with still preserving the pattern bool recognizer > for the above stated reasons), we'd start with changing > get_vectype_for_scalar_type to handle those types (then the > tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would > be unnecessary), but then we'd need to handle it in other places too > (I guess loads would be fine (unless BIT_FIELD_REF loads), but then > casts and stores need extra code). I'll try to poke at that a bit, thus support general bit-precision types for loads and stores and the few operations that are safe on them. If you have a store to a bool like int *a, *b; _Bool *c; for (;;) c[i] = a[i] < b[i]; will the compare choose an int vector type and then demote it to char for the store? I suppose trying to generally handle loads/stores for these types shouldn't interfere too much with this. But I'll see ... Richard. > Jakub >
On Fri, Oct 21, 2011 at 11:19:32AM +0200, Richard Guenther wrote: > I'll try to poke at that a bit, thus support general bit-precision types for > loads and stores and the few operations that are safe on them. If you > have a store to a bool like > > int *a, *b; > _Bool *c; > > for (;;) > c[i] = a[i] < b[i]; > > will the compare choose an int vector type and then demote it to > char for the store? Yes. The pattern recognizer would turn this into: int *a, *b; for (;;) { int tmp = a[i] < b[i] ? 1 : 0; ((char *)c)[i] = (char) tmp; // Still using _Bool for TBAA purposes } > I suppose trying to generally handle loads/stores > for these types shouldn't interfere too much with this. But I'll see ... If you manage to get the generic stuff working (remove the condition from get_vectype_from_scalar_type about TYPE_PRECISION and handle what is needed), then vect_recog_bool_pattern would need to be adjusted slightly (to not start on a cast from some kind of bool to another kind of bool, which now results in return NULL because get_vectype_from_scalar_type returns NULL_TREE) and from the patch I've posted we'd need just the tree-vect-patterns.c bits (adjusted as you say to unconditionally create VCE instead of special casing MEM_REF, and additionally attempting to use signed instead of unsigned type to avoid unnecessary casts) and something in vectorizable_store so that it doesn't fail on VCEs, at least not in pattern stmts. Jakub
On Thu, 20 Oct 2011, Jakub Jelinek wrote: > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: > > > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > > > + && is_pattern_stmt_p (stmt_info)) > > > + scalar_dest = TREE_OPERAND (scalar_dest, 0); > > > if (TREE_CODE (scalar_dest) != ARRAY_REF > > > && TREE_CODE (scalar_dest) != INDIRECT_REF > > > && TREE_CODE (scalar_dest) != COMPONENT_REF > > > > Just change the if () stmt to > > > > if (!handled_component_p (scalar_dest) > > && TREE_CODE (scalar_dest) != MEM_REF) > > return false; > > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts). > The VCEs I hope don't appear, but the first two might, and I'm not sure > we are prepared to handle them. Certainly not BIT_FIELD_REFs. > > > > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); > > > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) > > > + { > > > + lhs = copy_node (lhs); > > > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to > > do it here. In fact, just unconditionally do ... > > > > > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > > > + } > > > + else > > > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); > > > > ... this (wrap it in a V_C_E). No need to special-case any > > MEM_REFs. > > Ok. After all it seems vectorizable_store pretty much ignores it > (except for the scalar_dest check above). For aliasing it uses the type > from DR_REF and otherwise it uses the vectorized type. > > > > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) > > > > This should never be false, so you can as well unconditionally build > > the conversion stmt. > > You mean because currently adjust_bool_pattern will prefer signed types > over unsigned while here lhs will be unsigned? I guess I should > change it to use signed type for the memory store too to avoid the extra > cast instead. Both types can be certainly the same precision, e.g. for: > unsigned char a[N], b[N]; > unsigned int d[N], e[N]; > bool c[N]; > ... > for (i = 0; i < N; ++i) > c[i] = a[i] < b[i]; > or different precision, e.g. for: > for (i = 0; i < N; ++i) > c[i] = d[i] < e[i]; > > > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > > > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > > > || is_pattern_stmt_p (stmt_info)); > > > vectype = STMT_VINFO_VECTYPE (stmt_info); > > > + if (STMT_VINFO_DATA_REF (stmt_info)) > > > + { > > > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > > > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > > > + /* vect_analyze_data_refs will allow bool writes through, > > > + in order to allow vect_recog_bool_pattern to transform > > > + those. If they couldn't be transformed, give up now. */ > > > + if (((TYPE_PRECISION (scalar_type) == 1 > > > + && TYPE_UNSIGNED (scalar_type)) > > > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > > > > Shouldn't it be always possible to vectorize those? For loads > > we can assume the memory contains only 1 or 0 (we assume that for > > scalar loads), for stores we can mask out all other bits explicitly > > if you add support for truncating conversions to non-mode precision > > (in fact, we could support non-mode precision vectorization that way, > > if not support bitfield loads or extending conversions). > > Not without the pattern recognizer transforming it into something. > That is something we've discussed on IRC before I started working on the > first vect_recog_bool_pattern patch, we'd need to special case bool and > one-bit precision types in way too many places all around the vectorizer. > Another reason for that was that what vect_recog_bool_pattern does currently > is certainly way faster than what would we end up with if we just handled > bool as unsigned (or signed?) char with masking on casts and stores > - the ability to use any integer type for the bools rather than char > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and > corresponding VEC_UNPACK_{LO,HI}_EXPRs. > So the chosen solution was attempt to transform some of bool patterns > into something the vectorizer can handle easily. > And that can be extended over time what it handles. > > The above just reflects it, probably just me trying to be too cautious, > the vectorization would likely fail on the stmt feeding the store, because > get_vectype_for_scalar_type would fail on it. > > If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE) > vectorization (hopefully with still preserving the pattern bool recognizer > for the above stated reasons), we'd start with changing > get_vectype_for_scalar_type to handle those types (then the > tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would > be unnecessary), but then we'd need to handle it in other places too > (I guess loads would be fine (unless BIT_FIELD_REF loads), but then > casts and stores need extra code). This is what I have right now, bootstrapped and tested on x86_64-unknown-linux-gnu. I do see FAIL: gfortran.dg/logical_dot_product.f90 -O3 -fomit-frame-pointer (internal c ompiler error) FAIL: gfortran.dg/mapping_1.f90 -O3 -fomit-frame-pointer (internal compiler er ror) FAIL: gfortran.fortran-torture/execute/pr43390.f90, -O3 -g (internal compiler error) so there is some fallout, but somebody broke dejagnu enough that I can't easily debug this right now, so I'm post-poning it until that is fixed. It doesn't seem to break any testcases for Bool vectorization. I probably should factor out the precision test. Thanks, Richard. 2011-10-24 Richard Guenther <rguenther@suse.de> * tree-vect-stmts.c (vectorizable_assignment): Bail out for non-mode-precision operations. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_type_demotion): Likewise. (vectorizable_type_promotion): Likewise. (vectorizable_store): Handle non-mode-precision stores. (vectorizable_load): Handle non-mode-precision loads. (get_vectype_for_scalar_type_and_size): Return a vector type for non-mode-precision integers. * gcc.dg/vect/vect-bool-1.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 180380) --- gcc/tree-vect-stmts.c (working copy) *************** vectorizable_assignment (gimple stmt, gi *** 2173,2178 **** --- 2173,2197 ---- != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) return false; + /* We do not handle bit-precision changes. */ + if ((CONVERT_EXPR_CODE_P (code) + || code == VIEW_CONVERT_EXPR) + && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))) + /* But a conversion that does not change the bit-pattern is ok. */ + && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + > TYPE_PRECISION (TREE_TYPE (op))) + && TYPE_UNSIGNED (TREE_TYPE (op)))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type conversion to/from bit-precision " + "unsupported."); + return false; + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; *************** vectorizable_shift (gimple stmt, gimple_ *** 2326,2331 **** --- 2345,2357 ---- scalar_dest = gimple_assign_lhs (stmt); vectype_out = STMT_VINFO_VECTYPE (stmt_info); + if (TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bit-precision shifts not supported."); + return false; + } op0 = gimple_assign_rhs1 (stmt); if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, *************** vectorizable_operation (gimple stmt, gim *** 2660,2665 **** --- 2686,2706 ---- scalar_dest = gimple_assign_lhs (stmt); vectype_out = STMT_VINFO_VECTYPE (stmt_info); + /* Most operations cannot handle bit-precision types without extra + truncations. */ + if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + /* Exception are bitwise operations. */ + && code != BIT_IOR_EXPR + && code != BIT_XOR_EXPR + && code != BIT_AND_EXPR + && code != BIT_NOT_EXPR) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bit-precision arithmetic not supported."); + return false; + } + op0 = gimple_assign_rhs1 (stmt); if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype)) *************** vectorizable_type_demotion (gimple stmt, *** 3082,3090 **** if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) ! && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) ! && CONVERT_EXPR_CODE_P (code)))) return false; if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { --- 3123,3142 ---- if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) ! && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))))) return false; + + if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op0)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0))))))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type demotion to/from bit-precision unsupported."); + return false; + } + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { *************** vectorizable_type_promotion (gimple stmt *** 3365,3370 **** --- 3417,3435 ---- && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) && CONVERT_EXPR_CODE_P (code)))) return false; + + if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op0)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0))))))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type promotion to/from bit-precision " + "unsupported."); + return false; + } + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { *************** vectorizable_store (gimple stmt, gimple_ *** 3673,3689 **** return false; } - /* The scalar rhs type needs to be trivially convertible to the vector - component type. This should always be the case. */ elem_type = TREE_TYPE (vectype); - if (!useless_type_conversion_p (elem_type, TREE_TYPE (op))) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); - return false; - } - vec_mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported (e.g. - array initialization with 0). */ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) --- 3738,3746 ---- return false; } elem_type = TREE_TYPE (vectype); vec_mode = TYPE_MODE (vectype); + /* FORNOW. In some cases can vectorize even if data-type not supported (e.g. - array initialization with 0). */ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) *************** vectorizable_load (gimple stmt, gimple_s *** 4117,4123 **** bool strided_load = false; bool load_lanes_p = false; gimple first_stmt; - tree scalar_type; bool inv_p; bool negative; bool compute_in_loop = false; --- 4174,4179 ---- *************** vectorizable_load (gimple stmt, gimple_s *** 4192,4198 **** return false; } ! scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported --- 4248,4254 ---- return false; } ! elem_type = TREE_TYPE (vectype); mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported *************** vectorizable_load (gimple stmt, gimple_s *** 4204,4219 **** return false; } - /* The vector component type needs to be trivially convertible to the - scalar lhs. This should always be the case. */ - elem_type = TREE_TYPE (vectype); - if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); - return false; - } - /* Check if the load is a part of an interleaving chain. */ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { --- 4260,4265 ---- *************** vectorizable_load (gimple stmt, gimple_s *** 4560,4566 **** msq = new_temp; bump = size_binop (MULT_EXPR, vs_minus_1, ! TYPE_SIZE_UNIT (scalar_type)); ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); new_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, NULL_TREE, ptr, --- 4606,4612 ---- msq = new_temp; bump = size_binop (MULT_EXPR, vs_minus_1, ! TYPE_SIZE_UNIT (elem_type)); ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); new_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, NULL_TREE, ptr, *************** get_vectype_for_scalar_type_and_size (tr *** 5441,5453 **** if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) return NULL_TREE; ! /* If we'd build a vector type of elements whose mode precision doesn't ! match their types precision we'll get mismatched types on vector ! extracts via BIT_FIELD_REFs. This effectively means we disable ! vectorization of bool and/or enum types in some languages. */ if (INTEGRAL_TYPE_P (scalar_type) && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) ! return NULL_TREE; if (GET_MODE_CLASS (inner_mode) != MODE_INT && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) --- 5487,5500 ---- if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) return NULL_TREE; ! /* For vector types of elements whose mode precision doesn't ! match their types precision we use a element type of mode ! precision. The vectorization routines will have to make sure ! they support the proper result truncation/extension. */ if (INTEGRAL_TYPE_P (scalar_type) && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) ! scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), ! TYPE_UNSIGNED (scalar_type)); if (GET_MODE_CLASS (inner_mode) != MODE_INT && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) Index: gcc/testsuite/gcc.dg/vect/vect-bool-1.c =================================================================== *** gcc/testsuite/gcc.dg/vect/vect-bool-1.c (revision 0) --- gcc/testsuite/gcc.dg/vect/vect-bool-1.c (revision 0) *************** *** 0 **** --- 1,15 ---- + /* { dg-do compile } */ + /* { dg-require-effective-target vect_int } */ + + _Bool a[1024]; + _Bool b[1024]; + _Bool c[1024]; + void foo (void) + { + unsigned i; + for (i = 0; i < 1024; ++i) + a[i] = b[i] | c[i]; + } + + /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */
On Mon, 24 Oct 2011, Richard Guenther wrote: > On Thu, 20 Oct 2011, Jakub Jelinek wrote: > > > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote: > > > > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > > > > + && is_pattern_stmt_p (stmt_info)) > > > > + scalar_dest = TREE_OPERAND (scalar_dest, 0); > > > > if (TREE_CODE (scalar_dest) != ARRAY_REF > > > > && TREE_CODE (scalar_dest) != INDIRECT_REF > > > > && TREE_CODE (scalar_dest) != COMPONENT_REF > > > > > > Just change the if () stmt to > > > > > > if (!handled_component_p (scalar_dest) > > > && TREE_CODE (scalar_dest) != MEM_REF) > > > return false; > > > > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts). > > The VCEs I hope don't appear, but the first two might, and I'm not sure > > we are prepared to handle them. Certainly not BIT_FIELD_REFs. > > > > > > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); > > > > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) > > > > + { > > > > + lhs = copy_node (lhs); > > > > > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to > > > do it here. In fact, just unconditionally do ... > > > > > > > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > > > > + } > > > > + else > > > > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); > > > > > > ... this (wrap it in a V_C_E). No need to special-case any > > > MEM_REFs. > > > > Ok. After all it seems vectorizable_store pretty much ignores it > > (except for the scalar_dest check above). For aliasing it uses the type > > from DR_REF and otherwise it uses the vectorized type. > > > > > > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) > > > > > > This should never be false, so you can as well unconditionally build > > > the conversion stmt. > > > > You mean because currently adjust_bool_pattern will prefer signed types > > over unsigned while here lhs will be unsigned? I guess I should > > change it to use signed type for the memory store too to avoid the extra > > cast instead. Both types can be certainly the same precision, e.g. for: > > unsigned char a[N], b[N]; > > unsigned int d[N], e[N]; > > bool c[N]; > > ... > > for (i = 0; i < N; ++i) > > c[i] = a[i] < b[i]; > > or different precision, e.g. for: > > for (i = 0; i < N; ++i) > > c[i] = d[i] < e[i]; > > > > > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > > > > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > > > > || is_pattern_stmt_p (stmt_info)); > > > > vectype = STMT_VINFO_VECTYPE (stmt_info); > > > > + if (STMT_VINFO_DATA_REF (stmt_info)) > > > > + { > > > > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > > > > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > > > > + /* vect_analyze_data_refs will allow bool writes through, > > > > + in order to allow vect_recog_bool_pattern to transform > > > > + those. If they couldn't be transformed, give up now. */ > > > > + if (((TYPE_PRECISION (scalar_type) == 1 > > > > + && TYPE_UNSIGNED (scalar_type)) > > > > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > > > > > > Shouldn't it be always possible to vectorize those? For loads > > > we can assume the memory contains only 1 or 0 (we assume that for > > > scalar loads), for stores we can mask out all other bits explicitly > > > if you add support for truncating conversions to non-mode precision > > > (in fact, we could support non-mode precision vectorization that way, > > > if not support bitfield loads or extending conversions). > > > > Not without the pattern recognizer transforming it into something. > > That is something we've discussed on IRC before I started working on the > > first vect_recog_bool_pattern patch, we'd need to special case bool and > > one-bit precision types in way too many places all around the vectorizer. > > Another reason for that was that what vect_recog_bool_pattern does currently > > is certainly way faster than what would we end up with if we just handled > > bool as unsigned (or signed?) char with masking on casts and stores > > - the ability to use any integer type for the bools rather than char > > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and > > corresponding VEC_UNPACK_{LO,HI}_EXPRs. > > So the chosen solution was attempt to transform some of bool patterns > > into something the vectorizer can handle easily. > > And that can be extended over time what it handles. > > > > The above just reflects it, probably just me trying to be too cautious, > > the vectorization would likely fail on the stmt feeding the store, because > > get_vectype_for_scalar_type would fail on it. > > > > If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE) > > vectorization (hopefully with still preserving the pattern bool recognizer > > for the above stated reasons), we'd start with changing > > get_vectype_for_scalar_type to handle those types (then the > > tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would > > be unnecessary), but then we'd need to handle it in other places too > > (I guess loads would be fine (unless BIT_FIELD_REF loads), but then > > casts and stores need extra code). > > This is what I have right now, bootstrapped and tested on > x86_64-unknown-linux-gnu. I do see > > FAIL: gfortran.dg/logical_dot_product.f90 -O3 -fomit-frame-pointer > (internal c > ompiler error) > FAIL: gfortran.dg/mapping_1.f90 -O3 -fomit-frame-pointer (internal > compiler er > ror) > FAIL: gfortran.fortran-torture/execute/pr43390.f90, -O3 -g (internal > compiler > error) > > so there is some fallout, but somebody broke dejagnu enough that > I can't easily debug this right now, so I'm post-poning it until > that is fixed. > > It doesn't seem to break any testcases for Bool vectorization. This one bootstraps and regtests fine on x86_64-unknown-linux-gnu. I didn't find a good pattern to split out, eventually how we call the vectorizable_* routines should be re-factored a bit. Does this look ok to you? Thanks, Richard. 2011-10-24 Richard Guenther <rguenther@suse.de> * tree-vect-stmts.c (vect_get_vec_def_for_operand): Convert constants to vector element type. (vectorizable_assignment): Bail out for non-mode-precision operations. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_type_demotion): Likewise. (vectorizable_type_promotion): Likewise. (vectorizable_store): Handle non-mode-precision stores. (vectorizable_load): Handle non-mode-precision loads. (get_vectype_for_scalar_type_and_size): Return a vector type for non-mode-precision integers. * tree-vect-loop.c (vectorizable_reduction): Bail out for non-mode-precision reductions. * gcc.dg/vect/vect-bool-1.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 180380) --- gcc/tree-vect-stmts.c (working copy) *************** vect_get_vec_def_for_operand (tree op, g *** 1204,1210 **** if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); ! vec_cst = build_vector_from_val (vector_type, op); return vect_init_vector (stmt, vec_cst, vector_type, NULL); } --- 1204,1212 ---- if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); ! vec_cst = build_vector_from_val (vector_type, ! fold_convert (TREE_TYPE (vector_type), ! op)); return vect_init_vector (stmt, vec_cst, vector_type, NULL); } *************** vectorizable_assignment (gimple stmt, gi *** 2173,2178 **** --- 2175,2199 ---- != GET_MODE_SIZE (TYPE_MODE (vectype_in))))) return false; + /* We do not handle bit-precision changes. */ + if ((CONVERT_EXPR_CODE_P (code) + || code == VIEW_CONVERT_EXPR) + && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op)))))) + /* But a conversion that does not change the bit-pattern is ok. */ + && !((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + > TYPE_PRECISION (TREE_TYPE (op))) + && TYPE_UNSIGNED (TREE_TYPE (op)))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type conversion to/from bit-precision " + "unsupported."); + return false; + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; *************** vectorizable_shift (gimple stmt, gimple_ *** 2326,2331 **** --- 2347,2359 ---- scalar_dest = gimple_assign_lhs (stmt); vectype_out = STMT_VINFO_VECTYPE (stmt_info); + if (TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bit-precision shifts not supported."); + return false; + } op0 = gimple_assign_rhs1 (stmt); if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, *************** vectorizable_operation (gimple stmt, gim *** 2660,2665 **** --- 2688,2708 ---- scalar_dest = gimple_assign_lhs (stmt); vectype_out = STMT_VINFO_VECTYPE (stmt_info); + /* Most operations cannot handle bit-precision types without extra + truncations. */ + if ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + /* Exception are bitwise operations. */ + && code != BIT_IOR_EXPR + && code != BIT_XOR_EXPR + && code != BIT_AND_EXPR + && code != BIT_NOT_EXPR) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bit-precision arithmetic not supported."); + return false; + } + op0 = gimple_assign_rhs1 (stmt); if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype)) *************** vectorizable_type_demotion (gimple stmt, *** 3082,3090 **** if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) ! && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) ! && CONVERT_EXPR_CODE_P (code)))) return false; if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { --- 3125,3144 ---- if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest)) ! && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))))) return false; + + if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op0)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0))))))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type demotion to/from bit-precision unsupported."); + return false; + } + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { *************** vectorizable_type_promotion (gimple stmt *** 3365,3370 **** --- 3419,3437 ---- && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) && CONVERT_EXPR_CODE_P (code)))) return false; + + if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) + && ((TYPE_PRECISION (TREE_TYPE (scalar_dest)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest)))) + || ((TYPE_PRECISION (TREE_TYPE (op0)) + != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0))))))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "type promotion to/from bit-precision " + "unsupported."); + return false; + } + if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0], &vectype_in)) { *************** vectorizable_store (gimple stmt, gimple_ *** 3673,3689 **** return false; } - /* The scalar rhs type needs to be trivially convertible to the vector - component type. This should always be the case. */ elem_type = TREE_TYPE (vectype); - if (!useless_type_conversion_p (elem_type, TREE_TYPE (op))) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); - return false; - } - vec_mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported (e.g. - array initialization with 0). */ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) --- 3740,3748 ---- return false; } elem_type = TREE_TYPE (vectype); vec_mode = TYPE_MODE (vectype); + /* FORNOW. In some cases can vectorize even if data-type not supported (e.g. - array initialization with 0). */ if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing) *************** vectorizable_load (gimple stmt, gimple_s *** 4117,4123 **** bool strided_load = false; bool load_lanes_p = false; gimple first_stmt; - tree scalar_type; bool inv_p; bool negative; bool compute_in_loop = false; --- 4176,4181 ---- *************** vectorizable_load (gimple stmt, gimple_s *** 4192,4198 **** return false; } ! scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported --- 4250,4256 ---- return false; } ! elem_type = TREE_TYPE (vectype); mode = TYPE_MODE (vectype); /* FORNOW. In some cases can vectorize even if data-type not supported *************** vectorizable_load (gimple stmt, gimple_s *** 4204,4219 **** return false; } - /* The vector component type needs to be trivially convertible to the - scalar lhs. This should always be the case. */ - elem_type = TREE_TYPE (vectype); - if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); - return false; - } - /* Check if the load is a part of an interleaving chain. */ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { --- 4262,4267 ---- *************** vectorizable_load (gimple stmt, gimple_s *** 4560,4566 **** msq = new_temp; bump = size_binop (MULT_EXPR, vs_minus_1, ! TYPE_SIZE_UNIT (scalar_type)); ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); new_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, NULL_TREE, ptr, --- 4608,4614 ---- msq = new_temp; bump = size_binop (MULT_EXPR, vs_minus_1, ! TYPE_SIZE_UNIT (elem_type)); ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); new_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, NULL_TREE, ptr, *************** get_vectype_for_scalar_type_and_size (tr *** 5441,5453 **** if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) return NULL_TREE; ! /* If we'd build a vector type of elements whose mode precision doesn't ! match their types precision we'll get mismatched types on vector ! extracts via BIT_FIELD_REFs. This effectively means we disable ! vectorization of bool and/or enum types in some languages. */ if (INTEGRAL_TYPE_P (scalar_type) && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) ! return NULL_TREE; if (GET_MODE_CLASS (inner_mode) != MODE_INT && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) --- 5489,5502 ---- if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) return NULL_TREE; ! /* For vector types of elements whose mode precision doesn't ! match their types precision we use a element type of mode ! precision. The vectorization routines will have to make sure ! they support the proper result truncation/extension. */ if (INTEGRAL_TYPE_P (scalar_type) && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)) ! scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode), ! TYPE_UNSIGNED (scalar_type)); if (GET_MODE_CLASS (inner_mode) != MODE_INT && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 180380) --- gcc/tree-vect-loop.c (working copy) *************** vectorizable_reduction (gimple stmt, gim *** 4422,4427 **** --- 4422,4432 ---- && !SCALAR_FLOAT_TYPE_P (scalar_type)) return false; + /* Do not try to vectorize bit-precision reductions. */ + if ((TYPE_PRECISION (scalar_type) + != GET_MODE_PRECISION (TYPE_MODE (scalar_type)))) + return false; + /* All uses but the last are expected to be defined in the loop. The last use is the reduction variable. In case of nested cycle this assumption is not true: we use reduc_index to record the index of the Index: gcc/testsuite/gcc.dg/vect/vect-bool-1.c =================================================================== *** gcc/testsuite/gcc.dg/vect/vect-bool-1.c (revision 0) --- gcc/testsuite/gcc.dg/vect/vect-bool-1.c (revision 0) *************** *** 0 **** --- 1,15 ---- + /* { dg-do compile } */ + /* { dg-require-effective-target vect_int } */ + + _Bool a[1024]; + _Bool b[1024]; + _Bool c[1024]; + void foo (void) + { + unsigned i; + for (i = 0; i < 1024; ++i) + a[i] = b[i] | c[i]; + } + + /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/tree-vect-stmts.c.jj 2011-10-18 23:52:07.000000000 +0200 +++ gcc/tree-vect-stmts.c 2011-10-19 14:19:00.000000000 +0200 @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w /* This use is out of pattern use, if LHS has other uses that are pattern uses, we should mark the stmt itself, and not the pattern stmt. */ - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) - { - if (is_gimple_debug (USE_STMT (use_p))) - continue; - use_stmt = USE_STMT (use_p); + if (TREE_CODE (lhs) == SSA_NAME) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + if (is_gimple_debug (USE_STMT (use_p))) + continue; + use_stmt = USE_STMT (use_p); - if (vinfo_for_stmt (use_stmt) - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) - { - found = true; - break; - } - } + if (vinfo_for_stmt (use_stmt) + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) + { + found = true; + break; + } + } } if (!found) @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_ return false; scalar_dest = gimple_assign_lhs (stmt); + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR + && is_pattern_stmt_p (stmt_info)) + scalar_dest = TREE_OPERAND (scalar_dest, 0); if (TREE_CODE (scalar_dest) != ARRAY_REF && TREE_CODE (scalar_dest) != INDIRECT_REF && TREE_CODE (scalar_dest) != COMPONENT_REF --- gcc/tree-vect-patterns.c.jj 2011-10-18 23:52:05.000000000 +0200 +++ gcc/tree-vect-patterns.c 2011-10-19 13:55:27.000000000 +0200 @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he VEC_safe_push (gimple, heap, *stmts, last_stmt); return pattern_stmt; } + else if (rhs_code == SSA_NAME + && STMT_VINFO_DATA_REF (stmt_vinfo)) + { + stmt_vec_info pattern_stmt_info; + vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + gcc_assert (vectype != NULL_TREE); + if (!check_bool_pattern (var, loop_vinfo)) + return NULL; + + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) + { + lhs = copy_node (lhs); + TREE_TYPE (lhs) = TREE_TYPE (vectype); + } + else + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + { + tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); + gimple cast_stmt + = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE); + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt; + rhs = rhs2; + } + pattern_stmt + = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); + STMT_VINFO_DATA_REF (pattern_stmt_info) + = STMT_VINFO_DATA_REF (stmt_vinfo); + STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) + = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); + STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); + STMT_VINFO_DR_OFFSET (pattern_stmt_info) + = STMT_VINFO_DR_OFFSET (stmt_vinfo); + STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); + STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) + = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); + *type_out = vectype; + *type_in = vectype; + VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; + } else return NULL; } @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); gimple def_stmt; - set_vinfo_for_stmt (pattern_stmt, - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); - gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + if (pattern_stmt_info == NULL) + { + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); + } + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; STMT_VINFO_DEF_TYPE (pattern_stmt_info) - = STMT_VINFO_DEF_TYPE (orig_stmt_info); + = STMT_VINFO_DEF_TYPE (orig_stmt_info); STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) - = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); + = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) { def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); --- gcc/tree-vect-data-refs.c.jj 2011-09-20 21:43:07.000000000 +0200 +++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200 @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo /* Set vectype for STMT. */ scalar_type = TREE_TYPE (DR_REF (dr)); - STMT_VINFO_VECTYPE (stmt_info) = - get_vectype_for_scalar_type (scalar_type); + STMT_VINFO_VECTYPE (stmt_info) + = get_vectype_for_scalar_type (scalar_type); + if (!STMT_VINFO_VECTYPE (stmt_info) + && ((TYPE_PRECISION (scalar_type) == 1 + && TYPE_UNSIGNED (scalar_type)) + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) + && DR_IS_WRITE (dr) + && loop_vinfo) + { + /* For bool stores use integral type with the same + TYPE_MODE, but bigger precision. vect_recog_bool_pattern + can transform those into something vectorizable. */ + unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type)); + scalar_type = build_nonstandard_integer_type (modesize, 1); + STMT_VINFO_VECTYPE (stmt_info) + = get_vectype_for_scalar_type (scalar_type); + } if (!STMT_VINFO_VECTYPE (stmt_info)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) --- gcc/tree-vect-loop.c.jj 2011-09-26 14:06:52.000000000 +0200 +++ gcc/tree-vect-loop.c 2011-10-19 14:49:18.000000000 +0200 @@ -1,5 +1,5 @@ /* Loop Vectorization - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Dorit Naishlos <dorit@il.ibm.com> and Ira Rosen <irar@il.ibm.com> @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo gcc_assert (STMT_VINFO_DATA_REF (stmt_info) || is_pattern_stmt_p (stmt_info)); vectype = STMT_VINFO_VECTYPE (stmt_info); + if (STMT_VINFO_DATA_REF (stmt_info)) + { + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree scalar_type = TREE_TYPE (DR_REF (dr)); + /* vect_analyze_data_refs will allow bool writes through, + in order to allow vect_recog_bool_pattern to transform + those. If they couldn't be transformed, give up now. */ + if (((TYPE_PRECISION (scalar_type) == 1 + && TYPE_UNSIGNED (scalar_type)) + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) + && DR_IS_WRITE (dr) + && !is_pattern_stmt_p (stmt_info)) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) + { + fprintf (vect_dump, + "not vectorized: unsupported data-type "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } + return false; + } + } } else { --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj 2011-10-19 15:54:42.000000000 +0200 +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c 2011-10-19 16:00:22.000000000 +0200 @@ -0,0 +1,165 @@ +/* { dg-require-effective-target vect_cond_mixed } */ + +#include "tree-vect.h" + +#define N 1024 +float a[N], b[N], c[N], d[N]; +_Bool k[N]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + k[i] = x & y; + } +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = (a[i] < b[i]) & (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f3 (void) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + k[i] = x | y; + } +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < N; ++i) + k[i] = (a[i] < b[i]) | (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f5 (_Bool *p) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + p[i] = x & y; + } +} + +__attribute__((noinline, noclone)) void +f6 (_Bool *p) +{ + int i; + for (i = 0; i < N; ++i) + p[i] = (a[i] < b[i]) & (c[i] < d[i]); +} + +__attribute__((noinline, noclone)) void +f7 (_Bool *p) +{ + int i; + for (i = 0; i < N; ++i) + { + int x = a[i] < b[i]; + int y = c[i] < d[i]; + p[i] = x | y; + } +} + +__attribute__((noinline, noclone)) void +f8 (_Bool *p) +{ + int i; + for (i = 0; i < N; ++i) + p[i] = (a[i] < b[i]) | (c[i] < d[i]); +} + +int +main () +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + switch (i % 9) + { + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; + case 1: a[i] = 0; b[i] = 0; break; + case 2: a[i] = i + 1; b[i] = - i - 1; break; + case 3: a[i] = i; b[i] = i + 7; break; + case 4: a[i] = i; b[i] = i; break; + case 5: a[i] = i + 16; b[i] = i + 3; break; + case 6: a[i] = - i - 5; b[i] = - i; break; + case 7: a[i] = - i; b[i] = - i; break; + case 8: a[i] = - i; b[i] = - i - 7; break; + } + } + for (i = 0; i < N; i++) + { + switch ((i / 9) % 3) + { + case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break; + case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break; + case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break; + } + } + f1 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f2 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f3 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f4 (); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f5 (k); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f6 (k); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f7 (k); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + f8 (k); + for (i = 0; i < N; i++) + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) + abort (); + __builtin_memset (k, 0, sizeof (k)); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */