diff mbox

Extend vect_recog_bool_pattern also to stores into bool memory (PR tree-optimization/50596)

Message ID 20111019171440.GE2210@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 19, 2011, 5:14 p.m. UTC
Hi!

Similarly to casts of bool to integer, even stores into bool arrays
can be handled similarly.  Just we need to ensure tree-vect-data-refs.c
doesn't reject vectorization before tree-vect-patterns.c has a chance
to optimize it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-10-19  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/50596
	* tree-vect-stmts.c (vect_mark_relevant): Only use
	FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
	(vectorizable_store): If is_pattern_stmt_p look through
	VIEW_CONVERT_EXPR on lhs.
	* tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
	also stores into bool memory in addition to casts from bool
	to integral types.
	(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
	created, don't create it again.
	* tree-vect-data-refs.c (vect_analyze_data_refs): For stores
	into bool memory use vectype for integral type corresponding
	to bool's mode.
	* tree-vect-loop.c (vect_determine_vectorization_factor): Give up
	if a store into bool memory hasn't been replaced by the pattern
	recognizer.

	* gcc.dg/vect/vect-cond-10.c: New test.


	Jakub

Comments

Richard Biener Oct. 20, 2011, 9:42 a.m. UTC | #1
On Wed, 19 Oct 2011, Jakub Jelinek wrote:

> Hi!
> 
> Similarly to casts of bool to integer, even stores into bool arrays
> can be handled similarly.  Just we need to ensure tree-vect-data-refs.c
> doesn't reject vectorization before tree-vect-patterns.c has a chance
> to optimize it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok with ...

> 2011-10-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/50596
> 	* tree-vect-stmts.c (vect_mark_relevant): Only use
> 	FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
> 	(vectorizable_store): If is_pattern_stmt_p look through
> 	VIEW_CONVERT_EXPR on lhs.
> 	* tree-vect-patterns.c (vect_recog_bool_pattern): Optimize
> 	also stores into bool memory in addition to casts from bool
> 	to integral types.
> 	(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
> 	created, don't create it again.
> 	* tree-vect-data-refs.c (vect_analyze_data_refs): For stores
> 	into bool memory use vectype for integral type corresponding
> 	to bool's mode.
> 	* tree-vect-loop.c (vect_determine_vectorization_factor): Give up
> 	if a store into bool memory hasn't been replaced by the pattern
> 	recognizer.
> 
> 	* gcc.dg/vect/vect-cond-10.c: New test.
> 
> --- gcc/tree-vect-stmts.c.jj	2011-10-18 23:52:07.000000000 +0200
> +++ gcc/tree-vect-stmts.c	2011-10-19 14:19:00.000000000 +0200
> @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w
>            /* This use is out of pattern use, if LHS has other uses that are
>               pattern uses, we should mark the stmt itself, and not the pattern
>               stmt.  */
> -          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> -            {
> -              if (is_gimple_debug (USE_STMT (use_p)))
> -                continue;
> -              use_stmt = USE_STMT (use_p);
> +	  if (TREE_CODE (lhs) == SSA_NAME)
> +	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
> +	      {
> +		if (is_gimple_debug (USE_STMT (use_p)))
> +		  continue;
> +		use_stmt = USE_STMT (use_p);
>  
> -              if (vinfo_for_stmt (use_stmt)
> -                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> -                {
> -                  found = true;
> -                  break;
> -                }
> -            }
> +		if (vinfo_for_stmt (use_stmt)
> +		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
> +		  {
> +		    found = true;
> +		    break;
> +		  }
> +	      }
>          }
>  
>        if (!found)
> @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_
>      return false;
>  
>    scalar_dest = gimple_assign_lhs (stmt);
> +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> +      && is_pattern_stmt_p (stmt_info))
> +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF

Just change the if () stmt to

 if (!handled_component_p (scalar_dest)
     && TREE_CODE (scalar_dest) != MEM_REF)
   return false;

> --- gcc/tree-vect-patterns.c.jj	2011-10-18 23:52:05.000000000 +0200
> +++ gcc/tree-vect-patterns.c	2011-10-19 13:55:27.000000000 +0200
> @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he
>        VEC_safe_push (gimple, heap, *stmts, last_stmt);
>        return pattern_stmt;
>      }
> +  else if (rhs_code == SSA_NAME
> +	   && STMT_VINFO_DATA_REF (stmt_vinfo))
> +    {
> +      stmt_vec_info pattern_stmt_info;
> +      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
> +      gcc_assert (vectype != NULL_TREE);
> +      if (!check_bool_pattern (var, loop_vinfo))
> +	return NULL;
> +
> +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> +	{
> +	  lhs = copy_node (lhs);

We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
do it here.  In fact, just unconditionally do ...

> +	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
> +	}
> +      else
> +	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);

... this (wrap it in a V_C_E).  No need to special-case any
MEM_REFs.

> +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))

This should never be false, so you can as well unconditionally build
the conversion stmt.

> +	{
> +	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> +	  gimple cast_stmt
> +	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
> +	  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
> +	  rhs = rhs2;
> +	}
> +      pattern_stmt
> +	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +      STMT_VINFO_DATA_REF (pattern_stmt_info)
> +	= STMT_VINFO_DATA_REF (stmt_vinfo);
> +      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
> +	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
> +      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
> +      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
> +	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
> +      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
> +      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
> +	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
> +      *type_out = vectype;
> +      *type_in = vectype;
> +      VEC_safe_push (gimple, heap, *stmts, last_stmt);
> +      return pattern_stmt;
> +    }
>    else
>      return NULL;
>  }
> @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
>    gimple def_stmt;
>  
> -  set_vinfo_for_stmt (pattern_stmt,
> -                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
> -  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>    pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
> +  if (pattern_stmt_info == NULL)
> +    {
> +      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
> +      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
> +    }
> +  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
>  
>    STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
>    STMT_VINFO_DEF_TYPE (pattern_stmt_info)
> -	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
> +    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
>    STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
>    STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
>    STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
>    STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
> -	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
> +    = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
>    if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
>      {
>        def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
> --- gcc/tree-vect-data-refs.c.jj	2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree-vect-data-refs.c	2011-10-19 14:37:44.000000000 +0200
> @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo
>  
>        /* Set vectype for STMT.  */
>        scalar_type = TREE_TYPE (DR_REF (dr));
> -      STMT_VINFO_VECTYPE (stmt_info) =
> -                get_vectype_for_scalar_type (scalar_type);
> +      STMT_VINFO_VECTYPE (stmt_info)
> +	= get_vectype_for_scalar_type (scalar_type);
> +      if (!STMT_VINFO_VECTYPE (stmt_info)
> +	  && ((TYPE_PRECISION (scalar_type) == 1
> +	       && TYPE_UNSIGNED (scalar_type))
> +	      || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> +	  && DR_IS_WRITE (dr)
> +	  && loop_vinfo)
> +	{
> +	  /* For bool stores use integral type with the same
> +	     TYPE_MODE, but bigger precision.  vect_recog_bool_pattern
> +	     can transform those into something vectorizable.  */
> +	  unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
> +	  scalar_type = build_nonstandard_integer_type (modesize, 1);
> +	  STMT_VINFO_VECTYPE (stmt_info)
> +	    = get_vectype_for_scalar_type (scalar_type);
> +	}
>        if (!STMT_VINFO_VECTYPE (stmt_info))
>          {
>            if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> --- gcc/tree-vect-loop.c.jj	2011-09-26 14:06:52.000000000 +0200
> +++ gcc/tree-vect-loop.c	2011-10-19 14:49:18.000000000 +0200
> @@ -1,5 +1,5 @@
>  /* Loop Vectorization
> -   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
> +   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
>     Free Software Foundation, Inc.
>     Contributed by Dorit Naishlos <dorit@il.ibm.com> and
>     Ira Rosen <irar@il.ibm.com>
> @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
>  	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
>  			  || is_pattern_stmt_p (stmt_info));
>  	      vectype = STMT_VINFO_VECTYPE (stmt_info);
> +	      if (STMT_VINFO_DATA_REF (stmt_info))
> +		{
> +		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> +		  tree scalar_type = TREE_TYPE (DR_REF (dr));
> +		  /* vect_analyze_data_refs will allow bool writes through,
> +		     in order to allow vect_recog_bool_pattern to transform
> +		     those.  If they couldn't be transformed, give up now.  */
> +		  if (((TYPE_PRECISION (scalar_type) == 1
> +			&& TYPE_UNSIGNED (scalar_type))
> +		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)

Shouldn't it be always possible to vectorize those?  For loads
we can assume the memory contains only 1 or 0 (we assume that for
scalar loads), for stores we can mask out all other bits explicitly
if you add support for truncating conversions to non-mode precision
(in fact, we could support non-mode precision vectorization that way,
if not support bitfield loads or extending conversions).

So maybe that obsoletes my conditional approval ;)  Can you
investigate whether the above would work?

Thanks,
Richard.

> +		      && DR_IS_WRITE (dr)
> +		      && !is_pattern_stmt_p (stmt_info))
> +		    {
> +		      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +			{
> +			  fprintf (vect_dump,
> +				   "not vectorized: unsupported data-type ");
> +			  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
> +			}
> +		      return false;
> +		    }
> +		}
>  	    }
>  	  else
>  	    {
> --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj	2011-10-19 15:54:42.000000000 +0200
> +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c	2011-10-19 16:00:22.000000000 +0200
> @@ -0,0 +1,165 @@
> +/* { dg-require-effective-target vect_cond_mixed } */
> +
> +#include "tree-vect.h"
> +
> +#define N 1024
> +float a[N], b[N], c[N], d[N];
> +_Bool k[N];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      k[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x & y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) & (c[i] < d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    {
> +      int x = a[i] < b[i];
> +      int y = c[i] < d[i];
> +      p[i] = x | y;
> +    }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (_Bool *p)
> +{
> +  int i;
> +  for (i = 0; i < N; ++i)
> +    p[i] = (a[i] < b[i]) | (c[i] < d[i]);
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +
> +  check_vect ();
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      switch (i % 9)
> +	{
> +	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
> +	case 1: a[i] = 0; b[i] = 0; break;
> +	case 2: a[i] = i + 1; b[i] = - i - 1; break;
> +	case 3: a[i] = i; b[i] = i + 7; break;
> +	case 4: a[i] = i; b[i] = i; break;
> +	case 5: a[i] = i + 16; b[i] = i + 3; break;
> +	case 6: a[i] = - i - 5; b[i] = - i; break;
> +	case 7: a[i] = - i; b[i] = - i; break;
> +	case 8: a[i] = - i; b[i] = - i - 7; break;
> +	}
> +    }
> +  for (i = 0; i < N; i++)
> +    {
> +      switch ((i / 9) % 3)
> +	{
> +	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
> +	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
> +	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
> +	}
> +    }
> +  f1 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f2 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f3 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f4 ();
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f5 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f6 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f7 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +  f8 (k);
> +  for (i = 0; i < N; i++)
> +    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
> +      abort ();
> +  __builtin_memset (k, 0, sizeof (k));
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
> 	Jakub
> 
>
Jakub Jelinek Oct. 20, 2011, 10:31 a.m. UTC | #2
On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote:
> > +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> > +      && is_pattern_stmt_p (stmt_info))
> > +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
> >    if (TREE_CODE (scalar_dest) != ARRAY_REF
> >        && TREE_CODE (scalar_dest) != INDIRECT_REF
> >        && TREE_CODE (scalar_dest) != COMPONENT_REF
> 
> Just change the if () stmt to
> 
>  if (!handled_component_p (scalar_dest)
>      && TREE_CODE (scalar_dest) != MEM_REF)
>    return false;

That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts).
The VCEs I hope don't appear, but the first two might, and I'm not sure
we are prepared to handle them.  Certainly not BIT_FIELD_REFs.

> > +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> > +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> > +	{
> > +	  lhs = copy_node (lhs);
> 
> We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
> do it here.  In fact, just unconditionally do ...
> 
> > +	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
> > +	}
> > +      else
> > +	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
> 
> ... this (wrap it in a V_C_E).  No need to special-case any
> MEM_REFs.

Ok.  After all it seems vectorizable_store pretty much ignores it
(except for the scalar_dest check above).  For aliasing it uses the type
from DR_REF and otherwise it uses the vectorized type.

> > +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
> 
> This should never be false, so you can as well unconditionally build
> the conversion stmt.

You mean because currently adjust_bool_pattern will prefer signed types
over unsigned while here lhs will be unsigned?  I guess I should
change it to use signed type for the memory store too to avoid the extra
cast instead.  Both types can be certainly the same precision, e.g. for:
unsigned char a[N], b[N];
unsigned int d[N], e[N];
bool c[N];
...
  for (i = 0; i < N; ++i)
    c[i] = a[i] < b[i];
or different precision, e.g. for:
  for (i = 0; i < N; ++i)
    c[i] = d[i] < e[i];

> > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
> >  	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
> >  			  || is_pattern_stmt_p (stmt_info));
> >  	      vectype = STMT_VINFO_VECTYPE (stmt_info);
> > +	      if (STMT_VINFO_DATA_REF (stmt_info))
> > +		{
> > +		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> > +		  tree scalar_type = TREE_TYPE (DR_REF (dr));
> > +		  /* vect_analyze_data_refs will allow bool writes through,
> > +		     in order to allow vect_recog_bool_pattern to transform
> > +		     those.  If they couldn't be transformed, give up now.  */
> > +		  if (((TYPE_PRECISION (scalar_type) == 1
> > +			&& TYPE_UNSIGNED (scalar_type))
> > +		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> 
> Shouldn't it be always possible to vectorize those?  For loads
> we can assume the memory contains only 1 or 0 (we assume that for
> scalar loads), for stores we can mask out all other bits explicitly
> if you add support for truncating conversions to non-mode precision
> (in fact, we could support non-mode precision vectorization that way,
> if not support bitfield loads or extending conversions).

Not without the pattern recognizer transforming it into something.
That is something we've discussed on IRC before I started working on the
first vect_recog_bool_pattern patch, we'd need to special case bool and
one-bit precision types in way too many places all around the vectorizer.
Another reason for that was that what vect_recog_bool_pattern does currently
is certainly way faster than what would we end up with if we just handled
bool as unsigned (or signed?) char with masking on casts and stores
- the ability to use any integer type for the bools rather than char
as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and
corresponding VEC_UNPACK_{LO,HI}_EXPRs.
So the chosen solution was attempt to transform some of bool patterns
into something the vectorizer can handle easily.
And that can be extended over time what it handles.

The above just reflects it, probably just me trying to be too cautious,
the vectorization would likely fail on the stmt feeding the store, because
get_vectype_for_scalar_type would fail on it.

If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE)
vectorization (hopefully with still preserving the pattern bool recognizer
for the above stated reasons), we'd start with changing
get_vectype_for_scalar_type to handle those types (then the
tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would
be unnecessary), but then we'd need to handle it in other places too
(I guess loads would be fine (unless BIT_FIELD_REF loads), but then
casts and stores need extra code).

	Jakub
Richard Biener Oct. 21, 2011, 9:19 a.m. UTC | #3
On Thu, Oct 20, 2011 at 12:31 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote:
>> > +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
>> > +      && is_pattern_stmt_p (stmt_info))
>> > +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
>> >    if (TREE_CODE (scalar_dest) != ARRAY_REF
>> >        && TREE_CODE (scalar_dest) != INDIRECT_REF
>> >        && TREE_CODE (scalar_dest) != COMPONENT_REF
>>
>> Just change the if () stmt to
>>
>>  if (!handled_component_p (scalar_dest)
>>      && TREE_CODE (scalar_dest) != MEM_REF)
>>    return false;
>
> That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts).
> The VCEs I hope don't appear, but the first two might, and I'm not sure
> we are prepared to handle them.  Certainly not BIT_FIELD_REFs.
>
>> > +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
>> > +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
>> > +   {
>> > +     lhs = copy_node (lhs);
>>
>> We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
>> do it here.  In fact, just unconditionally do ...
>>
>> > +     TREE_TYPE (lhs) = TREE_TYPE (vectype);
>> > +   }
>> > +      else
>> > +   lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
>>
>> ... this (wrap it in a V_C_E).  No need to special-case any
>> MEM_REFs.
>
> Ok.  After all it seems vectorizable_store pretty much ignores it
> (except for the scalar_dest check above).  For aliasing it uses the type
> from DR_REF and otherwise it uses the vectorized type.
>
>> > +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
>>
>> This should never be false, so you can as well unconditionally build
>> the conversion stmt.
>
> You mean because currently adjust_bool_pattern will prefer signed types
> over unsigned while here lhs will be unsigned?  I guess I should
> change it to use signed type for the memory store too to avoid the extra
> cast instead.  Both types can be certainly the same precision, e.g. for:
> unsigned char a[N], b[N];
> unsigned int d[N], e[N];
> bool c[N];
> ...
>  for (i = 0; i < N; ++i)
>    c[i] = a[i] < b[i];
> or different precision, e.g. for:
>  for (i = 0; i < N; ++i)
>    c[i] = d[i] < e[i];
>
>> > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
>> >           gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
>> >                       || is_pattern_stmt_p (stmt_info));
>> >           vectype = STMT_VINFO_VECTYPE (stmt_info);
>> > +         if (STMT_VINFO_DATA_REF (stmt_info))
>> > +           {
>> > +             struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
>> > +             tree scalar_type = TREE_TYPE (DR_REF (dr));
>> > +             /* vect_analyze_data_refs will allow bool writes through,
>> > +                in order to allow vect_recog_bool_pattern to transform
>> > +                those.  If they couldn't be transformed, give up now.  */
>> > +             if (((TYPE_PRECISION (scalar_type) == 1
>> > +                   && TYPE_UNSIGNED (scalar_type))
>> > +                  || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
>>
>> Shouldn't it be always possible to vectorize those?  For loads
>> we can assume the memory contains only 1 or 0 (we assume that for
>> scalar loads), for stores we can mask out all other bits explicitly
>> if you add support for truncating conversions to non-mode precision
>> (in fact, we could support non-mode precision vectorization that way,
>> if not support bitfield loads or extending conversions).
>
> Not without the pattern recognizer transforming it into something.
> That is something we've discussed on IRC before I started working on the
> first vect_recog_bool_pattern patch, we'd need to special case bool and
> one-bit precision types in way too many places all around the vectorizer.
> Another reason for that was that what vect_recog_bool_pattern does currently
> is certainly way faster than what would we end up with if we just handled
> bool as unsigned (or signed?) char with masking on casts and stores
> - the ability to use any integer type for the bools rather than char
> as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and
> corresponding VEC_UNPACK_{LO,HI}_EXPRs.
> So the chosen solution was attempt to transform some of bool patterns
> into something the vectorizer can handle easily.
> And that can be extended over time what it handles.
>
> The above just reflects it, probably just me trying to be too cautious,
> the vectorization would likely fail on the stmt feeding the store, because
> get_vectype_for_scalar_type would fail on it.
>
> If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE)
> vectorization (hopefully with still preserving the pattern bool recognizer
> for the above stated reasons), we'd start with changing
> get_vectype_for_scalar_type to handle those types (then the
> tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would
> be unnecessary), but then we'd need to handle it in other places too
> (I guess loads would be fine (unless BIT_FIELD_REF loads), but then
> casts and stores need extra code).

I'll try to poke at that a bit, thus support general bit-precision types for
loads and stores and the few operations that are safe on them.  If you
have a store to a bool like

int *a, *b;
_Bool *c;

for (;;)
  c[i] = a[i] < b[i];

will the compare choose an int vector type and then demote it to
char for the store?  I suppose trying to generally handle loads/stores
for these types shouldn't interfere too much with this.  But I'll see ...

Richard.

>        Jakub
>
Jakub Jelinek Oct. 21, 2011, 9:26 a.m. UTC | #4
On Fri, Oct 21, 2011 at 11:19:32AM +0200, Richard Guenther wrote:
> I'll try to poke at that a bit, thus support general bit-precision types for
> loads and stores and the few operations that are safe on them.  If you
> have a store to a bool like
> 
> int *a, *b;
> _Bool *c;
> 
> for (;;)
>   c[i] = a[i] < b[i];
> 
> will the compare choose an int vector type and then demote it to
> char for the store?

Yes.  The pattern recognizer would turn this into:
int *a, *b;
for (;;)
  {
    int tmp = a[i] < b[i] ? 1 : 0;
    ((char *)c)[i] = (char) tmp;  // Still using _Bool for TBAA purposes
  }

>  I suppose trying to generally handle loads/stores
> for these types shouldn't interfere too much with this.  But I'll see ...

If you manage to get the generic stuff working (remove the condition from
get_vectype_from_scalar_type about TYPE_PRECISION and handle what is
needed), then vect_recog_bool_pattern would need to be adjusted slightly
(to not start on a cast from some kind of bool to another kind of bool,
which now results in return NULL because get_vectype_from_scalar_type
returns NULL_TREE) and from the patch I've posted we'd need just the
tree-vect-patterns.c bits (adjusted as you say to unconditionally create
VCE instead of special casing MEM_REF, and additionally attempting to use
signed instead of unsigned type to avoid unnecessary casts) and something
in vectorizable_store so that it doesn't fail on VCEs, at least not
in pattern stmts.

	Jakub
Richard Biener Oct. 24, 2011, 12:35 p.m. UTC | #5
On Thu, 20 Oct 2011, Jakub Jelinek wrote:

> On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote:
> > > +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> > > +      && is_pattern_stmt_p (stmt_info))
> > > +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
> > >    if (TREE_CODE (scalar_dest) != ARRAY_REF
> > >        && TREE_CODE (scalar_dest) != INDIRECT_REF
> > >        && TREE_CODE (scalar_dest) != COMPONENT_REF
> > 
> > Just change the if () stmt to
> > 
> >  if (!handled_component_p (scalar_dest)
> >      && TREE_CODE (scalar_dest) != MEM_REF)
> >    return false;
> 
> That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts).
> The VCEs I hope don't appear, but the first two might, and I'm not sure
> we are prepared to handle them.  Certainly not BIT_FIELD_REFs.
> 
> > > +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> > > +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> > > +	{
> > > +	  lhs = copy_node (lhs);
> > 
> > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
> > do it here.  In fact, just unconditionally do ...
> > 
> > > +	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
> > > +	}
> > > +      else
> > > +	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
> > 
> > ... this (wrap it in a V_C_E).  No need to special-case any
> > MEM_REFs.
> 
> Ok.  After all it seems vectorizable_store pretty much ignores it
> (except for the scalar_dest check above).  For aliasing it uses the type
> from DR_REF and otherwise it uses the vectorized type.
> 
> > > +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
> > 
> > This should never be false, so you can as well unconditionally build
> > the conversion stmt.
> 
> You mean because currently adjust_bool_pattern will prefer signed types
> over unsigned while here lhs will be unsigned?  I guess I should
> change it to use signed type for the memory store too to avoid the extra
> cast instead.  Both types can be certainly the same precision, e.g. for:
> unsigned char a[N], b[N];
> unsigned int d[N], e[N];
> bool c[N];
> ...
>   for (i = 0; i < N; ++i)
>     c[i] = a[i] < b[i];
> or different precision, e.g. for:
>   for (i = 0; i < N; ++i)
>     c[i] = d[i] < e[i];
> 
> > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
> > >  	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
> > >  			  || is_pattern_stmt_p (stmt_info));
> > >  	      vectype = STMT_VINFO_VECTYPE (stmt_info);
> > > +	      if (STMT_VINFO_DATA_REF (stmt_info))
> > > +		{
> > > +		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> > > +		  tree scalar_type = TREE_TYPE (DR_REF (dr));
> > > +		  /* vect_analyze_data_refs will allow bool writes through,
> > > +		     in order to allow vect_recog_bool_pattern to transform
> > > +		     those.  If they couldn't be transformed, give up now.  */
> > > +		  if (((TYPE_PRECISION (scalar_type) == 1
> > > +			&& TYPE_UNSIGNED (scalar_type))
> > > +		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> > 
> > Shouldn't it be always possible to vectorize those?  For loads
> > we can assume the memory contains only 1 or 0 (we assume that for
> > scalar loads), for stores we can mask out all other bits explicitly
> > if you add support for truncating conversions to non-mode precision
> > (in fact, we could support non-mode precision vectorization that way,
> > if not support bitfield loads or extending conversions).
> 
> Not without the pattern recognizer transforming it into something.
> That is something we've discussed on IRC before I started working on the
> first vect_recog_bool_pattern patch, we'd need to special case bool and
> one-bit precision types in way too many places all around the vectorizer.
> Another reason for that was that what vect_recog_bool_pattern does currently
> is certainly way faster than what would we end up with if we just handled
> bool as unsigned (or signed?) char with masking on casts and stores
> - the ability to use any integer type for the bools rather than char
> as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and
> corresponding VEC_UNPACK_{LO,HI}_EXPRs.
> So the chosen solution was attempt to transform some of bool patterns
> into something the vectorizer can handle easily.
> And that can be extended over time what it handles.
> 
> The above just reflects it, probably just me trying to be too cautious,
> the vectorization would likely fail on the stmt feeding the store, because
> get_vectype_for_scalar_type would fail on it.
> 
> If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE)
> vectorization (hopefully with still preserving the pattern bool recognizer
> for the above stated reasons), we'd start with changing
> get_vectype_for_scalar_type to handle those types (then the
> tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would
> be unnecessary), but then we'd need to handle it in other places too
> (I guess loads would be fine (unless BIT_FIELD_REF loads), but then
> casts and stores need extra code).

This is what I have right now, bootstrapped and tested on 
x86_64-unknown-linux-gnu.  I do see

FAIL: gfortran.dg/logical_dot_product.f90  -O3 -fomit-frame-pointer  
(internal c
ompiler error)
FAIL: gfortran.dg/mapping_1.f90  -O3 -fomit-frame-pointer  (internal 
compiler er
ror)
FAIL: gfortran.fortran-torture/execute/pr43390.f90,  -O3 -g  (internal 
compiler 
error)

so there is some fallout, but somebody broke dejagnu enough that
I can't easily debug this right now, so I'm post-poning it until
that is fixed.

It doesn't seem to break any testcases for Bool vectorization.

I probably should factor out the precision test.

Thanks,
Richard.

2011-10-24  Richard Guenther  <rguenther@suse.de>

	* tree-vect-stmts.c (vectorizable_assignment): Bail out for
	non-mode-precision operations.
	(vectorizable_shift): Likewise.
	(vectorizable_operation): Likewise.
	(vectorizable_type_demotion): Likewise.
	(vectorizable_type_promotion): Likewise.
	(vectorizable_store): Handle non-mode-precision stores.
	(vectorizable_load): Handle non-mode-precision loads.
	(get_vectype_for_scalar_type_and_size): Return a vector type
	for non-mode-precision integers.

	* gcc.dg/vect/vect-bool-1.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 180380)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_assignment (gimple stmt, gi
*** 2173,2178 ****
--- 2173,2197 ----
  	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
      return false;
  
+   /* We do not handle bit-precision changes.  */
+   if ((CONVERT_EXPR_CODE_P (code)
+        || code == VIEW_CONVERT_EXPR)
+       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
+       /* But a conversion that does not change the bit-pattern is ok.  */
+       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	    > TYPE_PRECISION (TREE_TYPE (op)))
+ 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type conversion to/from bit-precision "
+ 		 "unsupported.");
+       return false;
+     }
+ 
    if (!vec_stmt) /* transformation not required.  */
      {
        STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
*************** vectorizable_shift (gimple stmt, gimple_
*** 2326,2331 ****
--- 2345,2357 ----
  
    scalar_dest = gimple_assign_lhs (stmt);
    vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+   if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
+       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "bit-precision shifts not supported.");
+       return false;
+     }
  
    op0 = gimple_assign_rhs1 (stmt);
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
*************** vectorizable_operation (gimple stmt, gim
*** 2660,2665 ****
--- 2686,2706 ----
    scalar_dest = gimple_assign_lhs (stmt);
    vectype_out = STMT_VINFO_VECTYPE (stmt_info);
  
+   /* Most operations cannot handle bit-precision types without extra
+      truncations.  */
+   if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+       /* Exception are bitwise operations.  */
+       && code != BIT_IOR_EXPR
+       && code != BIT_XOR_EXPR
+       && code != BIT_AND_EXPR
+       && code != BIT_NOT_EXPR)
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "bit-precision arithmetic not supported.");
+       return false;
+     }
+ 
    op0 = gimple_assign_rhs1 (stmt);
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype))
*************** vectorizable_type_demotion (gimple stmt,
*** 3082,3090 ****
    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
  	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
! 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
! 	     && CONVERT_EXPR_CODE_P (code))))
      return false;
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
--- 3123,3142 ----
    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
  	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
! 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
      return false;
+ 
+   if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op0))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
+       return false;
+     }
+ 
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
*************** vectorizable_type_promotion (gimple stmt
*** 3365,3370 ****
--- 3417,3435 ----
  	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
  	     && CONVERT_EXPR_CODE_P (code))))
      return false;
+ 
+   if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op0))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type promotion to/from bit-precision "
+ 		 "unsupported.");
+       return false;
+     }
+ 
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
*************** vectorizable_store (gimple stmt, gimple_
*** 3673,3689 ****
        return false;
      }
  
-   /* The scalar rhs type needs to be trivially convertible to the vector
-      component type.  This should always be the case.  */
    elem_type = TREE_TYPE (vectype);
-   if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
-         fprintf (vect_dump, "???  operands of different types");
-       return false;
-     }
- 
    vec_mode = TYPE_MODE (vectype);
    /* FORNOW. In some cases can vectorize even if data-type not supported
       (e.g. - array initialization with 0).  */
    if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
--- 3738,3746 ----
        return false;
      }
  
    elem_type = TREE_TYPE (vectype);
    vec_mode = TYPE_MODE (vectype);
+ 
    /* FORNOW. In some cases can vectorize even if data-type not supported
       (e.g. - array initialization with 0).  */
    if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
*************** vectorizable_load (gimple stmt, gimple_s
*** 4117,4123 ****
    bool strided_load = false;
    bool load_lanes_p = false;
    gimple first_stmt;
-   tree scalar_type;
    bool inv_p;
    bool negative;
    bool compute_in_loop = false;
--- 4174,4179 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4192,4198 ****
        return false;
      }
  
!   scalar_type = TREE_TYPE (DR_REF (dr));
    mode = TYPE_MODE (vectype);
  
    /* FORNOW. In some cases can vectorize even if data-type not supported
--- 4248,4254 ----
        return false;
      }
  
!   elem_type = TREE_TYPE (vectype);
    mode = TYPE_MODE (vectype);
  
    /* FORNOW. In some cases can vectorize even if data-type not supported
*************** vectorizable_load (gimple stmt, gimple_s
*** 4204,4219 ****
        return false;
      }
  
-   /* The vector component type needs to be trivially convertible to the
-      scalar lhs.  This should always be the case.  */
-   elem_type = TREE_TYPE (vectype);
-   if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
-         fprintf (vect_dump, "???  operands of different types");
-       return false;
-     }
- 
    /* Check if the load is a part of an interleaving chain.  */
    if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
      {
--- 4260,4265 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4560,4566 ****
  		    msq = new_temp;
  
  		    bump = size_binop (MULT_EXPR, vs_minus_1,
! 				       TYPE_SIZE_UNIT (scalar_type));
  		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
  		    new_stmt = gimple_build_assign_with_ops
  				 (BIT_AND_EXPR, NULL_TREE, ptr,
--- 4606,4612 ----
  		    msq = new_temp;
  
  		    bump = size_binop (MULT_EXPR, vs_minus_1,
! 				       TYPE_SIZE_UNIT (elem_type));
  		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
  		    new_stmt = gimple_build_assign_with_ops
  				 (BIT_AND_EXPR, NULL_TREE, ptr,
*************** get_vectype_for_scalar_type_and_size (tr
*** 5441,5453 ****
    if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
      return NULL_TREE;
  
!   /* If we'd build a vector type of elements whose mode precision doesn't
!      match their types precision we'll get mismatched types on vector
!      extracts via BIT_FIELD_REFs.  This effectively means we disable
!      vectorization of bool and/or enum types in some languages.  */
    if (INTEGRAL_TYPE_P (scalar_type)
        && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
!     return NULL_TREE;
  
    if (GET_MODE_CLASS (inner_mode) != MODE_INT
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
--- 5487,5500 ----
    if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
      return NULL_TREE;
  
!   /* For vector types of elements whose mode precision doesn't
!      match their types precision we use a element type of mode
!      precision.  The vectorization routines will have to make sure
!      they support the proper result truncation/extension.  */
    if (INTEGRAL_TYPE_P (scalar_type)
        && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
!     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
! 						  TYPE_UNSIGNED (scalar_type));
  
    if (GET_MODE_CLASS (inner_mode) != MODE_INT
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
Index: gcc/testsuite/gcc.dg/vect/vect-bool-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-bool-1.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/vect-bool-1.c	(revision 0)
***************
*** 0 ****
--- 1,15 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ _Bool a[1024];
+ _Bool b[1024];
+ _Bool c[1024];
+ void foo (void)
+ {
+   unsigned i;
+   for (i = 0; i < 1024; ++i)
+     a[i] = b[i] | c[i];
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Richard Biener Oct. 24, 2011, 2:09 p.m. UTC | #6
On Mon, 24 Oct 2011, Richard Guenther wrote:

> On Thu, 20 Oct 2011, Jakub Jelinek wrote:
> 
> > On Thu, Oct 20, 2011 at 11:42:01AM +0200, Richard Guenther wrote:
> > > > +  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> > > > +      && is_pattern_stmt_p (stmt_info))
> > > > +    scalar_dest = TREE_OPERAND (scalar_dest, 0);
> > > >    if (TREE_CODE (scalar_dest) != ARRAY_REF
> > > >        && TREE_CODE (scalar_dest) != INDIRECT_REF
> > > >        && TREE_CODE (scalar_dest) != COMPONENT_REF
> > > 
> > > Just change the if () stmt to
> > > 
> > >  if (!handled_component_p (scalar_dest)
> > >      && TREE_CODE (scalar_dest) != MEM_REF)
> > >    return false;
> > 
> > That will accept BIT_FIELD_REF and ARRAY_RANGE_REF (as well as VCE outside of pattern stmts).
> > The VCEs I hope don't appear, but the first two might, and I'm not sure
> > we are prepared to handle them.  Certainly not BIT_FIELD_REFs.
> > 
> > > > +      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
> > > > +      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
> > > > +	{
> > > > +	  lhs = copy_node (lhs);
> > > 
> > > We don't handle TARGET_MEM_REF in vectorizable_store, so no need to
> > > do it here.  In fact, just unconditionally do ...
> > > 
> > > > +	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
> > > > +	}
> > > > +      else
> > > > +	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
> > > 
> > > ... this (wrap it in a V_C_E).  No need to special-case any
> > > MEM_REFs.
> > 
> > Ok.  After all it seems vectorizable_store pretty much ignores it
> > (except for the scalar_dest check above).  For aliasing it uses the type
> > from DR_REF and otherwise it uses the vectorized type.
> > 
> > > > +      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
> > > 
> > > This should never be false, so you can as well unconditionally build
> > > the conversion stmt.
> > 
> > You mean because currently adjust_bool_pattern will prefer signed types
> > over unsigned while here lhs will be unsigned?  I guess I should
> > change it to use signed type for the memory store too to avoid the extra
> > cast instead.  Both types can be certainly the same precision, e.g. for:
> > unsigned char a[N], b[N];
> > unsigned int d[N], e[N];
> > bool c[N];
> > ...
> >   for (i = 0; i < N; ++i)
> >     c[i] = a[i] < b[i];
> > or different precision, e.g. for:
> >   for (i = 0; i < N; ++i)
> >     c[i] = d[i] < e[i];
> > 
> > > > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo
> > > >  	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
> > > >  			  || is_pattern_stmt_p (stmt_info));
> > > >  	      vectype = STMT_VINFO_VECTYPE (stmt_info);
> > > > +	      if (STMT_VINFO_DATA_REF (stmt_info))
> > > > +		{
> > > > +		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> > > > +		  tree scalar_type = TREE_TYPE (DR_REF (dr));
> > > > +		  /* vect_analyze_data_refs will allow bool writes through,
> > > > +		     in order to allow vect_recog_bool_pattern to transform
> > > > +		     those.  If they couldn't be transformed, give up now.  */
> > > > +		  if (((TYPE_PRECISION (scalar_type) == 1
> > > > +			&& TYPE_UNSIGNED (scalar_type))
> > > > +		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> > > 
> > > Shouldn't it be always possible to vectorize those?  For loads
> > > we can assume the memory contains only 1 or 0 (we assume that for
> > > scalar loads), for stores we can mask out all other bits explicitly
> > > if you add support for truncating conversions to non-mode precision
> > > (in fact, we could support non-mode precision vectorization that way,
> > > if not support bitfield loads or extending conversions).
> > 
> > Not without the pattern recognizer transforming it into something.
> > That is something we've discussed on IRC before I started working on the
> > first vect_recog_bool_pattern patch, we'd need to special case bool and
> > one-bit precision types in way too many places all around the vectorizer.
> > Another reason for that was that what vect_recog_bool_pattern does currently
> > is certainly way faster than what would we end up with if we just handled
> > bool as unsigned (or signed?) char with masking on casts and stores
> > - the ability to use any integer type for the bools rather than char
> > as appropriate means we can avoid many VEC_PACK_TRUNK_EXPRs and
> > corresponding VEC_UNPACK_{LO,HI}_EXPRs.
> > So the chosen solution was attempt to transform some of bool patterns
> > into something the vectorizer can handle easily.
> > And that can be extended over time what it handles.
> > 
> > The above just reflects it, probably just me trying to be too cautious,
> > the vectorization would likely fail on the stmt feeding the store, because
> > get_vectype_for_scalar_type would fail on it.
> > 
> > If we wanted to support general TYPE_PRECISION != GET_MODE_BITSIZE (TYPE_MODE)
> > vectorization (hopefully with still preserving the pattern bool recognizer
> > for the above stated reasons), we'd start with changing
> > get_vectype_for_scalar_type to handle those types (then the
> > tree-vect-data-refs.c and tree-vect-loop.c changes from this patch would
> > be unnecessary), but then we'd need to handle it in other places too
> > (I guess loads would be fine (unless BIT_FIELD_REF loads), but then
> > casts and stores need extra code).
> 
> This is what I have right now, bootstrapped and tested on 
> x86_64-unknown-linux-gnu.  I do see
> 
> FAIL: gfortran.dg/logical_dot_product.f90  -O3 -fomit-frame-pointer  
> (internal c
> ompiler error)
> FAIL: gfortran.dg/mapping_1.f90  -O3 -fomit-frame-pointer  (internal 
> compiler er
> ror)
> FAIL: gfortran.fortran-torture/execute/pr43390.f90,  -O3 -g  (internal 
> compiler 
> error)
> 
> so there is some fallout, but somebody broke dejagnu enough that
> I can't easily debug this right now, so I'm post-poning it until
> that is fixed.
> 
> It doesn't seem to break any testcases for Bool vectorization.

This one bootstraps and regtests fine on x86_64-unknown-linux-gnu.
I didn't find a good pattern to split out, eventually how we call
the vectorizable_* routines should be re-factored a bit.

Does this look ok to you?

Thanks,
Richard.

2011-10-24  Richard Guenther  <rguenther@suse.de>

	* tree-vect-stmts.c (vect_get_vec_def_for_operand): Convert constants
	to vector element type.
	(vectorizable_assignment): Bail out for non-mode-precision operations.
	(vectorizable_shift): Likewise.
	(vectorizable_operation): Likewise.
	(vectorizable_type_demotion): Likewise.
	(vectorizable_type_promotion): Likewise.
	(vectorizable_store): Handle non-mode-precision stores.
	(vectorizable_load): Handle non-mode-precision loads.
	(get_vectype_for_scalar_type_and_size): Return a vector type
	for non-mode-precision integers.
	* tree-vect-loop.c (vectorizable_reduction): Bail out for
	non-mode-precision reductions.

	* gcc.dg/vect/vect-bool-1.c: New testcase.

Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 180380)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vect_get_vec_def_for_operand (tree op, g
*** 1204,1210 ****
          if (vect_print_dump_info (REPORT_DETAILS))
            fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
  
!         vec_cst = build_vector_from_val (vector_type, op);
          return vect_init_vector (stmt, vec_cst, vector_type, NULL);
        }
  
--- 1204,1212 ----
          if (vect_print_dump_info (REPORT_DETAILS))
            fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
  
!         vec_cst = build_vector_from_val (vector_type,
! 					 fold_convert (TREE_TYPE (vector_type),
! 						       op));
          return vect_init_vector (stmt, vec_cst, vector_type, NULL);
        }
  
*************** vectorizable_assignment (gimple stmt, gi
*** 2173,2178 ****
--- 2175,2199 ----
  	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
      return false;
  
+   /* We do not handle bit-precision changes.  */
+   if ((CONVERT_EXPR_CODE_P (code)
+        || code == VIEW_CONVERT_EXPR)
+       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
+       /* But a conversion that does not change the bit-pattern is ok.  */
+       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	    > TYPE_PRECISION (TREE_TYPE (op)))
+ 	   && TYPE_UNSIGNED (TREE_TYPE (op))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type conversion to/from bit-precision "
+ 		 "unsupported.");
+       return false;
+     }
+ 
    if (!vec_stmt) /* transformation not required.  */
      {
        STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
*************** vectorizable_shift (gimple stmt, gimple_
*** 2326,2331 ****
--- 2347,2359 ----
  
    scalar_dest = gimple_assign_lhs (stmt);
    vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+   if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
+       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "bit-precision shifts not supported.");
+       return false;
+     }
  
    op0 = gimple_assign_rhs1 (stmt);
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
*************** vectorizable_operation (gimple stmt, gim
*** 2660,2665 ****
--- 2688,2708 ----
    scalar_dest = gimple_assign_lhs (stmt);
    vectype_out = STMT_VINFO_VECTYPE (stmt_info);
  
+   /* Most operations cannot handle bit-precision types without extra
+      truncations.  */
+   if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+        != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+       /* Exception are bitwise operations.  */
+       && code != BIT_IOR_EXPR
+       && code != BIT_XOR_EXPR
+       && code != BIT_AND_EXPR
+       && code != BIT_NOT_EXPR)
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "bit-precision arithmetic not supported.");
+       return false;
+     }
+ 
    op0 = gimple_assign_rhs1 (stmt);
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype))
*************** vectorizable_type_demotion (gimple stmt,
*** 3082,3090 ****
    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
  	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
! 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
! 	     && CONVERT_EXPR_CODE_P (code))))
      return false;
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
--- 3125,3144 ----
    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
  	 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
! 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
      return false;
+ 
+   if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op0))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
+       return false;
+     }
+ 
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
*************** vectorizable_type_promotion (gimple stmt
*** 3365,3370 ****
--- 3419,3437 ----
  	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
  	     && CONVERT_EXPR_CODE_P (code))))
      return false;
+ 
+   if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+       && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
+ 	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
+ 	  || ((TYPE_PRECISION (TREE_TYPE (op0))
+ 	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
+     {
+       if (vect_print_dump_info (REPORT_DETAILS))
+         fprintf (vect_dump, "type promotion to/from bit-precision "
+ 		 "unsupported.");
+       return false;
+     }
+ 
    if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
  			     &def_stmt, &def, &dt[0], &vectype_in))
      {
*************** vectorizable_store (gimple stmt, gimple_
*** 3673,3689 ****
        return false;
      }
  
-   /* The scalar rhs type needs to be trivially convertible to the vector
-      component type.  This should always be the case.  */
    elem_type = TREE_TYPE (vectype);
-   if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
-         fprintf (vect_dump, "???  operands of different types");
-       return false;
-     }
- 
    vec_mode = TYPE_MODE (vectype);
    /* FORNOW. In some cases can vectorize even if data-type not supported
       (e.g. - array initialization with 0).  */
    if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
--- 3740,3748 ----
        return false;
      }
  
    elem_type = TREE_TYPE (vectype);
    vec_mode = TYPE_MODE (vectype);
+ 
    /* FORNOW. In some cases can vectorize even if data-type not supported
       (e.g. - array initialization with 0).  */
    if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
*************** vectorizable_load (gimple stmt, gimple_s
*** 4117,4123 ****
    bool strided_load = false;
    bool load_lanes_p = false;
    gimple first_stmt;
-   tree scalar_type;
    bool inv_p;
    bool negative;
    bool compute_in_loop = false;
--- 4176,4181 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4192,4198 ****
        return false;
      }
  
!   scalar_type = TREE_TYPE (DR_REF (dr));
    mode = TYPE_MODE (vectype);
  
    /* FORNOW. In some cases can vectorize even if data-type not supported
--- 4250,4256 ----
        return false;
      }
  
!   elem_type = TREE_TYPE (vectype);
    mode = TYPE_MODE (vectype);
  
    /* FORNOW. In some cases can vectorize even if data-type not supported
*************** vectorizable_load (gimple stmt, gimple_s
*** 4204,4219 ****
        return false;
      }
  
-   /* The vector component type needs to be trivially convertible to the
-      scalar lhs.  This should always be the case.  */
-   elem_type = TREE_TYPE (vectype);
-   if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
-         fprintf (vect_dump, "???  operands of different types");
-       return false;
-     }
- 
    /* Check if the load is a part of an interleaving chain.  */
    if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
      {
--- 4262,4267 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4560,4566 ****
  		    msq = new_temp;
  
  		    bump = size_binop (MULT_EXPR, vs_minus_1,
! 				       TYPE_SIZE_UNIT (scalar_type));
  		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
  		    new_stmt = gimple_build_assign_with_ops
  				 (BIT_AND_EXPR, NULL_TREE, ptr,
--- 4608,4614 ----
  		    msq = new_temp;
  
  		    bump = size_binop (MULT_EXPR, vs_minus_1,
! 				       TYPE_SIZE_UNIT (elem_type));
  		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
  		    new_stmt = gimple_build_assign_with_ops
  				 (BIT_AND_EXPR, NULL_TREE, ptr,
*************** get_vectype_for_scalar_type_and_size (tr
*** 5441,5453 ****
    if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
      return NULL_TREE;
  
!   /* If we'd build a vector type of elements whose mode precision doesn't
!      match their types precision we'll get mismatched types on vector
!      extracts via BIT_FIELD_REFs.  This effectively means we disable
!      vectorization of bool and/or enum types in some languages.  */
    if (INTEGRAL_TYPE_P (scalar_type)
        && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
!     return NULL_TREE;
  
    if (GET_MODE_CLASS (inner_mode) != MODE_INT
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
--- 5489,5502 ----
    if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
      return NULL_TREE;
  
!   /* For vector types of elements whose mode precision doesn't
!      match their types precision we use a element type of mode
!      precision.  The vectorization routines will have to make sure
!      they support the proper result truncation/extension.  */
    if (INTEGRAL_TYPE_P (scalar_type)
        && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
!     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
! 						  TYPE_UNSIGNED (scalar_type));
  
    if (GET_MODE_CLASS (inner_mode) != MODE_INT
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c	(revision 180380)
--- gcc/tree-vect-loop.c	(working copy)
*************** vectorizable_reduction (gimple stmt, gim
*** 4422,4427 ****
--- 4422,4432 ----
        && !SCALAR_FLOAT_TYPE_P (scalar_type))
      return false;
  
+   /* Do not try to vectorize bit-precision reductions.  */
+   if ((TYPE_PRECISION (scalar_type)
+        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
+     return false;
+ 
    /* All uses but the last are expected to be defined in the loop.
       The last use is the reduction variable.  In case of nested cycle this
       assumption is not true: we use reduc_index to record the index of the
Index: gcc/testsuite/gcc.dg/vect/vect-bool-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-bool-1.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/vect-bool-1.c	(revision 0)
***************
*** 0 ****
--- 1,15 ----
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ _Bool a[1024];
+ _Bool b[1024];
+ _Bool c[1024];
+ void foo (void)
+ {
+   unsigned i;
+   for (i = 0; i < 1024; ++i)
+     a[i] = b[i] | c[i];
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
diff mbox

Patch

--- gcc/tree-vect-stmts.c.jj	2011-10-18 23:52:07.000000000 +0200
+++ gcc/tree-vect-stmts.c	2011-10-19 14:19:00.000000000 +0200
@@ -159,19 +159,20 @@  vect_mark_relevant (VEC(gimple,heap) **w
           /* This use is out of pattern use, if LHS has other uses that are
              pattern uses, we should mark the stmt itself, and not the pattern
              stmt.  */
-          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
-            {
-              if (is_gimple_debug (USE_STMT (use_p)))
-                continue;
-              use_stmt = USE_STMT (use_p);
+	  if (TREE_CODE (lhs) == SSA_NAME)
+	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+	      {
+		if (is_gimple_debug (USE_STMT (use_p)))
+		  continue;
+		use_stmt = USE_STMT (use_p);
 
-              if (vinfo_for_stmt (use_stmt)
-                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
-                {
-                  found = true;
-                  break;
-                }
-            }
+		if (vinfo_for_stmt (use_stmt)
+		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+		  {
+		    found = true;
+		    break;
+		  }
+	      }
         }
 
       if (!found)
@@ -3656,6 +3657,9 @@  vectorizable_store (gimple stmt, gimple_
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
+  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
+      && is_pattern_stmt_p (stmt_info))
+    scalar_dest = TREE_OPERAND (scalar_dest, 0);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
       && TREE_CODE (scalar_dest) != COMPONENT_REF
--- gcc/tree-vect-patterns.c.jj	2011-10-18 23:52:05.000000000 +0200
+++ gcc/tree-vect-patterns.c	2011-10-19 13:55:27.000000000 +0200
@@ -1933,6 +1933,50 @@  vect_recog_bool_pattern (VEC (gimple, he
       VEC_safe_push (gimple, heap, *stmts, last_stmt);
       return pattern_stmt;
     }
+  else if (rhs_code == SSA_NAME
+	   && STMT_VINFO_DATA_REF (stmt_vinfo))
+    {
+      stmt_vec_info pattern_stmt_info;
+      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+      gcc_assert (vectype != NULL_TREE);
+      if (!check_bool_pattern (var, loop_vinfo))
+	return NULL;
+
+      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
+      if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF)
+	{
+	  lhs = copy_node (lhs);
+	  TREE_TYPE (lhs) = TREE_TYPE (vectype);
+	}
+      else
+	lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
+      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+	{
+	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+	  gimple cast_stmt
+	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
+	  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
+	  rhs = rhs2;
+	}
+      pattern_stmt
+	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
+      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+      STMT_VINFO_DATA_REF (pattern_stmt_info)
+	= STMT_VINFO_DATA_REF (stmt_vinfo);
+      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
+	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
+      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
+      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
+	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
+      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
+      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
+	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
+      *type_out = vectype;
+      *type_in = vectype;
+      VEC_safe_push (gimple, heap, *stmts, last_stmt);
+      return pattern_stmt;
+    }
   else
     return NULL;
 }
@@ -1949,19 +1993,22 @@  vect_mark_pattern_stmts (gimple orig_stm
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
   gimple def_stmt;
 
-  set_vinfo_for_stmt (pattern_stmt,
-                      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
-  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+  if (pattern_stmt_info == NULL)
+    {
+      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
+      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+    }
+  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
 
   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
-	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
+    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
   STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
-	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
+    = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
   if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
     {
       def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
--- gcc/tree-vect-data-refs.c.jj	2011-09-20 21:43:07.000000000 +0200
+++ gcc/tree-vect-data-refs.c	2011-10-19 14:37:44.000000000 +0200
@@ -2752,8 +2752,23 @@  vect_analyze_data_refs (loop_vec_info lo
 
       /* Set vectype for STMT.  */
       scalar_type = TREE_TYPE (DR_REF (dr));
-      STMT_VINFO_VECTYPE (stmt_info) =
-                get_vectype_for_scalar_type (scalar_type);
+      STMT_VINFO_VECTYPE (stmt_info)
+	= get_vectype_for_scalar_type (scalar_type);
+      if (!STMT_VINFO_VECTYPE (stmt_info)
+	  && ((TYPE_PRECISION (scalar_type) == 1
+	       && TYPE_UNSIGNED (scalar_type))
+	      || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+	  && DR_IS_WRITE (dr)
+	  && loop_vinfo)
+	{
+	  /* For bool stores use integral type with the same
+	     TYPE_MODE, but bigger precision.  vect_recog_bool_pattern
+	     can transform those into something vectorizable.  */
+	  unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type));
+	  scalar_type = build_nonstandard_integer_type (modesize, 1);
+	  STMT_VINFO_VECTYPE (stmt_info)
+	    = get_vectype_for_scalar_type (scalar_type);
+	}
       if (!STMT_VINFO_VECTYPE (stmt_info))
         {
           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
--- gcc/tree-vect-loop.c.jj	2011-09-26 14:06:52.000000000 +0200
+++ gcc/tree-vect-loop.c	2011-10-19 14:49:18.000000000 +0200
@@ -1,5 +1,5 @@ 
 /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
    Ira Rosen <irar@il.ibm.com>
@@ -347,6 +347,28 @@  vect_determine_vectorization_factor (loo
 	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 			  || is_pattern_stmt_p (stmt_info));
 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
+	      if (STMT_VINFO_DATA_REF (stmt_info))
+		{
+		  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+		  tree scalar_type = TREE_TYPE (DR_REF (dr));
+		  /* vect_analyze_data_refs will allow bool writes through,
+		     in order to allow vect_recog_bool_pattern to transform
+		     those.  If they couldn't be transformed, give up now.  */
+		  if (((TYPE_PRECISION (scalar_type) == 1
+			&& TYPE_UNSIGNED (scalar_type))
+		       || TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+		      && DR_IS_WRITE (dr)
+		      && !is_pattern_stmt_p (stmt_info))
+		    {
+		      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+			{
+			  fprintf (vect_dump,
+				   "not vectorized: unsupported data-type ");
+			  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+			}
+		      return false;
+		    }
+		}
 	    }
 	  else
 	    {
--- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj	2011-10-19 15:54:42.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c	2011-10-19 16:00:22.000000000 +0200
@@ -0,0 +1,165 @@ 
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N], d[N];
+_Bool k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f5 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      p[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f6 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    p[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f7 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      p[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (_Bool *p)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    p[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+    }
+  for (i = 0; i < N; i++)
+    {
+      switch ((i / 9) % 3)
+	{
+	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
+	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
+	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
+	}
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f5 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f6 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f7 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+  f8 (k);
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, 0, sizeof (k));
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */