diff mbox

[vec-cmp,3/6] Vectorize comparison

Message ID 20151008150348.GD63757@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Oct. 8, 2015, 3:03 p.m. UTC
Hi,

This patch supports comparison statements vectrization basing on introduced optabs.

Thanks,
Ilya
--
gcc/

2015-10-08  Ilya Enkovich  <enkovich.gnu@gmail.com>

	* tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
	(vect_create_destination_var): Likewise.
	* tree-vect-stmts.c (vectorizable_comparison): New.
	(vect_analyze_stmt): Add vectorizable_comparison.
	(vect_transform_stmt): Likewise.
	* tree-vectorizer.h (enum vect_var_kind): Add vect_mask_var.
	(enum stmt_vec_info_type): Add comparison_vec_info_type.
	(vectorizable_comparison): New.

Comments

Richard Biener Oct. 13, 2015, 1:45 p.m. UTC | #1
On Thu, Oct 8, 2015 at 5:03 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch supports comparison statements vectrization basing on introduced optabs.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  <enkovich.gnu@gmail.com>
>
>         * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
>         (vect_create_destination_var): Likewise.
>         * tree-vect-stmts.c (vectorizable_comparison): New.
>         (vect_analyze_stmt): Add vectorizable_comparison.
>         (vect_transform_stmt): Likewise.
>         * tree-vectorizer.h (enum vect_var_kind): Add vect_mask_var.
>         (enum stmt_vec_info_type): Add comparison_vec_info_type.
>         (vectorizable_comparison): New.
>
>
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 3befa38..9edc663 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -3849,6 +3849,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
>    case vect_scalar_var:
>      prefix = "stmp";
>      break;
> +  case vect_mask_var:
> +    prefix = "mask";
> +    break;
>    case vect_pointer_var:
>      prefix = "vectp";
>      break;
> @@ -4403,7 +4406,11 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
>    tree type;
>    enum vect_var_kind kind;
>
> -  kind = vectype ? vect_simple_var : vect_scalar_var;
> +  kind = vectype
> +    ? VECTOR_BOOLEAN_TYPE_P (vectype)
> +    ? vect_mask_var
> +    : vect_simple_var
> +    : vect_scalar_var;
>    type = vectype ? vectype : TREE_TYPE (scalar_dest);
>
>    gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 8eda8e9..6949c71 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -7525,6 +7525,211 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
>    return true;
>  }
>
> +/* vectorizable_comparison.
> +
> +   Check if STMT is comparison expression that can be vectorized.
> +   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
> +   comparison, put it in VEC_STMT, and insert it at GSI.
> +
> +   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
> +
> +bool
> +vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
> +                        gimple **vec_stmt, tree reduc_def,
> +                        slp_tree slp_node)
> +{
> +  tree lhs, rhs1, rhs2;
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
> +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
> +  tree vec_compare;
> +  tree new_temp;
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  tree def;
> +  enum vect_def_type dt, dts[4];
> +  unsigned nunits;
> +  int ncopies;
> +  enum tree_code code;
> +  stmt_vec_info prev_stmt_info = NULL;
> +  int i, j;
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> +  vec<tree> vec_oprnds0 = vNULL;
> +  vec<tree> vec_oprnds1 = vNULL;
> +  tree mask_type;
> +  tree mask;
> +
> +  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
> +    return false;
> +
> +  mask_type = vectype;
> +  nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +    ncopies = 1;
> +  else
> +    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
> +
> +  gcc_assert (ncopies >= 1);
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> +    return false;
> +
> +  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
> +      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
> +          && reduc_def))
> +    return false;
> +
> +  if (STMT_VINFO_LIVE_P (stmt_info))
> +    {
> +      if (dump_enabled_p ())
> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                        "value used after loop.\n");
> +      return false;
> +    }
> +
> +  if (!is_gimple_assign (stmt))
> +    return false;
> +
> +  code = gimple_assign_rhs_code (stmt);
> +
> +  if (TREE_CODE_CLASS (code) != tcc_comparison)
> +    return false;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +
> +  if (TREE_CODE (rhs1) == SSA_NAME)
> +    {
> +      gimple *rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1);
> +      if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo,
> +                                &rhs1_def_stmt, &def, &dt, &vectype1))
> +       return false;
> +    }
> +  else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST
> +          && TREE_CODE (rhs1) != FIXED_CST)
> +    return false;

I think vect_is_simple_use_1 handles constants just fine an def_stmt
is an output,
you don't need to initialize it.

> +
> +  if (TREE_CODE (rhs2) == SSA_NAME)
> +    {
> +      gimple *rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2);
> +      if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo,
> +                                &rhs2_def_stmt, &def, &dt, &vectype2))
> +       return false;
> +    }
> +  else if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != REAL_CST
> +          && TREE_CODE (rhs2) != FIXED_CST)
> +    return false;
> +
> +  if (vectype1 && vectype2
> +      && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
> +    return false;
> +
> +  vectype = vectype1 ? vectype1 : vectype2;
> +
> +  /* Invariant comparison.  */
> +  if (!vectype)

you should detect invariantness of operands by looking at 'dt' as computed by
vect_is_simple_use above.

> +    {
> +      vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
> +      if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
> +       return false;
> +    }
> +  else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
> +    return false;
> +
> +  if (!vec_stmt)
> +    {
> +      STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
> +      return expand_vec_cmp_expr_p (vectype, mask_type);

you are missing cost computation here.   That would be usually
vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL).

> +    }
> +
> +  /* Transform.  */
> +  if (!slp_node)
> +    {
> +      vec_oprnds0.create (1);
> +      vec_oprnds1.create (1);
> +    }
> +
> +  /* Handle def.  */
> +  lhs = gimple_assign_lhs (stmt);
> +  mask = vect_create_destination_var (lhs, mask_type);
> +
> +  /* Handle cmp expr.  */
> +  for (j = 0; j < ncopies; j++)
> +    {
> +      gassign *new_stmt = NULL;
> +      if (j == 0)
> +       {
> +         if (slp_node)
> +           {
> +             auto_vec<tree, 2> ops;
> +             auto_vec<vec<tree>, 2> vec_defs;
> +
> +             ops.safe_push (rhs1);
> +             ops.safe_push (rhs2);
> +             vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
> +             vec_oprnds1 = vec_defs.pop ();
> +             vec_oprnds0 = vec_defs.pop ();
> +
> +             ops.release ();
> +             vec_defs.release ();
> +           }
> +         else
> +           {
> +             gimple *gtemp;
> +             vec_rhs1
> +               = vect_get_vec_def_for_operand (rhs1, stmt, NULL);
> +             vect_is_simple_use (rhs1, stmt, loop_vinfo, NULL,
> +                                 &gtemp, &def, &dts[0]);
> +             vec_rhs2 =
> +               vect_get_vec_def_for_operand (rhs2, stmt, NULL);
> +             vect_is_simple_use (rhs2, stmt, loop_vinfo, NULL,
> +                                 &gtemp, &def, &dts[1]);
> +           }
> +       }
> +      else
> +       {
> +         vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
> +                                                    vec_oprnds0.pop ());
> +         vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
> +                                                    vec_oprnds1.pop ());

dts is uninitialized for the slp_node case.  You should have initialized
them by the vect_is_simple_use_1 call during analysis.

Otherwise this looks ok to me.  I suppose we have plenty of testcases
exercising this
from the original bool pattern support?  Esp. cases triggering SLP and
SLP with unrolling.

Thanks,
Richard.

> +       }
> +
> +      if (!slp_node)
> +       {
> +         vec_oprnds0.quick_push (vec_rhs1);
> +         vec_oprnds1.quick_push (vec_rhs2);
> +       }
> +
> +      /* Arguments are ready.  Create the new vector stmt.  */
> +      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
> +       {
> +         vec_rhs2 = vec_oprnds1[i];
> +
> +         vec_compare = build2 (code, mask_type, vec_rhs1, vec_rhs2);
> +         new_stmt = gimple_build_assign (mask, vec_compare);
> +         new_temp = make_ssa_name (mask, new_stmt);
> +         gimple_assign_set_lhs (new_stmt, new_temp);
> +         vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +         if (slp_node)
> +           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
> +       }
> +
> +      if (slp_node)
> +       continue;
> +
> +      if (j == 0)
> +       STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
> +      else
> +       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
> +
> +      prev_stmt_info = vinfo_for_stmt (new_stmt);
> +    }
> +
> +  vec_oprnds0.release ();
> +  vec_oprnds1.release ();
> +
> +  return true;
> +}
>
>  /* Make sure the statement is vectorizable.  */
>
> @@ -7728,7 +7933,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
>           || vectorizable_call (stmt, NULL, NULL, node)
>           || vectorizable_store (stmt, NULL, NULL, node)
>           || vectorizable_reduction (stmt, NULL, NULL, node)
> -         || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
> +         || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
> +         || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
>    else
>      {
>        if (bb_vinfo)
> @@ -7740,7 +7946,8 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
>               || vectorizable_load (stmt, NULL, NULL, node, NULL)
>               || vectorizable_call (stmt, NULL, NULL, node)
>               || vectorizable_store (stmt, NULL, NULL, node)
> -             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
> +             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
> +             || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
>      }
>
>    if (!ok)
> @@ -7856,6 +8063,11 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
>        gcc_assert (done);
>        break;
>
> +    case comparison_vec_info_type:
> +      done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
> +      gcc_assert (done);
> +      break;
> +
>      case call_vec_info_type:
>        done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
>        stmt = gsi_stmt (*gsi);
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 5d8e945..23a82ee 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -28,7 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>  enum vect_var_kind {
>    vect_simple_var,
>    vect_pointer_var,
> -  vect_scalar_var
> +  vect_scalar_var,
> +  vect_mask_var
>  };
>
>  /* Defines type of operation.  */
> @@ -482,6 +483,7 @@ enum stmt_vec_info_type {
>    call_simd_clone_vec_info_type,
>    assignment_vec_info_type,
>    condition_vec_info_type,
> +  comparison_vec_info_type,
>    reduc_vec_info_type,
>    induc_vec_info_type,
>    type_promotion_vec_info_type,
> @@ -1040,6 +1042,8 @@ extern void vect_remove_stores (gimple *);
>  extern bool vect_analyze_stmt (gimple *, bool *, slp_tree);
>  extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
>                                     gimple **, tree, int, slp_tree);
> +extern bool vectorizable_comparison (gimple *, gimple_stmt_iterator *,
> +                                    gimple **, tree, int, slp_tree);
>  extern void vect_get_load_cost (struct data_reference *, int, bool,
>                                 unsigned int *, unsigned int *,
>                                 stmt_vector_for_cost *,
Ilya Enkovich Oct. 14, 2015, 12:06 p.m. UTC | #2
2015-10-13 16:45 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Thu, Oct 8, 2015 at 5:03 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> Hi,
>>
>> This patch supports comparison statements vectrization basing on introduced optabs.
>>
>> Thanks,
>> Ilya
>> --
>> gcc/
>>
>> 2015-10-08  Ilya Enkovich  <enkovich.gnu@gmail.com>
>>
>>         * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
>>         (vect_create_destination_var): Likewise.
>>         * tree-vect-stmts.c (vectorizable_comparison): New.
>>         (vect_analyze_stmt): Add vectorizable_comparison.
>>         (vect_transform_stmt): Likewise.
>>         * tree-vectorizer.h (enum vect_var_kind): Add vect_mask_var.
>>         (enum stmt_vec_info_type): Add comparison_vec_info_type.
>>         (vectorizable_comparison): New.
>>
>>
>> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>> index 3befa38..9edc663 100644
>> --- a/gcc/tree-vect-data-refs.c
>> +++ b/gcc/tree-vect-data-refs.c
>> @@ -3849,6 +3849,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
>>    case vect_scalar_var:
>>      prefix = "stmp";
>>      break;
>> +  case vect_mask_var:
>> +    prefix = "mask";
>> +    break;
>>    case vect_pointer_var:
>>      prefix = "vectp";
>>      break;
>> @@ -4403,7 +4406,11 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
>>    tree type;
>>    enum vect_var_kind kind;
>>
>> -  kind = vectype ? vect_simple_var : vect_scalar_var;
>> +  kind = vectype
>> +    ? VECTOR_BOOLEAN_TYPE_P (vectype)
>> +    ? vect_mask_var
>> +    : vect_simple_var
>> +    : vect_scalar_var;
>>    type = vectype ? vectype : TREE_TYPE (scalar_dest);
>>
>>    gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 8eda8e9..6949c71 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -7525,6 +7525,211 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
>>    return true;
>>  }
>>
>> +/* vectorizable_comparison.
>> +
>> +   Check if STMT is comparison expression that can be vectorized.
>> +   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
>> +   comparison, put it in VEC_STMT, and insert it at GSI.
>> +
>> +   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
>> +
>> +bool
>> +vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
>> +                        gimple **vec_stmt, tree reduc_def,
>> +                        slp_tree slp_node)
>> +{
>> +  tree lhs, rhs1, rhs2;
>> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>> +  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
>> +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>> +  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
>> +  tree vec_compare;
>> +  tree new_temp;
>> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>> +  tree def;
>> +  enum vect_def_type dt, dts[4];
>> +  unsigned nunits;
>> +  int ncopies;
>> +  enum tree_code code;
>> +  stmt_vec_info prev_stmt_info = NULL;
>> +  int i, j;
>> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>> +  vec<tree> vec_oprnds0 = vNULL;
>> +  vec<tree> vec_oprnds1 = vNULL;
>> +  tree mask_type;
>> +  tree mask;
>> +
>> +  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
>> +    return false;
>> +
>> +  mask_type = vectype;
>> +  nunits = TYPE_VECTOR_SUBPARTS (vectype);
>> +
>> +  if (slp_node || PURE_SLP_STMT (stmt_info))
>> +    ncopies = 1;
>> +  else
>> +    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
>> +
>> +  gcc_assert (ncopies >= 1);
>> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>> +    return false;
>> +
>> +  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
>> +      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
>> +          && reduc_def))
>> +    return false;
>> +
>> +  if (STMT_VINFO_LIVE_P (stmt_info))
>> +    {
>> +      if (dump_enabled_p ())
>> +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> +                        "value used after loop.\n");
>> +      return false;
>> +    }
>> +
>> +  if (!is_gimple_assign (stmt))
>> +    return false;
>> +
>> +  code = gimple_assign_rhs_code (stmt);
>> +
>> +  if (TREE_CODE_CLASS (code) != tcc_comparison)
>> +    return false;
>> +
>> +  rhs1 = gimple_assign_rhs1 (stmt);
>> +  rhs2 = gimple_assign_rhs2 (stmt);
>> +
>> +  if (TREE_CODE (rhs1) == SSA_NAME)
>> +    {
>> +      gimple *rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1);
>> +      if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo,
>> +                                &rhs1_def_stmt, &def, &dt, &vectype1))
>> +       return false;
>> +    }
>> +  else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST
>> +          && TREE_CODE (rhs1) != FIXED_CST)
>> +    return false;
>
> I think vect_is_simple_use_1 handles constants just fine an def_stmt
> is an output,
> you don't need to initialize it.

OK

>
>> +
>> +  if (TREE_CODE (rhs2) == SSA_NAME)
>> +    {
>> +      gimple *rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2);
>> +      if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo,
>> +                                &rhs2_def_stmt, &def, &dt, &vectype2))
>> +       return false;
>> +    }
>> +  else if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != REAL_CST
>> +          && TREE_CODE (rhs2) != FIXED_CST)
>> +    return false;
>> +
>> +  if (vectype1 && vectype2
>> +      && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
>> +    return false;
>> +
>> +  vectype = vectype1 ? vectype1 : vectype2;
>> +
>> +  /* Invariant comparison.  */
>> +  if (!vectype)
>
> you should detect invariantness of operands by looking at 'dt' as computed by
> vect_is_simple_use above.

vect_is_simple_use_1 guarantees returned vectype is NULL when dt is
vect_uninitialized_def, vect_constant_def or vect_external_def.  So why to
check both dts against these values instead of just checking vectype for NULL?

>
>> +    {
>> +      vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
>> +      if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
>> +       return false;
>> +    }
>> +  else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
>> +    return false;
>> +
>> +  if (!vec_stmt)
>> +    {
>> +      STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
>> +      return expand_vec_cmp_expr_p (vectype, mask_type);
>
> you are missing cost computation here.   That would be usually
> vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL).

OK, will add.

>
>> +    }
>> +
>> +  /* Transform.  */
>> +  if (!slp_node)
>> +    {
>> +      vec_oprnds0.create (1);
>> +      vec_oprnds1.create (1);
>> +    }
>> +
>> +  /* Handle def.  */
>> +  lhs = gimple_assign_lhs (stmt);
>> +  mask = vect_create_destination_var (lhs, mask_type);
>> +
>> +  /* Handle cmp expr.  */
>> +  for (j = 0; j < ncopies; j++)
>> +    {
>> +      gassign *new_stmt = NULL;
>> +      if (j == 0)
>> +       {
>> +         if (slp_node)
>> +           {
>> +             auto_vec<tree, 2> ops;
>> +             auto_vec<vec<tree>, 2> vec_defs;
>> +
>> +             ops.safe_push (rhs1);
>> +             ops.safe_push (rhs2);
>> +             vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
>> +             vec_oprnds1 = vec_defs.pop ();
>> +             vec_oprnds0 = vec_defs.pop ();
>> +
>> +             ops.release ();
>> +             vec_defs.release ();
>> +           }
>> +         else
>> +           {
>> +             gimple *gtemp;
>> +             vec_rhs1
>> +               = vect_get_vec_def_for_operand (rhs1, stmt, NULL);
>> +             vect_is_simple_use (rhs1, stmt, loop_vinfo, NULL,
>> +                                 &gtemp, &def, &dts[0]);
>> +             vec_rhs2 =
>> +               vect_get_vec_def_for_operand (rhs2, stmt, NULL);
>> +             vect_is_simple_use (rhs2, stmt, loop_vinfo, NULL,
>> +                                 &gtemp, &def, &dts[1]);
>> +           }
>> +       }
>> +      else
>> +       {
>> +         vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
>> +                                                    vec_oprnds0.pop ());
>> +         vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
>> +                                                    vec_oprnds1.pop ());
>
> dts is uninitialized for the slp_node case.  You should have initialized
> them by the vect_is_simple_use_1 call during analysis.

OK

>
> Otherwise this looks ok to me.  I suppose we have plenty of testcases
> exercising this
> from the original bool pattern support?  Esp. cases triggering SLP and
> SLP with unrolling.

Right. Many existing tests will use vector comparison on i386 target.

Will send an updated version after testing.

Thanks,
Ilya

>
> Thanks,
> Richard.
>
diff mbox

Patch

diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 3befa38..9edc663 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3849,6 +3849,9 @@  vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   case vect_scalar_var:
     prefix = "stmp";
     break;
+  case vect_mask_var:
+    prefix = "mask";
+    break;
   case vect_pointer_var:
     prefix = "vectp";
     break;
@@ -4403,7 +4406,11 @@  vect_create_destination_var (tree scalar_dest, tree vectype)
   tree type;
   enum vect_var_kind kind;
 
-  kind = vectype ? vect_simple_var : vect_scalar_var;
+  kind = vectype
+    ? VECTOR_BOOLEAN_TYPE_P (vectype)
+    ? vect_mask_var
+    : vect_simple_var
+    : vect_scalar_var;
   type = vectype ? vectype : TREE_TYPE (scalar_dest);
 
   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8eda8e9..6949c71 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -7525,6 +7525,211 @@  vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* vectorizable_comparison.
+
+   Check if STMT is comparison expression that can be vectorized.
+   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+   comparison, put it in VEC_STMT, and insert it at GSI.
+
+   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+
+bool
+vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
+			 gimple **vec_stmt, tree reduc_def,
+			 slp_tree slp_node)
+{
+  tree lhs, rhs1, rhs2;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
+  tree vec_compare;
+  tree new_temp;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  tree def;
+  enum vect_def_type dt, dts[4];
+  unsigned nunits;
+  int ncopies;
+  enum tree_code code;
+  stmt_vec_info prev_stmt_info = NULL;
+  int i, j;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  vec<tree> vec_oprnds0 = vNULL;
+  vec<tree> vec_oprnds1 = vNULL;
+  tree mask_type;
+  tree mask;
+
+  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+    return false;
+
+  mask_type = vectype;
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+  gcc_assert (ncopies >= 1);
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+    return false;
+
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+	   && reduc_def))
+    return false;
+
+  if (STMT_VINFO_LIVE_P (stmt_info))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "value used after loop.\n");
+      return false;
+    }
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
+  code = gimple_assign_rhs_code (stmt);
+
+  if (TREE_CODE_CLASS (code) != tcc_comparison)
+    return false;
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+  rhs2 = gimple_assign_rhs2 (stmt);
+
+  if (TREE_CODE (rhs1) == SSA_NAME)
+    {
+      gimple *rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1);
+      if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo,
+				 &rhs1_def_stmt, &def, &dt, &vectype1))
+	return false;
+    }
+  else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST
+	   && TREE_CODE (rhs1) != FIXED_CST)
+    return false;
+
+  if (TREE_CODE (rhs2) == SSA_NAME)
+    {
+      gimple *rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2);
+      if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo,
+				 &rhs2_def_stmt, &def, &dt, &vectype2))
+	return false;
+    }
+  else if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != REAL_CST
+	   && TREE_CODE (rhs2) != FIXED_CST)
+    return false;
+
+  if (vectype1 && vectype2
+      && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
+    return false;
+
+  vectype = vectype1 ? vectype1 : vectype2;
+
+  /* Invariant comparison.  */
+  if (!vectype)
+    {
+      vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
+      if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
+	return false;
+    }
+  else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
+    return false;
+
+  if (!vec_stmt)
+    {
+      STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+      return expand_vec_cmp_expr_p (vectype, mask_type);
+    }
+
+  /* Transform.  */
+  if (!slp_node)
+    {
+      vec_oprnds0.create (1);
+      vec_oprnds1.create (1);
+    }
+
+  /* Handle def.  */
+  lhs = gimple_assign_lhs (stmt);
+  mask = vect_create_destination_var (lhs, mask_type);
+
+  /* Handle cmp expr.  */
+  for (j = 0; j < ncopies; j++)
+    {
+      gassign *new_stmt = NULL;
+      if (j == 0)
+	{
+	  if (slp_node)
+	    {
+	      auto_vec<tree, 2> ops;
+	      auto_vec<vec<tree>, 2> vec_defs;
+
+	      ops.safe_push (rhs1);
+	      ops.safe_push (rhs2);
+	      vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+	      vec_oprnds1 = vec_defs.pop ();
+	      vec_oprnds0 = vec_defs.pop ();
+
+	      ops.release ();
+	      vec_defs.release ();
+	    }
+	  else
+	    {
+	      gimple *gtemp;
+	      vec_rhs1
+		= vect_get_vec_def_for_operand (rhs1, stmt, NULL);
+	      vect_is_simple_use (rhs1, stmt, loop_vinfo, NULL,
+				  &gtemp, &def, &dts[0]);
+	      vec_rhs2 =
+		vect_get_vec_def_for_operand (rhs2, stmt, NULL);
+	      vect_is_simple_use (rhs2, stmt, loop_vinfo, NULL,
+				  &gtemp, &def, &dts[1]);
+	    }
+	}
+      else
+	{
+	  vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
+						     vec_oprnds0.pop ());
+	  vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
+						     vec_oprnds1.pop ());
+	}
+
+      if (!slp_node)
+	{
+	  vec_oprnds0.quick_push (vec_rhs1);
+	  vec_oprnds1.quick_push (vec_rhs2);
+	}
+
+      /* Arguments are ready.  Create the new vector stmt.  */
+      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
+	{
+	  vec_rhs2 = vec_oprnds1[i];
+
+	  vec_compare = build2 (code, mask_type, vec_rhs1, vec_rhs2);
+	  new_stmt = gimple_build_assign (mask, vec_compare);
+	  new_temp = make_ssa_name (mask, new_stmt);
+	  gimple_assign_set_lhs (new_stmt, new_temp);
+	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	  if (slp_node)
+	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+	}
+
+      if (slp_node)
+	continue;
+
+      if (j == 0)
+	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+      else
+	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+      prev_stmt_info = vinfo_for_stmt (new_stmt);
+    }
+
+  vec_oprnds0.release ();
+  vec_oprnds1.release ();
+
+  return true;
+}
 
 /* Make sure the statement is vectorizable.  */
 
@@ -7728,7 +7933,8 @@  vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
 	  || vectorizable_call (stmt, NULL, NULL, node)
 	  || vectorizable_store (stmt, NULL, NULL, node)
 	  || vectorizable_reduction (stmt, NULL, NULL, node)
-	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+	  || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
   else
     {
       if (bb_vinfo)
@@ -7740,7 +7946,8 @@  vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
 	      || vectorizable_call (stmt, NULL, NULL, node)
 	      || vectorizable_store (stmt, NULL, NULL, node)
-	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+	      || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
     }
 
   if (!ok)
@@ -7856,6 +8063,11 @@  vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
       gcc_assert (done);
       break;
 
+    case comparison_vec_info_type:
+      done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
+      gcc_assert (done);
+      break;
+
     case call_vec_info_type:
       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
       stmt = gsi_stmt (*gsi);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5d8e945..23a82ee 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -28,7 +28,8 @@  along with GCC; see the file COPYING3.  If not see
 enum vect_var_kind {
   vect_simple_var,
   vect_pointer_var,
-  vect_scalar_var
+  vect_scalar_var,
+  vect_mask_var
 };
 
 /* Defines type of operation.  */
@@ -482,6 +483,7 @@  enum stmt_vec_info_type {
   call_simd_clone_vec_info_type,
   assignment_vec_info_type,
   condition_vec_info_type,
+  comparison_vec_info_type,
   reduc_vec_info_type,
   induc_vec_info_type,
   type_promotion_vec_info_type,
@@ -1040,6 +1042,8 @@  extern void vect_remove_stores (gimple *);
 extern bool vect_analyze_stmt (gimple *, bool *, slp_tree);
 extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
 				    gimple **, tree, int, slp_tree);
+extern bool vectorizable_comparison (gimple *, gimple_stmt_iterator *,
+				     gimple **, tree, int, slp_tree);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
 				unsigned int *, unsigned int *,
 				stmt_vector_for_cost *,