diff mbox

[mask-load,1/2] Use boolean predicate for masked loads and store

Message ID 20151008154029.GH63757@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Oct. 8, 2015, 3:40 p.m. UTC
Hi,

This patch replaces integer mask argument for MASK_LOAD ans MASK_STORE calls with a boolean one.  To allow various boolean vector modes assigned by a target maskload and maskstore optabs were transformed into convert_optab to get mask mode as a second operand.  Patch applies on top of boolean vector patch series.

Thanks,
Ilya
--
gcc/

2015-10-08  Ilya Enkovich  <enkovich.gnu@gmail.com>

	* internal-fn.c (expand_MASK_LOAD): Adjust to maskload optab changes.
	(expand_MASK_STORE): Adjust to maskstore optab changes.
	* optabs-query.c (can_vec_mask_load_store_p): Add MASK_MODE arg.
	 Adjust to maskload, maskstore optab changes.
	* optabs-query.h (can_vec_mask_load_store_p): Add MASK_MODE arg.
	* optabs.def (maskload_optab): Transform into convert optab.
	(maskstore_optab): Likewise.
	* tree-if-conv.c (ifcvt_can_use_mask_load_store): Adjust to
	can_vec_mask_load_store_p signature change.
	(predicate_mem_writes): Use boolean mask.
	* tree-vect-stmts.c (vectorizable_mask_load_store): Adjust to
	can_vec_mask_load_store_p signature change.  Allow invariant masks.

Comments

Richard Biener Oct. 23, 2015, 9:59 a.m. UTC | #1
On Thu, Oct 8, 2015 at 5:40 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch replaces integer mask argument for MASK_LOAD ans MASK_STORE calls with a boolean one.  To allow various boolean vector modes assigned by a target maskload and maskstore optabs were transformed into convert_optab to get mask mode as a second operand.  Patch applies on top of boolean vector patch series.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  <enkovich.gnu@gmail.com>
>
>         * internal-fn.c (expand_MASK_LOAD): Adjust to maskload optab changes.
>         (expand_MASK_STORE): Adjust to maskstore optab changes.
>         * optabs-query.c (can_vec_mask_load_store_p): Add MASK_MODE arg.
>          Adjust to maskload, maskstore optab changes.
>         * optabs-query.h (can_vec_mask_load_store_p): Add MASK_MODE arg.
>         * optabs.def (maskload_optab): Transform into convert optab.
>         (maskstore_optab): Likewise.
>         * tree-if-conv.c (ifcvt_can_use_mask_load_store): Adjust to
>         can_vec_mask_load_store_p signature change.
>         (predicate_mem_writes): Use boolean mask.
>         * tree-vect-stmts.c (vectorizable_mask_load_store): Adjust to
>         can_vec_mask_load_store_p signature change.  Allow invariant masks.
>
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 71f811c..5ea3c0d 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -1885,7 +1885,9 @@ expand_MASK_LOAD (gcall *stmt)
>    create_output_operand (&ops[0], target, TYPE_MODE (type));
>    create_fixed_operand (&ops[1], mem);
>    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (optab_handler (maskload_optab, TYPE_MODE (type)), 3, ops);
> +  expand_insn (convert_optab_handler (maskload_optab, TYPE_MODE (type),
> +                                     TYPE_MODE (TREE_TYPE (maskt))),
> +              3, ops);
>  }
>
>  static void
> @@ -1908,7 +1910,9 @@ expand_MASK_STORE (gcall *stmt)
>    create_fixed_operand (&ops[0], mem);
>    create_input_operand (&ops[1], reg, TYPE_MODE (type));
>    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (optab_handler (maskstore_optab, TYPE_MODE (type)), 3, ops);
> +  expand_insn (convert_optab_handler (maskstore_optab, TYPE_MODE (type),
> +                                     TYPE_MODE (TREE_TYPE (maskt))),
> +              3, ops);
>  }
>
>  static void
> diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
> index 254089f..c20597c 100644
> --- a/gcc/optabs-query.c
> +++ b/gcc/optabs-query.c
> @@ -466,7 +466,9 @@ can_mult_highpart_p (machine_mode mode, bool uns_p)
>  /* Return true if target supports vector masked load/store for mode.  */
>
>  bool
> -can_vec_mask_load_store_p (machine_mode mode, bool is_load)
> +can_vec_mask_load_store_p (machine_mode mode,
> +                          machine_mode mask_mode,
> +                          bool is_load)
>  {
>    optab op = is_load ? maskload_optab : maskstore_optab;
>    machine_mode vmode;
> @@ -474,7 +476,7 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
>
>    /* If mode is vector mode, check it directly.  */
>    if (VECTOR_MODE_P (mode))
> -    return optab_handler (op, mode) != CODE_FOR_nothing;
> +    return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
>
>    /* Otherwise, return true if there is some vector mode with
>       the mask load/store supported.  */
> @@ -485,7 +487,12 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
>    if (!VECTOR_MODE_P (vmode))
>      return false;
>
> -  if (optab_handler (op, vmode) != CODE_FOR_nothing)
> +  mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
> +                                              GET_MODE_SIZE (vmode));
> +  if (mask_mode == VOIDmode)
> +    return false;
> +
> +  if (convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
>      return true;
>
>    vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
> @@ -496,8 +503,10 @@ can_vec_mask_load_store_p (machine_mode mode, bool is_load)
>        if (cur <= GET_MODE_SIZE (mode))
>         continue;
>        vmode = mode_for_vector (mode, cur / GET_MODE_SIZE (mode));
> +      mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
> +                                                  cur);
>        if (VECTOR_MODE_P (vmode)
> -         && optab_handler (op, vmode) != CODE_FOR_nothing)
> +         && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
>         return true;
>      }
>    return false;
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 81ac362..162d2e9 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -140,7 +140,7 @@ enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode,
>                                                      machine_mode, int,
>                                                      machine_mode *);
>  int can_mult_highpart_p (machine_mode, bool);
> -bool can_vec_mask_load_store_p (machine_mode, bool);
> +bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
>  bool can_compare_and_swap_p (machine_mode, bool);
>  bool can_atomic_exchange_p (machine_mode, bool);
>  bool lshift_cheap_p (bool);
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 1f9c1cf..9804378 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -63,6 +63,8 @@ OPTAB_CD(vcond_optab, "vcond$a$b")
>  OPTAB_CD(vcondu_optab, "vcondu$a$b")
>  OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
>  OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
> +OPTAB_CD(maskload_optab, "maskload$a$b")
> +OPTAB_CD(maskstore_optab, "maskstore$a$b")
>
>  OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(add_optab, "add$F$a3")
> @@ -266,8 +268,6 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a")
>  OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
>  OPTAB_D (usad_optab, "usad$I$a")
>  OPTAB_D (ssad_optab, "ssad$I$a")
> -OPTAB_D (maskload_optab, "maskload$a")
> -OPTAB_D (maskstore_optab, "maskstore$a")
>  OPTAB_D (vec_extract_optab, "vec_extract$a")
>  OPTAB_D (vec_init_optab, "vec_init$a")
>  OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
> diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
> index 25c9599..5d1c8ea 100644
> --- a/gcc/tree-if-conv.c
> +++ b/gcc/tree-if-conv.c
> @@ -811,7 +811,7 @@ ifcvt_can_use_mask_load_store (gimple *stmt)
>        || VECTOR_MODE_P (mode))
>      return false;
>
> -  if (can_vec_mask_load_store_p (mode, is_load))
> +  if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
>      return true;
>
>    return false;
> @@ -2068,7 +2068,7 @@ predicate_mem_writes (loop_p loop)
>           {
>             tree lhs = gimple_assign_lhs (stmt);
>             tree rhs = gimple_assign_rhs1 (stmt);
> -           tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
> +           tree ref, addr, ptr, mask;
>             gimple *new_stmt;
>             int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
>             ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
> @@ -2082,16 +2082,51 @@ predicate_mem_writes (loop_p loop)
>               mask = vect_masks[index];
>             else
>               {
> -               masktype = build_nonstandard_integer_type (bitsize, 1);
> -               mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
> -               mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
> -               cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
> -                                                  is_gimple_condexpr,
> -                                                  NULL_TREE,
> -                                                  true, GSI_SAME_STMT);
> -               mask = fold_build_cond_expr (masktype, unshare_expr (cond),
> -                                            mask_op0, mask_op1);
> -               mask = ifc_temp_var (masktype, mask, &gsi);
> +               if ((TREE_CODE (cond) == NE_EXPR
> +                    || TREE_CODE (cond) == EQ_EXPR)
> +                   && (integer_zerop (TREE_OPERAND (cond, 1))
> +                       || integer_onep (TREE_OPERAND (cond, 1)))
> +                   && TREE_CODE (TREE_TYPE (TREE_OPERAND (cond, 0)))
> +                      == BOOLEAN_TYPE)
> +                 {
> +                   bool negate = (TREE_CODE (cond) == EQ_EXPR);
> +                   if (integer_onep (TREE_OPERAND (cond, 1)))
> +                     negate = !negate;
> +                   if (swap)
> +                     negate = !negate;
> +                   mask = TREE_OPERAND (cond, 0);
> +                   if (negate)
> +                     {
> +                       mask = ifc_temp_var (TREE_TYPE (mask),
> +                                            unshare_expr (cond),
> +                                            &gsi);
> +                       mask = build2 (EQ_EXPR, TREE_TYPE (mask), mask,
> +                                      build_zero_cst (TREE_TYPE (mask)));
> +                     }
> +                 }
> +               else if (swap &&
> +                        TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
> +                 {
> +                   tree op_type = TREE_TYPE (TREE_OPERAND (cond, 0));
> +                   tree_code code
> +                     = invert_tree_comparison (TREE_CODE (cond),
> +                                               HONOR_NANS (op_type));
> +                   if (code != ERROR_MARK)
> +                       mask = build2 (code, TREE_TYPE (cond),
> +                                      TREE_OPERAND (cond, 0),
> +                                      TREE_OPERAND (cond, 1));
> +                   else
> +                     {
> +                       mask = ifc_temp_var (TREE_TYPE (cond),
> +                                            unshare_expr (cond),
> +                                            &gsi);
> +                       mask = build2 (EQ_EXPR, TREE_TYPE (mask), mask,
> +                                      build_zero_cst (TREE_TYPE (mask)));
> +                     }
> +                 }
> +               else
> +                 mask = unshare_expr (cond);
> +               mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);

ICK.  So what does the above do?  It basically preserves the boolean condition
as "mask" unless ... we ought to swap it (formerly easy, just swap arguments
of the cond_expr, now a bit harder, we need to invert the condition).  But I
don't understand the 'negate' dance.  It looks like you want to have mask
not be bool != 0 or bool == 1 but just bool in this case.  I suggest you
rework this to do sth like

   gimple_seq stmts = NULL;
   gcc_assert (types_compatible_p (TREE_TYPE (cond), boolean_type_node));
   if (TREE_CODE (cond) == SSA_NAME)
     ;
   else if (COMPARISON_CLASS_P (cond))
     mask = gimple_build (&stmts, TREE_CODE (cond), boolean_type_node,
TREE_OPERAND (cond, 0), TREE_OPERAND (cond, 1));
   else
      gcc_unreachable ();
   if (swap)
     mask = gimple_build (&stmts, BIT_XOR_EXPR, boolean_type_node,
mask, boolean_true_node);
   gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);

which should do all of the above.

The rest of the changes look good to me.

Thanks,
Richard.

>                 /* Save mask and its size for further use.  */
>                 vect_sizes.safe_push (bitsize);
>                 vect_masks.safe_push (mask);
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 337ea7b..d6819ec 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1800,6 +1800,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>    bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
>    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  tree mask_vectype;
>    tree elem_type;
>    gimple *new_stmt;
>    tree dummy;
> @@ -1827,8 +1828,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>
>    is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
>    mask = gimple_call_arg (stmt, 2);
> -  if (TYPE_PRECISION (TREE_TYPE (mask))
> -      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
> +
> +  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
>      return false;
>
>    /* FORNOW. This restriction should be relaxed.  */
> @@ -1857,6 +1858,19 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>    if (STMT_VINFO_STRIDED_P (stmt_info))
>      return false;
>
> +  if (TREE_CODE (mask) != SSA_NAME)
> +    return false;
> +
> +  if (!vect_is_simple_use_1 (mask, stmt, loop_vinfo, NULL,
> +                            &def_stmt, &def, &dt, &mask_vectype))
> +    return false;
> +
> +  if (!mask_vectype)
> +    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
> +
> +  if (!mask_vectype)
> +    return false;
> +
>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>      {
>        gimple *def_stmt;
> @@ -1890,14 +1904,9 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>                                  : DR_STEP (dr), size_zero_node) <= 0)
>      return false;
>    else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
> -          || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
> -    return false;
> -
> -  if (TREE_CODE (mask) != SSA_NAME)
> -    return false;
> -
> -  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
> -                          &def_stmt, &def, &dt))
> +          || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
> +                                         TYPE_MODE (mask_vectype),
> +                                         !is_store))
>      return false;
>
>    if (is_store)
Ilya Enkovich Oct. 23, 2015, 10:23 a.m. UTC | #2
2015-10-23 12:59 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>
> ICK.  So what does the above do?  It basically preserves the boolean condition
> as "mask" unless ... we ought to swap it (formerly easy, just swap arguments
> of the cond_expr, now a bit harder, we need to invert the condition).  But I
> don't understand the 'negate' dance.  It looks like you want to have mask
> not be bool != 0 or bool == 1 but just bool in this case.  I suggest you
> rework this to do sth like

That's right, I want to avoid ==,!= comparisons with 0 and 1 by either
using compared SSA_NAME
or SSA_NAME != 0 (negate case).

>
>    gimple_seq stmts = NULL;
>    gcc_assert (types_compatible_p (TREE_TYPE (cond), boolean_type_node));

Is it really valid assert? Compiling fortran test with LTO I may have
logical(kind=4) [aka 32bit boolean]
type for cond and single bit _Bool for boolean_type_node.

>    if (TREE_CODE (cond) == SSA_NAME)
>      ;
>    else if (COMPARISON_CLASS_P (cond))
>      mask = gimple_build (&stmts, TREE_CODE (cond), boolean_type_node,
> TREE_OPERAND (cond, 0), TREE_OPERAND (cond, 1));
>    else
>       gcc_unreachable ();
>    if (swap)
>      mask = gimple_build (&stmts, BIT_XOR_EXPR, boolean_type_node,
> mask, boolean_true_node);
>    gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
>
> which should do all of the above.

Thus we would get smth like

mask_1 = bool != 1
mask_2 = mask_1 XOR 1
_ifc_ = mask_2

instead of

_ifc_ = bool

Note that cond is built to be used as a condition in COND_EXPR
prepared for vectorization, i.e. it is always a comparison, thus
comparison with 0 and 1 is quite a common case.

Thanks,
Ilya

>
> The rest of the changes look good to me.
>
> Thanks,
> Richard.
>
>>                 /* Save mask and its size for further use.  */
>>                 vect_sizes.safe_push (bitsize);
>>                 vect_masks.safe_push (mask);
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 337ea7b..d6819ec 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -1800,6 +1800,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>    bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
>>    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
>>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>> +  tree mask_vectype;
>>    tree elem_type;
>>    gimple *new_stmt;
>>    tree dummy;
>> @@ -1827,8 +1828,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>
>>    is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
>>    mask = gimple_call_arg (stmt, 2);
>> -  if (TYPE_PRECISION (TREE_TYPE (mask))
>> -      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
>> +
>> +  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
>>      return false;
>>
>>    /* FORNOW. This restriction should be relaxed.  */
>> @@ -1857,6 +1858,19 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>    if (STMT_VINFO_STRIDED_P (stmt_info))
>>      return false;
>>
>> +  if (TREE_CODE (mask) != SSA_NAME)
>> +    return false;
>> +
>> +  if (!vect_is_simple_use_1 (mask, stmt, loop_vinfo, NULL,
>> +                            &def_stmt, &def, &dt, &mask_vectype))
>> +    return false;
>> +
>> +  if (!mask_vectype)
>> +    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
>> +
>> +  if (!mask_vectype)
>> +    return false;
>> +
>>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>>      {
>>        gimple *def_stmt;
>> @@ -1890,14 +1904,9 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>                                  : DR_STEP (dr), size_zero_node) <= 0)
>>      return false;
>>    else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
>> -          || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
>> -    return false;
>> -
>> -  if (TREE_CODE (mask) != SSA_NAME)
>> -    return false;
>> -
>> -  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
>> -                          &def_stmt, &def, &dt))
>> +          || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
>> +                                         TYPE_MODE (mask_vectype),
>> +                                         !is_store))
>>      return false;
>>
>>    if (is_store)
Richard Biener Oct. 23, 2015, 10:32 a.m. UTC | #3
On Fri, Oct 23, 2015 at 12:23 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> 2015-10-23 12:59 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>
>> ICK.  So what does the above do?  It basically preserves the boolean condition
>> as "mask" unless ... we ought to swap it (formerly easy, just swap arguments
>> of the cond_expr, now a bit harder, we need to invert the condition).  But I
>> don't understand the 'negate' dance.  It looks like you want to have mask
>> not be bool != 0 or bool == 1 but just bool in this case.  I suggest you
>> rework this to do sth like
>
> That's right, I want to avoid ==,!= comparisons with 0 and 1 by either
> using compared SSA_NAME
> or SSA_NAME != 0 (negate case).
>
>>
>>    gimple_seq stmts = NULL;
>>    gcc_assert (types_compatible_p (TREE_TYPE (cond), boolean_type_node));
>
> Is it really valid assert? Compiling fortran test with LTO I may have
> logical(kind=4) [aka 32bit boolean]
> type for cond and single bit _Bool for boolean_type_node.

I put it there to make sure it is because otherwise the use of boolean_type_node
below needs adjustment (boolean_true_node as well).  TREE_TYPE (cond)
would work and constant_boolean_node (true, TREE_TYPE (cond)) for
boolean_type_node.

Yes, you are right.

>>    if (TREE_CODE (cond) == SSA_NAME)
>>      ;
>>    else if (COMPARISON_CLASS_P (cond))
>>      mask = gimple_build (&stmts, TREE_CODE (cond), boolean_type_node,
>> TREE_OPERAND (cond, 0), TREE_OPERAND (cond, 1));
>>    else
>>       gcc_unreachable ();
>>    if (swap)
>>      mask = gimple_build (&stmts, BIT_XOR_EXPR, boolean_type_node,
>> mask, boolean_true_node);
>>    gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
>>
>> which should do all of the above.
>
> Thus we would get smth like
>
> mask_1 = bool != 1
> mask_2 = mask_1 XOR 1
> _ifc_ = mask_2

No, we'd get

  mask_1 = bool != 1;

and the 'mask' variable should have been simplified to 'bool'
(yes, we'd insert a dead stmt).  gimple_build simplifies
stmts via the match-and-simplify machinery and match.pd
knows how to invert conditions.

> instead of
>
> _ifc_ = bool
>
> Note that cond is built to be used as a condition in COND_EXPR
> prepared for vectorization, i.e. it is always a comparison, thus
> comparison with 0 and 1 is quite a common case.
>
> Thanks,
> Ilya
>
>>
>> The rest of the changes look good to me.
>>
>> Thanks,
>> Richard.
>>
>>>                 /* Save mask and its size for further use.  */
>>>                 vect_sizes.safe_push (bitsize);
>>>                 vect_masks.safe_push (mask);
>>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>>> index 337ea7b..d6819ec 100644
>>> --- a/gcc/tree-vect-stmts.c
>>> +++ b/gcc/tree-vect-stmts.c
>>> @@ -1800,6 +1800,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>>    bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
>>>    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
>>>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>>> +  tree mask_vectype;
>>>    tree elem_type;
>>>    gimple *new_stmt;
>>>    tree dummy;
>>> @@ -1827,8 +1828,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>>
>>>    is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
>>>    mask = gimple_call_arg (stmt, 2);
>>> -  if (TYPE_PRECISION (TREE_TYPE (mask))
>>> -      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
>>> +
>>> +  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
>>>      return false;
>>>
>>>    /* FORNOW. This restriction should be relaxed.  */
>>> @@ -1857,6 +1858,19 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>>    if (STMT_VINFO_STRIDED_P (stmt_info))
>>>      return false;
>>>
>>> +  if (TREE_CODE (mask) != SSA_NAME)
>>> +    return false;
>>> +
>>> +  if (!vect_is_simple_use_1 (mask, stmt, loop_vinfo, NULL,
>>> +                            &def_stmt, &def, &dt, &mask_vectype))
>>> +    return false;
>>> +
>>> +  if (!mask_vectype)
>>> +    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
>>> +
>>> +  if (!mask_vectype)
>>> +    return false;
>>> +
>>>    if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
>>>      {
>>>        gimple *def_stmt;
>>> @@ -1890,14 +1904,9 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>>>                                  : DR_STEP (dr), size_zero_node) <= 0)
>>>      return false;
>>>    else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
>>> -          || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
>>> -    return false;
>>> -
>>> -  if (TREE_CODE (mask) != SSA_NAME)
>>> -    return false;
>>> -
>>> -  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
>>> -                          &def_stmt, &def, &dt))
>>> +          || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
>>> +                                         TYPE_MODE (mask_vectype),
>>> +                                         !is_store))
>>>      return false;
>>>
>>>    if (is_store)
Ilya Enkovich Oct. 23, 2015, 10:36 a.m. UTC | #4
2015-10-23 13:32 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
> On Fri, Oct 23, 2015 at 12:23 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
>> 2015-10-23 12:59 GMT+03:00 Richard Biener <richard.guenther@gmail.com>:
>>>
>>> ICK.  So what does the above do?  It basically preserves the boolean condition
>>> as "mask" unless ... we ought to swap it (formerly easy, just swap arguments
>>> of the cond_expr, now a bit harder, we need to invert the condition).  But I
>>> don't understand the 'negate' dance.  It looks like you want to have mask
>>> not be bool != 0 or bool == 1 but just bool in this case.  I suggest you
>>> rework this to do sth like
>>
>> That's right, I want to avoid ==,!= comparisons with 0 and 1 by either
>> using compared SSA_NAME
>> or SSA_NAME != 0 (negate case).
>>
>>>
>>>    gimple_seq stmts = NULL;
>>>    gcc_assert (types_compatible_p (TREE_TYPE (cond), boolean_type_node));
>>
>> Is it really valid assert? Compiling fortran test with LTO I may have
>> logical(kind=4) [aka 32bit boolean]
>> type for cond and single bit _Bool for boolean_type_node.
>
> I put it there to make sure it is because otherwise the use of boolean_type_node
> below needs adjustment (boolean_true_node as well).  TREE_TYPE (cond)
> would work and constant_boolean_node (true, TREE_TYPE (cond)) for
> boolean_type_node.
>
> Yes, you are right.
>
>>>    if (TREE_CODE (cond) == SSA_NAME)
>>>      ;
>>>    else if (COMPARISON_CLASS_P (cond))
>>>      mask = gimple_build (&stmts, TREE_CODE (cond), boolean_type_node,
>>> TREE_OPERAND (cond, 0), TREE_OPERAND (cond, 1));
>>>    else
>>>       gcc_unreachable ();
>>>    if (swap)
>>>      mask = gimple_build (&stmts, BIT_XOR_EXPR, boolean_type_node,
>>> mask, boolean_true_node);
>>>    gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
>>>
>>> which should do all of the above.
>>
>> Thus we would get smth like
>>
>> mask_1 = bool != 1
>> mask_2 = mask_1 XOR 1
>> _ifc_ = mask_2
>
> No, we'd get
>
>   mask_1 = bool != 1;
>
> and the 'mask' variable should have been simplified to 'bool'
> (yes, we'd insert a dead stmt).  gimple_build simplifies
> stmts via the match-and-simplify machinery and match.pd
> knows how to invert conditions.
>

Thanks! I'll try it.

Ilya
diff mbox

Patch

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 71f811c..5ea3c0d 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -1885,7 +1885,9 @@  expand_MASK_LOAD (gcall *stmt)
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
   create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (optab_handler (maskload_optab, TYPE_MODE (type)), 3, ops);
+  expand_insn (convert_optab_handler (maskload_optab, TYPE_MODE (type),
+				      TYPE_MODE (TREE_TYPE (maskt))),
+	       3, ops);
 }
 
 static void
@@ -1908,7 +1910,9 @@  expand_MASK_STORE (gcall *stmt)
   create_fixed_operand (&ops[0], mem);
   create_input_operand (&ops[1], reg, TYPE_MODE (type));
   create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (optab_handler (maskstore_optab, TYPE_MODE (type)), 3, ops);
+  expand_insn (convert_optab_handler (maskstore_optab, TYPE_MODE (type),
+				      TYPE_MODE (TREE_TYPE (maskt))),
+	       3, ops);
 }
 
 static void
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
index 254089f..c20597c 100644
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -466,7 +466,9 @@  can_mult_highpart_p (machine_mode mode, bool uns_p)
 /* Return true if target supports vector masked load/store for mode.  */
 
 bool
-can_vec_mask_load_store_p (machine_mode mode, bool is_load)
+can_vec_mask_load_store_p (machine_mode mode,
+			   machine_mode mask_mode,
+			   bool is_load)
 {
   optab op = is_load ? maskload_optab : maskstore_optab;
   machine_mode vmode;
@@ -474,7 +476,7 @@  can_vec_mask_load_store_p (machine_mode mode, bool is_load)
 
   /* If mode is vector mode, check it directly.  */
   if (VECTOR_MODE_P (mode))
-    return optab_handler (op, mode) != CODE_FOR_nothing;
+    return convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
 
   /* Otherwise, return true if there is some vector mode with
      the mask load/store supported.  */
@@ -485,7 +487,12 @@  can_vec_mask_load_store_p (machine_mode mode, bool is_load)
   if (!VECTOR_MODE_P (vmode))
     return false;
 
-  if (optab_handler (op, vmode) != CODE_FOR_nothing)
+  mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
+					       GET_MODE_SIZE (vmode));
+  if (mask_mode == VOIDmode)
+    return false;
+
+  if (convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
     return true;
 
   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
@@ -496,8 +503,10 @@  can_vec_mask_load_store_p (machine_mode mode, bool is_load)
       if (cur <= GET_MODE_SIZE (mode))
 	continue;
       vmode = mode_for_vector (mode, cur / GET_MODE_SIZE (mode));
+      mask_mode = targetm.vectorize.get_mask_mode (GET_MODE_NUNITS (vmode),
+						   cur);
       if (VECTOR_MODE_P (vmode)
-	  && optab_handler (op, vmode) != CODE_FOR_nothing)
+	  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
 	return true;
     }
   return false;
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 81ac362..162d2e9 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -140,7 +140,7 @@  enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode,
 						     machine_mode, int,
 						     machine_mode *);
 int can_mult_highpart_p (machine_mode, bool);
-bool can_vec_mask_load_store_p (machine_mode, bool);
+bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
 bool can_compare_and_swap_p (machine_mode, bool);
 bool can_atomic_exchange_p (machine_mode, bool);
 bool lshift_cheap_p (bool);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 1f9c1cf..9804378 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,8 @@  OPTAB_CD(vcond_optab, "vcond$a$b")
 OPTAB_CD(vcondu_optab, "vcondu$a$b")
 OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
 OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
+OPTAB_CD(maskload_optab, "maskload$a$b")
+OPTAB_CD(maskstore_optab, "maskstore$a$b")
 
 OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(add_optab, "add$F$a3")
@@ -266,8 +268,6 @@  OPTAB_D (udot_prod_optab, "udot_prod$I$a")
 OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
 OPTAB_D (usad_optab, "usad$I$a")
 OPTAB_D (ssad_optab, "ssad$I$a")
-OPTAB_D (maskload_optab, "maskload$a")
-OPTAB_D (maskstore_optab, "maskstore$a")
 OPTAB_D (vec_extract_optab, "vec_extract$a")
 OPTAB_D (vec_init_optab, "vec_init$a")
 OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 25c9599..5d1c8ea 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -811,7 +811,7 @@  ifcvt_can_use_mask_load_store (gimple *stmt)
       || VECTOR_MODE_P (mode))
     return false;
 
-  if (can_vec_mask_load_store_p (mode, is_load))
+  if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
     return true;
 
   return false;
@@ -2068,7 +2068,7 @@  predicate_mem_writes (loop_p loop)
 	  {
 	    tree lhs = gimple_assign_lhs (stmt);
 	    tree rhs = gimple_assign_rhs1 (stmt);
-	    tree ref, addr, ptr, masktype, mask_op0, mask_op1, mask;
+	    tree ref, addr, ptr, mask;
 	    gimple *new_stmt;
 	    int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (lhs)));
 	    ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
@@ -2082,16 +2082,51 @@  predicate_mem_writes (loop_p loop)
 	      mask = vect_masks[index];
 	    else
 	      {
-		masktype = build_nonstandard_integer_type (bitsize, 1);
-		mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
-		mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
-		cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
-						   is_gimple_condexpr,
-						   NULL_TREE,
-						   true, GSI_SAME_STMT);
-		mask = fold_build_cond_expr (masktype, unshare_expr (cond),
-					     mask_op0, mask_op1);
-		mask = ifc_temp_var (masktype, mask, &gsi);
+		if ((TREE_CODE (cond) == NE_EXPR
+		     || TREE_CODE (cond) == EQ_EXPR)
+		    && (integer_zerop (TREE_OPERAND (cond, 1))
+			|| integer_onep (TREE_OPERAND (cond, 1)))
+		    && TREE_CODE (TREE_TYPE (TREE_OPERAND (cond, 0)))
+		       == BOOLEAN_TYPE)
+		  {
+		    bool negate = (TREE_CODE (cond) == EQ_EXPR);
+		    if (integer_onep (TREE_OPERAND (cond, 1)))
+		      negate = !negate;
+		    if (swap)
+		      negate = !negate;
+		    mask = TREE_OPERAND (cond, 0);
+		    if (negate)
+		      {
+			mask = ifc_temp_var (TREE_TYPE (mask),
+					     unshare_expr (cond),
+					     &gsi);
+			mask = build2 (EQ_EXPR, TREE_TYPE (mask), mask,
+				       build_zero_cst (TREE_TYPE (mask)));
+		      }
+		  }
+		else if (swap &&
+			 TREE_CODE_CLASS (TREE_CODE (cond)) == tcc_comparison)
+		  {
+		    tree op_type = TREE_TYPE (TREE_OPERAND (cond, 0));
+		    tree_code code
+		      = invert_tree_comparison (TREE_CODE (cond),
+						HONOR_NANS (op_type));
+		    if (code != ERROR_MARK)
+			mask = build2 (code, TREE_TYPE (cond),
+				       TREE_OPERAND (cond, 0),
+				       TREE_OPERAND (cond, 1));
+		    else
+		      {
+			mask = ifc_temp_var (TREE_TYPE (cond),
+					     unshare_expr (cond),
+					     &gsi);
+			mask = build2 (EQ_EXPR, TREE_TYPE (mask), mask,
+				       build_zero_cst (TREE_TYPE (mask)));
+		      }
+		  }
+		else
+		  mask = unshare_expr (cond);
+		mask = ifc_temp_var (TREE_TYPE (mask), mask, &gsi);
 		/* Save mask and its size for further use.  */
 	        vect_sizes.safe_push (bitsize);
 		vect_masks.safe_push (mask);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 337ea7b..d6819ec 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1800,6 +1800,7 @@  vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree mask_vectype;
   tree elem_type;
   gimple *new_stmt;
   tree dummy;
@@ -1827,8 +1828,8 @@  vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
 
   is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
   mask = gimple_call_arg (stmt, 2);
-  if (TYPE_PRECISION (TREE_TYPE (mask))
-      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
+
+  if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
     return false;
 
   /* FORNOW. This restriction should be relaxed.  */
@@ -1857,6 +1858,19 @@  vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_STRIDED_P (stmt_info))
     return false;
 
+  if (TREE_CODE (mask) != SSA_NAME)
+    return false;
+
+  if (!vect_is_simple_use_1 (mask, stmt, loop_vinfo, NULL,
+			     &def_stmt, &def, &dt, &mask_vectype))
+    return false;
+
+  if (!mask_vectype)
+    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
+
+  if (!mask_vectype)
+    return false;
+
   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       gimple *def_stmt;
@@ -1890,14 +1904,9 @@  vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
 				 : DR_STEP (dr), size_zero_node) <= 0)
     return false;
   else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
-	   || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
-    return false;
-
-  if (TREE_CODE (mask) != SSA_NAME)
-    return false;
-
-  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
-			   &def_stmt, &def, &dt))
+	   || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
+					  TYPE_MODE (mask_vectype),
+					  !is_store))
     return false;
 
   if (is_store)