diff mbox series

[1/2] Mid engine setup [SU]ABDL

Message ID 20230626153423.42763-1-oluwatamilore.adebayo@arm.com
State New
Headers show
Series [1/2] Mid engine setup [SU]ABDL | expand

Commit Message

Oluwatamilore Adebayo June 26, 2023, 3:34 p.m. UTC
From: oluade01 <oluwatamilore.adebayo@arm.com>

This updates vect_recog_abd_pattern to recognize the widening
variant of absolute difference (ABDL, ABDL2).

gcc/ChangeLog:

	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
	Add IFN_VEC_WIDEN_ABD to the switch statement.
	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
	* optabs.def (vec_widen_sabd_optab,
	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
	vec_widen_uabd_optab,
	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
	New optabs.
	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
	to build a VEC_WIDEN_ABD call if the input precision is smaller
	than the precision of the output.
	(vect_recog_widen_abd_pattern): Should an ABD expression be
	found preceeding an extension, replace the two with a
	VEC_WIDEN_ABD.
---
 gcc/internal-fn.def       |   5 ++
 gcc/optabs.def            |  10 +++
 gcc/tree-vect-patterns.cc | 174 ++++++++++++++++++++++++++++++++------
 3 files changed, 161 insertions(+), 28 deletions(-)

Comments

Richard Sandiford June 26, 2023, 8:50 p.m. UTC | #1
Thanks for doing this.  Generally looks good, but some comments below.

Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> 	* internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> 	Add IFN_VEC_WIDEN_ABD to the switch statement.
> 	* internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> 	* optabs.def (vec_widen_sabd_optab,
> 	vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> 	vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> 	vec_widen_uabd_optab,
> 	vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> 	vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> 	New optabs.
> 	* tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> 	to build a VEC_WIDEN_ABD call if the input precision is smaller
> 	than the precision of the output.
> 	(vect_recog_widen_abd_pattern): Should an ABD expression be
> 	found preceeding an extension, replace the two with a
> 	VEC_WIDEN_ABD.
> ---
>  gcc/internal-fn.def       |   5 ++
>  gcc/optabs.def            |  10 +++
>  gcc/tree-vect-patterns.cc | 174 ++++++++++++++++++++++++++++++++------
>  3 files changed, 161 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
>  				first,
>  				vec_widen_ssub, vec_widen_usub,
>  				binary)
> +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
> +				ECF_CONST | ECF_NOTHROW,
> +				first,
> +				vec_widen_sabd, vec_widen_uabd,
> +				binary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
>  DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
>  OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
>  OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
>  OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
> +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
> +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
> +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
> +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
> +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
>  OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
>  OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
>  OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
> @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
>  OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
>  OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
>  OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
> +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
> +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
> +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
> +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
> +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
>  OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
>  OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
>  OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")

The new optabs need to be documented in doc/md.texi.

> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index e2392113bff4065c909aefc760b4c48978b73a5a..852c4e99edb19d215c354b666b991c87c48620b4 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
>        gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
>        if (!abd_stmt
>  	  || !gimple_call_internal_p (abd_stmt)
> -	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +	  || gimple_call_num_args (abd_stmt) != 2)
>  	return NULL;
>  
>        tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
>        tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>  
> -      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> -	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> -						    &unprom[1]))
> +      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> +	{
> +	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> +						     &unprom[0])
> +	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> +							&unprom[1]))
> +	    return NULL;
> +	}
> +      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> +	{
> +	  unprom[0].op = abd_oprnd0;
> +	  unprom[0].type = TREE_TYPE (abd_oprnd0);
> +	  unprom[1].op = abd_oprnd1;
> +	  unprom[1].type = TREE_TYPE (abd_oprnd1);
> +	}
> +      else
>  	return NULL;
>  
>        half_type = unprom[0].type;
> @@ -1442,16 +1455,25 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>  /* Function vect_recog_abd_pattern
>  
> -   Try to find the following ABsolute Difference (ABD) pattern:
> +   Try to find the following ABsolute Difference (ABD) or
> +   ABsolute Difference Long (ABDL) pattern:

“Long” is a bit of an architecture-specific term.  Maybe just:

   Try to find the following ABsolute Difference (ABD) or
   widening ABD (WIDEN_ABD) pattern:
  
> -     VTYPE x, y, out;
> +     VTYPE x, y;
> +     WTYPE out;
>       type diff;
>     loop i in range:
>       S1 diff = x[i] - y[i]
>       S2 out[i] = ABS_EXPR <diff>;
>  
> -   where 'type' is a integer and 'VTYPE' is a vector of integers
> -   the same size as 'type'
> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
> +	 'WTYPE' may be wider than 'VTYPE'.
> +	 'type' is as wide as 'WTYPE'.

I don't think the existing comment is right about the types.  What we're
matching is scalar code, so VTYPE and (now) WTYPE are integers rather
than vectors of integers.

I think it would be clearer to write:

       S1 diff = (type) x[i] - (type) y[i]
       S2 out[i] = ABS_EXPR <(WTYPE) diff>;

since the promotions happen on the operands.

It'd be good to keep the part about 'type' being an integer.

Rather than:

	 'WTYPE' may be wider than 'VTYPE'.
	 'type' is as wide as 'WTYPE'.

maybe:

	 'type' is no narrower than 'VTYPE' (but may be wider)
	 'WTYPE' is no narrower than 'type' (but may be wider)

> +
> +   In the case of ABD, should the precision of 'WTYPE' be wider than
> +   'VTYPE' or narrower, conversion operations will succeed the ABD or
> +   ABDL expression.
> +   In the case of ABDL, the same occurs if the precision of 'WTYPE' is
> +   more than twice as wide or narrower.

WTYPE can't be narrower than VTYPE though.  I think with the changes
suggested above, the text before this block describes the conditions
in enough detail, and so we can just say:

   WIDEN_ABD exists to optimize the case where WTYPE is at least twice as
   wide as VTYPE.

> @@ -1459,12 +1481,14 @@ vect_recog_sad_pattern (vec_info *vinfo,
>  
>     Output:
>  
> -   * TYPE_out: The type of the output of this pattern
> +   * TYPE_OUT: The type of the output of this pattern
>  
>     * Return value: A new stmt that will be used to replace the sequence of
> -     stmts that constitute the pattern; either SABD or UABD:
> +     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
>  	SABD_EXPR<x, y, out>
>  	UABD_EXPR<x, y, out>
> +	SABDL_EXPR<x, y, out>
> +	UABDL_EXPR<x, y, out>

SABD_EXPR/UABD_EXPR should be IFN_ABD
SABDL_EXPR/UABDL_EXPR should be IFN_WIDEN_ABD

>   */
>  
>  static gimple *
> @@ -1479,8 +1503,9 @@ vect_recog_abd_pattern (vec_info *vinfo,
>  	out[i] = DAD
>  
>       In which
> -      - X, Y, DIFF, DAD all have the same type
> -      - x, y, out are all vectors of the same type
> +      - X, Y have the same type
> +      - x, y are all vectors of the same type
> +      - DIFF, DAD, out may be wider than X, Y
>    */

Maybe it would be easier to remove this comment, since I think the
comment above the function says enough.

> @@ -1496,37 +1521,74 @@ vect_recog_abd_pattern (vec_info *vinfo,
>  				       unprom, &diff_stmt))
>      return NULL;
>  
> -  tree abd_type = out_type, vectype;
> -  tree abd_oprnds[2];
> +  internal_fn ifn = IFN_ABD;
> +  tree abd_in_type, abd_out_type;
>    bool extend = false;
> +
>    if (half_type)
>      {
> -      vectype = get_vectype_for_scalar_type (vinfo, half_type);
> -      abd_type = half_type;
> -      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> +      abd_in_type = half_type;
> +      abd_out_type = abd_in_type;
> +
> +      if (stmt_vinfo->min_output_precision == TYPE_PRECISION (abd_out_type))
> +	extend = TYPE_PRECISION (abd_in_type) < TYPE_PRECISION (out_type);
> +      else
> +	{
> +	  ifn = IFN_VEC_WIDEN_ABD;
> +	  abd_out_type
> +	    = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> +					      TYPE_UNSIGNED (abd_in_type));
> +	  extend = TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type);
> +	}
>      }
>    else
>      {
>        unprom[0].op = gimple_assign_rhs1 (diff_stmt);
>        unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> -      tree signed_out = signed_type_for (out_type);
> -      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +      abd_in_type = signed_type_for (out_type);
> +      abd_out_type = abd_in_type;
>      }

Rather than have the "extend" variable, how about:

>  
> -  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
> +  if (!vectype_in || !vectype_out)
> +    return NULL;
>  
> -  if (!vectype
> -      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +  if (ifn == IFN_VEC_WIDEN_ABD)
> +    {
> +      code_helper dummy_code;
> +      int dummy_int;
> +      auto_vec<tree> dummy_vec;
> +      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
> +					   vectype_out, vectype_in,
> +					   &dummy_code, &dummy_code,
> +					   &dummy_int, &dummy_vec))
> +	{
> +	  /* There are architectures that have the ABD instructions
> +	     but not the ABDL instructions.  If we just return NULL here
> +	     we will miss an occasion where we should have used ABD.
> +	     So we change back to ABD and try again.  */
> +	  ifn = IFN_ABD;
> +	  abd_out_type = abd_in_type;
> +	  extend = true;
> +	}
> +    }

making this:

  if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2)
    {
      tree mid_type
	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
					  TYPE_UNSIGNED (abd_in_type));
      tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_vectype);
      code_helper dummy_code;
      int dummy_int;
      auto_vec<tree> dummy_vec;
      if (mid_vectype
	  && supportable_widening_operation (vinfo, IFN_WIDEN_ABD, stmt_vinfo,
					     mid_vectype, vectype_in,
					     &dummy_code, &dummy_code,
					     &dummy_int, &dummy_vec))
	{
	  ifn = IFN_WIDEN_ABD;
	  abd_out_type = mid_type;
	  vectype_out = mid_vectype;
	}
    }

The idea is to keep the assumption that we're using IFN_ABD
until we've proven conclusively otherwise.

I think the later:

  if (!extend)
    return abd_stmt;

should then be deleted, since we should still use vect_convert_output
if abd_out_type has a different sign from out_type.

> +
> +  if (ifn == IFN_ABD
> +      && !direct_internal_fn_supported_p (ifn, vectype_in,
>  					  OPTIMIZE_FOR_SPEED))
>      return NULL;
>  
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree abd_oprnds[2];
>    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> -		       TREE_TYPE (vectype), unprom, vectype);
> +		       abd_in_type, unprom, vectype_in);
>  
>    *type_out = get_vectype_for_scalar_type (vinfo, out_type);
>  
> -  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> -  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
>  						abd_oprnds[0], abd_oprnds[1]);
>    gimple_call_set_lhs (abd_stmt, abd_result);
>    gimple_set_location (abd_stmt, gimple_location (last_stmt));
> @@ -1535,15 +1597,15 @@ vect_recog_abd_pattern (vec_info *vinfo,
>      return abd_stmt;
>  
>    gimple *stmt = abd_stmt;
> -  if (!TYPE_UNSIGNED (abd_type))
> +  if (!TYPE_UNSIGNED (abd_out_type))

And this condition would then be:

  if (TYPE_PRECISION (abd_out_type) == TYPE_PRECISION (abd_in_type)
      && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
      && !TYPE_UNSIGNED (abd_out_type))

since:

(a) we don't need to force the type to unsigned if the final
    vect_convert_output is just a sign change

(b) we don't need to force the type to unsigned if abd_out_type
    is large enough to hold the results without loss of precision
    (or loss of sign correctness)

>      {
> -      tree unsign = unsigned_type_for (abd_type);
> +      tree unsign = unsigned_type_for (abd_out_type);
>        tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
>        stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
>  				  unsign_vectype);
>      }
>  
> -  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> +  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
>  }
>  
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
> @@ -1703,6 +1765,61 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
>  				      &subtype);
>  }
>  
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> +   to IFN_VEC_WIDEN_ABD.  */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> +			      tree *type_out)
> +{
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
> +    return NULL;
> +
> +  tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
> +  if (!abs_vinfo)
> +    return NULL;
> +
> +  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
> +  if (!abd_pattern_vinfo)
> +    return NULL;
> +
> +  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
> +  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> +    return NULL;
> +
> +  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> +  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> +
> +  tree in_type = TREE_TYPE (abd_oprnd0);
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> +    return NULL;

I think it'd be more natural to test this on TREE_TYPE (last_rhs)
rather than TREE_TYPE (abd_oprnd0), and do it after the assignment
to last_rhs above.

> +
> +  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> +  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> +  code_helper dummy_code;
> +  int dummy_int;
> +  auto_vec<tree> dummy_vec;
> +  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> +				       vectype_out, vectype_in,
> +				       &dummy_code, &dummy_code,
> +				       &dummy_int, &dummy_vec))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> +  *type_out = vectype_out;
> +
> +  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> +						 abd_oprnd0, abd_oprnd1);
> +  gimple_call_set_lhs (abdl_stmt, abdl_result);
> +  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
> +  return abdl_stmt;

I think “widen_abd” would be better than “abdl” here.

Richard

> +}
> +
>  /* Function vect_recog_ctz_ffs_pattern
>  
>     Try to find the following pattern:
> @@ -6670,6 +6787,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_mask_conversion_pattern, "mask_conversion" },
>    { vect_recog_widen_plus_pattern, "widen_plus" },
>    { vect_recog_widen_minus_pattern, "widen_minus" },
> +  { vect_recog_widen_abd_pattern, "widen_abd" },
>    /* These must come after the double widening ones.  */
>  };
Richard Sandiford June 27, 2023, 7:46 a.m. UTC | #2
Richard Sandiford <richard.sandiford@arm.com> writes:
>> -     VTYPE x, y, out;
>> +     VTYPE x, y;
>> +     WTYPE out;
>>       type diff;
>>     loop i in range:
>>       S1 diff = x[i] - y[i]
>>       S2 out[i] = ABS_EXPR <diff>;
>>  
>> -   where 'type' is a integer and 'VTYPE' is a vector of integers
>> -   the same size as 'type'
>> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
>> +	 'WTYPE' may be wider than 'VTYPE'.
>> +	 'type' is as wide as 'WTYPE'.
>
> I don't think the existing comment is right about the types.  What we're
> matching is scalar code, so VTYPE and (now) WTYPE are integers rather
> than vectors of integers.

Gah, sorry, I realise now that the point was that VTYPE and WTYPE
are sequences rather than scalars.  But patterns are used for SLP
as well as loops, and the inputs and outputs might not be memory
objects.  So:

> I think it would be clearer to write:
>
>        S1 diff = (type) x[i] - (type) y[i]
>        S2 out[i] = ABS_EXPR <(WTYPE) diff>;
>
> since the promotions happen on the operands.
>
> It'd be good to keep the part about 'type' being an integer.
>
> Rather than:
>
> 	 'WTYPE' may be wider than 'VTYPE'.
> 	 'type' is as wide as 'WTYPE'.
>
> maybe:
>
> 	 'type' is no narrower than 'VTYPE' (but may be wider)
> 	 'WTYPE' is no narrower than 'type' (but may be wider)

...how about:

  TYPE1 x;
  TYPE2 y;
  TYPE3 x_cast = (TYPE3) x;              // widening or no-op
  TYPE3 y_cast = (TYPE3) y;              // widening or no-op
  TYPE3 diff = x_cast - y_cast;
  TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
  TYPE5 abs = ABS(U)_EXPR <diff_cast>;

(based on the comment above vect_recog_widen_op_pattern).

Thanks,
Richard
Oluwatamilore Adebayo June 28, 2023, 3:07 p.m. UTC | #3
> The new optabs need to be documented in doc/md.texi.

Done.

> “Long” is a bit of an architecture-specific term.  Maybe just:
> 
>    Try to find the following ABsolute Difference (ABD) or
>    widening ABD (WIDEN_ABD) pattern:

Change made.

> >> -     VTYPE x, y, out;
> >> +     VTYPE x, y;
> >> +     WTYPE out;
> >>       type diff;
> >>     loop i in range:
> >>       S1 diff = x[i] - y[i]
> >>       S2 out[i] = ABS_EXPR <diff>;
> >>  
> >> -   where 'type' is a integer and 'VTYPE' is a vector of integers
> >> -   the same size as 'type'
> >> +   where 'VTYPE' and 'WTYPE' are vectors of integers.
> >> +	 'WTYPE' may be wider than 'VTYPE'.
> >> +	 'type' is as wide as 'WTYPE'.
> >
> > I don't think the existing comment is right about the types.  What we're
> > matching is scalar code, so VTYPE and (now) WTYPE are integers rather
> > than vectors of integers.
> 
> Gah, sorry, I realise now that the point was that VTYPE and WTYPE
> are sequences rather than scalars.  But patterns are used for SLP
> as well as loops, and the inputs and outputs might not be memory
> objects.  So:
> 
> > I think it would be clearer to write:
> >
> >        S1 diff = (type) x[i] - (type) y[i]
> >        S2 out[i] = ABS_EXPR <(WTYPE) diff>;
> >
> > since the promotions happen on the operands.
> >
> > It'd be good to keep the part about 'type' being an integer.
> >
> > Rather than:
> >
> > 	 'WTYPE' may be wider than 'VTYPE'.
> > 	 'type' is as wide as 'WTYPE'.
> >
> > maybe:
> >
> > 	 'type' is no narrower than 'VTYPE' (but may be wider)
> > 	 'WTYPE' is no narrower than 'type' (but may be wider)
> 
> ...how about:
> 
>   TYPE1 x;
>   TYPE2 y;
>   TYPE3 x_cast = (TYPE3) x;              // widening or no-op
>   TYPE3 y_cast = (TYPE3) y;              // widening or no-op
>   TYPE3 diff = x_cast - y_cast;
>   TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
>   TYPE5 abs = ABS(U)_EXPR <diff_cast>;
> 
> (based on the comment above vect_recog_widen_op_pattern).

Done.

> WTYPE can't be narrower than VTYPE though.  I think with the changes
> suggested above, the text before this block describes the conditions
> in enough detail, and so we can just say:
> 
>    WIDEN_ABD exists to optimize the case where WTYPE is at least twice as
>    wide as VTYPE.

Change made.

> SABD_EXPR/UABD_EXPR should be IFN_ABD
> SABDL_EXPR/UABDL_EXPR should be IFN_WIDEN_ABD

Change made.

> Maybe it would be easier to remove this comment, since I think the
> comment above the function says enough.

Done.

> Rather than have the "extend" variable, how about:
> 
> >  
> > -  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> > +  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> > +  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
> > +  if (!vectype_in || !vectype_out)
> > +    return NULL;
> >  
> > -  if (!vectype
> > -      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> > +  if (ifn == IFN_VEC_WIDEN_ABD)
> > +    {
> > +      code_helper dummy_code;
> > +      int dummy_int;
> > +      auto_vec<tree> dummy_vec;
> > +      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
> > +					   vectype_out, vectype_in,
> > +					   &dummy_code, &dummy_code,
> > +					   &dummy_int, &dummy_vec))
> > +	{
> > +	  /* There are architectures that have the ABD instructions
> > +	     but not the ABDL instructions.  If we just return NULL here
> > +	     we will miss an occasion where we should have used ABD.
> > +	     So we change back to ABD and try again.  */
> > +	  ifn = IFN_ABD;
> > +	  abd_out_type = abd_in_type;
> > +	  extend = true;
> > +	}
> > +    }
> 
> making this:
> 
>   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2)
>     {
>       tree mid_type
> 	= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> 					  TYPE_UNSIGNED (abd_in_type));
>       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_vectype);
>       code_helper dummy_code;
>       int dummy_int;
>       auto_vec<tree> dummy_vec;
>       if (mid_vectype
> 	  && supportable_widening_operation (vinfo, IFN_WIDEN_ABD, stmt_vinfo,
> 					     mid_vectype, vectype_in,
> 					     &dummy_code, &dummy_code,
> 					     &dummy_int, &dummy_vec))
> 	{
> 	  ifn = IFN_WIDEN_ABD;
> 	  abd_out_type = mid_type;
> 	  vectype_out = mid_vectype;
> 	}
>     }
> 
> The idea is to keep the assumption that we're using IFN_ABD
> until we've proven conclusively otherwise.
> 
> I think the later:
> 
>   if (!extend)
>     return abd_stmt;
> 
> should then be deleted, since we should still use vect_convert_output
> if abd_out_type has a different sign from out_type.
> 
> .....
> 
> And this condition would then be:
> 
>   if (TYPE_PRECISION (abd_out_type) == TYPE_PRECISION (abd_in_type)
>       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
>       && !TYPE_UNSIGNED (abd_out_type))
> 
> since:
> 
> (a) we don't need to force the type to unsigned if the final
>     vect_convert_output is just a sign change
> 
> (b) we don't need to force the type to unsigned if abd_out_type
>     is large enough to hold the resu

Done.

> I think it'd be more natural to test this on TREE_TYPE (last_rhs)
> rather than TREE_TYPE (abd_oprnd0), and do it after the assignment
> to last_rhs above.

Done.

> > +  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
> > +  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> > +						 abd_oprnd0, abd_oprnd1);
> > +  gimple_call_set_lhs (abdl_stmt, abdl_result);
> > +  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
> > +  return abdl_stmt;
> 
> I think “widen_abd” would be better than “abdl” here

Done.
diff mbox series

Patch

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -352,6 +352,11 @@  DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
 				first,
 				vec_widen_ssub, vec_widen_usub,
 				binary)
+DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
+				ECF_CONST | ECF_NOTHROW,
+				first,
+				vec_widen_sabd, vec_widen_uabd,
+				binary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
 DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -418,6 +418,11 @@  OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
 OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
 OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
 OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
+OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
+OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
+OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
+OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
+OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
 OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
 OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
 OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
@@ -436,6 +441,11 @@  OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
 OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
 OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
 OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
+OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
+OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
+OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
+OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
+OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
 OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
 OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
 OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index e2392113bff4065c909aefc760b4c48978b73a5a..852c4e99edb19d215c354b666b991c87c48620b4 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1404,15 +1404,28 @@  vect_recog_sad_pattern (vec_info *vinfo,
       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
       if (!abd_stmt
 	  || !gimple_call_internal_p (abd_stmt)
-	  || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+	  || gimple_call_num_args (abd_stmt) != 2)
 	return NULL;
 
       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
 
-      if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
-	  || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
-						    &unprom[1]))
+      if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
+	{
+	  if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
+						     &unprom[0])
+	      || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
+							&unprom[1]))
+	    return NULL;
+	}
+      else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+	{
+	  unprom[0].op = abd_oprnd0;
+	  unprom[0].type = TREE_TYPE (abd_oprnd0);
+	  unprom[1].op = abd_oprnd1;
+	  unprom[1].type = TREE_TYPE (abd_oprnd1);
+	}
+      else
 	return NULL;
 
       half_type = unprom[0].type;
@@ -1442,16 +1455,25 @@  vect_recog_sad_pattern (vec_info *vinfo,
 
 /* Function vect_recog_abd_pattern
 
-   Try to find the following ABsolute Difference (ABD) pattern:
+   Try to find the following ABsolute Difference (ABD) or
+   ABsolute Difference Long (ABDL) pattern:
 
-     VTYPE x, y, out;
+     VTYPE x, y;
+     WTYPE out;
      type diff;
    loop i in range:
      S1 diff = x[i] - y[i]
      S2 out[i] = ABS_EXPR <diff>;
 
-   where 'type' is a integer and 'VTYPE' is a vector of integers
-   the same size as 'type'
+   where 'VTYPE' and 'WTYPE' are vectors of integers.
+	 'WTYPE' may be wider than 'VTYPE'.
+	 'type' is as wide as 'WTYPE'.
+
+   In the case of ABD, should the precision of 'WTYPE' be wider than
+   'VTYPE' or narrower, conversion operations will succeed the ABD or
+   ABDL expression.
+   In the case of ABDL, the same occurs if the precision of 'WTYPE' is
+   more than twice as wide or narrower.
 
    Input:
 
@@ -1459,12 +1481,14 @@  vect_recog_sad_pattern (vec_info *vinfo,
 
    Output:
 
-   * TYPE_out: The type of the output of this pattern
+   * TYPE_OUT: The type of the output of this pattern
 
    * Return value: A new stmt that will be used to replace the sequence of
-     stmts that constitute the pattern; either SABD or UABD:
+     stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
 	SABD_EXPR<x, y, out>
 	UABD_EXPR<x, y, out>
+	SABDL_EXPR<x, y, out>
+	UABDL_EXPR<x, y, out>
  */
 
 static gimple *
@@ -1479,8 +1503,9 @@  vect_recog_abd_pattern (vec_info *vinfo,
 	out[i] = DAD
 
      In which
-      - X, Y, DIFF, DAD all have the same type
-      - x, y, out are all vectors of the same type
+      - X, Y have the same type
+      - x, y are all vectors of the same type
+      - DIFF, DAD, out may be wider than X, Y
   */
 
   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
@@ -1496,37 +1521,74 @@  vect_recog_abd_pattern (vec_info *vinfo,
 				       unprom, &diff_stmt))
     return NULL;
 
-  tree abd_type = out_type, vectype;
-  tree abd_oprnds[2];
+  internal_fn ifn = IFN_ABD;
+  tree abd_in_type, abd_out_type;
   bool extend = false;
+
   if (half_type)
     {
-      vectype = get_vectype_for_scalar_type (vinfo, half_type);
-      abd_type = half_type;
-      extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
+      abd_in_type = half_type;
+      abd_out_type = abd_in_type;
+
+      if (stmt_vinfo->min_output_precision == TYPE_PRECISION (abd_out_type))
+	extend = TYPE_PRECISION (abd_in_type) < TYPE_PRECISION (out_type);
+      else
+	{
+	  ifn = IFN_VEC_WIDEN_ABD;
+	  abd_out_type
+	    = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
+					      TYPE_UNSIGNED (abd_in_type));
+	  extend = TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type);
+	}
     }
   else
     {
       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
-      tree signed_out = signed_type_for (out_type);
-      vectype = get_vectype_for_scalar_type (vinfo, signed_out);
+      abd_in_type = signed_type_for (out_type);
+      abd_out_type = abd_in_type;
     }
 
-  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
+  if (!vectype_in || !vectype_out)
+    return NULL;
 
-  if (!vectype
-      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
+  if (ifn == IFN_VEC_WIDEN_ABD)
+    {
+      code_helper dummy_code;
+      int dummy_int;
+      auto_vec<tree> dummy_vec;
+      if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
+					   vectype_out, vectype_in,
+					   &dummy_code, &dummy_code,
+					   &dummy_int, &dummy_vec))
+	{
+	  /* There are architectures that have the ABD instructions
+	     but not the ABDL instructions.  If we just return NULL here
+	     we will miss an occasion where we should have used ABD.
+	     So we change back to ABD and try again.  */
+	  ifn = IFN_ABD;
+	  abd_out_type = abd_in_type;
+	  extend = true;
+	}
+    }
+
+  if (ifn == IFN_ABD
+      && !direct_internal_fn_supported_p (ifn, vectype_in,
 					  OPTIMIZE_FOR_SPEED))
     return NULL;
 
+  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
+
+  tree abd_oprnds[2];
   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
-		       TREE_TYPE (vectype), unprom, vectype);
+		       abd_in_type, unprom, vectype_in);
 
   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
 
-  tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
-  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
+  tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
+  gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
 						abd_oprnds[0], abd_oprnds[1]);
   gimple_call_set_lhs (abd_stmt, abd_result);
   gimple_set_location (abd_stmt, gimple_location (last_stmt));
@@ -1535,15 +1597,15 @@  vect_recog_abd_pattern (vec_info *vinfo,
     return abd_stmt;
 
   gimple *stmt = abd_stmt;
-  if (!TYPE_UNSIGNED (abd_type))
+  if (!TYPE_UNSIGNED (abd_out_type))
     {
-      tree unsign = unsigned_type_for (abd_type);
+      tree unsign = unsigned_type_for (abd_out_type);
       tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
 				  unsign_vectype);
     }
 
-  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
+  return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
 }
 
 /* Recognize an operation that performs ORIG_CODE on widened inputs,
@@ -1703,6 +1765,61 @@  vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
 				      &subtype);
 }
 
+/* Try to detect abd on widened inputs, converting IFN_ABD
+   to IFN_VEC_WIDEN_ABD.  */
+static gimple *
+vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			      tree *type_out)
+{
+  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
+  if (!last_stmt || !gimple_assign_cast_p (last_stmt))
+    return NULL;
+
+  tree last_rhs = gimple_assign_rhs1 (last_stmt);
+  stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
+  if (!abs_vinfo)
+    return NULL;
+
+  stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
+  if (!abd_pattern_vinfo)
+    return NULL;
+
+  gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
+  if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
+    return NULL;
+
+  tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
+  tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
+
+  tree in_type = TREE_TYPE (abd_oprnd0);
+  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+  if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
+    return NULL;
+
+  tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
+  tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
+
+  code_helper dummy_code;
+  int dummy_int;
+  auto_vec<tree> dummy_vec;
+  if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
+				       vectype_out, vectype_in,
+				       &dummy_code, &dummy_code,
+				       &dummy_int, &dummy_vec))
+    return NULL;
+
+  vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
+
+  *type_out = vectype_out;
+
+  tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
+  gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
+						 abd_oprnd0, abd_oprnd1);
+  gimple_call_set_lhs (abdl_stmt, abdl_result);
+  gimple_set_location (abdl_stmt, gimple_location (last_stmt));
+  return abdl_stmt;
+}
+
 /* Function vect_recog_ctz_ffs_pattern
 
    Try to find the following pattern:
@@ -6670,6 +6787,7 @@  static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_mask_conversion_pattern, "mask_conversion" },
   { vect_recog_widen_plus_pattern, "widen_plus" },
   { vect_recog_widen_minus_pattern, "widen_minus" },
+  { vect_recog_widen_abd_pattern, "widen_abd" },
   /* These must come after the double widening ones.  */
 };