diff mbox series

VECT: Add LEN_FOLD_EXTRACT_LAST pattern

Message ID 20230822105137.1308817-1-juzhe.zhong@rivai.ai
State New
Headers show
Series VECT: Add LEN_FOLD_EXTRACT_LAST pattern | expand

Commit Message

juzhe.zhong@rivai.ai Aug. 22, 2023, 10:51 a.m. UTC
Hi, Richard and Richi.

This is the last autovec pattern I want to add for RVV (length loop control).

This patch is supposed to handled this following case:

int __attribute__ ((noinline, noclone))
condition_reduction (int *a, int min_v, int n)
{
  int last = 66; /* High start value.  */

  for (int i = 0; i < n; i++)
    if (a[i] < min_v)
      last = i;

  return last;
}

ARM SVE IR:

  ...
  mask__7.11_39 = vect__4.10_37 < vect_cst__38;
  _40 = loop_mask_36 & mask__7.11_39;
  last_5 = .FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32);
  ...

RVV IR, we want to see:
 ...
 loop_len = SELECT_VL
 mask__7.11_39 = vect__4.10_37 < vect_cst__38;
 last_5 = .LEN_FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32, loop_len, bias);
 ...

gcc/ChangeLog:

	* doc/md.texi: Add LEN_FOLD_EXTRACT_LAST pattern.
	* internal-fn.cc (fold_len_extract_direct): Ditto.
	(expand_fold_len_extract_optab_fn): Ditto.
	(direct_fold_len_extract_optab_supported_p): Ditto.
	* internal-fn.def (LEN_FOLD_EXTRACT_LAST): Ditto.

---
 gcc/doc/md.texi     | 6 ++++++
 gcc/internal-fn.cc  | 5 +++++
 gcc/internal-fn.def | 3 +++
 3 files changed, 14 insertions(+)

Comments

Richard Biener Aug. 22, 2023, 11:07 a.m. UTC | #1
On Tue, 22 Aug 2023, Juzhe-Zhong wrote:

> Hi, Richard and Richi.
> 
> This is the last autovec pattern I want to add for RVV (length loop control).
> 
> This patch is supposed to handled this following case:
> 
> int __attribute__ ((noinline, noclone))
> condition_reduction (int *a, int min_v, int n)
> {
>   int last = 66; /* High start value.  */
> 
>   for (int i = 0; i < n; i++)
>     if (a[i] < min_v)
>       last = i;
> 
>   return last;
> }
> 
> ARM SVE IR:
> 
>   ...
>   mask__7.11_39 = vect__4.10_37 < vect_cst__38;
>   _40 = loop_mask_36 & mask__7.11_39;
>   last_5 = .FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32);
>   ...
> 
> RVV IR, we want to see:
>  ...
>  loop_len = SELECT_VL
>  mask__7.11_39 = vect__4.10_37 < vect_cst__38;
>  last_5 = .LEN_FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32, loop_len, bias);
>  ...

OK.

Richard.

> gcc/ChangeLog:
> 
> 	* doc/md.texi: Add LEN_FOLD_EXTRACT_LAST pattern.
> 	* internal-fn.cc (fold_len_extract_direct): Ditto.
> 	(expand_fold_len_extract_optab_fn): Ditto.
> 	(direct_fold_len_extract_optab_supported_p): Ditto.
> 	* internal-fn.def (LEN_FOLD_EXTRACT_LAST): Ditto.
> 
> ---
>  gcc/doc/md.texi     | 6 ++++++
>  gcc/internal-fn.cc  | 5 +++++
>  gcc/internal-fn.def | 3 +++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 89562fdb43c..24453693d89 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5636,6 +5636,12 @@ has mode @var{m} and operands 0 and 1 have the mode appropriate for
>  one element of @var{m}.  Operand 2 has the usual mask mode for vectors
>  of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>  
> +@cindex @code{len_fold_extract_last_@var{m}} instruction pattern
> +@item @code{len_fold_extract_last_@var{m}}
> +Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as
> +operand 4 and an extra bias operand as operand 5.  The last associated element
> +is extracted should have the index i < len (operand 4) + bias (operand 5).
> +
>  @cindex @code{fold_left_plus_@var{m}} instruction pattern
>  @item @code{fold_left_plus_@var{m}}
>  Take scalar operand 1 and successively add each element from vector
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 314f63b614b..4138cc31d7e 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -188,6 +188,7 @@ init_internal_fns ()
>  #define cond_len_ternary_direct { 1, 1, true }
>  #define while_direct { 0, 2, false }
>  #define fold_extract_direct { 2, 2, false }
> +#define fold_len_extract_direct { 2, 2, false }
>  #define fold_left_direct { 1, 1, false }
>  #define mask_fold_left_direct { 1, 1, false }
>  #define mask_len_fold_left_direct { 1, 1, false }
> @@ -3863,6 +3864,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
>  #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 3)
>  
> +#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \
> +  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
> +
>  #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 2)
>  
> @@ -3980,6 +3984,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
>  #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
>  #define direct_while_optab_supported_p convert_optab_supported_p
>  #define direct_fold_extract_optab_supported_p direct_optab_supported_p
> +#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p
>  #define direct_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 594f7881511..d09403c0a91 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -312,6 +312,9 @@ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
>  DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
>  		       fold_extract_last, fold_extract)
>  
> +DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
> +		       len_fold_extract_last, fold_len_extract)
> +
>  DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  		       fold_left_plus, fold_left)
>  
>
Li, Pan2 via Gcc-patches Aug. 22, 2023, 2:13 p.m. UTC | #2
Committed as passed both the regression and bootstrap tests in x86, thanks Richard.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Richard Biener via Gcc-patches
Sent: Tuesday, August 22, 2023 7:08 PM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com
Subject: Re: [PATCH] VECT: Add LEN_FOLD_EXTRACT_LAST pattern

On Tue, 22 Aug 2023, Juzhe-Zhong wrote:

> Hi, Richard and Richi.
> 
> This is the last autovec pattern I want to add for RVV (length loop control).
> 
> This patch is supposed to handled this following case:
> 
> int __attribute__ ((noinline, noclone))
> condition_reduction (int *a, int min_v, int n)
> {
>   int last = 66; /* High start value.  */
> 
>   for (int i = 0; i < n; i++)
>     if (a[i] < min_v)
>       last = i;
> 
>   return last;
> }
> 
> ARM SVE IR:
> 
>   ...
>   mask__7.11_39 = vect__4.10_37 < vect_cst__38;
>   _40 = loop_mask_36 & mask__7.11_39;
>   last_5 = .FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32);
>   ...
> 
> RVV IR, we want to see:
>  ...
>  loop_len = SELECT_VL
>  mask__7.11_39 = vect__4.10_37 < vect_cst__38;
>  last_5 = .LEN_FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32, loop_len, bias);
>  ...

OK.

Richard.

> gcc/ChangeLog:
> 
> 	* doc/md.texi: Add LEN_FOLD_EXTRACT_LAST pattern.
> 	* internal-fn.cc (fold_len_extract_direct): Ditto.
> 	(expand_fold_len_extract_optab_fn): Ditto.
> 	(direct_fold_len_extract_optab_supported_p): Ditto.
> 	* internal-fn.def (LEN_FOLD_EXTRACT_LAST): Ditto.
> 
> ---
>  gcc/doc/md.texi     | 6 ++++++
>  gcc/internal-fn.cc  | 5 +++++
>  gcc/internal-fn.def | 3 +++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 89562fdb43c..24453693d89 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5636,6 +5636,12 @@ has mode @var{m} and operands 0 and 1 have the mode appropriate for
>  one element of @var{m}.  Operand 2 has the usual mask mode for vectors
>  of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}.
>  
> +@cindex @code{len_fold_extract_last_@var{m}} instruction pattern
> +@item @code{len_fold_extract_last_@var{m}}
> +Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as
> +operand 4 and an extra bias operand as operand 5.  The last associated element
> +is extracted should have the index i < len (operand 4) + bias (operand 5).
> +
>  @cindex @code{fold_left_plus_@var{m}} instruction pattern
>  @item @code{fold_left_plus_@var{m}}
>  Take scalar operand 1 and successively add each element from vector
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 314f63b614b..4138cc31d7e 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -188,6 +188,7 @@ init_internal_fns ()
>  #define cond_len_ternary_direct { 1, 1, true }
>  #define while_direct { 0, 2, false }
>  #define fold_extract_direct { 2, 2, false }
> +#define fold_len_extract_direct { 2, 2, false }
>  #define fold_left_direct { 1, 1, false }
>  #define mask_fold_left_direct { 1, 1, false }
>  #define mask_len_fold_left_direct { 1, 1, false }
> @@ -3863,6 +3864,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
>  #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 3)
>  
> +#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \
> +  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
> +
>  #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 2)
>  
> @@ -3980,6 +3984,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
>  #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
>  #define direct_while_optab_supported_p convert_optab_supported_p
>  #define direct_fold_extract_optab_supported_p direct_optab_supported_p
> +#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p
>  #define direct_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 594f7881511..d09403c0a91 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -312,6 +312,9 @@ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
>  DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
>  		       fold_extract_last, fold_extract)
>  
> +DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
> +		       len_fold_extract_last, fold_len_extract)
> +
>  DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  		       fold_left_plus, fold_left)
>  
>
diff mbox series

Patch

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 89562fdb43c..24453693d89 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5636,6 +5636,12 @@  has mode @var{m} and operands 0 and 1 have the mode appropriate for
 one element of @var{m}.  Operand 2 has the usual mask mode for vectors
 of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}.
 
+@cindex @code{len_fold_extract_last_@var{m}} instruction pattern
+@item @code{len_fold_extract_last_@var{m}}
+Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as
+operand 4 and an extra bias operand as operand 5.  The last associated element
+is extracted should have the index i < len (operand 4) + bias (operand 5).
+
 @cindex @code{fold_left_plus_@var{m}} instruction pattern
 @item @code{fold_left_plus_@var{m}}
 Take scalar operand 1 and successively add each element from vector
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 314f63b614b..4138cc31d7e 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -188,6 +188,7 @@  init_internal_fns ()
 #define cond_len_ternary_direct { 1, 1, true }
 #define while_direct { 0, 2, false }
 #define fold_extract_direct { 2, 2, false }
+#define fold_len_extract_direct { 2, 2, false }
 #define fold_left_direct { 1, 1, false }
 #define mask_fold_left_direct { 1, 1, false }
 #define mask_len_fold_left_direct { 1, 1, false }
@@ -3863,6 +3864,9 @@  expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
 #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 3)
 
+#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \
+  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
+
 #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 2)
 
@@ -3980,6 +3984,7 @@  multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_len_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
+#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p
 #define direct_fold_left_optab_supported_p direct_optab_supported_p
 #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
 #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 594f7881511..d09403c0a91 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -312,6 +312,9 @@  DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
 DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
 		       fold_extract_last, fold_extract)
 
+DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
+		       len_fold_extract_last, fold_len_extract)
+
 DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
 		       fold_left_plus, fold_left)