Message ID | 20230822105137.1308817-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | VECT: Add LEN_FOLD_EXTRACT_LAST pattern | expand |
On Tue, 22 Aug 2023, Juzhe-Zhong wrote: > Hi, Richard and Richi. > > This is the last autovec pattern I want to add for RVV (length loop control). > > This patch is supposed to handled this following case: > > int __attribute__ ((noinline, noclone)) > condition_reduction (int *a, int min_v, int n) > { > int last = 66; /* High start value. */ > > for (int i = 0; i < n; i++) > if (a[i] < min_v) > last = i; > > return last; > } > > ARM SVE IR: > > ... > mask__7.11_39 = vect__4.10_37 < vect_cst__38; > _40 = loop_mask_36 & mask__7.11_39; > last_5 = .FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32); > ... > > RVV IR, we want to see: > ... > loop_len = SELECT_VL > mask__7.11_39 = vect__4.10_37 < vect_cst__38; > last_5 = .LEN_FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32, loop_len, bias); > ... OK. Richard. > gcc/ChangeLog: > > * doc/md.texi: Add LEN_FOLD_EXTRACT_LAST pattern. > * internal-fn.cc (fold_len_extract_direct): Ditto. > (expand_fold_len_extract_optab_fn): Ditto. > (direct_fold_len_extract_optab_supported_p): Ditto. > * internal-fn.def (LEN_FOLD_EXTRACT_LAST): Ditto. > > --- > gcc/doc/md.texi | 6 ++++++ > gcc/internal-fn.cc | 5 +++++ > gcc/internal-fn.def | 3 +++ > 3 files changed, 14 insertions(+) > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index 89562fdb43c..24453693d89 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -5636,6 +5636,12 @@ has mode @var{m} and operands 0 and 1 have the mode appropriate for > one element of @var{m}. Operand 2 has the usual mask mode for vectors > of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}. > > +@cindex @code{len_fold_extract_last_@var{m}} instruction pattern > +@item @code{len_fold_extract_last_@var{m}} > +Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as > +operand 4 and an extra bias operand as operand 5. The last associated element > +is extracted should have the index i < len (operand 4) + bias (operand 5). > + > @cindex @code{fold_left_plus_@var{m}} instruction pattern > @item @code{fold_left_plus_@var{m}} > Take scalar operand 1 and successively add each element from vector > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 314f63b614b..4138cc31d7e 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -188,6 +188,7 @@ init_internal_fns () > #define cond_len_ternary_direct { 1, 1, true } > #define while_direct { 0, 2, false } > #define fold_extract_direct { 2, 2, false } > +#define fold_len_extract_direct { 2, 2, false } > #define fold_left_direct { 1, 1, false } > #define mask_fold_left_direct { 1, 1, false } > #define mask_len_fold_left_direct { 1, 1, false } > @@ -3863,6 +3864,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, > #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \ > expand_direct_optab_fn (FN, STMT, OPTAB, 3) > > +#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \ > + expand_direct_optab_fn (FN, STMT, OPTAB, 5) > + > #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \ > expand_direct_optab_fn (FN, STMT, OPTAB, 2) > > @@ -3980,6 +3984,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, > #define direct_mask_len_store_optab_supported_p convert_optab_supported_p > #define direct_while_optab_supported_p convert_optab_supported_p > #define direct_fold_extract_optab_supported_p direct_optab_supported_p > +#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p > #define direct_fold_left_optab_supported_p direct_optab_supported_p > #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p > #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 594f7881511..d09403c0a91 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -312,6 +312,9 @@ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > fold_extract_last, fold_extract) > > +DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > + len_fold_extract_last, fold_len_extract) > + > DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, > fold_left_plus, fold_left) > >
Committed as passed both the regression and bootstrap tests in x86, thanks Richard. Pan -----Original Message----- From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Richard Biener via Gcc-patches Sent: Tuesday, August 22, 2023 7:08 PM To: Juzhe-Zhong <juzhe.zhong@rivai.ai> Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com Subject: Re: [PATCH] VECT: Add LEN_FOLD_EXTRACT_LAST pattern On Tue, 22 Aug 2023, Juzhe-Zhong wrote: > Hi, Richard and Richi. > > This is the last autovec pattern I want to add for RVV (length loop control). > > This patch is supposed to handled this following case: > > int __attribute__ ((noinline, noclone)) > condition_reduction (int *a, int min_v, int n) > { > int last = 66; /* High start value. */ > > for (int i = 0; i < n; i++) > if (a[i] < min_v) > last = i; > > return last; > } > > ARM SVE IR: > > ... > mask__7.11_39 = vect__4.10_37 < vect_cst__38; > _40 = loop_mask_36 & mask__7.11_39; > last_5 = .FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32); > ... > > RVV IR, we want to see: > ... > loop_len = SELECT_VL > mask__7.11_39 = vect__4.10_37 < vect_cst__38; > last_5 = .LEN_FOLD_EXTRACT_LAST (last_15, _40, vect_vec_iv_.7_32, loop_len, bias); > ... OK. Richard. > gcc/ChangeLog: > > * doc/md.texi: Add LEN_FOLD_EXTRACT_LAST pattern. > * internal-fn.cc (fold_len_extract_direct): Ditto. > (expand_fold_len_extract_optab_fn): Ditto. > (direct_fold_len_extract_optab_supported_p): Ditto. > * internal-fn.def (LEN_FOLD_EXTRACT_LAST): Ditto. > > --- > gcc/doc/md.texi | 6 ++++++ > gcc/internal-fn.cc | 5 +++++ > gcc/internal-fn.def | 3 +++ > 3 files changed, 14 insertions(+) > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index 89562fdb43c..24453693d89 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -5636,6 +5636,12 @@ has mode @var{m} and operands 0 and 1 have the mode appropriate for > one element of @var{m}. Operand 2 has the usual mask mode for vectors > of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}. > > +@cindex @code{len_fold_extract_last_@var{m}} instruction pattern > +@item @code{len_fold_extract_last_@var{m}} > +Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as > +operand 4 and an extra bias operand as operand 5. The last associated element > +is extracted should have the index i < len (operand 4) + bias (operand 5). > + > @cindex @code{fold_left_plus_@var{m}} instruction pattern > @item @code{fold_left_plus_@var{m}} > Take scalar operand 1 and successively add each element from vector > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 314f63b614b..4138cc31d7e 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -188,6 +188,7 @@ init_internal_fns () > #define cond_len_ternary_direct { 1, 1, true } > #define while_direct { 0, 2, false } > #define fold_extract_direct { 2, 2, false } > +#define fold_len_extract_direct { 2, 2, false } > #define fold_left_direct { 1, 1, false } > #define mask_fold_left_direct { 1, 1, false } > #define mask_len_fold_left_direct { 1, 1, false } > @@ -3863,6 +3864,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, > #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \ > expand_direct_optab_fn (FN, STMT, OPTAB, 3) > > +#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \ > + expand_direct_optab_fn (FN, STMT, OPTAB, 5) > + > #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \ > expand_direct_optab_fn (FN, STMT, OPTAB, 2) > > @@ -3980,6 +3984,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, > #define direct_mask_len_store_optab_supported_p convert_optab_supported_p > #define direct_while_optab_supported_p convert_optab_supported_p > #define direct_fold_extract_optab_supported_p direct_optab_supported_p > +#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p > #define direct_fold_left_optab_supported_p direct_optab_supported_p > #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p > #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 594f7881511..d09403c0a91 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -312,6 +312,9 @@ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > fold_extract_last, fold_extract) > > +DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, > + len_fold_extract_last, fold_len_extract) > + > DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, > fold_left_plus, fold_left) > >
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 89562fdb43c..24453693d89 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5636,6 +5636,12 @@ has mode @var{m} and operands 0 and 1 have the mode appropriate for one element of @var{m}. Operand 2 has the usual mask mode for vectors of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}. +@cindex @code{len_fold_extract_last_@var{m}} instruction pattern +@item @code{len_fold_extract_last_@var{m}} +Like @samp{fold_extract_last_@var{m}}, but takes an extra length operand as +operand 4 and an extra bias operand as operand 5. The last associated element +is extracted should have the index i < len (operand 4) + bias (operand 5). + @cindex @code{fold_left_plus_@var{m}} instruction pattern @item @code{fold_left_plus_@var{m}} Take scalar operand 1 and successively add each element from vector diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 314f63b614b..4138cc31d7e 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -188,6 +188,7 @@ init_internal_fns () #define cond_len_ternary_direct { 1, 1, true } #define while_direct { 0, 2, false } #define fold_extract_direct { 2, 2, false } +#define fold_len_extract_direct { 2, 2, false } #define fold_left_direct { 1, 1, false } #define mask_fold_left_direct { 1, 1, false } #define mask_len_fold_left_direct { 1, 1, false } @@ -3863,6 +3864,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 3) +#define expand_fold_len_extract_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 5) + #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 2) @@ -3980,6 +3984,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_len_store_optab_supported_p convert_optab_supported_p #define direct_while_optab_supported_p convert_optab_supported_p #define direct_fold_extract_optab_supported_p direct_optab_supported_p +#define direct_fold_len_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 594f7881511..d09403c0a91 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -312,6 +312,9 @@ DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, fold_extract_last, fold_extract) +DEF_INTERNAL_OPTAB_FN (LEN_FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, + len_fold_extract_last, fold_len_extract) + DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, fold_left_plus, fold_left)