Message ID | 20240516040542.2734412-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v2,1/3] Vect: Support loop len in vectorizable early exit | expand |
> -----Original Message----- > From: pan2.li@intel.com <pan2.li@intel.com> > Sent: Thursday, May 16, 2024 5:06 AM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford > <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com> > Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit > > From: Pan Li <pan2.li@intel.com> > > This patch adds early break auto-vectorization support for target which > use length on partial vectorization. Consider this following example: > > unsigned vect_a[802]; > unsigned vect_b[802]; > > void test (unsigned x, int n) > { > for (int i = 0; i < n; i++) > { > vect_b[i] = x + i; > > if (vect_a[i] > x) > break; > > vect_a[i] = x; > } > } > > We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias). > And then the IR of RVV looks like below: > > ... > _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]); > _55 = (int) _87; > ... > mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67; > vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \ > {0, ... }, _87, 0); > if (vec_len_mask_72 != { 0, ... }) > goto <bb 6>; [5.50%] > else > goto <bb 7>; [94.50%] > > The below tests are passed for this patch: > 1. The riscv fully regression tests. > 2. The x86 bootstrap tests. > 3. The x86 fully regression tests. > > gcc/ChangeLog: > > * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len > handling for one or multiple stmt. > > gcc/ChangeLog: > > * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen > the loop len mask. > * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the > vect_gen_loop_len_mask for 1 or more stmt(s). > * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl > for vect_gen_loop_len_mask. > Thanks, this version looks good to me! You'll need Richi's review still. Cheers, Tamar > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++ > gcc/tree-vect-stmts.cc | 17 +++++++++++++++-- > gcc/tree-vectorizer.h | 4 ++++ > 3 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 361aec06488..83c0544b6aa 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, > gimple_stmt_iterator *gsi, > return loop_len; > } > > +/* Generate the tree for the loop len mask and return it. Given the lens, > + nvectors, vectype, index and factor to gen the len mask as below. > + > + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) > +*/ > +tree > +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, > + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, > + unsigned int nvectors, tree vectype, tree stmt, > + unsigned int index, unsigned int factor) > +{ > + tree all_one_mask = build_all_ones_cst (vectype); > + tree all_zero_mask = build_zero_cst (vectype); > + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, > + factor); > + tree bias = build_int_cst (intQI_type_node, > + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS > (loop_vinfo)); > + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, > "vec_len_mask"); > + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, > + all_one_mask, all_zero_mask, len, > + bias); > + gimple_call_set_lhs (call, len_mask); > + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); > + > + return len_mask; > +} > + > /* Scale profiling counters by estimation for LOOP which is vectorized > by factor VF. > If FLAT is true, the loop we started with had unrealistically flat > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index b8a71605f1b..672959501bb 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, > stmt_vec_info stmt_info, > ncopies = vect_get_num_copies (loop_vinfo, vectype); > > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); > bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); > > /* Now build the new conditional. Pattern gimple_conds get dropped during > codegen so we must replace the original insn. */ > @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, > stmt_vec_info stmt_info, > { > if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, > OPTIMIZE_FOR_SPEED)) > - return false; > + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); > else > vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); > } > > - > return true; > } > > @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, > stmt_vec_info stmt_info, > stmts[i], &cond_gsi); > workset.quick_push (stmt_mask); > } > + else if (len_loop_p) > + for (unsigned i = 0; i < stmts.length (); i++) > + { > + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, > + lens, ncopies, vectype, > + stmts[i], i, 1); > + > + workset.quick_push (len_mask); > + } > else > workset.splice (stmts); > > @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, > stmt_vec_info stmt_info, > new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, > new_temp, &cond_gsi); > } > + else if (len_loop_p) > + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens, > + ncopies, vectype, new_temp, 0, 1); > } > > gcc_assert (new_temp); > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index db44d730b70..93bc30ef660 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, > vec_loop_lens *, unsigned int, > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, > vec_loop_lens *, unsigned int, tree, > unsigned int, unsigned int); > +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, > + gimple_stmt_iterator *, vec_loop_lens *, > + unsigned int, tree, tree, unsigned int, > + unsigned int); > extern gimple_seq vect_gen_len (tree, tree, tree, tree); > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); > extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); > -- > 2.34.1
On Thu, May 16, 2024 at 8:50 AM Tamar Christina <Tamar.Christina@arm.com> wrote: > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Thursday, May 16, 2024 5:06 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford > > <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch adds early break auto-vectorization support for target which > > use length on partial vectorization. Consider this following example: > > > > unsigned vect_a[802]; > > unsigned vect_b[802]; > > > > void test (unsigned x, int n) > > { > > for (int i = 0; i < n; i++) > > { > > vect_b[i] = x + i; > > > > if (vect_a[i] > x) > > break; > > > > vect_a[i] = x; > > } > > } > > > > We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias). > > And then the IR of RVV looks like below: > > > > ... > > _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]); > > _55 = (int) _87; > > ... > > mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67; > > vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \ > > {0, ... }, _87, 0); > > if (vec_len_mask_72 != { 0, ... }) > > goto <bb 6>; [5.50%] > > else > > goto <bb 7>; [94.50%] > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 2. The x86 bootstrap tests. > > 3. The x86 fully regression tests. > > > > gcc/ChangeLog: > > > > * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len > > handling for one or multiple stmt. > > > > gcc/ChangeLog: > > > > * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen > > the loop len mask. > > * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the > > vect_gen_loop_len_mask for 1 or more stmt(s). > > * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl > > for vect_gen_loop_len_mask. > > > > Thanks, this version looks good to me! > > You'll need Richi's review still. OK. Thanks, Richard. > Cheers, > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++ > > gcc/tree-vect-stmts.cc | 17 +++++++++++++++-- > > gcc/tree-vectorizer.h | 4 ++++ > > 3 files changed, 46 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index 361aec06488..83c0544b6aa 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, > > gimple_stmt_iterator *gsi, > > return loop_len; > > } > > > > +/* Generate the tree for the loop len mask and return it. Given the lens, > > + nvectors, vectype, index and factor to gen the len mask as below. > > + > > + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) > > +*/ > > +tree > > +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, > > + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, > > + unsigned int nvectors, tree vectype, tree stmt, > > + unsigned int index, unsigned int factor) > > +{ > > + tree all_one_mask = build_all_ones_cst (vectype); > > + tree all_zero_mask = build_zero_cst (vectype); > > + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, > > + factor); > > + tree bias = build_int_cst (intQI_type_node, > > + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS > > (loop_vinfo)); > > + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, > > "vec_len_mask"); > > + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, > > + all_one_mask, all_zero_mask, len, > > + bias); > > + gimple_call_set_lhs (call, len_mask); > > + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); > > + > > + return len_mask; > > +} > > + > > /* Scale profiling counters by estimation for LOOP which is vectorized > > by factor VF. > > If FLAT is true, the loop we started with had unrealistically flat > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index b8a71605f1b..672959501bb 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > ncopies = vect_get_num_copies (loop_vinfo, vectype); > > > > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); > > bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > > + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); > > > > /* Now build the new conditional. Pattern gimple_conds get dropped during > > codegen so we must replace the original insn. */ > > @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > { > > if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, > > OPTIMIZE_FOR_SPEED)) > > - return false; > > + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); > > else > > vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); > > } > > > > - > > return true; > > } > > > > @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > stmts[i], &cond_gsi); > > workset.quick_push (stmt_mask); > > } > > + else if (len_loop_p) > > + for (unsigned i = 0; i < stmts.length (); i++) > > + { > > + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, > > + lens, ncopies, vectype, > > + stmts[i], i, 1); > > + > > + workset.quick_push (len_mask); > > + } > > else > > workset.splice (stmts); > > > > @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, > > new_temp, &cond_gsi); > > } > > + else if (len_loop_p) > > + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens, > > + ncopies, vectype, new_temp, 0, 1); > > } > > > > gcc_assert (new_temp); > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index db44d730b70..93bc30ef660 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, > > vec_loop_lens *, unsigned int, > > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, > > vec_loop_lens *, unsigned int, tree, > > unsigned int, unsigned int); > > +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, > > + gimple_stmt_iterator *, vec_loop_lens *, > > + unsigned int, tree, tree, unsigned int, > > + unsigned int); > > extern gimple_seq vect_gen_len (tree, tree, tree, tree); > > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); > > extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); > > -- > > 2.34.1 >
Committed, thanks Richard. Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Thursday, May 16, 2024 8:13 PM To: Tamar Christina <Tamar.Christina@arm.com> Cc: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Richard Sandiford <Richard.Sandiford@arm.com> Subject: Re: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit On Thu, May 16, 2024 at 8:50 AM Tamar Christina <Tamar.Christina@arm.com> wrote: > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Thursday, May 16, 2024 5:06 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; Richard Sandiford > > <Richard.Sandiford@arm.com>; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v2 1/3] Vect: Support loop len in vectorizable early exit > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch adds early break auto-vectorization support for target which > > use length on partial vectorization. Consider this following example: > > > > unsigned vect_a[802]; > > unsigned vect_b[802]; > > > > void test (unsigned x, int n) > > { > > for (int i = 0; i < n; i++) > > { > > vect_b[i] = x + i; > > > > if (vect_a[i] > x) > > break; > > > > vect_a[i] = x; > > } > > } > > > > We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias). > > And then the IR of RVV looks like below: > > > > ... > > _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]); > > _55 = (int) _87; > > ... > > mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67; > > vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \ > > {0, ... }, _87, 0); > > if (vec_len_mask_72 != { 0, ... }) > > goto <bb 6>; [5.50%] > > else > > goto <bb 7>; [94.50%] > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 2. The x86 bootstrap tests. > > 3. The x86 fully regression tests. > > > > gcc/ChangeLog: > > > > * tree-vect-stmts.cc (vectorizable_early_exit): Add loop len > > handling for one or multiple stmt. > > > > gcc/ChangeLog: > > > > * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen > > the loop len mask. > > * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the > > vect_gen_loop_len_mask for 1 or more stmt(s). > > * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl > > for vect_gen_loop_len_mask. > > > > Thanks, this version looks good to me! > > You'll need Richi's review still. OK. Thanks, Richard. > Cheers, > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/tree-vect-loop.cc | 27 +++++++++++++++++++++++++++ > > gcc/tree-vect-stmts.cc | 17 +++++++++++++++-- > > gcc/tree-vectorizer.h | 4 ++++ > > 3 files changed, 46 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index 361aec06488..83c0544b6aa 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, > > gimple_stmt_iterator *gsi, > > return loop_len; > > } > > > > +/* Generate the tree for the loop len mask and return it. Given the lens, > > + nvectors, vectype, index and factor to gen the len mask as below. > > + > > + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) > > +*/ > > +tree > > +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, > > + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, > > + unsigned int nvectors, tree vectype, tree stmt, > > + unsigned int index, unsigned int factor) > > +{ > > + tree all_one_mask = build_all_ones_cst (vectype); > > + tree all_zero_mask = build_zero_cst (vectype); > > + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, > > + factor); > > + tree bias = build_int_cst (intQI_type_node, > > + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS > > (loop_vinfo)); > > + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, > > "vec_len_mask"); > > + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, > > + all_one_mask, all_zero_mask, len, > > + bias); > > + gimple_call_set_lhs (call, len_mask); > > + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); > > + > > + return len_mask; > > +} > > + > > /* Scale profiling counters by estimation for LOOP which is vectorized > > by factor VF. > > If FLAT is true, the loop we started with had unrealistically flat > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index b8a71605f1b..672959501bb 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > ncopies = vect_get_num_copies (loop_vinfo, vectype); > > > > vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); > > bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > > + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); > > > > /* Now build the new conditional. Pattern gimple_conds get dropped during > > codegen so we must replace the original insn. */ > > @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > { > > if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, > > OPTIMIZE_FOR_SPEED)) > > - return false; > > + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); > > else > > vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); > > } > > > > - > > return true; > > } > > > > @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > stmts[i], &cond_gsi); > > workset.quick_push (stmt_mask); > > } > > + else if (len_loop_p) > > + for (unsigned i = 0; i < stmts.length (); i++) > > + { > > + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, > > + lens, ncopies, vectype, > > + stmts[i], i, 1); > > + > > + workset.quick_push (len_mask); > > + } > > else > > workset.splice (stmts); > > > > @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, > > stmt_vec_info stmt_info, > > new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, > > new_temp, &cond_gsi); > > } > > + else if (len_loop_p) > > + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens, > > + ncopies, vectype, new_temp, 0, 1); > > } > > > > gcc_assert (new_temp); > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index db44d730b70..93bc30ef660 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, > > vec_loop_lens *, unsigned int, > > extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, > > vec_loop_lens *, unsigned int, tree, > > unsigned int, unsigned int); > > +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, > > + gimple_stmt_iterator *, vec_loop_lens *, > > + unsigned int, tree, tree, unsigned int, > > + unsigned int); > > extern gimple_seq vect_gen_len (tree, tree, tree, tree); > > extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); > > extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); > > -- > > 2.34.1 >
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 361aec06488..83c0544b6aa 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, return loop_len; } +/* Generate the tree for the loop len mask and return it. Given the lens, + nvectors, vectype, index and factor to gen the len mask as below. + + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) +*/ +tree +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, + unsigned int nvectors, tree vectype, tree stmt, + unsigned int index, unsigned int factor) +{ + tree all_one_mask = build_all_ones_cst (vectype); + tree all_zero_mask = build_zero_cst (vectype); + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, + factor); + tree bias = build_int_cst (intQI_type_node, + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)); + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask"); + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, + all_one_mask, all_zero_mask, len, + bias); + gimple_call_set_lhs (call, len_mask); + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); + + return len_mask; +} + /* Scale profiling counters by estimation for LOOP which is vectorized by factor VF. If FLAT is true, the loop we started with had unrealistically flat diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b8a71605f1b..672959501bb 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, ncopies = vect_get_num_copies (loop_vinfo, vectype); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); /* Now build the new conditional. Pattern gimple_conds get dropped during codegen so we must replace the original insn. */ @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, { if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, OPTIMIZE_FOR_SPEED)) - return false; + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); else vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); } - return true; } @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, stmts[i], &cond_gsi); workset.quick_push (stmt_mask); } + else if (len_loop_p) + for (unsigned i = 0; i < stmts.length (); i++) + { + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, + lens, ncopies, vectype, + stmts[i], i, 1); + + workset.quick_push (len_mask); + } else workset.splice (stmts); @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, new_temp, &cond_gsi); } + else if (len_loop_p) + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens, + ncopies, vectype, new_temp, 0, 1); } gcc_assert (new_temp); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index db44d730b70..93bc30ef660 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int, extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, vec_loop_lens *, unsigned int, tree, unsigned int, unsigned int); +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, + gimple_stmt_iterator *, vec_loop_lens *, + unsigned int, tree, tree, unsigned int, + unsigned int); extern gimple_seq vect_gen_len (tree, tree, tree, tree); extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);