Message ID | ecec76ce-2aca-547c-a70d-e1b9838fae2c@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v3,1/2] IFN: Implement IFN_VEC_SET for ARRAY_REF with VIEW_CONVERT_EXPR | expand |
On Tue, Sep 22, 2020 at 5:55 AM xionghu luo <luoxhu@linux.ibm.com> wrote: > > Thanks for the review, > > > On 2020/9/21 16:31, Richard Biener wrote: > >> + > >> +static gimple * > >> +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi) > >> +{ > >> + enum tree_code code; > >> + gcall *new_stmt = NULL; > >> + gassign *ass_stmt = NULL; > >> + > >> + /* Only consider code == GIMPLE_ASSIGN. */ > >> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi)); > >> + if (!stmt) > >> + return NULL; > >> + > >> + code = TREE_CODE (gimple_assign_lhs (stmt)); > > > > do the lhs = gimple_assign_lhs (stmt) before and elide cond, > > putting the TREE_CODE into the if below. > > Done. > > > > >> + if (code != ARRAY_REF) > >> + return NULL; > >> + > >> + tree lhs = gimple_assign_lhs (stmt); > >> + tree val = gimple_assign_rhs1 (stmt); > >> + > >> + tree type = TREE_TYPE (lhs); > >> + tree op0 = TREE_OPERAND (lhs, 0); > >> + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR > > > > So I think we want to have an exact structural match first here, so > > > > if (TREE_CODE (op0) == VIEW_CONVERT_EXPR > > && DECL_P (TREE_OPERAND (op0, 0)) > > && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))) > > && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE > > (TREE_TYPE (TREE_OPERAND (op0, 0)))) > > > > which means we're sure to do an element extract from a vector type > > (and we know all vector types have sane element types). > > Done. > > > > > > >> + && tree_fits_uhwi_p (TYPE_SIZE (type))) > >> + { > >> + tree pos = TREE_OPERAND (lhs, 1); > >> + tree view_op0 = TREE_OPERAND (op0, 0); > >> + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); > >> + scalar_mode innermode = GET_MODE_INNER (outermode); > >> + tree_code code = TREE_CODE (TREE_TYPE(view_op0)); > >> + if (!is_global_var (view_op0) && code == VECTOR_TYPE > >> + && tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (view_op0))) > > > > why did you need those TYPE_SIZE checks? As said earlier > > you want !TREE_ADDRESSABLE (view_op0) and eventually > > the stronger auto_var_in_fn_p (view_op0, cfun) rather than !is_global_var. > > Done. > > > > >> + && can_vec_set_var_idx_p (code, outermode, innermode, > >> + TYPE_MODE (TREE_TYPE (pos)))) > >> + { > >> + location_t loc = gimple_location (stmt); > >> + tree var_src = make_ssa_name (TREE_TYPE (view_op0)); > >> + tree var_dst = make_ssa_name (TREE_TYPE (view_op0)); > >> + > >> + ass_stmt = gimple_build_assign (var_src, view_op0); > >> + gimple_set_vuse (ass_stmt, gimple_vuse (stmt)); > >> + gimple_set_location (ass_stmt, loc); > >> + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); > >> + > >> + new_stmt > >> + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos); > >> + gimple_call_set_lhs (new_stmt, var_dst); > >> + gimple_set_location (new_stmt, loc); > >> + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > >> + > >> + ass_stmt = gimple_build_assign (view_op0, var_dst); > >> + gimple_set_location (ass_stmt, loc); > >> + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); > >> + > >> + gimple_move_vops (ass_stmt, stmt); > >> + gsi_remove (gsi, true); > >> + } > >> + } > >> + > >> + return ass_stmt; > >> +} > >> > >> /* Expand all VEC_COND_EXPR gimple assignments into calls to internal > >> function based on type of selected expansion. */ > >> @@ -187,8 +261,25 @@ gimple_expand_vec_cond_exprs (void) > >> { > >> for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > >> { > >> - gimple *g = gimple_expand_vec_cond_expr (&gsi, > >> - &vec_cond_ssa_name_uses); > >> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); > >> + if (!stmt) > >> + continue; > >> + > >> + enum tree_code code; > >> + gimple *g = NULL; > >> + code = gimple_assign_rhs_code (stmt); > >> + switch (code) > >> + { > >> + case VEC_COND_EXPR: > >> + g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses); > >> + break; > >> + case ARRAY_REF: > >> + /* TODO: generate IFN for vec_extract with variable index. */ > > > > so why not do this here? > > > >> + break; > >> + default: > >> + break; > >> + } > >> + > >> if (g != NULL) > >> { > >> tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); > >> @@ -204,6 +295,27 @@ gimple_expand_vec_cond_exprs (void) > >> > >> simple_dce_from_worklist (dce_ssa_names); > >> > >> + FOR_EACH_BB_FN (bb, cfun) > > > > but in a separate loop? > > The first loop is for rhs stmt process, this loop is for lhs stmt process. > I thought vec_extract also need to generate IFN before, but seems not > necessary now? And that the first loop needs to update the lhs stmt while > then second doesn't. That's not good reasons to separate them, please move all the processing into one loop. + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); + if (!stmt) + continue; + + enum tree_code code; + code = TREE_CODE (gimple_assign_lhs (stmt)); + switch (code) + { + case ARRAY_REF: + gimple_expand_vec_set_expr (&gsi); you also do the assign and ARRAY_REF checking duplicate. The patch likely wasn't bootstrapped because I've seen unused and set-but-not-used variables. Otherwise the patch looks good to me - I guess you want to add the vec_extract bits as well so you can overall assess the affect of the patch on altivec code? That said, the patch misses a testcase where we verify we properly expand the vector to a pseudo now. Richard. > > > >> + { > >> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > >> + { > >> + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); > >> + if (!stmt) > >> + continue; > >> + > >> + enum tree_code code; > >> + code = TREE_CODE (gimple_assign_lhs (stmt)); > >> + switch (code) > >> + { > >> + case ARRAY_REF: > >> + gimple_expand_vec_set_expr (&gsi); > >> + break; > >> + default: > >> + break; > >> + } > >> + } > >> + } > >> + > >> return 0; > >> } > >> > >> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > >> index 8efc77d986b..36837381c04 100644 > >> --- a/gcc/internal-fn.c > >> +++ b/gcc/internal-fn.c > >> @@ -115,6 +115,7 @@ init_internal_fns () > >> #define vec_condeq_direct { 0, 0, false } > >> #define scatter_store_direct { 3, 1, false } > >> #define len_store_direct { 3, 3, false } > >> +#define vec_set_direct { 3, 3, false } > >> #define unary_direct { 0, 0, true } > >> #define binary_direct { 0, 0, true } > >> #define ternary_direct { 0, 0, true } > >> @@ -2658,6 +2659,40 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > >> > >> #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn > >> > >> +static void > >> +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > > > > all new functions require a function level comment > > Done. > > > > >> +{ > >> + tree lhs = gimple_call_lhs (stmt); > >> + tree op0 = gimple_call_arg (stmt, 0); > >> + tree op1 = gimple_call_arg (stmt, 1); > >> + tree op2 = gimple_call_arg (stmt, 2); > >> + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > >> + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE); > >> + > >> + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); > >> + scalar_mode innermode = GET_MODE_INNER (outermode); > >> + > >> + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL); > >> + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL); > >> + > >> + class expand_operand ops[3]; > >> + enum insn_code icode = optab_handler (optab, outermode); > >> + > >> + if (icode != CODE_FOR_nothing) > >> + { > >> + pos = convert_to_mode (E_SImode, pos, 0); > >> + > >> + create_fixed_operand (&ops[0], src); > >> + create_input_operand (&ops[1], value, innermode); > >> + create_input_operand (&ops[2], pos, GET_MODE (pos)); > >> + if (maybe_expand_insn (icode, 3, ops)) > >> + { > >> + emit_move_insn (target, src); > > > > I think you need to assert that we end up here. > > Added gcc_unreachable at the end of this function. > > > > >> + return; > >> + } > >> + } > >> +} > >> + > >> static void > >> expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) > >> { > >> @@ -3253,6 +3288,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, > >> #define direct_fold_left_optab_supported_p direct_optab_supported_p > >> #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p > >> #define direct_check_ptrs_optab_supported_p direct_optab_supported_p > >> +#define direct_vec_set_optab_supported_p direct_optab_supported_p > >> > >> /* Return the optab used by internal function FN. */ > >> > >> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > >> index 13e60828fcf..e6cfe1b6159 100644 > >> --- a/gcc/internal-fn.def > >> +++ b/gcc/internal-fn.def > >> @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) > >> DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) > >> DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) > >> > >> +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set) > >> + > >> DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) > >> > >> DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) > >> diff --git a/gcc/optabs.c b/gcc/optabs.c > >> index 184827fdf4e..c8125670d2d 100644 > >> --- a/gcc/optabs.c > >> +++ b/gcc/optabs.c > >> @@ -3841,6 +3841,23 @@ can_vcond_compare_p (enum rtx_code code, machine_mode value_mode, > >> && insn_operand_matches (icode, 3, test); > >> } > >> > >> +bool > >> +can_vec_set_var_idx_p (enum tree_code code, machine_mode vec_mode, > >> + machine_mode value_mode, machine_mode idx_mode) > > > > toplevel comment missing > > > >> +{ > >> + gcc_assert (code == VECTOR_TYPE); > > > > what's the point of pasing 'code' here then? Since the optab only has a single > > mode, the vector mode, the value_mode is redundant as well. And I guess > > we might want to handle "arbitrary" index modes? That is, the .md expanders > > should not restrict its mode - I guess it simply uses VOIDmode at the moment > > (for integer constants). Not sure how to best do this without an explicit mode > > in the optab ... > > Yes, removed 'code' and value_mode by checking VECTOR_MODE_P and use GET_MODE_INNER > for value_mode. ".md expanders" shall support for integer constants index mode, but > I guess they shouldn't be expanded by IFN as this function is for variable index > insert only? Anyway, the v3 patch used VOIDmode check... > > > Thanks, > Xionghu >
Hi, On 2020/9/23 19:33, Richard Biener wrote: >> The first loop is for rhs stmt process, this loop is for lhs stmt process. >> I thought vec_extract also need to generate IFN before, but seems not >> necessary now? And that the first loop needs to update the lhs stmt while >> then second doesn't. > That's not good reasons to separate them, please move all the processing > into one loop. > > + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); > + if (!stmt) > + continue; > + > + enum tree_code code; > + code = TREE_CODE (gimple_assign_lhs (stmt)); > + switch (code) > + { > + case ARRAY_REF: > + gimple_expand_vec_set_expr (&gsi); > > you also do the assign and ARRAY_REF checking duplicate. > > The patch likely wasn't bootstrapped because I've seen unused and > set-but-not-used > variables. > > Otherwise the patch looks good to me - I guess you want to add the > vec_extract bits as well so you can overall assess the affect of the patch > on altivec code? That said, the patch misses a testcase where we verify > we properly expand the vector to a pseudo now. Thanks, fixed the bootstrap error. Actually the "[PATCH v2 2/2] rs6000: Expand vec_insert in expander instead of gimple [PR79251]" includes typed vec_insert tests for V4SI/V4SF/V8HI/V16QI/V2DI/V2DF of expanding the IFN VEC_SET and instruction count check, but I am discussing and refining with Segher's comments, will split and send it later once we reached agreement. Not sure whether this is the testcase you mentioned? (As you said *vec_extract*, but this patch series target for vec_insert only.) FYI, We are trying below or even better code generations: rlwinm 6,6,2,28,29 mtvsrwz 0,5 lvsr 1,0,6 lvsl 0,0,6 xxperm 34,34,33 xxinsertw 34,0,12 xxperm 34,34,32 Second thing is I removed the second loop and move the "gimple_expand_vec_set_expr (&gsi);" up as your comments. Thanks again. IFN: Implement IFN_VEC_SET for ARRAY_REF with VIEW_CONVERT_EXPR This patch enables transformation from ARRAY_REF(VIEW_CONVERT_EXPR) to VEC_SET internal function in gimple-isel pass if target supports vec_set with variable index by checking can_vec_set_var_idx_p. gcc/ChangeLog: 2020-09-24 Xionghu Luo <luoxhu@linux.ibm.com> * gimple-isel.cc (gimple_expand_vec_set_expr): New function. (gimple_expand_vec_cond_exprs): Rename to ... (gimple_expand_vec_exprs): ... this and call gimple_expand_vec_set_expr. * internal-fn.c (vec_set_direct): New define. (expand_vec_set_optab_fn): New function. (direct_vec_set_optab_supported_p): New define. * internal-fn.def (VEC_SET): New DEF_INTERNAL_OPTAB_FN. * optabs.c (can_vec_set_var_idx_p): New function. * optabs.h (can_vec_set_var_idx_p): New declaration. --- gcc/gimple-isel.cc | 75 +++++++++++++++++++++++++++++++++++++++++++-- gcc/internal-fn.c | 39 +++++++++++++++++++++++ gcc/internal-fn.def | 2 ++ gcc/optabs.c | 21 +++++++++++++ gcc/optabs.h | 4 +++ 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index b330cf4c20e..02513e04900 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -35,6 +35,74 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfg.h" #include "bitmap.h" #include "tree-ssa-dce.h" +#include "memmodel.h" +#include "optabs.h" + +/* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to + internal function based on vector type of selected expansion. + i.e.: + VIEW_CONVERT_EXPR<int[4]>(u)[_1] = = i_4(D); + => + _7 = u; + _8 = .VEC_SET (_7, i_4(D), _1); + u = _8; */ + +static gimple * +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi) +{ + enum tree_code code; + gcall *new_stmt = NULL; + gassign *ass_stmt = NULL; + + /* Only consider code == GIMPLE_ASSIGN. */ + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi)); + if (!stmt) + return NULL; + + tree lhs = gimple_assign_lhs (stmt); + code = TREE_CODE (lhs); + if (code != ARRAY_REF) + return NULL; + + tree val = gimple_assign_rhs1 (stmt); + tree op0 = TREE_OPERAND (lhs, 0); + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR && DECL_P (TREE_OPERAND (op0, 0)) + && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))) + && TYPE_MODE (TREE_TYPE (lhs)) + == TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 0))))) + { + tree pos = TREE_OPERAND (lhs, 1); + tree view_op0 = TREE_OPERAND (op0, 0); + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); + if (auto_var_in_fn_p (view_op0, cfun->decl) + && !TREE_ADDRESSABLE (view_op0) && can_vec_set_var_idx_p (outermode)) + { + location_t loc = gimple_location (stmt); + tree var_src = make_ssa_name (TREE_TYPE (view_op0)); + tree var_dst = make_ssa_name (TREE_TYPE (view_op0)); + + ass_stmt = gimple_build_assign (var_src, view_op0); + gimple_set_vuse (ass_stmt, gimple_vuse (stmt)); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + new_stmt + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos); + gimple_call_set_lhs (new_stmt, var_dst); + gimple_set_location (new_stmt, loc); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + ass_stmt = gimple_build_assign (view_op0, var_dst); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + gimple_move_vops (ass_stmt, stmt); + gsi_remove (gsi, true); + } + } + + return ass_stmt; +} /* Expand all VEC_COND_EXPR gimple assignments into calls to internal function based on type of selected expansion. */ @@ -176,7 +244,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, VEC_COND_EXPR assignments. */ static unsigned int -gimple_expand_vec_cond_exprs (void) +gimple_expand_vec_exprs (void) { gimple_stmt_iterator gsi; basic_block bb; @@ -189,12 +257,15 @@ gimple_expand_vec_cond_exprs (void) { gimple *g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses); + if (g != NULL) { tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); gimple_set_lhs (g, lhs); gsi_replace (&gsi, g, false); } + + gimple_expand_vec_set_expr (&gsi); } } @@ -237,7 +308,7 @@ public: virtual unsigned int execute (function *) { - return gimple_expand_vec_cond_exprs (); + return gimple_expand_vec_exprs (); } }; // class pass_gimple_isel diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 8efc77d986b..f97aea44253 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -115,6 +115,7 @@ init_internal_fns () #define vec_condeq_direct { 0, 0, false } #define scatter_store_direct { 3, 1, false } #define len_store_direct { 3, 3, false } +#define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } @@ -2658,6 +2659,43 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn +/* Expand VEC_SET internal functions. */ + +static void +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) +{ + tree lhs = gimple_call_lhs (stmt); + tree op0 = gimple_call_arg (stmt, 0); + tree op1 = gimple_call_arg (stmt, 1); + tree op2 = gimple_call_arg (stmt, 2); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE); + + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); + scalar_mode innermode = GET_MODE_INNER (outermode); + + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + class expand_operand ops[3]; + enum insn_code icode = optab_handler (optab, outermode); + + if (icode != CODE_FOR_nothing) + { + pos = convert_to_mode (E_SImode, pos, 0); + + create_fixed_operand (&ops[0], src); + create_input_operand (&ops[1], value, innermode); + create_input_operand (&ops[2], pos, GET_MODE (pos)); + if (maybe_expand_insn (icode, 3, ops)) + { + emit_move_insn (target, src); + return; + } + } + gcc_unreachable (); +} + static void expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) { @@ -3253,6 +3291,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p #define direct_check_ptrs_optab_supported_p direct_optab_supported_p +#define direct_vec_set_optab_supported_p direct_optab_supported_p /* Return the optab used by internal function FN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 13e60828fcf..e6cfe1b6159 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set) + DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) diff --git a/gcc/optabs.c b/gcc/optabs.c index 184827fdf4e..8e844028d92 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -3841,6 +3841,27 @@ can_vcond_compare_p (enum rtx_code code, machine_mode value_mode, && insn_operand_matches (icode, 3, test); } +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ + +bool +can_vec_set_var_idx_p (machine_mode vec_mode) +{ + if (!VECTOR_MODE_P (vec_mode)) + return false; + + machine_mode inner_mode = GET_MODE_INNER (vec_mode); + rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1); + rtx reg2 = alloca_raw_REG (inner_mode, LAST_VIRTUAL_REGISTER + 2); + rtx reg3 = alloca_raw_REG (VOIDmode, LAST_VIRTUAL_REGISTER + 3); + + enum insn_code icode = optab_handler (vec_set_optab, vec_mode); + + return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1) + && insn_operand_matches (icode, 1, reg2) + && insn_operand_matches (icode, 2, reg3); +} + /* This function is called when we are going to emit a compare instruction that compares the values found in X and Y, using the rtl operator COMPARISON. diff --git a/gcc/optabs.h b/gcc/optabs.h index 7c2ec257cb0..0b14700ab3d 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -249,6 +249,10 @@ extern int can_compare_p (enum rtx_code, machine_mode, VALUE_MODE. */ extern bool can_vcond_compare_p (enum rtx_code, machine_mode, machine_mode); +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ +extern bool can_vec_set_var_idx_p (machine_mode); + extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode, machine_mode, int); /* Emit a pair of rtl insns to compare two rtx's and to jump
xionghu luo <luoxhu@linux.ibm.com> writes: > @@ -2658,6 +2659,43 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > > #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn > > +/* Expand VEC_SET internal functions. */ > + > +static void > +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > +{ > + tree lhs = gimple_call_lhs (stmt); > + tree op0 = gimple_call_arg (stmt, 0); > + tree op1 = gimple_call_arg (stmt, 1); > + tree op2 = gimple_call_arg (stmt, 2); > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE); I'm not sure about the expand_expr here. ISTM that op0 is a normal input and so should be expanded by expand_normal rather than EXPAND_WRITE. Also: > + > + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); > + scalar_mode innermode = GET_MODE_INNER (outermode); > + > + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL); > + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL); > + > + class expand_operand ops[3]; > + enum insn_code icode = optab_handler (optab, outermode); > + > + if (icode != CODE_FOR_nothing) > + { > + pos = convert_to_mode (E_SImode, pos, 0); > + > + create_fixed_operand (&ops[0], src); ...this would mean that if SRC happens to be a MEM, the pattern must also accept a MEM. ISTM that we're making more work for ourselves by not “fixing” the optab to have a natural pure-input + pure-output interface. :-) But if we stick with the current optab interface, I think we need to: - create a temporary register - move SRC into the temporary register before the insn - use create_fixed_operand with the temporary register for operand 0 - move the temporary register into TARGET after the insn > + create_input_operand (&ops[1], value, innermode); > + create_input_operand (&ops[2], pos, GET_MODE (pos)); For this I think we should use convert_operand_from on the original “pos”, so that the target gets to choose what the mode of the operand is. Thanks, Richard
diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index b330cf4c20e..d16d67b1a21 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -35,6 +35,78 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfg.h" #include "bitmap.h" #include "tree-ssa-dce.h" +#include "fold-const.h" +#include "gimple-fold.h" +#include "memmodel.h" +#include "optabs.h" + +/* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to + internal function based on vector type of selected expansion. + i.e.: + VIEW_CONVERT_EXPR<int[4]>(u)[_1] = = i_4(D); + => + _7 = u; + _8 = .VEC_SET (_7, i_4(D), _1); + u = _8; */ + +static gimple * +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi) +{ + enum tree_code code; + gcall *new_stmt = NULL; + gassign *ass_stmt = NULL; + + /* Only consider code == GIMPLE_ASSIGN. */ + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi)); + if (!stmt) + return NULL; + + tree lhs = gimple_assign_lhs (stmt); + code = TREE_CODE (lhs); + if (code != ARRAY_REF) + return NULL; + + tree val = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (lhs); + tree op0 = TREE_OPERAND (lhs, 0); + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR && DECL_P (TREE_OPERAND (op0, 0)) + && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))) + && TYPE_MODE (TREE_TYPE (lhs)) + == TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 0))))) + { + tree pos = TREE_OPERAND (lhs, 1); + tree view_op0 = TREE_OPERAND (op0, 0); + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); + scalar_mode innermode = GET_MODE_INNER (outermode); + if (auto_var_in_fn_p (view_op0, cfun->decl) + && !TREE_ADDRESSABLE (view_op0) && can_vec_set_var_idx_p (outermode)) + { + location_t loc = gimple_location (stmt); + tree var_src = make_ssa_name (TREE_TYPE (view_op0)); + tree var_dst = make_ssa_name (TREE_TYPE (view_op0)); + + ass_stmt = gimple_build_assign (var_src, view_op0); + gimple_set_vuse (ass_stmt, gimple_vuse (stmt)); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + new_stmt + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos); + gimple_call_set_lhs (new_stmt, var_dst); + gimple_set_location (new_stmt, loc); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + ass_stmt = gimple_build_assign (view_op0, var_dst); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + gimple_move_vops (ass_stmt, stmt); + gsi_remove (gsi, true); + } + } + + return ass_stmt; +} /* Expand all VEC_COND_EXPR gimple assignments into calls to internal function based on type of selected expansion. */ @@ -176,7 +248,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, VEC_COND_EXPR assignments. */ static unsigned int -gimple_expand_vec_cond_exprs (void) +gimple_expand_vec_exprs (void) { gimple_stmt_iterator gsi; basic_block bb; @@ -187,8 +259,22 @@ gimple_expand_vec_cond_exprs (void) { for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { - gimple *g = gimple_expand_vec_cond_expr (&gsi, - &vec_cond_ssa_name_uses); + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); + if (!stmt) + continue; + + enum tree_code code; + gimple *g = NULL; + code = gimple_assign_rhs_code (stmt); + switch (code) + { + case VEC_COND_EXPR: + g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses); + break; + default: + break; + } + if (g != NULL) { tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); @@ -204,6 +290,27 @@ gimple_expand_vec_cond_exprs (void) simple_dce_from_worklist (dce_ssa_names); + FOR_EACH_BB_FN (bb, cfun) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gassign *stmt = dyn_cast<gassign *> (gsi_stmt (gsi)); + if (!stmt) + continue; + + enum tree_code code; + code = TREE_CODE (gimple_assign_lhs (stmt)); + switch (code) + { + case ARRAY_REF: + gimple_expand_vec_set_expr (&gsi); + break; + default: + break; + } + } + } + return 0; } @@ -237,7 +344,7 @@ public: virtual unsigned int execute (function *) { - return gimple_expand_vec_cond_exprs (); + return gimple_expand_vec_exprs (); } }; // class pass_gimple_isel diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 8efc77d986b..f97aea44253 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -115,6 +115,7 @@ init_internal_fns () #define vec_condeq_direct { 0, 0, false } #define scatter_store_direct { 3, 1, false } #define len_store_direct { 3, 3, false } +#define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } @@ -2658,6 +2659,43 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn +/* Expand VEC_SET internal functions. */ + +static void +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) +{ + tree lhs = gimple_call_lhs (stmt); + tree op0 = gimple_call_arg (stmt, 0); + tree op1 = gimple_call_arg (stmt, 1); + tree op2 = gimple_call_arg (stmt, 2); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx src = expand_expr (op0, NULL_RTX, VOIDmode, EXPAND_WRITE); + + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); + scalar_mode innermode = GET_MODE_INNER (outermode); + + rtx value = expand_expr (op1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx pos = expand_expr (op2, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + class expand_operand ops[3]; + enum insn_code icode = optab_handler (optab, outermode); + + if (icode != CODE_FOR_nothing) + { + pos = convert_to_mode (E_SImode, pos, 0); + + create_fixed_operand (&ops[0], src); + create_input_operand (&ops[1], value, innermode); + create_input_operand (&ops[2], pos, GET_MODE (pos)); + if (maybe_expand_insn (icode, 3, ops)) + { + emit_move_insn (target, src); + return; + } + } + gcc_unreachable (); +} + static void expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) { @@ -3253,6 +3291,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p #define direct_check_ptrs_optab_supported_p direct_optab_supported_p +#define direct_vec_set_optab_supported_p direct_optab_supported_p /* Return the optab used by internal function FN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 13e60828fcf..e6cfe1b6159 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set) + DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) diff --git a/gcc/optabs.c b/gcc/optabs.c index 184827fdf4e..8e844028d92 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -3841,6 +3841,27 @@ can_vcond_compare_p (enum rtx_code code, machine_mode value_mode, && insn_operand_matches (icode, 3, test); } +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ + +bool +can_vec_set_var_idx_p (machine_mode vec_mode) +{ + if (!VECTOR_MODE_P (vec_mode)) + return false; + + machine_mode inner_mode = GET_MODE_INNER (vec_mode); + rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1); + rtx reg2 = alloca_raw_REG (inner_mode, LAST_VIRTUAL_REGISTER + 2); + rtx reg3 = alloca_raw_REG (VOIDmode, LAST_VIRTUAL_REGISTER + 3); + + enum insn_code icode = optab_handler (vec_set_optab, vec_mode); + + return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1) + && insn_operand_matches (icode, 1, reg2) + && insn_operand_matches (icode, 2, reg3); +} + /* This function is called when we are going to emit a compare instruction that compares the values found in X and Y, using the rtl operator COMPARISON. diff --git a/gcc/optabs.h b/gcc/optabs.h index 7c2ec257cb0..0b14700ab3d 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -249,6 +249,10 @@ extern int can_compare_p (enum rtx_code, machine_mode, VALUE_MODE. */ extern bool can_vcond_compare_p (enum rtx_code, machine_mode, machine_mode); +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ +extern bool can_vec_set_var_idx_p (machine_mode); + extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode, machine_mode, int); /* Emit a pair of rtl insns to compare two rtx's and to jump