Message ID | 20201010080825.3599892-4-luoxhu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Enable variable vec_insert with IFN VEC_SET | expand |
Hi Segher, Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your opinion of this [PATCH 3/4] for P8, please? xxinsertw only exists since v3.0, so we had to implement by another way. Xionghu On 2020/10/10 16:08, Xionghu Luo wrote: > gcc/ChangeLog: > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): > Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later > platforms. > * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update > to call different path for P8 and P9. > (rs6000_expand_vector_set_var_p9): New function. > (rs6000_expand_vector_set_var_p8): New function. > > gcc/testsuite/ChangeLog: > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > * gcc.target/powerpc/pr79251.p8.c: New test. > --- > gcc/config/rs6000/rs6000-c.c | 27 +++- > gcc/config/rs6000/rs6000.c | 117 +++++++++++++++++- > gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ > 3 files changed, 155 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c > > diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c > index 5551a21d738..4bea8001ec6 100644 > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, > SET_EXPR_LOCATION (stmt, loc); > stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); > } > - stmt = build_array_ref (loc, stmt, arg2); > - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, > - convert (TREE_TYPE (stmt), arg0)); > - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > + > + if (TARGET_P8_VECTOR) > + { > + stmt = build_array_ref (loc, stmt, arg2); > + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, > + convert (TREE_TYPE (stmt), arg0)); > + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > + } > + else > + { > + tree arg1_inner_type; > + tree innerptrtype; > + arg1_inner_type = TREE_TYPE (arg1_type); > + innerptrtype = build_pointer_type (arg1_inner_type); > + > + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); > + stmt = convert (innerptrtype, stmt); > + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); > + stmt = build_indirect_ref (loc, stmt, RO_NULL); > + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, > + convert (TREE_TYPE (stmt), arg0)); > + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); > + } > return stmt; > } > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 96f76c7a74c..33ca839cb28 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx) > } > > /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX > - is variable and also counts by vector element size. */ > + is variable and also counts by vector element size for p9 and above. */ > > void > -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) > +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) > { > machine_mode mode = GET_MODE (target); > > @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) > emit_insn (perml); > } > > +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX > + is variable and also counts by vector element size for p8. */ > + > +void > +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) > +{ > + machine_mode mode = GET_MODE (target); > + > + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); > + > + gcc_assert (GET_MODE (idx) == E_SImode); > + > + machine_mode inner_mode = GET_MODE (val); > + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); > + > + rtx tmp = gen_reg_rtx (GET_MODE (idx)); > + int width = GET_MODE_SIZE (inner_mode); > + > + gcc_assert (width >= 1 && width <= 4); > + > + if (!BYTES_BIG_ENDIAN) > + { > + /* idx = idx * width. */ > + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); > + /* idx = idx + 8. */ > + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); > + } > + else > + { > + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); > + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); > + } > + > + /* lxv vs33, mask. > + DImode: 0xffffffffffffffff0000000000000000 > + SImode: 0x00000000ffffffff0000000000000000 > + HImode: 0x000000000000ffff0000000000000000. > + QImode: 0x00000000000000ff0000000000000000. */ > + rtx mask = gen_reg_rtx (V16QImode); > + rtx mask_v2di = gen_reg_rtx (V2DImode); > + rtvec v = rtvec_alloc (2); > + if (!BYTES_BIG_ENDIAN) > + { > + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); > + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); > + } > + else > + { > + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); > + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); > + } > + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v))); > + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0); > + emit_insn (gen_rtx_SET (mask, sub_mask)); > + > + /* mtvsrd[wz] f0,tmp_val. */ > + rtx tmp_val = gen_reg_rtx (SImode); > + if (inner_mode == E_SFmode) > + emit_insn (gen_movsi_from_sf (tmp_val, val)); > + else > + tmp_val = force_reg (SImode, val); > + > + rtx val_v16qi = gen_reg_rtx (V16QImode); > + rtx val_v2di = gen_reg_rtx (V2DImode); > + rtvec vec_val = rtvec_alloc (2); > + if (!BYTES_BIG_ENDIAN) > + { > + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); > + RTVEC_ELT (vec_val, 1) = tmp_val; > + } > + else > + { > + RTVEC_ELT (vec_val, 0) = tmp_val; > + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); > + } > + emit_insn ( > + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val))); > + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); > + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); > + > + /* lvsl 13,0,idx. */ > + tmp = convert_modes (DImode, SImode, tmp, 1); > + rtx pcv = gen_reg_rtx (V16QImode); > + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); > + > + /* vperm 1,1,1,13. */ > + /* vperm 0,0,0,13. */ > + rtx val_perm = gen_reg_rtx (V16QImode); > + rtx mask_perm = gen_reg_rtx (V16QImode); > + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv)); > + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); > + > + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); > + > + /* xxsel 34,34,32,33. */ > + emit_insn ( > + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm)); > +} > + > +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX > + is variable and also counts by vector element size. */ > + > +void > +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) > +{ > + machine_mode mode = GET_MODE (target); > + machine_mode inner_mode = GET_MODE_INNER (mode); > + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) > + rs6000_expand_vector_set_var_p9 (target, val, idx); > + else > + rs6000_expand_vector_set_var_p8 (target, val, idx); > +} > + > /* Extract field ELT from VEC into TARGET. */ > > void > diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c > new file mode 100644 > index 00000000000..06da47b7758 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ > + > +#include <stddef.h> > +#include <altivec.h> > +#include "pr79251.h" > + > +TEST_VEC_INSERT_ALL (test) > + > +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ > +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ > +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ > +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ > +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ > +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ > + >
Ping. Thanks. On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote: > Hi Segher, > Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your > opinion of this [PATCH 3/4] for P8, please? xxinsertw only exists since > v3.0, so we had to implement by another way. > > > Xionghu > > > On 2020/10/10 16:08, Xionghu Luo wrote: >> gcc/ChangeLog: >> >> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >> >> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): >> Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later >> platforms. >> * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update >> to call different path for P8 and P9. >> (rs6000_expand_vector_set_var_p9): New function. >> (rs6000_expand_vector_set_var_p8): New function. >> >> gcc/testsuite/ChangeLog: >> >> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >> >> * gcc.target/powerpc/pr79251.p8.c: New test. >> --- >> gcc/config/rs6000/rs6000-c.c | 27 +++- >> gcc/config/rs6000/rs6000.c | 117 +++++++++++++++++- >> gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ >> 3 files changed, 155 insertions(+), 6 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >> >> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c >> index 5551a21d738..4bea8001ec6 100644 >> --- a/gcc/config/rs6000/rs6000-c.c >> +++ b/gcc/config/rs6000/rs6000-c.c >> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, >> SET_EXPR_LOCATION (stmt, loc); >> stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); >> } >> - stmt = build_array_ref (loc, stmt, arg2); >> - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >> - convert (TREE_TYPE (stmt), arg0)); >> - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >> + >> + if (TARGET_P8_VECTOR) >> + { >> + stmt = build_array_ref (loc, stmt, arg2); >> + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >> + convert (TREE_TYPE (stmt), arg0)); >> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >> + } >> + else >> + { >> + tree arg1_inner_type; >> + tree innerptrtype; >> + arg1_inner_type = TREE_TYPE (arg1_type); >> + innerptrtype = build_pointer_type (arg1_inner_type); >> + >> + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); >> + stmt = convert (innerptrtype, stmt); >> + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); >> + stmt = build_indirect_ref (loc, stmt, RO_NULL); >> + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, >> + convert (TREE_TYPE (stmt), arg0)); >> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >> + } >> return stmt; >> } >> >> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >> index 96f76c7a74c..33ca839cb28 100644 >> --- a/gcc/config/rs6000/rs6000.c >> +++ b/gcc/config/rs6000/rs6000.c >> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx) >> } >> >> /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX >> - is variable and also counts by vector element size. */ >> + is variable and also counts by vector element size for p9 and above. */ >> >> void >> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) >> { >> machine_mode mode = GET_MODE (target); >> >> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >> emit_insn (perml); >> } >> >> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX >> + is variable and also counts by vector element size for p8. */ >> + >> +void >> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) >> +{ >> + machine_mode mode = GET_MODE (target); >> + >> + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); >> + >> + gcc_assert (GET_MODE (idx) == E_SImode); >> + >> + machine_mode inner_mode = GET_MODE (val); >> + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); >> + >> + rtx tmp = gen_reg_rtx (GET_MODE (idx)); >> + int width = GET_MODE_SIZE (inner_mode); >> + >> + gcc_assert (width >= 1 && width <= 4); >> + >> + if (!BYTES_BIG_ENDIAN) >> + { >> + /* idx = idx * width. */ >> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >> + /* idx = idx + 8. */ >> + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); >> + } >> + else >> + { >> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >> + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); >> + } >> + >> + /* lxv vs33, mask. >> + DImode: 0xffffffffffffffff0000000000000000 >> + SImode: 0x00000000ffffffff0000000000000000 >> + HImode: 0x000000000000ffff0000000000000000. >> + QImode: 0x00000000000000ff0000000000000000. */ >> + rtx mask = gen_reg_rtx (V16QImode); >> + rtx mask_v2di = gen_reg_rtx (V2DImode); >> + rtvec v = rtvec_alloc (2); >> + if (!BYTES_BIG_ENDIAN) >> + { >> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); >> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); >> + } >> + else >> + { >> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); >> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); >> + } >> + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v))); >> + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0); >> + emit_insn (gen_rtx_SET (mask, sub_mask)); >> + >> + /* mtvsrd[wz] f0,tmp_val. */ >> + rtx tmp_val = gen_reg_rtx (SImode); >> + if (inner_mode == E_SFmode) >> + emit_insn (gen_movsi_from_sf (tmp_val, val)); >> + else >> + tmp_val = force_reg (SImode, val); >> + >> + rtx val_v16qi = gen_reg_rtx (V16QImode); >> + rtx val_v2di = gen_reg_rtx (V2DImode); >> + rtvec vec_val = rtvec_alloc (2); >> + if (!BYTES_BIG_ENDIAN) >> + { >> + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); >> + RTVEC_ELT (vec_val, 1) = tmp_val; >> + } >> + else >> + { >> + RTVEC_ELT (vec_val, 0) = tmp_val; >> + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); >> + } >> + emit_insn ( >> + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val))); >> + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); >> + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); >> + >> + /* lvsl 13,0,idx. */ >> + tmp = convert_modes (DImode, SImode, tmp, 1); >> + rtx pcv = gen_reg_rtx (V16QImode); >> + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); >> + >> + /* vperm 1,1,1,13. */ >> + /* vperm 0,0,0,13. */ >> + rtx val_perm = gen_reg_rtx (V16QImode); >> + rtx mask_perm = gen_reg_rtx (V16QImode); >> + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv)); >> + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); >> + >> + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); >> + >> + /* xxsel 34,34,32,33. */ >> + emit_insn ( >> + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm)); >> +} >> + >> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX >> + is variable and also counts by vector element size. */ >> + >> +void >> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >> +{ >> + machine_mode mode = GET_MODE (target); >> + machine_mode inner_mode = GET_MODE_INNER (mode); >> + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) >> + rs6000_expand_vector_set_var_p9 (target, val, idx); >> + else >> + rs6000_expand_vector_set_var_p8 (target, val, idx); >> +} >> + >> /* Extract field ELT from VEC into TARGET. */ >> >> void >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >> new file mode 100644 >> index 00000000000..06da47b7758 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target powerpc_p8vector_ok } */ >> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ >> + >> +#include <stddef.h> >> +#include <altivec.h> >> +#include "pr79251.h" >> + >> +TEST_VEC_INSERT_ALL (test) >> + >> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ >> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ >> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ >> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ >> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ >> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ >> + >> >
Ping^2. Thanks. On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote: > Ping. Thanks. > > > On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote: >> Hi Segher, >> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your >> opinion of this [PATCH 3/4] for P8, please? xxinsertw only exists since >> v3.0, so we had to implement by another way. >> >> >> Xionghu >> >> >> On 2020/10/10 16:08, Xionghu Luo wrote: >>> gcc/ChangeLog: >>> >>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>> >>> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): >>> Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later >>> platforms. >>> * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update >>> to call different path for P8 and P9. >>> (rs6000_expand_vector_set_var_p9): New function. >>> (rs6000_expand_vector_set_var_p8): New function. >>> >>> gcc/testsuite/ChangeLog: >>> >>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>> >>> * gcc.target/powerpc/pr79251.p8.c: New test. >>> --- >>> gcc/config/rs6000/rs6000-c.c | 27 +++- >>> gcc/config/rs6000/rs6000.c | 117 >>> +++++++++++++++++- >>> gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ >>> 3 files changed, 155 insertions(+), 6 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> >>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c >>> index 5551a21d738..4bea8001ec6 100644 >>> --- a/gcc/config/rs6000/rs6000-c.c >>> +++ b/gcc/config/rs6000/rs6000-c.c >>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin >>> (location_t loc, tree fndecl, >>> SET_EXPR_LOCATION (stmt, loc); >>> stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); >>> } >>> - stmt = build_array_ref (loc, stmt, arg2); >>> - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>> - convert (TREE_TYPE (stmt), arg0)); >>> - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + >>> + if (TARGET_P8_VECTOR) >>> + { >>> + stmt = build_array_ref (loc, stmt, arg2); >>> + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>> + convert (TREE_TYPE (stmt), arg0)); >>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + } >>> + else >>> + { >>> + tree arg1_inner_type; >>> + tree innerptrtype; >>> + arg1_inner_type = TREE_TYPE (arg1_type); >>> + innerptrtype = build_pointer_type (arg1_inner_type); >>> + >>> + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); >>> + stmt = convert (innerptrtype, stmt); >>> + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); >>> + stmt = build_indirect_ref (loc, stmt, RO_NULL); >>> + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, >>> + convert (TREE_TYPE (stmt), arg0)); >>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + } >>> return stmt; >>> } >>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >>> index 96f76c7a74c..33ca839cb28 100644 >>> --- a/gcc/config/rs6000/rs6000.c >>> +++ b/gcc/config/rs6000/rs6000.c >>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx >>> val, rtx elt_rtx) >>> } >>> /* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> - is variable and also counts by vector element size. */ >>> + is variable and also counts by vector element size for p9 and >>> above. */ >>> void >>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) >>> { >>> machine_mode mode = GET_MODE (target); >>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx >>> val, rtx idx) >>> emit_insn (perml); >>> } >>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> + is variable and also counts by vector element size for p8. */ >>> + >>> +void >>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) >>> +{ >>> + machine_mode mode = GET_MODE (target); >>> + >>> + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); >>> + >>> + gcc_assert (GET_MODE (idx) == E_SImode); >>> + >>> + machine_mode inner_mode = GET_MODE (val); >>> + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); >>> + >>> + rtx tmp = gen_reg_rtx (GET_MODE (idx)); >>> + int width = GET_MODE_SIZE (inner_mode); >>> + >>> + gcc_assert (width >= 1 && width <= 4); >>> + >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + /* idx = idx * width. */ >>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>> + /* idx = idx + 8. */ >>> + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); >>> + } >>> + else >>> + { >>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>> + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); >>> + } >>> + >>> + /* lxv vs33, mask. >>> + DImode: 0xffffffffffffffff0000000000000000 >>> + SImode: 0x00000000ffffffff0000000000000000 >>> + HImode: 0x000000000000ffff0000000000000000. >>> + QImode: 0x00000000000000ff0000000000000000. */ >>> + rtx mask = gen_reg_rtx (V16QImode); >>> + rtx mask_v2di = gen_reg_rtx (V2DImode); >>> + rtvec v = rtvec_alloc (2); >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); >>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); >>> + } >>> + else >>> + { >>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); >>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); >>> + } >>> + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL >>> (V2DImode, v))); >>> + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, >>> V2DImode, 0); >>> + emit_insn (gen_rtx_SET (mask, sub_mask)); >>> + >>> + /* mtvsrd[wz] f0,tmp_val. */ >>> + rtx tmp_val = gen_reg_rtx (SImode); >>> + if (inner_mode == E_SFmode) >>> + emit_insn (gen_movsi_from_sf (tmp_val, val)); >>> + else >>> + tmp_val = force_reg (SImode, val); >>> + >>> + rtx val_v16qi = gen_reg_rtx (V16QImode); >>> + rtx val_v2di = gen_reg_rtx (V2DImode); >>> + rtvec vec_val = rtvec_alloc (2); >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); >>> + RTVEC_ELT (vec_val, 1) = tmp_val; >>> + } >>> + else >>> + { >>> + RTVEC_ELT (vec_val, 0) = tmp_val; >>> + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); >>> + } >>> + emit_insn ( >>> + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, >>> vec_val))); >>> + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); >>> + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); >>> + >>> + /* lvsl 13,0,idx. */ >>> + tmp = convert_modes (DImode, SImode, tmp, 1); >>> + rtx pcv = gen_reg_rtx (V16QImode); >>> + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); >>> + >>> + /* vperm 1,1,1,13. */ >>> + /* vperm 0,0,0,13. */ >>> + rtx val_perm = gen_reg_rtx (V16QImode); >>> + rtx mask_perm = gen_reg_rtx (V16QImode); >>> + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, >>> val_v16qi, pcv)); >>> + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); >>> + >>> + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); >>> + >>> + /* xxsel 34,34,32,33. */ >>> + emit_insn ( >>> + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, >>> mask_perm)); >>> +} >>> + >>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> + is variable and also counts by vector element size. */ >>> + >>> +void >>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>> +{ >>> + machine_mode mode = GET_MODE (target); >>> + machine_mode inner_mode = GET_MODE_INNER (mode); >>> + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) >>> + rs6000_expand_vector_set_var_p9 (target, val, idx); >>> + else >>> + rs6000_expand_vector_set_var_p8 (target, val, idx); >>> +} >>> + >>> /* Extract field ELT from VEC into TARGET. */ >>> void >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> new file mode 100644 >>> index 00000000000..06da47b7758 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> @@ -0,0 +1,17 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target powerpc_p8vector_ok } */ >>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ >>> + >>> +#include <stddef.h> >>> +#include <altivec.h> >>> +#include "pr79251.h" >>> + >>> +TEST_VEC_INSERT_ALL (test) >>> + >>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ >>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ >>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ >>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ >>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ >>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ >>> + >>> >> >
Ping^3 for stage 3. And this followed patch: [PATCH 4/4] rs6000: Update testcases' instruction count. Thanks:) On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote: > Ping. Thanks. > > > On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote: >> Hi Segher, >> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your >> opinion of this [PATCH 3/4] for P8, please? xxinsertw only exists since >> v3.0, so we had to implement by another way. >> >> >> Xionghu >> >> >> On 2020/10/10 16:08, Xionghu Luo wrote: >>> gcc/ChangeLog: >>> >>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>> >>> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): >>> Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later >>> platforms. >>> * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update >>> to call different path for P8 and P9. >>> (rs6000_expand_vector_set_var_p9): New function. >>> (rs6000_expand_vector_set_var_p8): New function. >>> >>> gcc/testsuite/ChangeLog: >>> >>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>> >>> * gcc.target/powerpc/pr79251.p8.c: New test. >>> --- >>> gcc/config/rs6000/rs6000-c.c | 27 +++- >>> gcc/config/rs6000/rs6000.c | 117 >>> +++++++++++++++++- >>> gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ >>> 3 files changed, 155 insertions(+), 6 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> >>> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c >>> index 5551a21d738..4bea8001ec6 100644 >>> --- a/gcc/config/rs6000/rs6000-c.c >>> +++ b/gcc/config/rs6000/rs6000-c.c >>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin >>> (location_t loc, tree fndecl, >>> SET_EXPR_LOCATION (stmt, loc); >>> stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); >>> } >>> - stmt = build_array_ref (loc, stmt, arg2); >>> - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>> - convert (TREE_TYPE (stmt), arg0)); >>> - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + >>> + if (TARGET_P8_VECTOR) >>> + { >>> + stmt = build_array_ref (loc, stmt, arg2); >>> + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>> + convert (TREE_TYPE (stmt), arg0)); >>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + } >>> + else >>> + { >>> + tree arg1_inner_type; >>> + tree innerptrtype; >>> + arg1_inner_type = TREE_TYPE (arg1_type); >>> + innerptrtype = build_pointer_type (arg1_inner_type); >>> + >>> + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); >>> + stmt = convert (innerptrtype, stmt); >>> + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); >>> + stmt = build_indirect_ref (loc, stmt, RO_NULL); >>> + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, >>> + convert (TREE_TYPE (stmt), arg0)); >>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>> + } >>> return stmt; >>> } >>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >>> index 96f76c7a74c..33ca839cb28 100644 >>> --- a/gcc/config/rs6000/rs6000.c >>> +++ b/gcc/config/rs6000/rs6000.c >>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx >>> val, rtx elt_rtx) >>> } >>> /* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> - is variable and also counts by vector element size. */ >>> + is variable and also counts by vector element size for p9 and >>> above. */ >>> void >>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) >>> { >>> machine_mode mode = GET_MODE (target); >>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx >>> val, rtx idx) >>> emit_insn (perml); >>> } >>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> + is variable and also counts by vector element size for p8. */ >>> + >>> +void >>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) >>> +{ >>> + machine_mode mode = GET_MODE (target); >>> + >>> + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); >>> + >>> + gcc_assert (GET_MODE (idx) == E_SImode); >>> + >>> + machine_mode inner_mode = GET_MODE (val); >>> + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); >>> + >>> + rtx tmp = gen_reg_rtx (GET_MODE (idx)); >>> + int width = GET_MODE_SIZE (inner_mode); >>> + >>> + gcc_assert (width >= 1 && width <= 4); >>> + >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + /* idx = idx * width. */ >>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>> + /* idx = idx + 8. */ >>> + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); >>> + } >>> + else >>> + { >>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>> + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); >>> + } >>> + >>> + /* lxv vs33, mask. >>> + DImode: 0xffffffffffffffff0000000000000000 >>> + SImode: 0x00000000ffffffff0000000000000000 >>> + HImode: 0x000000000000ffff0000000000000000. >>> + QImode: 0x00000000000000ff0000000000000000. */ >>> + rtx mask = gen_reg_rtx (V16QImode); >>> + rtx mask_v2di = gen_reg_rtx (V2DImode); >>> + rtvec v = rtvec_alloc (2); >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); >>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); >>> + } >>> + else >>> + { >>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); >>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); >>> + } >>> + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL >>> (V2DImode, v))); >>> + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, >>> V2DImode, 0); >>> + emit_insn (gen_rtx_SET (mask, sub_mask)); >>> + >>> + /* mtvsrd[wz] f0,tmp_val. */ >>> + rtx tmp_val = gen_reg_rtx (SImode); >>> + if (inner_mode == E_SFmode) >>> + emit_insn (gen_movsi_from_sf (tmp_val, val)); >>> + else >>> + tmp_val = force_reg (SImode, val); >>> + >>> + rtx val_v16qi = gen_reg_rtx (V16QImode); >>> + rtx val_v2di = gen_reg_rtx (V2DImode); >>> + rtvec vec_val = rtvec_alloc (2); >>> + if (!BYTES_BIG_ENDIAN) >>> + { >>> + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); >>> + RTVEC_ELT (vec_val, 1) = tmp_val; >>> + } >>> + else >>> + { >>> + RTVEC_ELT (vec_val, 0) = tmp_val; >>> + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); >>> + } >>> + emit_insn ( >>> + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, >>> vec_val))); >>> + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); >>> + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); >>> + >>> + /* lvsl 13,0,idx. */ >>> + tmp = convert_modes (DImode, SImode, tmp, 1); >>> + rtx pcv = gen_reg_rtx (V16QImode); >>> + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); >>> + >>> + /* vperm 1,1,1,13. */ >>> + /* vperm 0,0,0,13. */ >>> + rtx val_perm = gen_reg_rtx (V16QImode); >>> + rtx mask_perm = gen_reg_rtx (V16QImode); >>> + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, >>> val_v16qi, pcv)); >>> + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); >>> + >>> + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); >>> + >>> + /* xxsel 34,34,32,33. */ >>> + emit_insn ( >>> + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, >>> mask_perm)); >>> +} >>> + >>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>> element, IDX >>> + is variable and also counts by vector element size. */ >>> + >>> +void >>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>> +{ >>> + machine_mode mode = GET_MODE (target); >>> + machine_mode inner_mode = GET_MODE_INNER (mode); >>> + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) >>> + rs6000_expand_vector_set_var_p9 (target, val, idx); >>> + else >>> + rs6000_expand_vector_set_var_p8 (target, val, idx); >>> +} >>> + >>> /* Extract field ELT from VEC into TARGET. */ >>> void >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> new file mode 100644 >>> index 00000000000..06da47b7758 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>> @@ -0,0 +1,17 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target powerpc_p8vector_ok } */ >>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ >>> + >>> +#include <stddef.h> >>> +#include <altivec.h> >>> +#include "pr79251.h" >>> + >>> +TEST_VEC_INSERT_ALL (test) >>> + >>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ >>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ >>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ >>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ >>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ >>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ >>> + >>> >> >
Ping^4, thanks. On 2020/12/23 10:18, Xionghu Luo via Gcc-patches wrote: > Ping^3 for stage 3. > > And this followed patch: > [PATCH 4/4] rs6000: Update testcases' instruction count. > > Thanks:) > > > On 2020/12/3 22:16, Xionghu Luo via Gcc-patches wrote: >> Ping. Thanks. >> >> >> On 2020/11/27 09:04, Xionghu Luo via Gcc-patches wrote: >>> Hi Segher, >>> Thanks for the approval of [PATCH 1/4] and [PATCH 2/4], what's your >>> opinion of this [PATCH 3/4] for P8, please? xxinsertw only exists since >>> v3.0, so we had to implement by another way. >>> >>> >>> Xionghu >>> >>> >>> On 2020/10/10 16:08, Xionghu Luo wrote: >>>> gcc/ChangeLog: >>>> >>>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>>> >>>> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): >>>> Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later >>>> platforms. >>>> * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update >>>> to call different path for P8 and P9. >>>> (rs6000_expand_vector_set_var_p9): New function. >>>> (rs6000_expand_vector_set_var_p8): New function. >>>> >>>> gcc/testsuite/ChangeLog: >>>> >>>> 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> >>>> >>>> * gcc.target/powerpc/pr79251.p8.c: New test. >>>> --- >>>> gcc/config/rs6000/rs6000-c.c | 27 +++- >>>> gcc/config/rs6000/rs6000.c | 117 >>>> +++++++++++++++++- >>>> gcc/testsuite/gcc.target/powerpc/pr79251.p8.c | 17 +++ >>>> 3 files changed, 155 insertions(+), 6 deletions(-) >>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>>> >>>> diff --git a/gcc/config/rs6000/rs6000-c.c >>>> b/gcc/config/rs6000/rs6000-c.c >>>> index 5551a21d738..4bea8001ec6 100644 >>>> --- a/gcc/config/rs6000/rs6000-c.c >>>> +++ b/gcc/config/rs6000/rs6000-c.c >>>> @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin >>>> (location_t loc, tree fndecl, >>>> SET_EXPR_LOCATION (stmt, loc); >>>> stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); >>>> } >>>> - stmt = build_array_ref (loc, stmt, arg2); >>>> - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>>> - convert (TREE_TYPE (stmt), arg0)); >>>> - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>>> + >>>> + if (TARGET_P8_VECTOR) >>>> + { >>>> + stmt = build_array_ref (loc, stmt, arg2); >>>> + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, >>>> + convert (TREE_TYPE (stmt), arg0)); >>>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>>> + } >>>> + else >>>> + { >>>> + tree arg1_inner_type; >>>> + tree innerptrtype; >>>> + arg1_inner_type = TREE_TYPE (arg1_type); >>>> + innerptrtype = build_pointer_type (arg1_inner_type); >>>> + >>>> + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); >>>> + stmt = convert (innerptrtype, stmt); >>>> + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); >>>> + stmt = build_indirect_ref (loc, stmt, RO_NULL); >>>> + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, >>>> + convert (TREE_TYPE (stmt), arg0)); >>>> + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); >>>> + } >>>> return stmt; >>>> } >>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c >>>> index 96f76c7a74c..33ca839cb28 100644 >>>> --- a/gcc/config/rs6000/rs6000.c >>>> +++ b/gcc/config/rs6000/rs6000.c >>>> @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx >>>> val, rtx elt_rtx) >>>> } >>>> /* Insert VAL into IDX of TARGET, VAL size is same of the vector >>>> element, IDX >>>> - is variable and also counts by vector element size. */ >>>> + is variable and also counts by vector element size for p9 and >>>> above. */ >>>> void >>>> -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>>> +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) >>>> { >>>> machine_mode mode = GET_MODE (target); >>>> @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, >>>> rtx val, rtx idx) >>>> emit_insn (perml); >>>> } >>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>>> element, IDX >>>> + is variable and also counts by vector element size for p8. */ >>>> + >>>> +void >>>> +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) >>>> +{ >>>> + machine_mode mode = GET_MODE (target); >>>> + >>>> + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); >>>> + >>>> + gcc_assert (GET_MODE (idx) == E_SImode); >>>> + >>>> + machine_mode inner_mode = GET_MODE (val); >>>> + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); >>>> + >>>> + rtx tmp = gen_reg_rtx (GET_MODE (idx)); >>>> + int width = GET_MODE_SIZE (inner_mode); >>>> + >>>> + gcc_assert (width >= 1 && width <= 4); >>>> + >>>> + if (!BYTES_BIG_ENDIAN) >>>> + { >>>> + /* idx = idx * width. */ >>>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>>> + /* idx = idx + 8. */ >>>> + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); >>>> + } >>>> + else >>>> + { >>>> + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); >>>> + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); >>>> + } >>>> + >>>> + /* lxv vs33, mask. >>>> + DImode: 0xffffffffffffffff0000000000000000 >>>> + SImode: 0x00000000ffffffff0000000000000000 >>>> + HImode: 0x000000000000ffff0000000000000000. >>>> + QImode: 0x00000000000000ff0000000000000000. */ >>>> + rtx mask = gen_reg_rtx (V16QImode); >>>> + rtx mask_v2di = gen_reg_rtx (V2DImode); >>>> + rtvec v = rtvec_alloc (2); >>>> + if (!BYTES_BIG_ENDIAN) >>>> + { >>>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); >>>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); >>>> + } >>>> + else >>>> + { >>>> + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); >>>> + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); >>>> + } >>>> + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL >>>> (V2DImode, v))); >>>> + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, >>>> V2DImode, 0); >>>> + emit_insn (gen_rtx_SET (mask, sub_mask)); >>>> + >>>> + /* mtvsrd[wz] f0,tmp_val. */ >>>> + rtx tmp_val = gen_reg_rtx (SImode); >>>> + if (inner_mode == E_SFmode) >>>> + emit_insn (gen_movsi_from_sf (tmp_val, val)); >>>> + else >>>> + tmp_val = force_reg (SImode, val); >>>> + >>>> + rtx val_v16qi = gen_reg_rtx (V16QImode); >>>> + rtx val_v2di = gen_reg_rtx (V2DImode); >>>> + rtvec vec_val = rtvec_alloc (2); >>>> + if (!BYTES_BIG_ENDIAN) >>>> + { >>>> + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); >>>> + RTVEC_ELT (vec_val, 1) = tmp_val; >>>> + } >>>> + else >>>> + { >>>> + RTVEC_ELT (vec_val, 0) = tmp_val; >>>> + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); >>>> + } >>>> + emit_insn ( >>>> + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, >>>> vec_val))); >>>> + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, >>>> 0); >>>> + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); >>>> + >>>> + /* lvsl 13,0,idx. */ >>>> + tmp = convert_modes (DImode, SImode, tmp, 1); >>>> + rtx pcv = gen_reg_rtx (V16QImode); >>>> + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); >>>> + >>>> + /* vperm 1,1,1,13. */ >>>> + /* vperm 0,0,0,13. */ >>>> + rtx val_perm = gen_reg_rtx (V16QImode); >>>> + rtx mask_perm = gen_reg_rtx (V16QImode); >>>> + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, >>>> val_v16qi, pcv)); >>>> + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, >>>> pcv)); >>>> + >>>> + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); >>>> + >>>> + /* xxsel 34,34,32,33. */ >>>> + emit_insn ( >>>> + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, >>>> mask_perm)); >>>> +} >>>> + >>>> +/* Insert VAL into IDX of TARGET, VAL size is same of the vector >>>> element, IDX >>>> + is variable and also counts by vector element size. */ >>>> + >>>> +void >>>> +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) >>>> +{ >>>> + machine_mode mode = GET_MODE (target); >>>> + machine_mode inner_mode = GET_MODE_INNER (mode); >>>> + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) >>>> + rs6000_expand_vector_set_var_p9 (target, val, idx); >>>> + else >>>> + rs6000_expand_vector_set_var_p8 (target, val, idx); >>>> +} >>>> + >>>> /* Extract field ELT from VEC into TARGET. */ >>>> void >>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>>> b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>>> new file mode 100644 >>>> index 00000000000..06da47b7758 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c >>>> @@ -0,0 +1,17 @@ >>>> +/* { dg-do compile } */ >>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */ >>>> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ >>>> + >>>> +#include <stddef.h> >>>> +#include <altivec.h> >>>> +#include "pr79251.h" >>>> + >>>> +TEST_VEC_INSERT_ALL (test) >>>> + >>>> +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ >>>> +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ >>>> +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ >>>> +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ >>>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ >>>> +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ >>>> + >>>> >>> >> >
Hi! You never committed 2/4? That makes it harder to review this one :-) On Sat, Oct 10, 2020 at 03:08:24AM -0500, Xionghu Luo wrote: > gcc/ChangeLog: > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): > Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later > platforms. > * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update > to call different path for P8 and P9. > (rs6000_expand_vector_set_var_p9): New function. > (rs6000_expand_vector_set_var_p8): New function. > > gcc/testsuite/ChangeLog: > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > * gcc.target/powerpc/pr79251.p8.c: New test. If testing on P9 LE and P7 BE (32-bit and 64-bit) worked, this is okay for trunk. Thanks! (Let me know if you need help testing.) Segher
On Thu, Jan 21, 2021 at 6:51 PM Segher Boessenkool <segher@kernel.crashing.org> wrote: > > Hi! > > You never committed 2/4? That makes it harder to review this one :-) > > On Sat, Oct 10, 2020 at 03:08:24AM -0500, Xionghu Luo wrote: > > gcc/ChangeLog: > > > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > > > * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): > > Generate ARRAY_REF(VIEW_CONVERT_EXPR) for P8 and later > > platforms. > > * config/rs6000/rs6000.c (rs6000_expand_vector_set_var): Update > > to call different path for P8 and P9. > > (rs6000_expand_vector_set_var_p9): New function. > > (rs6000_expand_vector_set_var_p8): New function. > > > > gcc/testsuite/ChangeLog: > > > > 2020-10-10 Xionghu Luo <luoxhu@linux.ibm.com> > > > > * gcc.target/powerpc/pr79251.p8.c: New test. > > If testing on P9 LE and P7 BE (32-bit and 64-bit) worked, this is okay > for trunk. Thanks! This testcase ICEs on AIX. Please fix. This was not tested properly. The new pattern does not have matching target conditions for patterns on which it relies. Thanks, David
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 5551a21d738..4bea8001ec6 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -1599,10 +1599,29 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, SET_EXPR_LOCATION (stmt, loc); stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); } - stmt = build_array_ref (loc, stmt, arg2); - stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, - convert (TREE_TYPE (stmt), arg0)); - stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + + if (TARGET_P8_VECTOR) + { + stmt = build_array_ref (loc, stmt, arg2); + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } + else + { + tree arg1_inner_type; + tree innerptrtype; + arg1_inner_type = TREE_TYPE (arg1_type); + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } return stmt; } diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 96f76c7a74c..33ca839cb28 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6806,10 +6806,10 @@ rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx) } /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX - is variable and also counts by vector element size. */ + is variable and also counts by vector element size for p9 and above. */ void -rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) +rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) { machine_mode mode = GET_MODE (target); @@ -6852,6 +6852,119 @@ rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) emit_insn (perml); } +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX + is variable and also counts by vector element size for p8. */ + +void +rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx idx) +{ + machine_mode mode = GET_MODE (target); + + gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); + + gcc_assert (GET_MODE (idx) == E_SImode); + + machine_mode inner_mode = GET_MODE (val); + HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); + + rtx tmp = gen_reg_rtx (GET_MODE (idx)); + int width = GET_MODE_SIZE (inner_mode); + + gcc_assert (width >= 1 && width <= 4); + + if (!BYTES_BIG_ENDIAN) + { + /* idx = idx * width. */ + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); + /* idx = idx + 8. */ + emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8))); + } + else + { + emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width))); + emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp)); + } + + /* lxv vs33, mask. + DImode: 0xffffffffffffffff0000000000000000 + SImode: 0x00000000ffffffff0000000000000000 + HImode: 0x000000000000ffff0000000000000000. + QImode: 0x00000000000000ff0000000000000000. */ + rtx mask = gen_reg_rtx (V16QImode); + rtx mask_v2di = gen_reg_rtx (V2DImode); + rtvec v = rtvec_alloc (2); + if (!BYTES_BIG_ENDIAN) + { + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); + } + else + { + RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); + RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); + } + emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v))); + rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0); + emit_insn (gen_rtx_SET (mask, sub_mask)); + + /* mtvsrd[wz] f0,tmp_val. */ + rtx tmp_val = gen_reg_rtx (SImode); + if (inner_mode == E_SFmode) + emit_insn (gen_movsi_from_sf (tmp_val, val)); + else + tmp_val = force_reg (SImode, val); + + rtx val_v16qi = gen_reg_rtx (V16QImode); + rtx val_v2di = gen_reg_rtx (V2DImode); + rtvec vec_val = rtvec_alloc (2); + if (!BYTES_BIG_ENDIAN) + { + RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); + RTVEC_ELT (vec_val, 1) = tmp_val; + } + else + { + RTVEC_ELT (vec_val, 0) = tmp_val; + RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); + } + emit_insn ( + gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val))); + rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); + emit_insn (gen_rtx_SET (val_v16qi, sub_val)); + + /* lvsl 13,0,idx. */ + tmp = convert_modes (DImode, SImode, tmp, 1); + rtx pcv = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_reg (pcv, tmp)); + + /* vperm 1,1,1,13. */ + /* vperm 0,0,0,13. */ + rtx val_perm = gen_reg_rtx (V16QImode); + rtx mask_perm = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv)); + emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); + + rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); + + /* xxsel 34,34,32,33. */ + emit_insn ( + gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm)); +} + +/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX + is variable and also counts by vector element size. */ + +void +rs6000_expand_vector_set_var (rtx target, rtx val, rtx idx) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + if (TARGET_P9_VECTOR || GET_MODE_SIZE (inner_mode) == 8) + rs6000_expand_vector_set_var_p9 (target, val, idx); + else + rs6000_expand_vector_set_var_p8 (target, val, idx); +} + /* Extract field ELT from VEC into TARGET. */ void diff --git a/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c new file mode 100644 index 00000000000..06da47b7758 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79251.p8.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -maltivec" } */ + +#include <stddef.h> +#include <altivec.h> +#include "pr79251.h" + +TEST_VEC_INSERT_ALL (test) + +/* { dg-final { scan-assembler-not {\mstxw\M} } } */ +/* { dg-final { scan-assembler-times {\mlvsl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mlvsr\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvperm\M} 20 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mxxsel\M} 7 } } */ +