Message ID | 1504711323.18797.5.camel@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Add support for vec_xst_len_r() and vec_xl_len_r() builtins | expand |
Hi Carl, On Wed, Sep 06, 2017 at 08:22:03AM -0700, Carl Love wrote: > (define_insn "*stxvl"): add missing argument to the sldi instruction. s/add/Add/ . This one-liner fix is approved right now, please commit it as a separate patch. > +(define_insn "addi_neg16" > + [(set (match_operand:DI 0 "vsx_register_operand" "=r") > + (unspec:DI > + [(match_operand:DI 1 "gpc_reg_operand" "r")] > + UNSPEC_ADDI_NEG16))] > + "" > + "addi %0,%1,-16" > +) You don't need a separate insn (or unspec) for this at all afaics... Where you do emit_insn (gen_addi_neg16 (tmp, operands[2])); you could just do emit_insn (gen_adddi3 (tmp, operands[2], GEN_INT (-16))); > +;; Load VSX Vector with Length, right justified > +(define_expand "lxvll" > + [(set (match_dup 3) > + (match_operand:DI 2 "register_operand")) > + (set (match_operand:V16QI 0 "vsx_register_operand") > + (unspec:V16QI > + [(match_operand:DI 1 "gpc_reg_operand") > + (match_dup 3)] > + UNSPEC_LXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > +{ > + operands[3] = gen_reg_rtx (DImode); > +}) Hrm, so you make a reg 3 only because the lxvll pattern will clobber it? > +(define_insn "*lxvll" > + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") > + (unspec:V16QI > + [(match_operand:DI 1 "gpc_reg_operand" "b") > + (match_operand:DI 2 "register_operand" "+r")] > + UNSPEC_LXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > +;; "lxvll %x0,%1,%2;" > + "sldi %2,%2, 56\; lxvll %x0,%1,%2;" > + [(set_attr "length" "8") > + (set_attr "type" "vecload")]) It is nicer to just have a match_scratch in here then, like (define_insn "*lxvll" [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") (match_operand:DI 2 "register_operand" "r")] UNSPEC_LXVLL)) (clobber (match_scratch:DI 3 "=&r"))] "TARGET_P9_VECTOR && TARGET_64BIT" "sldi %3,%2,56\;lxvll %x0,%1,%3" [(set_attr "length" "8") (set_attr "type" "vecload")]) (Note spacing, comment, ";" stuff, and the earlyclobber). Ideally you split the sldi off in the expand though, so that the *lxvll pattern is really just that single insn. > +(define_insn "altivec_lvsl_reg" > + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") > + (unspec:V16QI > + [(match_operand:DI 1 "gpc_reg_operand" "b")] > + UNSPEC_LVSL_REG))] > + "TARGET_ALTIVEC" > + "lvsl %0,0,%1" > + [(set_attr "type" "vecload")]) vecload isn't really the correct type for this, but I see we have the same on the existing lvsl patterns (it's permute unit on p9; I expect the same on p8 and older, but please check). Please move this next to the existing lvsl pattern. > +;; Expand for builtin xl_len_r > +(define_expand "xl_len_r" > + [(match_operand:V16QI 0 "vsx_register_operand" "=v") > + (match_operand:DI 1 "register_operand" "r") > + (match_operand:DI 2 "register_operand" "r")] > + "UNSPEC_XL_LEN_R" > +{ > + rtx shift_mask = gen_reg_rtx (V16QImode); > + rtx rtx_vtmp = gen_reg_rtx (V16QImode); > + rtx tmp = gen_reg_rtx (DImode); > + > +/* Setup permute vector to shift right by operands[2] bytes. > + Note: addi operands[2], -16 is negative so we actually need to > + shift left to get a right shift. */ Indent the comment with the code, so that's 2 spaces more here. The comment isn't clear to me... Neither is the code though: lvsl looks at just the low 4 bits of its arg, so the addi does nothing useful? Maybe I am missing something. > + emit_insn (gen_addi_neg16 (tmp, operands[2])); > + emit_insn (gen_altivec_lvsl_reg (shift_mask, tmp)); > + emit_insn (gen_lxvll (rtx_vtmp, operands[1], operands[2])); > + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, > + rtx_vtmp, shift_mask)); > +;; Store VSX Vector with Length, right justified _left_ justified? > +(define_expand "stxvll" > + [(set (match_dup 3) > + (match_operand:DI 2 "register_operand")) > + (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) > + (unspec:V16QI > + [(match_operand:V16QI 0 "vsx_register_operand") > + (match_dup 3)] > + UNSPEC_STXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > +{ > + operands[3] = gen_reg_rtx (DImode); > +}) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c > @@ -0,0 +1,309 @@ > +/* { dg-do run { target { powerpc64*-*-* && { p9vector_hw } } } } */ This should be powerpc*-*-* I think? Does it need braces around p9vector_hw? Segher
GCC maintainers: Here is an updated patch to address the comment from Segher. The one comment that was not addressed was: >> +(define_insn "altivec_lvsl_reg" >> + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") >> + (unspec:V16QI >> + [(match_operand:DI 1 "gpc_reg_operand" "b")] >> + UNSPEC_LVSL_REG))] >> + "TARGET_ALTIVEC" >> + "lvsl %0,0,%1" >> + [(set_attr "type" "vecload")]) vecload isn't really the correct type for this, but I see we have the same on the existing lvsl patterns (it's permute unit on p9; I expect the same on p8 and older, but please check). Per our additional discussions Segher said: > You can leave it as vecload just like the other lvsl's we have, leave > the cleanup for a later date. I believe everything else has been addressed. The patch was retested on powerpc64le-unknown-linux-gnu (Power 9 LE) and powerpc64le-unknown-linux-gnu (Power 8 LE) without regressions. Let me know if there are additional issues that need addressing. Thanks. Carl Love ------------------------------------------------------------------------------ gcc/ChangeLog: 2017-09-14 Carl Love <cel@us.ibm.com> * config/rs6000/rs6000-c.c (P9V_BUILTIN_VEC_XL_LEN_R, P9V_BUILTIN_VEC_XST_LEN_R): Add support for builtins vector unsigned char vec_xl_len_r (unsigned char *, size_t); void vec_xst_len_r (vector unsigned char, unsigned char *, size_t); * config/rs6000/altivec.h (vec_xl_len_r, vec_xst_len_r): Add defines. * config/rs6000/rs6000-builtin.def (XL_LEN_R, XST_LEN_R): Add definitions and overloading. * config/rs6000/rs6000.c (altivec_expand_builtin): Add case statement for P9V_BUILTIN_XST_LEN_R. (altivec_init_builtins): Add def_builtin for P9V_BUILTIN_STXVLL. * config/rs6000/vsx.md (lxvll, stxvll, xl_len_r, xst_len_r): Add define_expand and define_insn for the instructions and builtins. * doc/extend.texi: Update the built-in documenation file for the new built-in functions. * config/rs6000/altivec.md (altivec_lvsl_reg, altivec_lvsr_reg): Add define_insn for the instructions gcc/testsuite/ChangeLog: 2017-09-14 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/builtins-5-p9-runnable.c: Add new runable test file for the new built-ins and the existing built-ins. --- gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/altivec.md | 18 ++ gcc/config/rs6000/rs6000-builtin.def | 4 + gcc/config/rs6000/rs6000-c.c | 8 + gcc/config/rs6000/rs6000.c | 11 +- gcc/config/rs6000/vsx.md | 114 ++++++++ gcc/doc/extend.texi | 4 + .../gcc.target/powerpc/builtins-5-p9-runnable.c | 309 +++++++++++++++++++++ 8 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index c8e508cf0..94a4db24a 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -467,6 +467,8 @@ #ifdef _ARCH_PPC64 #define vec_xl_len __builtin_vec_lxvl #define vec_xst_len __builtin_vec_stxvl +#define vec_xl_len_r __builtin_vec_xl_len_r +#define vec_xst_len_r __builtin_vec_xst_len_r #endif #define vec_cmpnez __builtin_vec_vcmpnez diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 0aa1e3016..3436c0dfd 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2542,6 +2542,15 @@ DONE; }) +(define_insn "altivec_lvsl_reg" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSL_REG))] + "TARGET_ALTIVEC" + "lvsl %0,0,%1" + [(set_attr "type" "vecload")]) + (define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] @@ -2574,6 +2583,15 @@ DONE; }) +(define_insn "altivec_lvsr_reg" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSR_REG))] + "TARGET_ALTIVEC" + "lvsr %0,0,%1" + [(set_attr "type" "vecload")]) + (define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 850164a09..8f87ccea4 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2125,6 +2125,7 @@ BU_P9V_OVERLOAD_2 (VIESP, "insert_exp_sp") /* 2 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_64BIT_VSX_2 (LXVL, "lxvl", CONST, lxvl) +BU_P9V_64BIT_VSX_2 (XL_LEN_R, "xl_len_r", CONST, xl_len_r) BU_P9V_AV_2 (VEXTUBLX, "vextublx", CONST, vextublx) BU_P9V_AV_2 (VEXTUBRX, "vextubrx", CONST, vextubrx) @@ -2141,6 +2142,7 @@ BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) +BU_P9V_64BIT_AV_X (XST_LEN_R, "xst_len_r", MISC) /* 1 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_AV_1 (VCLZLSBB, "vclzlsbb", CONST, vclzlsbb) @@ -2182,12 +2184,14 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", CONST, vector_nez_v4si_p) /* ISA 3.0 Vector scalar overloaded 2 argument functions */ BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (XL_LEN_R, "xl_len_r") BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") +BU_P9V_OVERLOAD_3 (XST_LEN_R, "xst_len_r") BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") /* Overloaded CMPNE support was implemented prior to Power 9, diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index b2df850e8..2388260be 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -4789,6 +4789,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VSCEDPUO, P9V_BUILTIN_VSCEDPUO, RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_XL_LEN_R, P9V_BUILTIN_XL_LEN_R, + RS6000_BTI_unsigned_V16QI, ~RS6000_BTI_UINTQI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long, 0 }, @@ -4833,6 +4837,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { /* At an appropriate future time, add support for the RS6000_BTI_Float16 (exact name to be determined) type here. */ + { P9V_BUILTIN_VEC_XST_LEN_R, P9V_BUILTIN_XST_LEN_R, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, + ~RS6000_BTI_UINTQI, RS6000_BTI_unsigned_long_long}, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, RS6000_BTI_void, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long }, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ecdf776b9..10919edc7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15546,6 +15546,9 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case P9V_BUILTIN_STXVL: return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); + case P9V_BUILTIN_XST_LEN_R: + return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp); + case VSX_BUILTIN_STXVD2X_V1TI: return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); case VSX_BUILTIN_STXVD2X_V2DF: @@ -17488,8 +17491,12 @@ altivec_init_builtins (void) def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); if (TARGET_P9_VECTOR) - def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, - P9V_BUILTIN_STXVL); + { + def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_STXVL); + def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_XST_LEN_R); + } /* Add the DST variants. */ d = bdesc_dst; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 9b24c7b72..0af8e5a77 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -382,8 +382,17 @@ UNSPEC_VSX_VTSTDC UNSPEC_VSX_VEC_INIT UNSPEC_VSX_VSIGNED2 + UNSPEC_LXVL + UNSPEC_LXVLL + UNSPEC_LVSL_REG + UNSPEC_LVSR_REG + UNSPEC_SLDI UNSPEC_STXVL + UNSPEC_STXVLL + UNSPEC_XL_LEN_R + UNSPEC_XST_LEN_R + UNSPEC_VCLZLSBB UNSPEC_VCTZLSBB UNSPEC_VEXTUBLX @@ -4352,6 +4361,87 @@ [(set_attr "length" "8") (set_attr "type" "vecload")]) +;; Load VSX Vector with Length, right justified +(define_expand "lxvll" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand") + (match_dup 3)] + UNSPEC_LXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "sldi" + [(set (match_operand:DI 0 "vsx_register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "u6bit_cint_operand" "")] + UNSPEC_SLDI))] + "" + "sldi %0,%1,%2" +) + +(define_insn "*lxvll" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_LXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "lxvll %x0,%1,%2;" + [(set_attr "length" "4") + (set_attr "type" "vecload")]) + +;; Expand for builtin xl_len_r +(define_expand "xl_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=v") + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + "UNSPEC_XL_LEN_R" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + + /* Setup permute vector to shift right by operands[2] bytes. + Note: operands[2] is between 0 and 15, adding -16 to it results + in a negative value. Shifting left by a negative value results in + the value being shifted right by the desired amount. */ + emit_insn (gen_adddi3 (tmp, operands[2], GEN_INT (-16))); + emit_insn (gen_altivec_lvsl_reg (shift_mask, tmp)); + emit_insn (gen_sldi (operands[2], operands[2], GEN_INT (56))); + emit_insn (gen_lxvll (rtx_vtmp, operands[1], operands[2])); + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, + rtx_vtmp, shift_mask)); + DONE; +}) + +;; Store VSX Vector with Length, left justified +(define_expand "stxvll" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) + (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand") + (match_dup 3)] + UNSPEC_STXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "*stxvll" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_STXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "stxvll %x0,%1,%2" + [(set_attr "length" "8") + (set_attr "type" "vecstore")]) + ;; Store VSX Vector with Length (define_expand "stxvl" [(set (match_dup 3) @@ -4377,6 +4467,30 @@ [(set_attr "length" "8") (set_attr "type" "vecstore")]) +;; Expand for builtin xst_len_r +(define_expand "xst_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=v") + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + "UNSPEC_XST_LEN_R" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + + /* Setup permute vector to shift right by operands[2] bytes. + Note: operands[2] is between 0 and 15, adding -16 to it results + in a negative value. Shifting right by a negative value results in + the value being shifted left by the desired amount. */ + emit_insn (gen_adddi3 (tmp, operands[2], GEN_INT (-16))); + emit_insn (gen_altivec_lvsr_reg (shift_mask, tmp)); + emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], + operands[0], shift_mask)); + emit_insn (gen_sldi (operands[2], operands[2], GEN_INT (56))); + emit_insn (gen_stxvll (rtx_vtmp, operands[1], operands[2])); + DONE; +}) + ;; Vector Compare Not Equal Byte (define_insn "vcmpneb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 649be015d..37fd769df 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15631,6 +15631,8 @@ vector unsigned short vec_xl_len (unsigned short *addr, size_t len); vector double vec_xl_len (double *addr, size_t len); vector float vec_xl_len (float *addr, size_t len); +vector unsigned char vec_xl_len_r (unsigned char *addr, size_t len); + void vec_xst_len (vector signed char data, signed char *addr, size_t len); void vec_xst_len (vector unsigned char data, unsigned char *addr, size_t len); void vec_xst_len (vector signed int data, signed int *addr, size_t len); @@ -15644,6 +15646,8 @@ void vec_xst_len (vector signed __int128 data, signed __int128 *addr, size_t len void vec_xst_len (vector double data, double *addr, size_t len); void vec_xst_len (vector float data, float *addr, size_t len); +void vec_xst_len_r (vector unsigned char data, unsigned char *addr, size_t len); + signed char vec_xlx (unsigned int index, vector signed char data); unsigned char vec_xlx (unsigned int index, vector unsigned char data); signed short vec_xlx (unsigned int index, vector signed short data); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c new file mode 100644 index 000000000..ad3947196 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c @@ -0,0 +1,309 @@ +/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <altivec.h> // vector + +#define TRUE 1 +#define FALSE 0 + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int result_wrong(vector unsigned char vec_expected, + vector unsigned char vec_actual) +{ + int i; + + for (i=0; i<16; i++) + if (vec_expected[i] != vec_actual[i]) + return TRUE; + + return FALSE; +} + +int main() { + int i, j; + size_t size; + unsigned char data_uc[100]; + vector unsigned char store_data_uc; + unsigned char *address; + vector unsigned char *datap; + + vector unsigned char vec_uc_expected1, vec_uc_expected2, + vec_uc_result1, vec_uc_result2; + vector int data_int; + + for (i=0; i<100; i++) + data_uc[i] = i+1; + + + /* VEC_XL_LEN */ + + size = 8; + vec_uc_result1 = vec_xl_len (data_uc, size); + + vec_uc_expected1 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0}; + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len (%d): vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,",vec_uc_expected1[i]); + + printf("\nvec_xl_len (%d): vec_uc_result1[0] to vec_uc_result1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XL_LEN_R */ + size = 8; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){8, 7, 6, 5, 4, 3, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0,}; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + size = 4; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 4, 3, 2, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + size = 2; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XST_LEN */ + vec_uc_expected2 = (vector unsigned char){ 1, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 2; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uc_expected2 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 14; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + /* VEC_XST_LEN_R */ + vec_uc_expected1 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 2; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected1[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + vec_uc_expected1 = (vector unsigned char){ 14, 13, 12, 11, 10, 9, 8, 7, + 6, 5, 4, 3, 2, 1, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 14; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } +}
Hi Carl, On Thu, Sep 14, 2017 at 02:23:47PM -0700, Carl Love wrote: > vecload isn't really the correct type for this, but I see we have the > same on the existing lvsl patterns (it's permute unit on p9; I expect > the same on p8 and older, but please check). It is a bit more complicated on older cores I think; but we'll deal with all at once, there is nothing special about your added one. > * doc/extend.texi: Update the built-in documenation file for the new > built-in functions. (Typo, "documentation"). > +(define_insn "altivec_lvsl_reg" > + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") altivec_register_operand instead? lvsl can target only the VR regs, not all VSR regs. > +;; Load VSX Vector with Length, right justified > +(define_expand "lxvll" > + [(set (match_dup 3) > + (match_operand:DI 2 "register_operand")) > + (set (match_operand:V16QI 0 "vsx_register_operand") > + (unspec:V16QI > + [(match_operand:DI 1 "gpc_reg_operand") > + (match_dup 3)] > + UNSPEC_LXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > +{ > + operands[3] = gen_reg_rtx (DImode); > +}) I don't think you need to copy operands[2] to a temporary here, see below. Why does this require TARGET_64BIT? > +(define_insn "sldi" > + [(set (match_operand:DI 0 "vsx_register_operand" "=r") > + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r") > + (match_operand:DI 2 "u6bit_cint_operand" "")] > + UNSPEC_SLDI))] > + "" > + "sldi %0,%1,%2" > +) As we discussed, you can just use ashldi3. > +(define_insn "*lxvll" > + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") > + (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") > + (match_operand:DI 2 "register_operand" "+r")] > + UNSPEC_LXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > + "lxvll %x0,%1,%2;" > + [(set_attr "length" "4") > + (set_attr "type" "vecload")]) Why "+r"? The instruction doesn't write to that reg. A leftover from an earlier version of the patch, I guess. No ";" at the end of pattern strings please. Length 4 is the default, just leave it out. > +;; Expand for builtin xl_len_r > +(define_expand "xl_len_r" > + [(match_operand:V16QI 0 "vsx_register_operand" "=v") > + (match_operand:DI 1 "register_operand" "r") > + (match_operand:DI 2 "register_operand" "r")] > + "UNSPEC_XL_LEN_R" Expanders don't need constraints; just leave them out :-) > +{ > + rtx shift_mask = gen_reg_rtx (V16QImode); > + rtx rtx_vtmp = gen_reg_rtx (V16QImode); > + rtx tmp = gen_reg_rtx (DImode); > + > + /* Setup permute vector to shift right by operands[2] bytes. > + Note: operands[2] is between 0 and 15, adding -16 to it results > + in a negative value. Shifting left by a negative value results in > + the value being shifted right by the desired amount. */ > + emit_insn (gen_adddi3 (tmp, operands[2], GEN_INT (-16))); > + emit_insn (gen_altivec_lvsl_reg (shift_mask, tmp)); Since lvsl looks only at the low four bits, adding -16 does nothing for it. > + emit_insn (gen_sldi (operands[2], operands[2], GEN_INT (56))); Please use a new temporary instead of reusing operands[2]; this gives the register allocator more freedom. > +(define_insn "*stxvll" > + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) > + (unspec:V16QI > + [(match_operand:V16QI 0 "vsx_register_operand" "wa") > + (match_operand:DI 2 "register_operand" "+r")] > + UNSPEC_STXVLL))] > + "TARGET_P9_VECTOR && TARGET_64BIT" > + "stxvll %x0,%1,%2" > + [(set_attr "length" "8") > + (set_attr "type" "vecstore")]) That's the wrong length now (just a single insn; doesn't need a length attribute). Many of these comments apply to multiple places, please check all. Thanks, Segher
GCC maintianers: Addressed the comments from Segher about copying operands in define_expand lxvll and stxvll. Added new temp for the output of the sldi instructions to give the allocator the freedom to select the registers. Removed constraints in the expanders. Cleaned up issues left over from the previous patch version. Removed length attributes that are now 4 rather then 8. Tested on powerpc64le-unknown-linux-gnu (Power 9 LE), powerpc64le-unknown-linux-gnu (Power 8 LE) and powerpc64le-unknown-linux-gnu (Power 8 BE) without regressions. Please let me know if there are any additional issues to address. ---------------------------------------------------------------------------- 2017-09-18 Carl Love <cel@us.ibm.com> * config/rs6000/rs6000-c.c (P9V_BUILTIN_VEC_XL_LEN_R, P9V_BUILTIN_VEC_XST_LEN_R): Add support for builtins vector unsigned char vec_xl_len_r (unsigned char *, size_t); void vec_xst_len_r (vector unsigned char, unsigned char *, size_t); * config/rs6000/altivec.h (vec_xl_len_r, vec_xst_len_r): Add defines. * config/rs6000/rs6000-builtin.def (XL_LEN_R, XST_LEN_R): Add definitions and overloading. * config/rs6000/rs6000.c (altivec_expand_builtin): Add case statement for P9V_BUILTIN_XST_LEN_R. (altivec_init_builtins): Add def_builtin for P9V_BUILTIN_STXVLL. * config/rs6000/vsx.md (lxvll, stxvll, xl_len_r, xst_len_r): Add define_expand and define_insn for the instructions and builtins. * doc/extend.texi: Update the built-in documentation file for the new built-in functions. * config/rs6000/altivec.md (altivec_lvsl_reg, altivec_lvsr_reg): Add define_insn for the instructions gcc/testsuite/ChangeLog: 2017-09-18 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/builtins-5-p9-runnable.c: Add new runable test file for the new built-ins and the existing built-ins. --- gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/altivec.md | 20 +- gcc/config/rs6000/rs6000-builtin.def | 4 + gcc/config/rs6000/rs6000-c.c | 8 + gcc/config/rs6000/rs6000.c | 11 +- gcc/config/rs6000/vsx.md | 64 +++++ gcc/doc/extend.texi | 4 + .../gcc.target/powerpc/builtins-5-p9-runnable.c | 309 +++++++++++++++++++++ 8 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index c8e508cf0..94a4db24a 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -467,6 +467,8 @@ #ifdef _ARCH_PPC64 #define vec_xl_len __builtin_vec_lxvl #define vec_xst_len __builtin_vec_stxvl +#define vec_xl_len_r __builtin_vec_xl_len_r +#define vec_xst_len_r __builtin_vec_xst_len_r #endif #define vec_cmpnez __builtin_vec_vcmpnez diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 0aa1e3016..a01720545 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2542,6 +2542,15 @@ DONE; }) +(define_insn "altivec_lvsl_reg" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSL_REG))] + "TARGET_ALTIVEC" + "lvsl %0,0,%1" + [(set_attr "type" "vecload")]) + (define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] @@ -2551,7 +2560,7 @@ [(set_attr "type" "vecload")]) (define_expand "altivec_lvsr" - [(use (match_operand:V16QI 0 "register_operand" "")) + [(use (match_operand:V16QI 0 "altivec_register_operand" "")) (use (match_operand:V16QI 1 "memory_operand" ""))] "TARGET_ALTIVEC" { @@ -2574,6 +2583,15 @@ DONE; }) +(define_insn "altivec_lvsr_reg" + [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSR_REG))] + "TARGET_ALTIVEC" + "lvsr %0,0,%1" + [(set_attr "type" "vecload")]) + (define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 850164a09..8f87ccea4 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2125,6 +2125,7 @@ BU_P9V_OVERLOAD_2 (VIESP, "insert_exp_sp") /* 2 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_64BIT_VSX_2 (LXVL, "lxvl", CONST, lxvl) +BU_P9V_64BIT_VSX_2 (XL_LEN_R, "xl_len_r", CONST, xl_len_r) BU_P9V_AV_2 (VEXTUBLX, "vextublx", CONST, vextublx) BU_P9V_AV_2 (VEXTUBRX, "vextubrx", CONST, vextubrx) @@ -2141,6 +2142,7 @@ BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) +BU_P9V_64BIT_AV_X (XST_LEN_R, "xst_len_r", MISC) /* 1 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_AV_1 (VCLZLSBB, "vclzlsbb", CONST, vclzlsbb) @@ -2182,12 +2184,14 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", CONST, vector_nez_v4si_p) /* ISA 3.0 Vector scalar overloaded 2 argument functions */ BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (XL_LEN_R, "xl_len_r") BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") +BU_P9V_OVERLOAD_3 (XST_LEN_R, "xst_len_r") BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") /* Overloaded CMPNE support was implemented prior to Power 9, diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index b2df850e8..2388260be 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -4789,6 +4789,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VSCEDPUO, P9V_BUILTIN_VSCEDPUO, RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_XL_LEN_R, P9V_BUILTIN_XL_LEN_R, + RS6000_BTI_unsigned_V16QI, ~RS6000_BTI_UINTQI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long, 0 }, @@ -4833,6 +4837,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { /* At an appropriate future time, add support for the RS6000_BTI_Float16 (exact name to be determined) type here. */ + { P9V_BUILTIN_VEC_XST_LEN_R, P9V_BUILTIN_XST_LEN_R, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, + ~RS6000_BTI_UINTQI, RS6000_BTI_unsigned_long_long}, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, RS6000_BTI_void, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long }, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ecdf776b9..10919edc7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15546,6 +15546,9 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case P9V_BUILTIN_STXVL: return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); + case P9V_BUILTIN_XST_LEN_R: + return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp); + case VSX_BUILTIN_STXVD2X_V1TI: return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); case VSX_BUILTIN_STXVD2X_V2DF: @@ -17488,8 +17491,12 @@ altivec_init_builtins (void) def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); if (TARGET_P9_VECTOR) - def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, - P9V_BUILTIN_STXVL); + { + def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_STXVL); + def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_XST_LEN_R); + } /* Add the DST variants. */ d = bdesc_dst; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 9b24c7b72..c5e56e96e 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -382,8 +382,16 @@ UNSPEC_VSX_VTSTDC UNSPEC_VSX_VEC_INIT UNSPEC_VSX_VSIGNED2 + UNSPEC_LXVL + UNSPEC_LXVLL + UNSPEC_LVSL_REG + UNSPEC_LVSR_REG UNSPEC_STXVL + UNSPEC_STXVLL + UNSPEC_XL_LEN_R + UNSPEC_XST_LEN_R + UNSPEC_VCLZLSBB UNSPEC_VCTZLSBB UNSPEC_VEXTUBLX @@ -4352,6 +4360,43 @@ [(set_attr "length" "8") (set_attr "type" "vecload")]) +(define_insn "lxvll" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_LXVLL))] + "TARGET_P9_VECTOR" + "lxvll %x0,%1,%2" + [(set_attr "type" "vecload")]) + +;; Expand for builtin xl_len_r +(define_expand "xl_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=wa") + (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")] + "" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + + emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2])); + emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); + emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, + shift_mask)); + DONE; +}) + +(define_insn "stxvll" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_STXVLL))] + "TARGET_P9_VECTOR" + "stxvll %x0,%1,%2" + [(set_attr "type" "vecstore")]) + ;; Store VSX Vector with Length (define_expand "stxvl" [(set (match_dup 3) @@ -4377,6 +4422,25 @@ [(set_attr "length" "8") (set_attr "type" "vecstore")]) +;; Expand for builtin xst_len_r +(define_expand "xst_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=wa") + (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")] + "UNSPEC_XST_LEN_R" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + + emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2])); + emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], + shift_mask)); + emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); + emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); + DONE; +}) + ;; Vector Compare Not Equal Byte (define_insn "vcmpneb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 649be015d..37fd769df 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15631,6 +15631,8 @@ vector unsigned short vec_xl_len (unsigned short *addr, size_t len); vector double vec_xl_len (double *addr, size_t len); vector float vec_xl_len (float *addr, size_t len); +vector unsigned char vec_xl_len_r (unsigned char *addr, size_t len); + void vec_xst_len (vector signed char data, signed char *addr, size_t len); void vec_xst_len (vector unsigned char data, unsigned char *addr, size_t len); void vec_xst_len (vector signed int data, signed int *addr, size_t len); @@ -15644,6 +15646,8 @@ void vec_xst_len (vector signed __int128 data, signed __int128 *addr, size_t len void vec_xst_len (vector double data, double *addr, size_t len); void vec_xst_len (vector float data, float *addr, size_t len); +void vec_xst_len_r (vector unsigned char data, unsigned char *addr, size_t len); + signed char vec_xlx (unsigned int index, vector signed char data); unsigned char vec_xlx (unsigned int index, vector unsigned char data); signed short vec_xlx (unsigned int index, vector signed short data); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c new file mode 100644 index 000000000..ad3947196 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c @@ -0,0 +1,309 @@ +/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <altivec.h> // vector + +#define TRUE 1 +#define FALSE 0 + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int result_wrong(vector unsigned char vec_expected, + vector unsigned char vec_actual) +{ + int i; + + for (i=0; i<16; i++) + if (vec_expected[i] != vec_actual[i]) + return TRUE; + + return FALSE; +} + +int main() { + int i, j; + size_t size; + unsigned char data_uc[100]; + vector unsigned char store_data_uc; + unsigned char *address; + vector unsigned char *datap; + + vector unsigned char vec_uc_expected1, vec_uc_expected2, + vec_uc_result1, vec_uc_result2; + vector int data_int; + + for (i=0; i<100; i++) + data_uc[i] = i+1; + + + /* VEC_XL_LEN */ + + size = 8; + vec_uc_result1 = vec_xl_len (data_uc, size); + + vec_uc_expected1 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0}; + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len (%d): vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,",vec_uc_expected1[i]); + + printf("\nvec_xl_len (%d): vec_uc_result1[0] to vec_uc_result1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XL_LEN_R */ + size = 8; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){8, 7, 6, 5, 4, 3, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0,}; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + size = 4; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 4, 3, 2, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + size = 2; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XST_LEN */ + vec_uc_expected2 = (vector unsigned char){ 1, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 2; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uc_expected2 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 14; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + /* VEC_XST_LEN_R */ + vec_uc_expected1 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 2; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected1[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + vec_uc_expected1 = (vector unsigned char){ 14, 13, 12, 11, 10, 9, 8, 7, + 6, 5, 4, 3, 2, 1, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 14; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } +}
Hi Carl, On Mon, Sep 18, 2017 at 11:31:07AM -0700, Carl Love wrote: > * gcc.target/powerpc/builtins-5-p9-runnable.c: Add new runable test file > for the new built-ins and the existing built-ins. Typo ("runable"). > (define_expand "altivec_lvsr" > - [(use (match_operand:V16QI 0 "register_operand" "")) > + [(use (match_operand:V16QI 0 "altivec_register_operand" "")) > (use (match_operand:V16QI 1 "memory_operand" ""))] Empty constraint strings in define_expand is the default, just leave them out. > +;; Expand for builtin xl_len_r > +(define_expand "xl_len_r" > + [(match_operand:V16QI 0 "vsx_register_operand" "=wa") > + (match_operand:DI 1 "register_operand" "b") > + (match_operand:DI 2 "register_operand" "r")] > + "" Non-empty constraints in an expander do not really make sense either :-) All the rest looks fine. Please fix up the expanders and commit. Thanks! Segher
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index c8e508c..94a4db2 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -467,6 +467,8 @@ #ifdef _ARCH_PPC64 #define vec_xl_len __builtin_vec_lxvl #define vec_xst_len __builtin_vec_stxvl +#define vec_xl_len_r __builtin_vec_xl_len_r +#define vec_xst_len_r __builtin_vec_xst_len_r #endif #define vec_cmpnez __builtin_vec_vcmpnez diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 850164a..8f87cce 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2125,6 +2125,7 @@ BU_P9V_OVERLOAD_2 (VIESP, "insert_exp_sp") /* 2 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_64BIT_VSX_2 (LXVL, "lxvl", CONST, lxvl) +BU_P9V_64BIT_VSX_2 (XL_LEN_R, "xl_len_r", CONST, xl_len_r) BU_P9V_AV_2 (VEXTUBLX, "vextublx", CONST, vextublx) BU_P9V_AV_2 (VEXTUBRX, "vextubrx", CONST, vextubrx) @@ -2141,6 +2142,7 @@ BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) +BU_P9V_64BIT_AV_X (XST_LEN_R, "xst_len_r", MISC) /* 1 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_AV_1 (VCLZLSBB, "vclzlsbb", CONST, vclzlsbb) @@ -2182,12 +2184,14 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", CONST, vector_nez_v4si_p) /* ISA 3.0 Vector scalar overloaded 2 argument functions */ BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (XL_LEN_R, "xl_len_r") BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") +BU_P9V_OVERLOAD_3 (XST_LEN_R, "xst_len_r") BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") /* Overloaded CMPNE support was implemented prior to Power 9, diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 897306c..15f0406 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -4787,6 +4787,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VSCEDPUO, P9V_BUILTIN_VSCEDPUO, RS6000_BTI_INTSI, RS6000_BTI_double, RS6000_BTI_double, 0 }, + { P9V_BUILTIN_VEC_XL_LEN_R, P9V_BUILTIN_XL_LEN_R, + RS6000_BTI_unsigned_V16QI, ~RS6000_BTI_UINTQI, + RS6000_BTI_unsigned_long_long, 0 }, + { P9V_BUILTIN_VEC_LXVL, P9V_BUILTIN_LXVL, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long, 0 }, @@ -4831,6 +4835,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { /* At an appropriate future time, add support for the RS6000_BTI_Float16 (exact name to be determined) type here. */ + { P9V_BUILTIN_VEC_XST_LEN_R, P9V_BUILTIN_XST_LEN_R, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, + ~RS6000_BTI_UINTQI, RS6000_BTI_unsigned_long_long}, + { P9V_BUILTIN_VEC_STXVL, P9V_BUILTIN_STXVL, RS6000_BTI_void, RS6000_BTI_V16QI, ~RS6000_BTI_INTQI, RS6000_BTI_unsigned_long_long }, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6d613c3..6df2d79 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15580,6 +15580,8 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case P9V_BUILTIN_STXVL: return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); + case P9V_BUILTIN_XST_LEN_R: + return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp); case VSX_BUILTIN_STXVD2X_V1TI: return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); case VSX_BUILTIN_STXVD2X_V2DF: @@ -17534,9 +17536,12 @@ altivec_init_builtins (void) def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX); def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); - if (TARGET_P9_VECTOR) + if (TARGET_P9_VECTOR) { def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, P9V_BUILTIN_STXVL); + def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long, + P9V_BUILTIN_XST_LEN_R); + } /* Add the DST variants. */ d = bdesc_dst; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index b47eeac..b9c7343 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -382,8 +382,17 @@ UNSPEC_VSX_VTSTDC UNSPEC_VSX_VEC_INIT UNSPEC_VSX_VSIGNED2 + + UNSPEC_ADDI_NEG16 UNSPEC_LXVL + UNSPEC_LXVLL + UNSPEC_LVSL_REG + UNSPEC_LVSR_REG UNSPEC_STXVL + UNSPEC_STXVLL + UNSPEC_XL_LEN_R + UNSPEC_XST_LEN_R + UNSPEC_VCLZLSBB UNSPEC_VCTZLSBB UNSPEC_VEXTUBLX @@ -4352,6 +4361,106 @@ [(set_attr "length" "8") (set_attr "type" "vecload")]) +(define_insn "addi_neg16" + [(set (match_operand:DI 0 "vsx_register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "gpc_reg_operand" "r")] + UNSPEC_ADDI_NEG16))] + "" + "addi %0,%1,-16" +) + +;; Load VSX Vector with Length, right justified +(define_expand "lxvll" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand") + (match_dup 3)] + UNSPEC_LXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "*lxvll" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_LXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +;; "lxvll %x0,%1,%2;" + "sldi %2,%2, 56\; lxvll %x0,%1,%2;" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +(define_insn "altivec_lvsl_reg" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSL_REG))] + "TARGET_ALTIVEC" + "lvsl %0,0,%1" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvsr_reg" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=v") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b")] + UNSPEC_LVSR_REG))] + "TARGET_ALTIVEC" + "lvsr %0,0,%1" + [(set_attr "type" "vecload")]) + +;; Expand for builtin xl_len_r +(define_expand "xl_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=v") + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + "UNSPEC_XL_LEN_R" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + +/* Setup permute vector to shift right by operands[2] bytes. + Note: addi operands[2], -16 is negative so we actually need to + shift left to get a right shift. */ + emit_insn (gen_addi_neg16 (tmp, operands[2])); + emit_insn (gen_altivec_lvsl_reg (shift_mask, tmp)); + emit_insn (gen_lxvll (rtx_vtmp, operands[1], operands[2])); + emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, + rtx_vtmp, shift_mask)); + DONE; +}) + +;; Store VSX Vector with Length, right justified +(define_expand "stxvll" + [(set (match_dup 3) + (match_operand:DI 2 "register_operand")) + (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand") + (match_dup 3)] + UNSPEC_STXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "*stxvll" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (match_operand:DI 2 "register_operand" "+r")] + UNSPEC_STXVLL))] + "TARGET_P9_VECTOR && TARGET_64BIT" + "sldi %2,%2,56\;stxvll %x0,%1,%2" + [(set_attr "length" "8") + (set_attr "type" "vecstore")]) + ;; Store VSX Vector with Length (define_expand "stxvl" [(set (match_dup 3) @@ -4373,10 +4482,32 @@ (match_operand:DI 2 "register_operand" "+r")] UNSPEC_STXVL))] "TARGET_P9_VECTOR && TARGET_64BIT" - "sldi %2,%2\;stxvl %x0,%1,%2" + "sldi %2,%2,56\;stxvl %x0,%1,%2" [(set_attr "length" "8") (set_attr "type" "vecstore")]) +;; Expand for builtin xst_len_r +(define_expand "xst_len_r" + [(match_operand:V16QI 0 "vsx_register_operand" "=v") + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + "UNSPEC_XST_LEN_R" +{ + rtx shift_mask = gen_reg_rtx (V16QImode); + rtx rtx_vtmp = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (DImode); + +/* Setup permute vector to shift left by operands[2] bytes. + Note: addi operands[2], -16 is negative so we actually need to + shift right to get a left shift. */ + emit_insn (gen_addi_neg16 (tmp, operands[2])); + emit_insn (gen_altivec_lvsr_reg (shift_mask, tmp)); + emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], + operands[0], shift_mask)); + emit_insn (gen_stxvll (rtx_vtmp, operands[1], operands[2])); + DONE; +}) + ;; Vector Compare Not Equal Byte (define_insn "vcmpneb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 649be01..37fd769 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15631,6 +15631,8 @@ vector unsigned short vec_xl_len (unsigned short *addr, size_t len); vector double vec_xl_len (double *addr, size_t len); vector float vec_xl_len (float *addr, size_t len); +vector unsigned char vec_xl_len_r (unsigned char *addr, size_t len); + void vec_xst_len (vector signed char data, signed char *addr, size_t len); void vec_xst_len (vector unsigned char data, unsigned char *addr, size_t len); void vec_xst_len (vector signed int data, signed int *addr, size_t len); @@ -15644,6 +15646,8 @@ void vec_xst_len (vector signed __int128 data, signed __int128 *addr, size_t len void vec_xst_len (vector double data, double *addr, size_t len); void vec_xst_len (vector float data, float *addr, size_t len); +void vec_xst_len_r (vector unsigned char data, unsigned char *addr, size_t len); + signed char vec_xlx (unsigned int index, vector signed char data); unsigned char vec_xlx (unsigned int index, vector unsigned char data); signed short vec_xlx (unsigned int index, vector signed short data); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c new file mode 100644 index 0000000..448e974 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-5-p9-runnable.c @@ -0,0 +1,309 @@ +/* { dg-do run { target { powerpc64*-*-* && { p9vector_hw } } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <altivec.h> // vector + +#define TRUE 1 +#define FALSE 0 + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int result_wrong(vector unsigned char vec_expected, + vector unsigned char vec_actual) +{ + int i; + + for (i=0; i<16; i++) + if (vec_expected[i] != vec_actual[i]) + return TRUE; + + return FALSE; +} + +int main() { + int i, j; + size_t size; + unsigned char data_uc[100]; + vector unsigned char store_data_uc; + unsigned char *address; + vector unsigned char *datap; + + vector unsigned char vec_uc_expected1, vec_uc_expected2, + vec_uc_result1, vec_uc_result2; + vector int data_int; + + for (i=0; i<100; i++) + data_uc[i] = i+1; + + + /* VEC_XL_LEN */ + + size = 8; + vec_uc_result1 = vec_xl_len (data_uc, size); + + vec_uc_expected1 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0}; + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len (%d): vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,",vec_uc_expected1[i]); + + printf("\nvec_xl_len (%d): vec_uc_result1[0] to vec_uc_result1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XL_LEN_R */ + size = 8; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){8, 7, 6, 5, 4, 3, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0,}; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + size = 4; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 4, 3, 2, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d): vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + size = 2; + vec_uc_result2 = vec_xl_len_r(data_uc, size); + + vec_uc_expected2 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xl_len_r(%d): vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xl_len_r(%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + /* VEC_XST_LEN */ + vec_uc_expected2 = (vector unsigned char){ 1, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 2; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uc_expected2 = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + size = 14; + + for (i=0; i<16; i++) + vec_uc_result2[i] = 0; + + address = &vec_uc_result2[0]; + + vec_xst_len (store_data_uc, address, size); + + if (result_wrong (vec_uc_expected2, vec_uc_result2)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len (%d) vec_uc_result2[0] to vec_uc_result2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len (%d) store_data_uc[0] to store_data_uc[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result2[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + /* VEC_XST_LEN_R */ + vec_uc_expected1 = (vector unsigned char){ 2, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 2; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected1[0] to vec_uc_expected1[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected1[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } + + + vec_uc_expected1 = (vector unsigned char){ 14, 13, 12, 11, 10, 9, 8, 7, + 6, 5, 4, 3, 2, 1, 0, 0 }; + store_data_uc = (vector unsigned char){ 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 }; + vec_uc_result1 = (vector unsigned char){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + + size = 14; + + for (i=0; i<16; i++) + vec_uc_result1[i] = 0; + + address = &vec_uc_result1[0]; + + vec_xst_len_r(store_data_uc, address, size); + + if (result_wrong (vec_uc_expected1, vec_uc_result1)) + { +#ifdef DEBUG + printf("Error: result does not match expected result\n"); + printf("vec_xst_len_r(%d) vec_uc_expected2[0] to vec_uc_expected2[15]\n", + size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_expected2[i]); + + printf("\nvec_xst_len_r(%d) result[0] to result[15]\n", size); + + for (i=0; i<16; i++) + printf(" %d,", vec_uc_result1[i]); + + printf("\n\n"); +#else + abort(); +#endif + } +}