diff mbox series

[3/6,ver,2] rs6000, Add vector replace builtin support

Message ID edf8a29bd6cfd85439a4a863d24ab8f2ea5c2c50.camel@us.ibm.com
State New
Headers show
Series ] Permute Class Operations | expand

Commit Message

Carl Love June 15, 2020, 11:37 p.m. UTC
v2 fixes:

change log entries config/rs6000/vsx.md, config/rs6000/rs6000-builtin.def,
config/rs6000/rs6000-call.c.

gcc/config/rs6000/rs6000-call.c: fixed if check for 3rd arg between 0 and 3
                                 fixed if check for 3rd arg between 0 and 12

gcc/config/rs6000/vsx.md: removed REPLACE_ELT_atr definition and used
                          VS_scalar instead.
                          removed REPLACE_ELT_inst definition and used <mode> i\
nstead
                          fixed spelling mistake on Endianness.
                          fixed indenting for vreplace_elt_<mode>

-----------------------------------

GCC maintainers:

The following patch adds support for builtins vec_replace_elt and
vec_replace_unaligned.

The patch has been compiled and tested on

  powerpc64le-unknown-linux-gnu (Power 9 LE)

and mambo with no regression errors.

Please let me know if this patch is acceptable for the pu
branch.  Thanks.

                         Carl Love

-------------------------------------------------------

gcc/ChangeLog

2020-06-15 Carl Love  <cel@us.ibm.com>

        * config/rs6000/altivec.h: Add define for vec_replace_elt and
        vec_replace_unaligned.
        * config/rs6000/vsx.md (UNSPEC_REPLACE_ELT, UNSPEC_REPLACE_UN): New.
        (REPLACE_ELT): New mode iterator.
        (REPLACE_ELT_atr, REPLACE_ELT_inst, REPLACE_ELT_char,
        REPLACE_ELT_sh, REPLACE_ELT_max): New mode attributes.
        (vreplace_un_<mode>, vreplace_elt_<mode>_inst): New.
        * config/rs6000/rs6000-builtin.def (VREPLACE_ELT_V4SI, VREPLACE_ELT_UV4\
SI,
        VREPLACE_ELT_V4SF, VREPLACE_ELT_UV2DI, VREPLACE_ELT_V2DF,
        VREPLACE_UN_V4SI, VREPLACE_UN_UV4SI, VREPLACE_UN_V4SF,
        VREPLACE_UN_V2DI, VREPLACE_UN_UV2DI, VREPLACE_UN_V2DF): New.
        (REPLACE_ELT, REPLACE_UN): New.
        * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_REPLACE_ELT,
        FUTURE_BUILTIN_VEC_REPLACE_UN): New.
        (rs6000_expand_ternop_builtin): Add 3rd argument checks for
        CODE_FOR_vreplace_elt_v4si, CODE_FOR_vreplace_elt_v4sf,
        CODE_FOR_vreplace_un_v4si, CODE_FOR_vreplace_un_v4sf.
        (builtin_function_type) [FUTURE_BUILTIN_VREPLACE_ELT_UV4SI, FUTURE_BUIL\
TIN_VREPLACE_ELT_UV2DI,
        FUTURE_BUILTIN_VREPLACE_UN_UV4SI, FUTURE_BUILTIN_VREPLACE_UN_UV2DI]: Ne\
w cases.
        * doc/extend.texi: Add description for vec_replace_elt and
        vec_replace_unaligned builtins.


gcc/testsuite/ChangeLog

2020-06-15 Carl Love  <cel@us.ibm.com>
        * gcc.target/powerpc/vec-replace-word.c: Add new test.
---
 gcc/config/rs6000/altivec.h                   |   2 +
 gcc/config/rs6000/rs6000-builtin.def          |  16 +
 gcc/config/rs6000/rs6000-call.c               |  61 ++++
 gcc/config/rs6000/vsx.md                      |  60 ++++
 gcc/doc/extend.texi                           |  50 +++
 .../powerpc/vec-replace-word-runnable.c       | 289 ++++++++++++++++++
 6 files changed, 478 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c

Comments

will schmidt June 16, 2020, 5:56 p.m. UTC | #1
On Mon, 2020-06-15 at 16:37 -0700, Carl Love via Gcc-patches wrote:
> v2 fixes:
> 
> change log entries config/rs6000/vsx.md, config/rs6000/rs6000-builtin.def,
> config/rs6000/rs6000-call.c.
> 
> gcc/config/rs6000/rs6000-call.c: fixed if check for 3rd arg between 0 and 3
>                                  fixed if check for 3rd arg between 0 and 12
> 
> gcc/config/rs6000/vsx.md: removed REPLACE_ELT_atr definition and used
>                           VS_scalar instead.
>                           removed REPLACE_ELT_inst definition and used <mode> i\
> nstead

bad word break.


>                           fixed spelling mistake on Endianness.
>                           fixed indenting for vreplace_elt_<mode>
> 
> -----------------------------------
> 
> GCC maintainers:
> 
> The following patch adds support for builtins vec_replace_elt and
> vec_replace_unaligned.
> 
> The patch has been compiled and tested on
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
> 
> and mambo with no regression errors.
> 
> Please let me know if this patch is acceptable for the pu
> branch.  Thanks.

What branch?



> 
>                          Carl Love
> 
> -------------------------------------------------------
> 
> gcc/ChangeLog
> 
> 2020-06-15 Carl Love  <cel@us.ibm.com>
> 
>         * config/rs6000/altivec.h: Add define for vec_replace_elt and
>         vec_replace_unaligned.
>         * config/rs6000/vsx.md (UNSPEC_REPLACE_ELT, UNSPEC_REPLACE_UN): New.
>         (REPLACE_ELT): New mode iterator.
>         (REPLACE_ELT_atr, REPLACE_ELT_inst, REPLACE_ELT_char,
>         REPLACE_ELT_sh, REPLACE_ELT_max): New mode attributes.
>         (vreplace_un_<mode>, vreplace_elt_<mode>_inst): New.
>         * config/rs6000/rs6000-builtin.def (VREPLACE_ELT_V4SI, VREPLACE_ELT_UV4\
> SI,
>         VREPLACE_ELT_V4SF, VREPLACE_ELT_UV2DI, VREPLACE_ELT_V2DF,
>         VREPLACE_UN_V4SI, VREPLACE_UN_UV4SI, VREPLACE_UN_V4SF,
>         VREPLACE_UN_V2DI, VREPLACE_UN_UV2DI, VREPLACE_UN_V2DF): New.
>         (REPLACE_ELT, REPLACE_UN): New.
>         * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_REPLACE_ELT,
>         FUTURE_BUILTIN_VEC_REPLACE_UN): New.
>         (rs6000_expand_ternop_builtin): Add 3rd argument checks for
>         CODE_FOR_vreplace_elt_v4si, CODE_FOR_vreplace_elt_v4sf,
>         CODE_FOR_vreplace_un_v4si, CODE_FOR_vreplace_un_v4sf.
>         (builtin_function_type) [FUTURE_BUILTIN_VREPLACE_ELT_UV4SI, FUTURE_BUIL\
> TIN_VREPLACE_ELT_UV2DI,
>         FUTURE_BUILTIN_VREPLACE_UN_UV4SI, FUTURE_BUILTIN_VREPLACE_UN_UV2DI]: Ne\
> w cases.


Multiple bad wordbreaks.


>         * doc/extend.texi: Add description for vec_replace_elt and
>         vec_replace_unaligned builtins.
> 
> 
> gcc/testsuite/ChangeLog
> 
> 2020-06-15 Carl Love  <cel@us.ibm.com>
>         * gcc.target/powerpc/vec-replace-word.c: Add new test.
> ---
>  gcc/config/rs6000/altivec.h                   |   2 +
>  gcc/config/rs6000/rs6000-builtin.def          |  16 +
>  gcc/config/rs6000/rs6000-call.c               |  61 ++++
>  gcc/config/rs6000/vsx.md                      |  60 ++++
>  gcc/doc/extend.texi                           |  50 +++
>  .../powerpc/vec-replace-word-runnable.c       | 289 ++++++++++++++++++
>  6 files changed, 478 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 936aeb1ee09..435ffb8158f 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -701,6 +701,8 @@ __altivec_scalar_pred(vec_any_nle,
>  #define vec_extracth(a, b, c)	__builtin_vec_extracth (a, b, c)
>  #define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
>  #define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
> +#define vec_replace_elt(a, b, c)       __builtin_vec_replace_elt (a, b, c)
> +#define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c)
> 

I don't think the parms are necessary unless there is remapping going
on.  But existing nearby content has them, so match the existing style,
i guess.  :-)


>  #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
>  #define vec_clrl(a, b)	__builtin_vec_clrl (a, b)
> diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
> index c5bd4f86555..91821f29a6f 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2643,6 +2643,20 @@ BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
>  BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
>  BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
> 
> +BU_FUTURE_V_3 (VREPLACE_ELT_V4SI, "vreplace_v4si", CONST, vreplace_elt_v4si)
> +BU_FUTURE_V_3 (VREPLACE_ELT_UV4SI, "vreplace_uv4si", CONST, vreplace_elt_v4si)
> +BU_FUTURE_V_3 (VREPLACE_ELT_V4SF, "vreplace_v4sf", CONST, vreplace_elt_v4sf)
> +BU_FUTURE_V_3 (VREPLACE_ELT_V2DI, "vreplace_v2di", CONST, vreplace_elt_v2di)
> +BU_FUTURE_V_3 (VREPLACE_ELT_UV2DI, "vreplace_uv2di", CONST, vreplace_elt_v2di)
> +BU_FUTURE_V_3 (VREPLACE_ELT_V2DF, "vreplace_v2df", CONST, vreplace_elt_v2df)
> +
> +BU_FUTURE_V_3 (VREPLACE_UN_V4SI, "vreplace_un_v4si", CONST, vreplace_un_v4si)
> +BU_FUTURE_V_3 (VREPLACE_UN_UV4SI, "vreplace_un_uv4si", CONST, vreplace_un_v4si)
> +BU_FUTURE_V_3 (VREPLACE_UN_V4SF, "vreplace_un_v4sf", CONST, vreplace_un_v4sf)
> +BU_FUTURE_V_3 (VREPLACE_UN_V2DI, "vreplace_un_v2di", CONST, vreplace_un_v2di)
> +BU_FUTURE_V_3 (VREPLACE_UN_UV2DI, "vreplace_un_uv2di", CONST, vreplace_un_v2di)
> +BU_FUTURE_V_3 (VREPLACE_UN_V2DF, "vreplace_un_v2df", CONST, vreplace_un_v2df)
> +
>  BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
>  BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
>  BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
> @@ -2664,6 +2678,8 @@ BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl")
>  BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
>  BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl")
>  BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth")
> +BU_FUTURE_OVERLOAD_3 (REPLACE_ELT, "replace_elt")
> +BU_FUTURE_OVERLOAD_3 (REPLACE_UN, "replace_un")
> 
>  BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
>  BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index abbe00030ea..2653222ced0 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5624,6 +5624,36 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
>      RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
>      RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
> 
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SI,
> +    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SF,
> +    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV2DI,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
> +    RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DI,
> +    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DF,
> +    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
> +
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV4SI,
> +    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
> +    RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SI,
> +    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SF,
> +    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV2DI,
> +    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
> +    RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DI,
> +    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
> +  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DF,
> +    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
> +
>    { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
>      RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
>    { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
> @@ -9987,6 +10017,33 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
>  	  return CONST0_RTX (tmode);
>  	}
>      }
> +  else if (icode == CODE_FOR_vreplace_elt_v4si
> +	   || icode == CODE_FOR_vreplace_elt_v4sf)
> +   {
> +     /* Check whether the 3rd argument is an integer constant in the range
> +	0 to 3 inclusive.  */
> +     STRIP_NOPS (arg2);
> +     if (TREE_CODE (arg2) != INTEGER_CST
> +	 || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 3))
> +	{
> +	  error ("argument 3 must be in the range 0 to 3");
> +	  return CONST0_RTX (tmode);
> +	}
> +   }
> +
> +  else if (icode == CODE_FOR_vreplace_un_v4si
> +	   || icode == CODE_FOR_vreplace_un_v4sf)
> +   {
> +     /* Check whether the 3rd argument is an integer constant in the range
> +	0 to 12 inclusive.  */
> +     STRIP_NOPS (arg2);
> +     if (TREE_CODE (arg2) != INTEGER_CST
> +	 || !IN_RANGE(TREE_INT_CST_LOW (arg2), 0, 12))
> +	{
> +	  error ("argument 3 must be in the range 0 to 12");
> +	  return CONST0_RTX (tmode);
> +	}
> +   }
> 
>    if (target == 0
>        || GET_MODE (target) != tmode
> @@ -13342,6 +13399,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
>      case FUTURE_BUILTIN_VINSERTVPRBL:
>      case FUTURE_BUILTIN_VINSERTVPRHL:
>      case FUTURE_BUILTIN_VINSERTVPRWL:
> +    case FUTURE_BUILTIN_VREPLACE_ELT_UV4SI:
> +    case FUTURE_BUILTIN_VREPLACE_ELT_UV2DI:
> +    case FUTURE_BUILTIN_VREPLACE_UN_UV4SI:
> +    case FUTURE_BUILTIN_VREPLACE_UN_UV2DI:
>        h.uns_p[0] = 1;
>        h.uns_p[1] = 1;
>        h.uns_p[2] = 1;
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 6ce93f14dec..57607998c42 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -348,11 +348,22 @@
>     UNSPEC_EXTRACTR
>     UNSPEC_INSERTL
>     UNSPEC_INSERTR
> +   UNSPEC_REPLACE_ELT
> +   UNSPEC_REPLACE_UN
>    ])
> 
>  ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
>  (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
> 
> +;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
> +(define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
> +(define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
> +				    (V2DI  "d") (V2DF "d")])
> +(define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
> +				  (V2DI  "3") (V2DF "3")])
> +(define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
> +				   (V2DI  "8") (V2DF "8")])
> +
>  ;; VSX moves
> 
>  ;; The patterns for LE permuted loads and stores come before the general
> @@ -3957,6 +3968,55 @@
>   "vins<wd>rx %0,%1,%2"
>   [(set_attr "type" "vecsimple")])
> 
> +(define_expand "vreplace_elt_<mode>"
> +  [(set (match_operand:REPLACE_ELT 0 "register_operand")
> +  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
> +		       (match_operand:<VS_scalar> 2 "register_operand")
> +		       (match_operand:QI 3 "const_0_to_3_operand")]
> +		      UNSPEC_REPLACE_ELT))]
> + "TARGET_FUTURE"
> +{
> +   int index;
> +   /* Immediate value is the word index, convert to byte index and adjust for
> +      Endianness if needed.  */
> +   if (BYTES_BIG_ENDIAN)
> +     index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
> +
> +   else
> +     index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
> +
> +   emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
> +					    operands[2],
> +					    GEN_INT (index)));
> +   DONE;
> + }
> +[(set_attr "type" "vecsimple")])
> +
> +(define_expand "vreplace_un_<mode>"
> + [(set (match_operand:REPLACE_ELT 0 "register_operand")
> + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
> +		      (match_operand:<VS_scalar> 2 "register_operand")
> +		      (match_operand:QI 3 "const_0_to_12_operand")]
> +		     UNSPEC_REPLACE_UN))]
> + "TARGET_FUTURE"
> +{
> +   /* Immediate value is the byte index Big Endian numbering.  */
> +   emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
> +					    operands[2], operands[3]));
> +   DONE;
> + }
> +[(set_attr "type" "vecsimple")])
> +
> +(define_insn "vreplace_elt_<mode>_inst"
> + [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
> +  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
> +		       (match_operand:<VS_scalar> 2 "register_operand" "r")
> +		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
> +		      UNSPEC_REPLACE_ELT))]
> + "TARGET_FUTURE"
> + "vins<REPLACE_ELT_char> %0,%2,%3"
> + [(set_attr "type" "vecsimple")])
> +
>  ;; VSX_EXTRACT optimizations
>  ;; Optimize double d = (double) vec_extract (vi, <n>)
>  ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 8931c7950f6..00c17be1851 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21045,6 +21045,56 @@ This is a limitation of the bi-endian vector programming model consistent with
>  the limitation on vec_perm, for example.
>  @findex vec_inserth
> 
> +Vector Replace Element
> +@smallexample
> +@exdent vector signed int vec_replace_elt (vector signed int, signed int,
> +const int);
> +@exdent vector unsigned int vec_replace_elt (vector unsigned int,
> +unsigned int, const int);
> +@exdent vector float vec_replace_elt (vector float, float, const int);
> +@exdent vector signed long long vec_replace_elt (vector signed long long,
> +signed long long, const int);
> +@exdent vector unsigned long long vec_replace_elt (vector unsigned long long,
> +unsigned long long, const int);
> +@exdent vector double rec_replace_elt (vector double, double, const int);
> +@end smallexample
> +The third argument (constrained to [0,3]) identifies the natural-endian
> +element number of the first argument that will be replaced by the second
> +argument to produce the result.  The other elements of the first argument will
> +remain unchanged in the result.
> +
> +If it's desirable to insert a word at an unaligned position, use
> +vec_replace_unaligned instead.
> +
> +@findex vec_replace_element
> +
> +Vector Replace Unaligned
> +@smallexample
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +signed int, const int);
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +unsigned int, const int);
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +float, const int);
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +signed long long, const int);
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +unsigned long long, const int);
> +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
> +double, const int);
> +@end smallexample
> +
> +The second argument replaces a portion of the first argument to produce the
> +result, with the rest of the first argument unchanged in the result.  The
> +third argument identifies the byte index (using left-to-right, or big-endian
> +order) where the high-order byte of the second argument will be placed, with
> +the remaining bytes of the second argument placed naturally "to the right"
> +of the high-order byte.
> +
> +The programmer is responsible for understanding the endianness issues involved
> +with the first argument and the result.
> +@findex vec_replace_unaligned
> +
>  @smallexample
>  @exdent vector unsigned long long int
>  @exdent vec_pdep (vector unsigned long long int, vector unsigned long long int)
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
> new file mode 100644
> index 00000000000..1fe23d5f912
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
> @@ -0,0 +1,289 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_future_hw } */
> +/* { dg-options "-mdejagnu-cpu=future" } */
> +
> +#include <altivec.h>
> +
> +#define DEBUG 1
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +extern void abort (void);
> +
> +int
> +main (int argc, char *argv [])
> +{
> +  int i;
> +  unsigned char ch;
> +  unsigned int index;
> +
> +  vector unsigned int vresult_uint;
> +  vector unsigned int expected_vresult_uint;
> +  vector unsigned int src_va_uint;
> +  vector unsigned int src_vb_uint;
> +  unsigned int src_a_uint;
> +
> +  vector int vresult_int;
> +  vector int expected_vresult_int;
> +  vector int src_va_int;
> +  vector int src_vb_int;
> +  int src_a_int;
> +
> +  vector unsigned long long int vresult_ullint;
> +  vector unsigned long long int expected_vresult_ullint;
> +  vector unsigned long long int src_va_ullint;
> +  vector unsigned long long int src_vb_ullint;
> +  unsigned int long long src_a_ullint;
> +
> +  vector long long int vresult_llint;
> +  vector long long int expected_vresult_llint;
> +  vector long long int src_va_llint;
> +  vector long long int src_vb_llint;
> +  long long int src_a_llint;
> +
> +  vector float vresult_float;
> +  vector float expected_vresult_float;
> +  vector float src_va_float;
> +  float src_a_float;
> +
> +  vector double vresult_double;
> +  vector double expected_vresult_double;
> +  vector double src_va_double;
> +  double src_a_double;
> +
> +  /* Vector replace 32-bit element */
> +  src_a_uint = 345;
> +  src_va_uint = (vector unsigned int) { 0, 1, 2, 3 };
> +  vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
> +  expected_vresult_uint = (vector unsigned int) { 0, 1, 345, 3 };
> +						 
> +  vresult_uint = vec_replace_elt (src_va_uint, src_a_uint, 2);
> +
> +  if (!vec_all_eq (vresult_uint,  expected_vresult_uint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_uint, src_va_uint, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
> +	     i, vresult_uint[i], i, expected_vresult_uint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_int = 234;
> +  src_va_int = (vector int) { 0, 1, 2, 3 };
> +  vresult_int = (vector int) { 0, 0, 0, 0 };
> +  expected_vresult_int = (vector int) { 0, 234, 2, 3 };
> +						 
> +  vresult_int = vec_replace_elt (src_va_int, src_a_int, 1);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  src_a_float = 34.0;
> +  src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
> +  vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
> +  expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
> +						 
> +  vresult_float = vec_replace_elt (src_va_float, src_a_float, 1);
> +
> +  if (!vec_all_eq (vresult_float,  expected_vresult_float)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_float, src_va_float, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
> +	     i, vresult_float[i], i, expected_vresult_float[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector replace 64-bit element */
> +  src_a_ullint = 456;
> +  src_va_ullint = (vector unsigned long long int) { 0, 1 };
> +  vresult_ullint = (vector unsigned long long int) { 0, 0 };
> +  expected_vresult_ullint = (vector unsigned long long int) { 0, 456 };
> +						 
> +  vresult_ullint = vec_replace_elt (src_va_ullint, src_a_ullint, 1);
> +
> +  if (!vec_all_eq (vresult_ullint,  expected_vresult_ullint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_ullint, src_va_ullint, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
> +	     i, vresult_ullint[i], i, expected_vresult_ullint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_llint = 678;
> +  src_va_llint = (vector long long int) { 0, 1 };
> +  vresult_llint = (vector long long int) { 0, 0 };
> +  expected_vresult_llint = (vector long long int) { 0, 678 };
> +						 
> +  vresult_llint = vec_replace_elt (src_va_llint, src_a_llint, 1);
> +
> +  if (!vec_all_eq (vresult_llint,  expected_vresult_llint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_llint, src_va_llint, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
> +	     i, vresult_llint[i], i, expected_vresult_llint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  src_a_double = 678.0;
> +  src_va_double = (vector double) { 0.0, 50.0 };
> +  vresult_double = (vector double) { 0.0, 0.0 };
> +  expected_vresult_double = (vector double) { 0.0, 678.0 };
> +						 
> +  vresult_double = vec_replace_elt (src_va_double, src_a_double, 1);
> +
> +  if (!vec_all_eq (vresult_double,  expected_vresult_double)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_elt (src_vb_double, src_va_double, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
> +	     i, vresult_double[i], i, expected_vresult_double[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +
> +  /* Vector replace 32-bit element, unaligned */
> +  src_a_uint = 345;
> +  src_va_uint = (vector unsigned int) { 1, 2, 0, 0 };
> +  vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
> +  /* Byte index 7 will overwrite part of elements 2 and 3 */
> +  expected_vresult_uint = (vector unsigned int) { 1, 2, 345*256, 0 };
> +						 
> +  vresult_uint = vec_replace_unaligned (src_va_uint, src_a_uint, 3);
> +
> +  if (!vec_all_eq (vresult_uint,  expected_vresult_uint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_uint, src_va_uint, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
> +	     i, vresult_uint[i], i, expected_vresult_uint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_int = 234;
> +  src_va_int = (vector int) { 1, 0, 3, 4 };
> +  vresult_int = (vector int) { 0, 0, 0, 0 };
> +  /* Byte index 7 will over write part of elements 1 and 2 */
> +  expected_vresult_int = (vector int) { 1, 234*256, 0, 4 };
> +						 
> +  vresult_int = vec_replace_unaligned (src_va_int, src_a_int, 7);
> +
> +  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_int, src_va_int, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
> +	     i, vresult_int[i], i, expected_vresult_int[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_float = 34.0;
> +  src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
> +  vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
> +  expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
> +						 
> +  vresult_float = vec_replace_unaligned (src_va_float, src_a_float, 8);
> +
> +  if (!vec_all_eq (vresult_float,  expected_vresult_float)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_float, src_va_float, index)\n");
> +    for(i = 0; i < 4; i++)
> +      printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
> +	     i, vresult_float[i], i, expected_vresult_float[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  /* Vector replace 64-bit element, unaligned  */
> +  src_a_ullint = 456;
> +  src_va_ullint = (vector unsigned long long int) { 0, 0x222 };
> +  vresult_ullint = (vector unsigned long long int) { 0, 0 };
> +  expected_vresult_ullint = (vector unsigned long long int) { 456*256,
> +							      0x200 };
> +						 
> +  /* Byte index 7 will over write least significant byte of  element 0  */
> +  vresult_ullint = vec_replace_unaligned (src_va_ullint, src_a_ullint, 7);
> +
> +  if (!vec_all_eq (vresult_ullint,  expected_vresult_ullint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_ullint, src_va_ullint, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
> +	     i, vresult_ullint[i], i, expected_vresult_ullint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +
> +  src_a_llint = 678;
> +  src_va_llint = (vector long long int) { 0, 0x101 };
> +  vresult_llint = (vector long long int) { 0, 0 };
> +  /* Byte index 7 will over write least significant byte of  element 0  */
> +  expected_vresult_llint = (vector long long int) { 678*256, 0x100 };
> +						 
> +  vresult_llint = vec_replace_unaligned (src_va_llint, src_a_llint, 7);
> +
> +  if (!vec_all_eq (vresult_llint,  expected_vresult_llint)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_llint, src_va_llint, index)\n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
> +	     i, vresult_llint[i], i, expected_vresult_llint[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +  
> +  src_a_double = 678.0;
> +  src_va_double = (vector double) { 0.0, 50.0 };
> +  vresult_double = (vector double) { 0.0, 0.0 };
> +  expected_vresult_double = (vector double) { 0.0, 678.0 };
> +						 
> +  vresult_double = vec_replace_unaligned (src_va_double, src_a_double, 0);
> +
> +  if (!vec_all_eq (vresult_double,  expected_vresult_double)) {
> +#if DEBUG
> +    printf("ERROR, vec_replace_unaligned (src_vb_double, src_va_double, index)\
> +n");
> +    for(i = 0; i < 2; i++)
> +      printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
> +	     i, vresult_double[i], i, expected_vresult_double[i]);
> +#else
> +    abort();
> +#endif
> +  }
> +    
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvinsw\M} 6 } } */
> +/* { dg-final { scan-assembler-times {\mvinsd\M} 6 } } */
> +
> +
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 936aeb1ee09..435ffb8158f 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -701,6 +701,8 @@  __altivec_scalar_pred(vec_any_nle,
 #define vec_extracth(a, b, c)	__builtin_vec_extracth (a, b, c)
 #define vec_insertl(a, b, c)   __builtin_vec_insertl (a, b, c)
 #define vec_inserth(a, b, c)   __builtin_vec_inserth (a, b, c)
+#define vec_replace_elt(a, b, c)       __builtin_vec_replace_elt (a, b, c)
+#define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c)
 
 #define vec_gnb(a, b)	__builtin_vec_gnb (a, b)
 #define vec_clrl(a, b)	__builtin_vec_clrl (a, b)
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index c5bd4f86555..91821f29a6f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2643,6 +2643,20 @@  BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi)
 BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi)
 BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si)
 
+BU_FUTURE_V_3 (VREPLACE_ELT_V4SI, "vreplace_v4si", CONST, vreplace_elt_v4si)
+BU_FUTURE_V_3 (VREPLACE_ELT_UV4SI, "vreplace_uv4si", CONST, vreplace_elt_v4si)
+BU_FUTURE_V_3 (VREPLACE_ELT_V4SF, "vreplace_v4sf", CONST, vreplace_elt_v4sf)
+BU_FUTURE_V_3 (VREPLACE_ELT_V2DI, "vreplace_v2di", CONST, vreplace_elt_v2di)
+BU_FUTURE_V_3 (VREPLACE_ELT_UV2DI, "vreplace_uv2di", CONST, vreplace_elt_v2di)
+BU_FUTURE_V_3 (VREPLACE_ELT_V2DF, "vreplace_v2df", CONST, vreplace_elt_v2df)
+
+BU_FUTURE_V_3 (VREPLACE_UN_V4SI, "vreplace_un_v4si", CONST, vreplace_un_v4si)
+BU_FUTURE_V_3 (VREPLACE_UN_UV4SI, "vreplace_un_uv4si", CONST, vreplace_un_v4si)
+BU_FUTURE_V_3 (VREPLACE_UN_V4SF, "vreplace_un_v4sf", CONST, vreplace_un_v4sf)
+BU_FUTURE_V_3 (VREPLACE_UN_V2DI, "vreplace_un_v2di", CONST, vreplace_un_v2di)
+BU_FUTURE_V_3 (VREPLACE_UN_UV2DI, "vreplace_un_uv2di", CONST, vreplace_un_v2di)
+BU_FUTURE_V_3 (VREPLACE_UN_V2DF, "vreplace_un_v2df", CONST, vreplace_un_v2df)
+
 BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi)
 BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi)
 BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi)
@@ -2664,6 +2678,8 @@  BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl")
 BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
 BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl")
 BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth")
+BU_FUTURE_OVERLOAD_3 (REPLACE_ELT, "replace_elt")
+BU_FUTURE_OVERLOAD_3 (REPLACE_UN, "replace_un")
 
 BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
 BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index abbe00030ea..2653222ced0 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5624,6 +5624,36 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI },
 
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
+
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_UINTSI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_UINTDI, RS6000_BTI_UINTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI },
+  { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI },
+
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
   { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL,
@@ -9987,6 +10017,33 @@  rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  return CONST0_RTX (tmode);
 	}
     }
+  else if (icode == CODE_FOR_vreplace_elt_v4si
+	   || icode == CODE_FOR_vreplace_elt_v4sf)
+   {
+     /* Check whether the 3rd argument is an integer constant in the range
+	0 to 3 inclusive.  */
+     STRIP_NOPS (arg2);
+     if (TREE_CODE (arg2) != INTEGER_CST
+	 || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 3))
+	{
+	  error ("argument 3 must be in the range 0 to 3");
+	  return CONST0_RTX (tmode);
+	}
+   }
+
+  else if (icode == CODE_FOR_vreplace_un_v4si
+	   || icode == CODE_FOR_vreplace_un_v4sf)
+   {
+     /* Check whether the 3rd argument is an integer constant in the range
+	0 to 12 inclusive.  */
+     STRIP_NOPS (arg2);
+     if (TREE_CODE (arg2) != INTEGER_CST
+	 || !IN_RANGE(TREE_INT_CST_LOW (arg2), 0, 12))
+	{
+	  error ("argument 3 must be in the range 0 to 12");
+	  return CONST0_RTX (tmode);
+	}
+   }
 
   if (target == 0
       || GET_MODE (target) != tmode
@@ -13342,6 +13399,10 @@  builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case FUTURE_BUILTIN_VINSERTVPRBL:
     case FUTURE_BUILTIN_VINSERTVPRHL:
     case FUTURE_BUILTIN_VINSERTVPRWL:
+    case FUTURE_BUILTIN_VREPLACE_ELT_UV4SI:
+    case FUTURE_BUILTIN_VREPLACE_ELT_UV2DI:
+    case FUTURE_BUILTIN_VREPLACE_UN_UV4SI:
+    case FUTURE_BUILTIN_VREPLACE_UN_UV2DI:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 6ce93f14dec..57607998c42 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -348,11 +348,22 @@ 
    UNSPEC_EXTRACTR
    UNSPEC_INSERTL
    UNSPEC_INSERTR
+   UNSPEC_REPLACE_ELT
+   UNSPEC_REPLACE_UN
   ])
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 
+;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
+(define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
+(define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
+				    (V2DI  "d") (V2DF "d")])
+(define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
+				  (V2DI  "3") (V2DF "3")])
+(define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
+				   (V2DI  "8") (V2DF "8")])
+
 ;; VSX moves
 
 ;; The patterns for LE permuted loads and stores come before the general
@@ -3957,6 +3968,55 @@ 
  "vins<wd>rx %0,%1,%2"
  [(set_attr "type" "vecsimple")])
 
+(define_expand "vreplace_elt_<mode>"
+  [(set (match_operand:REPLACE_ELT 0 "register_operand")
+  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
+		       (match_operand:<VS_scalar> 2 "register_operand")
+		       (match_operand:QI 3 "const_0_to_3_operand")]
+		      UNSPEC_REPLACE_ELT))]
+ "TARGET_FUTURE"
+{
+   int index;
+   /* Immediate value is the word index, convert to byte index and adjust for
+      Endianness if needed.  */
+   if (BYTES_BIG_ENDIAN)
+     index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
+
+   else
+     index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
+
+   emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
+					    operands[2],
+					    GEN_INT (index)));
+   DONE;
+ }
+[(set_attr "type" "vecsimple")])
+
+(define_expand "vreplace_un_<mode>"
+ [(set (match_operand:REPLACE_ELT 0 "register_operand")
+ (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
+		      (match_operand:<VS_scalar> 2 "register_operand")
+		      (match_operand:QI 3 "const_0_to_12_operand")]
+		     UNSPEC_REPLACE_UN))]
+ "TARGET_FUTURE"
+{
+   /* Immediate value is the byte index Big Endian numbering.  */
+   emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
+					    operands[2], operands[3]));
+   DONE;
+ }
+[(set_attr "type" "vecsimple")])
+
+(define_insn "vreplace_elt_<mode>_inst"
+ [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
+  (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
+		       (match_operand:<VS_scalar> 2 "register_operand" "r")
+		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
+		      UNSPEC_REPLACE_ELT))]
+ "TARGET_FUTURE"
+ "vins<REPLACE_ELT_char> %0,%2,%3"
+ [(set_attr "type" "vecsimple")])
+
 ;; VSX_EXTRACT optimizations
 ;; Optimize double d = (double) vec_extract (vi, <n>)
 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8931c7950f6..00c17be1851 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21045,6 +21045,56 @@  This is a limitation of the bi-endian vector programming model consistent with
 the limitation on vec_perm, for example.
 @findex vec_inserth
 
+Vector Replace Element
+@smallexample
+@exdent vector signed int vec_replace_elt (vector signed int, signed int,
+const int);
+@exdent vector unsigned int vec_replace_elt (vector unsigned int,
+unsigned int, const int);
+@exdent vector float vec_replace_elt (vector float, float, const int);
+@exdent vector signed long long vec_replace_elt (vector signed long long,
+signed long long, const int);
+@exdent vector unsigned long long vec_replace_elt (vector unsigned long long,
+unsigned long long, const int);
+@exdent vector double rec_replace_elt (vector double, double, const int);
+@end smallexample
+The third argument (constrained to [0,3]) identifies the natural-endian
+element number of the first argument that will be replaced by the second
+argument to produce the result.  The other elements of the first argument will
+remain unchanged in the result.
+
+If it's desirable to insert a word at an unaligned position, use
+vec_replace_unaligned instead.
+
+@findex vec_replace_element
+
+Vector Replace Unaligned
+@smallexample
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+signed int, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+unsigned int, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+float, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+signed long long, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+unsigned long long, const int);
+@exdent vector unsigned char vec_replace_unaligned (vector unsigned char,
+double, const int);
+@end smallexample
+
+The second argument replaces a portion of the first argument to produce the
+result, with the rest of the first argument unchanged in the result.  The
+third argument identifies the byte index (using left-to-right, or big-endian
+order) where the high-order byte of the second argument will be placed, with
+the remaining bytes of the second argument placed naturally "to the right"
+of the high-order byte.
+
+The programmer is responsible for understanding the endianness issues involved
+with the first argument and the result.
+@findex vec_replace_unaligned
+
 @smallexample
 @exdent vector unsigned long long int
 @exdent vec_pdep (vector unsigned long long int, vector unsigned long long int)
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
new file mode 100644
index 00000000000..1fe23d5f912
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c
@@ -0,0 +1,289 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_future_hw } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+#include <altivec.h>
+
+#define DEBUG 1
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+extern void abort (void);
+
+int
+main (int argc, char *argv [])
+{
+  int i;
+  unsigned char ch;
+  unsigned int index;
+
+  vector unsigned int vresult_uint;
+  vector unsigned int expected_vresult_uint;
+  vector unsigned int src_va_uint;
+  vector unsigned int src_vb_uint;
+  unsigned int src_a_uint;
+
+  vector int vresult_int;
+  vector int expected_vresult_int;
+  vector int src_va_int;
+  vector int src_vb_int;
+  int src_a_int;
+
+  vector unsigned long long int vresult_ullint;
+  vector unsigned long long int expected_vresult_ullint;
+  vector unsigned long long int src_va_ullint;
+  vector unsigned long long int src_vb_ullint;
+  unsigned int long long src_a_ullint;
+
+  vector long long int vresult_llint;
+  vector long long int expected_vresult_llint;
+  vector long long int src_va_llint;
+  vector long long int src_vb_llint;
+  long long int src_a_llint;
+
+  vector float vresult_float;
+  vector float expected_vresult_float;
+  vector float src_va_float;
+  float src_a_float;
+
+  vector double vresult_double;
+  vector double expected_vresult_double;
+  vector double src_va_double;
+  double src_a_double;
+
+  /* Vector replace 32-bit element */
+  src_a_uint = 345;
+  src_va_uint = (vector unsigned int) { 0, 1, 2, 3 };
+  vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+  expected_vresult_uint = (vector unsigned int) { 0, 1, 345, 3 };
+						 
+  vresult_uint = vec_replace_elt (src_va_uint, src_a_uint, 2);
+
+  if (!vec_all_eq (vresult_uint,  expected_vresult_uint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_uint, src_va_uint, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+	     i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 234;
+  src_va_int = (vector int) { 0, 1, 2, 3 };
+  vresult_int = (vector int) { 0, 0, 0, 0 };
+  expected_vresult_int = (vector int) { 0, 234, 2, 3 };
+						 
+  vresult_int = vec_replace_elt (src_va_int, src_a_int, 1);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+  
+  src_a_float = 34.0;
+  src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
+  vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
+  expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
+						 
+  vresult_float = vec_replace_elt (src_va_float, src_a_float, 1);
+
+  if (!vec_all_eq (vresult_float,  expected_vresult_float)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_float, src_va_float, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
+	     i, vresult_float[i], i, expected_vresult_float[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector replace 64-bit element */
+  src_a_ullint = 456;
+  src_va_ullint = (vector unsigned long long int) { 0, 1 };
+  vresult_ullint = (vector unsigned long long int) { 0, 0 };
+  expected_vresult_ullint = (vector unsigned long long int) { 0, 456 };
+						 
+  vresult_ullint = vec_replace_elt (src_va_ullint, src_a_ullint, 1);
+
+  if (!vec_all_eq (vresult_ullint,  expected_vresult_ullint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_ullint, src_va_ullint, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
+	     i, vresult_ullint[i], i, expected_vresult_ullint[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_llint = 678;
+  src_va_llint = (vector long long int) { 0, 1 };
+  vresult_llint = (vector long long int) { 0, 0 };
+  expected_vresult_llint = (vector long long int) { 0, 678 };
+						 
+  vresult_llint = vec_replace_elt (src_va_llint, src_a_llint, 1);
+
+  if (!vec_all_eq (vresult_llint,  expected_vresult_llint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_llint, src_va_llint, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
+	     i, vresult_llint[i], i, expected_vresult_llint[i]);
+#else
+    abort();
+#endif
+  }
+  
+  src_a_double = 678.0;
+  src_va_double = (vector double) { 0.0, 50.0 };
+  vresult_double = (vector double) { 0.0, 0.0 };
+  expected_vresult_double = (vector double) { 0.0, 678.0 };
+						 
+  vresult_double = vec_replace_elt (src_va_double, src_a_double, 1);
+
+  if (!vec_all_eq (vresult_double,  expected_vresult_double)) {
+#if DEBUG
+    printf("ERROR, vec_replace_elt (src_vb_double, src_va_double, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
+	     i, vresult_double[i], i, expected_vresult_double[i]);
+#else
+    abort();
+#endif
+  }
+
+
+  /* Vector replace 32-bit element, unaligned */
+  src_a_uint = 345;
+  src_va_uint = (vector unsigned int) { 1, 2, 0, 0 };
+  vresult_uint = (vector unsigned int) { 0, 0, 0, 0 };
+  /* Byte index 7 will overwrite part of elements 2 and 3 */
+  expected_vresult_uint = (vector unsigned int) { 1, 2, 345*256, 0 };
+						 
+  vresult_uint = vec_replace_unaligned (src_va_uint, src_a_uint, 3);
+
+  if (!vec_all_eq (vresult_uint,  expected_vresult_uint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_uint, src_va_uint, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n",
+	     i, vresult_uint[i], i, expected_vresult_uint[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_int = 234;
+  src_va_int = (vector int) { 1, 0, 3, 4 };
+  vresult_int = (vector int) { 0, 0, 0, 0 };
+  /* Byte index 7 will over write part of elements 1 and 2 */
+  expected_vresult_int = (vector int) { 1, 234*256, 0, 4 };
+						 
+  vresult_int = vec_replace_unaligned (src_va_int, src_a_int, 7);
+
+  if (!vec_all_eq (vresult_int,  expected_vresult_int)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_int, src_va_int, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n",
+	     i, vresult_int[i], i, expected_vresult_int[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_float = 34.0;
+  src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 };
+  vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 };
+  expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 };
+						 
+  vresult_float = vec_replace_unaligned (src_va_float, src_a_float, 8);
+
+  if (!vec_all_eq (vresult_float,  expected_vresult_float)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_float, src_va_float, index)\n");
+    for(i = 0; i < 4; i++)
+      printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n",
+	     i, vresult_float[i], i, expected_vresult_float[i]);
+#else
+    abort();
+#endif
+  }
+
+  /* Vector replace 64-bit element, unaligned  */
+  src_a_ullint = 456;
+  src_va_ullint = (vector unsigned long long int) { 0, 0x222 };
+  vresult_ullint = (vector unsigned long long int) { 0, 0 };
+  expected_vresult_ullint = (vector unsigned long long int) { 456*256,
+							      0x200 };
+						 
+  /* Byte index 7 will over write least significant byte of  element 0  */
+  vresult_ullint = vec_replace_unaligned (src_va_ullint, src_a_ullint, 7);
+
+  if (!vec_all_eq (vresult_ullint,  expected_vresult_ullint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_ullint, src_va_ullint, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n",
+	     i, vresult_ullint[i], i, expected_vresult_ullint[i]);
+#else
+    abort();
+#endif
+  }
+
+  src_a_llint = 678;
+  src_va_llint = (vector long long int) { 0, 0x101 };
+  vresult_llint = (vector long long int) { 0, 0 };
+  /* Byte index 7 will over write least significant byte of  element 0  */
+  expected_vresult_llint = (vector long long int) { 678*256, 0x100 };
+						 
+  vresult_llint = vec_replace_unaligned (src_va_llint, src_a_llint, 7);
+
+  if (!vec_all_eq (vresult_llint,  expected_vresult_llint)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_llint, src_va_llint, index)\n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n",
+	     i, vresult_llint[i], i, expected_vresult_llint[i]);
+#else
+    abort();
+#endif
+  }
+  
+  src_a_double = 678.0;
+  src_va_double = (vector double) { 0.0, 50.0 };
+  vresult_double = (vector double) { 0.0, 0.0 };
+  expected_vresult_double = (vector double) { 0.0, 678.0 };
+						 
+  vresult_double = vec_replace_unaligned (src_va_double, src_a_double, 0);
+
+  if (!vec_all_eq (vresult_double,  expected_vresult_double)) {
+#if DEBUG
+    printf("ERROR, vec_replace_unaligned (src_vb_double, src_va_double, index)\
+n");
+    for(i = 0; i < 2; i++)
+      printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n",
+	     i, vresult_double[i], i, expected_vresult_double[i]);
+#else
+    abort();
+#endif
+  }
+    
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {\mvinsw\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mvinsd\M} 6 } } */
+
+