diff mbox series

s390: Try to emit vlbr/vstbr instead of vperm et al.

Message ID 20230803065059.951867-2-stefansf@linux.ibm.com
State New
Headers show
Series s390: Try to emit vlbr/vstbr instead of vperm et al. | expand

Commit Message

Stefan Schulze Frielinghaus Aug. 3, 2023, 6:51 a.m. UTC
Bootstrapped and regtested on s390x.  Ok for mainline?

gcc/ChangeLog:

	* config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate):
	New function which handles bswap patterns for vec_perm_const.
	(vectorize_vec_perm_const_1): Call new function.
	* config/s390/vector.md (*bswap<mode>): Fix operands in output
	template.
	(*vstbr<mode>): New insn.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/s390.exp: Add subdirectory vxe2.
	* gcc.target/s390/vxe2/vlbr-1.c: New test.
	* gcc.target/s390/vxe2/vstbr-1.c: New test.
	* gcc.target/s390/vxe2/vstbr-2.c: New test.
---
 gcc/config/s390/s390.cc                      | 55 ++++++++++++++++++++
 gcc/config/s390/vector.md                    | 16 ++++--
 gcc/testsuite/gcc.target/s390/s390.exp       |  3 ++
 gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c  | 29 +++++++++++
 gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c | 29 +++++++++++
 gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c | 42 +++++++++++++++
 6 files changed, 170 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c

Comments

Andreas Krebbel Aug. 3, 2023, 7:17 a.m. UTC | #1
On 8/3/23 08:51, Stefan Schulze Frielinghaus wrote:
> Bootstrapped and regtested on s390x.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate):
> 	New function which handles bswap patterns for vec_perm_const.
> 	(vectorize_vec_perm_const_1): Call new function.
> 	* config/s390/vector.md (*bswap<mode>): Fix operands in output
> 	template.
> 	(*vstbr<mode>): New insn.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/s390/s390.exp: Add subdirectory vxe2.
> 	* gcc.target/s390/vxe2/vlbr-1.c: New test.
> 	* gcc.target/s390/vxe2/vstbr-1.c: New test.
> 	* gcc.target/s390/vxe2/vstbr-2.c: New test.

Ok. Thanks!

Andreas


> ---
>  gcc/config/s390/s390.cc                      | 55 ++++++++++++++++++++
>  gcc/config/s390/vector.md                    | 16 ++++--
>  gcc/testsuite/gcc.target/s390/s390.exp       |  3 ++
>  gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c  | 29 +++++++++++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c | 29 +++++++++++
>  gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c | 42 +++++++++++++++
>  6 files changed, 170 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
> 
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index d9f10542473..91eb9232b10 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -17698,6 +17698,58 @@ expand_perm_with_vstbrq (const struct expand_vec_perm_d &d)
>    return false;
>  }
>  
> +/* Try to emit vlbr/vstbr.  Note, this is only a candidate insn since
> +   TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only.  Thus,
> +   either fwprop, combine et al. "fixes" one of the input/output operands into
> +   a memory operand or a splitter has to reverse this into a general vperm
> +   operation.  */
> +
> +static bool
> +expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
> +{
> +  static const char perm[4][MAX_VECT_LEN]
> +    = { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },
> +	{ 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },
> +	{ 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },
> +	{ 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } };
> +
> +  if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
> +    return false;
> +
> +  if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
> +    {
> +      rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
> +      rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
> +      emit_insn (gen_bswapv8hi (target, op0));
> +      return true;
> +    }
> +
> +  if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
> +    {
> +      rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
> +      rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
> +      emit_insn (gen_bswapv4si (target, op0));
> +      return true;
> +    }
> +
> +  if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
> +    {
> +      rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
> +      rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
> +      emit_insn (gen_bswapv2di (target, op0));
> +      return true;
> +    }
> +
> +  if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
> +    {
> +      rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
> +      rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
> +      emit_insn (gen_bswapv1ti (target, op0));
> +      return true;
> +    }
> +
> +  return false;
> +}
>  
>  /* Try to find the best sequence for the vector permute operation
>     described by D.  Return true if the operation could be
> @@ -17720,6 +17772,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
>    if (expand_perm_with_rot (d))
>      return true;
>  
> +  if (expand_perm_as_a_vlbr_vstbr_candidate (d))
> +    return true;
> +
>    return false;
>  }
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 21bec729efa..f0e9ed3d263 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -47,6 +47,7 @@
>  (define_mode_iterator VI_HW     [V16QI V8HI V4SI V2DI])
>  (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
>  (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
> +(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
>  (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
>  (define_mode_iterator VI_HW_QH  [V16QI V8HI])
>  
> @@ -2876,12 +2877,12 @@
>       (use (match_dup 2))])]
>    "TARGET_VX"
>  {
> -  static char p[4][16] =
> +  static const char p[4][16] =
>      { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },   /* H */
>        { 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },   /* S */
>        { 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },   /* D */
>        { 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } }; /* T */
> -  char *perm;
> +  const char *perm;
>    rtx perm_rtx[16];
>  
>    switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
> @@ -2933,8 +2934,8 @@
>    "TARGET_VXE2"
>    "@
>     #
> -   vlbr<bhfgq>\t%v0,%v1
> -   vstbr<bhfgq>\t%v1,%v0"
> +   vlbr<bhfgq>\t%v0,%1
> +   vstbr<bhfgq>\t%v1,%0"
>    "&& reload_completed
>     && !memory_operand (operands[0], <MODE>mode)
>     && !memory_operand (operands[1], <MODE>mode)"
> @@ -2947,6 +2948,13 @@
>    ""
>    [(set_attr "op_type"      "*,VRX,VRX")])
>  
> +(define_insn "*vstbr<mode>"
> +  [(set (match_operand:VI_HW_HSDT                   0 "memory_operand"  "=R")
> +	(bswap:VI_HW_HSDT (match_operand:VI_HW_HSDT 1 "register_operand" "v")))]
> +  "TARGET_VXE2"
> +  "vstbr<bhfgq>\t%v1,%0"
> +  [(set_attr "op_type" "VRX")])
> +
>  ;
>  ; Implement len_load/len_store optabs with vll/vstl.
>  (define_expand "len_load_v16qi"
> diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
> index 58258492f83..a2b48eed5f2 100644
> --- a/gcc/testsuite/gcc.target/s390/s390.exp
> +++ b/gcc/testsuite/gcc.target/s390/s390.exp
> @@ -254,6 +254,9 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/arch13/*.{c,S}]] \
>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vxe/*.{c,S}]] \
>  	"" "-O3 -march=arch12 -mzarch"
>  
> +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vxe2/*.{c,S}]] \
> +	"" "-O3 -march=arch13 -mzarch"
> +
>  # Some md tests require libatomic
>  atomic_init
>  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/md/*.{c,S}]] \
> diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c b/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
> new file mode 100644
> index 00000000000..34fd1db23e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-final { scan-assembler {\tvlbrh\t} } } */
> +/* { dg-final { scan-assembler {\tvlbrf\t} } } */
> +/* { dg-final { scan-assembler {\tvlbrg\t} } } */
> +/* { dg-final { scan-assembler-not {\tvperm\t} } } */
> +
> +/* The addend X ensures that a LOAD REVERSE and not a STORE REVERSE is
> +   emitted.  */
> +
> +void
> +vlbrh (unsigned short *a, unsigned short x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap16 (a[i]) + x;
> +}
> +
> +void
> +vlbrf (unsigned int *a, unsigned int x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap32 (a[i]) + x;
> +}
> +
> +void
> +vlbrg (unsigned long long *a, unsigned long long x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap64 (a[i]) + x;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
> new file mode 100644
> index 00000000000..38947d12380
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-final { scan-assembler {\tvstbrh\t} } } */
> +/* { dg-final { scan-assembler {\tvstbrf\t} } } */
> +/* { dg-final { scan-assembler {\tvstbrg\t} } } */
> +/* { dg-final { scan-assembler-not {\tvperm\t} } } */
> +
> +/* The addend X ensures that a STORE REVERSE and not a LOAD REVERSE is
> +   emitted.  */
> +
> +void
> +vlbrh (unsigned short *a, unsigned short x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap16 (a[i] + x);
> +}
> +
> +void
> +vlbrf (unsigned int *a, unsigned int x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap32 (a[i] + x);
> +}
> +
> +void
> +vlbrg (unsigned long long *a, unsigned long long x)
> +{
> +  for (int i = 0; i < 128; ++i)
> +    a[i] = __builtin_bswap64 (a[i] + x);
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
> new file mode 100644
> index 00000000000..65d2e45381c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-final { scan-assembler {\tvstbrh\t} } } */
> +/* { dg-final { scan-assembler {\tvstbrf\t} } } */
> +/* { dg-final { scan-assembler {\tvstbrg\t} } } */
> +/* { dg-final { scan-assembler-not {\tvperm\t} } } */
> +
> +typedef unsigned short __attribute__ ((vector_size (16))) V8HI;
> +typedef unsigned int __attribute__ ((vector_size (16))) V4SI;
> +typedef unsigned long long __attribute__ ((vector_size (16))) V2DI;
> +
> +void
> +vstbrh (V8HI *p, V8HI x)
> +{
> +  V8HI y;
> +
> +  for (int i = 0; i < 8; ++i)
> +    y[i] = __builtin_bswap16 (x[i]);
> +
> +  *p = y;
> +}
> +
> +void
> +vstbrf (V4SI *p, V4SI x)
> +{
> +  V4SI y;
> +
> +  for (int i = 0; i < 4; ++i)
> +    y[i] = __builtin_bswap32 (x[i]);
> +
> +  *p = y;
> +}
> +
> +void
> +vstbrg (V2DI *p, V2DI x)
> +{
> +  V2DI y;
> +
> +  for (int i = 0; i < 2; ++i)
> +    y[i] = __builtin_bswap64 (x[i]);
> +
> +  *p = y;
> +}
diff mbox series

Patch

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d9f10542473..91eb9232b10 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17698,6 +17698,58 @@  expand_perm_with_vstbrq (const struct expand_vec_perm_d &d)
   return false;
 }
 
+/* Try to emit vlbr/vstbr.  Note, this is only a candidate insn since
+   TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only.  Thus,
+   either fwprop, combine et al. "fixes" one of the input/output operands into
+   a memory operand or a splitter has to reverse this into a general vperm
+   operation.  */
+
+static bool
+expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
+{
+  static const char perm[4][MAX_VECT_LEN]
+    = { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },
+	{ 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },
+	{ 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },
+	{ 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } };
+
+  if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
+    return false;
+
+  if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
+    {
+      rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
+      rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
+      emit_insn (gen_bswapv8hi (target, op0));
+      return true;
+    }
+
+  if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
+    {
+      rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
+      rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
+      emit_insn (gen_bswapv4si (target, op0));
+      return true;
+    }
+
+  if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
+    {
+      rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
+      rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
+      emit_insn (gen_bswapv2di (target, op0));
+      return true;
+    }
+
+  if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
+    {
+      rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
+      rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
+      emit_insn (gen_bswapv1ti (target, op0));
+      return true;
+    }
+
+  return false;
+}
 
 /* Try to find the best sequence for the vector permute operation
    described by D.  Return true if the operation could be
@@ -17720,6 +17772,9 @@  vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
   if (expand_perm_with_rot (d))
     return true;
 
+  if (expand_perm_as_a_vlbr_vstbr_candidate (d))
+    return true;
+
   return false;
 }
 
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 21bec729efa..f0e9ed3d263 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -47,6 +47,7 @@ 
 (define_mode_iterator VI_HW     [V16QI V8HI V4SI V2DI])
 (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
 (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
+(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
 (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
 (define_mode_iterator VI_HW_QH  [V16QI V8HI])
 
@@ -2876,12 +2877,12 @@ 
      (use (match_dup 2))])]
   "TARGET_VX"
 {
-  static char p[4][16] =
+  static const char p[4][16] =
     { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },   /* H */
       { 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },   /* S */
       { 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },   /* D */
       { 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } }; /* T */
-  char *perm;
+  const char *perm;
   rtx perm_rtx[16];
 
   switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
@@ -2933,8 +2934,8 @@ 
   "TARGET_VXE2"
   "@
    #
-   vlbr<bhfgq>\t%v0,%v1
-   vstbr<bhfgq>\t%v1,%v0"
+   vlbr<bhfgq>\t%v0,%1
+   vstbr<bhfgq>\t%v1,%0"
   "&& reload_completed
    && !memory_operand (operands[0], <MODE>mode)
    && !memory_operand (operands[1], <MODE>mode)"
@@ -2947,6 +2948,13 @@ 
   ""
   [(set_attr "op_type"      "*,VRX,VRX")])
 
+(define_insn "*vstbr<mode>"
+  [(set (match_operand:VI_HW_HSDT                   0 "memory_operand"  "=R")
+	(bswap:VI_HW_HSDT (match_operand:VI_HW_HSDT 1 "register_operand" "v")))]
+  "TARGET_VXE2"
+  "vstbr<bhfgq>\t%v1,%0"
+  [(set_attr "op_type" "VRX")])
+
 ;
 ; Implement len_load/len_store optabs with vll/vstl.
 (define_expand "len_load_v16qi"
diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
index 58258492f83..a2b48eed5f2 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -254,6 +254,9 @@  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/arch13/*.{c,S}]] \
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vxe/*.{c,S}]] \
 	"" "-O3 -march=arch12 -mzarch"
 
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vxe2/*.{c,S}]] \
+	"" "-O3 -march=arch13 -mzarch"
+
 # Some md tests require libatomic
 atomic_init
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/md/*.{c,S}]] \
diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c b/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
new file mode 100644
index 00000000000..34fd1db23e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vxe2/vlbr-1.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-final { scan-assembler {\tvlbrh\t} } } */
+/* { dg-final { scan-assembler {\tvlbrf\t} } } */
+/* { dg-final { scan-assembler {\tvlbrg\t} } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+/* The addend X ensures that a LOAD REVERSE and not a STORE REVERSE is
+   emitted.  */
+
+void
+vlbrh (unsigned short *a, unsigned short x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap16 (a[i]) + x;
+}
+
+void
+vlbrf (unsigned int *a, unsigned int x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap32 (a[i]) + x;
+}
+
+void
+vlbrg (unsigned long long *a, unsigned long long x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap64 (a[i]) + x;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
new file mode 100644
index 00000000000..38947d12380
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-1.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-final { scan-assembler {\tvstbrh\t} } } */
+/* { dg-final { scan-assembler {\tvstbrf\t} } } */
+/* { dg-final { scan-assembler {\tvstbrg\t} } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+/* The addend X ensures that a STORE REVERSE and not a LOAD REVERSE is
+   emitted.  */
+
+void
+vlbrh (unsigned short *a, unsigned short x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap16 (a[i] + x);
+}
+
+void
+vlbrf (unsigned int *a, unsigned int x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap32 (a[i] + x);
+}
+
+void
+vlbrg (unsigned long long *a, unsigned long long x)
+{
+  for (int i = 0; i < 128; ++i)
+    a[i] = __builtin_bswap64 (a[i] + x);
+}
diff --git a/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
new file mode 100644
index 00000000000..65d2e45381c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vxe2/vstbr-2.c
@@ -0,0 +1,42 @@ 
+/* { dg-do compile } */
+/* { dg-final { scan-assembler {\tvstbrh\t} } } */
+/* { dg-final { scan-assembler {\tvstbrf\t} } } */
+/* { dg-final { scan-assembler {\tvstbrg\t} } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef unsigned short __attribute__ ((vector_size (16))) V8HI;
+typedef unsigned int __attribute__ ((vector_size (16))) V4SI;
+typedef unsigned long long __attribute__ ((vector_size (16))) V2DI;
+
+void
+vstbrh (V8HI *p, V8HI x)
+{
+  V8HI y;
+
+  for (int i = 0; i < 8; ++i)
+    y[i] = __builtin_bswap16 (x[i]);
+
+  *p = y;
+}
+
+void
+vstbrf (V4SI *p, V4SI x)
+{
+  V4SI y;
+
+  for (int i = 0; i < 4; ++i)
+    y[i] = __builtin_bswap32 (x[i]);
+
+  *p = y;
+}
+
+void
+vstbrg (V2DI *p, V2DI x)
+{
+  V2DI y;
+
+  for (int i = 0; i < 2; ++i)
+    y[i] = __builtin_bswap64 (x[i]);
+
+  *p = y;
+}