diff mbox

[rs6000] Add support for the vec_xl_be builtin

Message ID 1501188521.4760.33.camel@us.ibm.com
State New
Headers show

Commit Message

Carl Love July 27, 2017, 8:48 p.m. UTC
GCC Maintainers:

The following patch add support for the vec_xl_be builtins.  The builtin
always loads data in BE order.

The patch has been tested on powerpc64le-unknown-linux-gnu (Power 8 LE)
and on powerpc64-unknown-linux-gnu (Power 7 BE).


Please let me know if the following patch is acceptable.  Thanks.

                        Carl Love

--------------------------------------------------------------------------

gcc/ChangeLog:

2017-07-27  Carl Love  <cel@us.ibm.com>

	* config/rs6000/rs6000-c: Add support for built-in functions
	vector signed char vec_xl_be (signed long long, signed char *);
	vector unsigned char vec_xl_be (signed long long, unsigned char *);
	vector signed int vec_xl_be (signed long long, signed int *);
	vector unsigned int vec_xl_be (signed long long, unsigned int *);
	vector signed long long vec_xl_be (signed long long, signed long long *);
	vector unsigned long long vec_xl_be (signed long long, unsigned long long *);
	vector signed short vec_xl_be (signed long long, signed short *);
	vector unsigned short vec_xl_be (signed long long, unsigned short *);
	vector double vec_xl_be (signed long long, double *);
	vector float vec_xl_be (signed long long, float *);
	* config/rs6000/altivec.h (vec_xl_be): Add #define.
	* config/rs6000/rs6000-builtin.def (XL_BE_V16QI, XL_BE_V8HI, XL_BE_V4SI,
	XL_BE_V2DI, XL_BE_V4SF, XL_BE_V2DF, XL_BE): Add definitions for the builtins.
	* config/rs6000/rs6000.c (altivec_expand_xl_be_builtin): Add function.
	(altivec_expand_builtin): Add switch statement to call altivec_expand_xl_be
	for each builtin.
	(altivec_init_builtins): Add def_builtin for _builtin_vsx_le_be_v8hi,
	__builtin_vsx_le_be_v4si, __builtin_vsx_le_be_v2di, __builtin_vsx_le_be_v4sf,
	__builtin_vsx_le_be_v2df, __builtin_vsx_le_be_v16qi.
	* doc/extend.texi: Update the built-in documentation file for the
	new built-in functions.

gcc/testsuite/ChangeLog:

2017-07-27  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/builtins-4-runnable.c: Add test cases for the
	new builtins.
---
 gcc/config/rs6000/altivec.h                        |   1 +
 gcc/config/rs6000/rs6000-builtin.def               |   9 +
 gcc/config/rs6000/rs6000-c.c                       |  20 ++
 gcc/config/rs6000/rs6000.c                         | 111 +++++++
 gcc/doc/extend.texi                                |  13 +
 .../gcc.target/powerpc/builtins-4-runnable.c       | 321 +++++++++++++++++++++
 6 files changed, 475 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-4-runnable.c

Comments

Segher Boessenkool July 27, 2017, 9:24 p.m. UTC | #1
Hi Carl,

On Thu, Jul 27, 2017 at 01:48:41PM -0700, Carl Love wrote:
> +  pat = GEN_FCN (icode) (target, addr);
> +  if (! pat)
> +    return 0;

No space after "!".

> +  /*  Reverse element order of elements if in LE mode */

Single space after "/*"; sentences end with dot space space.

> +  /* LX_BE  We initialized them to always load in big endian order.  */

XL_BE.

> +    default:
> +       break;
> +      /* Fall through.  */
> +    }

"break" is indented one space too many I think?

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-4-runnable.c
> @@ -0,0 +1,321 @@
> +/* { dg-do run { target { powerpc64*-*-* } } } */

powerpc64*-*-* is pretty much never correct (you can use
powerpc64-linux-gcc -m32 as well as powerpc-linux-gcc -m64).  If you
need to restrict a testcase to 64-bit, use an lp64 test.  But this
test works everywhere I think?  So just powerpc*-*-*, or you can
leave out even that, this is in gcc.target/powerpc.

Looks fine otherwise.  Please fix those trivialities and then
commit, thanks!


Segher
diff mbox

Patch

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 4d34a97..c8e508c 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -355,6 +355,7 @@ 
 #define vec_vsx_ld __builtin_vec_vsx_ld
 #define vec_vsx_st __builtin_vec_vsx_st
 #define vec_xl __builtin_vec_vsx_ld
+#define vec_xl_be __builtin_vec_xl_be
 #define vec_xst __builtin_vec_vsx_st
 
 /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a043e70..850164a 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1735,6 +1735,14 @@  BU_VSX_X (LXVW4X_V4SF,	      "lxvw4x_v4sf",	MEM)
 BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",	MEM)
 BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",	MEM)
 BU_VSX_X (LXVW4X_V16QI,	      "lxvw4x_v16qi",	MEM)
+
+BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM)
+BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM)
+BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM)
+BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM)
+BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM)
+BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM)
+
 BU_VSX_X (STXSDX,	      "stxsdx",		MEM)
 BU_VSX_X (STXVD2X_V1TI,	      "stxvd2x_v1ti",	MEM)
 BU_VSX_X (STXVD2X_V2DF,	      "stxvd2x_v2df",	MEM)
@@ -1835,6 +1843,7 @@  BU_VSX_OVERLOAD_1 (VUNSIGNEDO,  "vunsignedo")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 BU_VSX_OVERLOAD_X (XL,	     "xl")
+BU_VSX_OVERLOAD_X (XL_BE,    "xl_be")
 BU_VSX_OVERLOAD_X (XST,	     "xst")
 
 /* 2 argument CMPB instructions added in ISA 2.05. */
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 1359099..7ffb3fd 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3077,6 +3077,26 @@  const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     ~RS6000_BTI_unsigned_V16QI, 0 },
   { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 7d8bf63..c510f2e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -14597,6 +14597,58 @@  altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
 }
 
 static rtx
+altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode mode0 = Pmode;
+  machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	  target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode,
+                          gen_rtx_PLUS (Pmode, op1, op0));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  /*  Reverse element order of elements if in LE mode */
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx sel = swap_selector_for_mode (tmode);
+      rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel),
+				  UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (target, vperm));
+    }
+  return target;
+}
+
+static rtx
 paired_expand_stv_builtin (enum insn_code icode, tree exp)
 {
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -15988,6 +16040,50 @@  altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
       /* Fall through.  */
     }
 
+  /* LX_BE  We initialized them to always load in big endian order.  */
+  switch (fcode)
+    {
+    case VSX_BUILTIN_XL_BE_V2DI:
+       {
+          enum insn_code code = CODE_FOR_vsx_load_v2di;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    case VSX_BUILTIN_XL_BE_V4SI:
+       {
+          enum insn_code code = CODE_FOR_vsx_load_v4si;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    case VSX_BUILTIN_XL_BE_V8HI:
+       {
+          enum insn_code code = CODE_FOR_vsx_load_v8hi;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    case VSX_BUILTIN_XL_BE_V16QI:
+       {
+          enum insn_code code = CODE_FOR_vsx_load_v16qi;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    case VSX_BUILTIN_XL_BE_V2DF:
+		 {
+          enum insn_code code = CODE_FOR_vsx_load_v2df;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    case VSX_BUILTIN_XL_BE_V4SF:
+       {
+          enum insn_code code = CODE_FOR_vsx_load_v4sf;
+          return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+       break;
+    default:
+       break;
+      /* Fall through.  */
+    }
+
   *expandedp = false;
   return NULL_RTX;
 }
@@ -17448,6 +17544,19 @@  altivec_init_builtins (void)
   def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
 	       VSX_BUILTIN_ST_ELEMREV_V4SI);
 
+  def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V8HI);
+  def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SI);
+  def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DI);
+  def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SF);
+  def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DF);
+  def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V16QI);
+
   if (TARGET_P9_VECTOR)
     {
       def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
@@ -17477,6 +17586,8 @@  altivec_init_builtins (void)
 	       VSX_BUILTIN_VEC_ST);
   def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
 	       VSX_BUILTIN_VEC_XL);
+  def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_XL_BE);
   def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
 	       VSX_BUILTIN_VEC_XST);
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 562666e..4fd5beb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15461,6 +15461,19 @@  signed int vec_cnttz_lsbb (vector unsigned char);
 
 vector unsigned short vec_pack_to_short_fp32 (vector float, vector float);
 
+vector signed char vec_xl_be (signed long long, signed char *);
+vector unsigned char vec_xl_be (signed long long, unsigned char *);
+vector signed int vec_xl_be (signed long long, signed int *);
+vector unsigned int vec_xl_be (signed long long, unsigned int *);
+vector signed __int128 vec_xl_be (signed long long, signed __int128 *);
+vector unsigned __int128 vec_xl_be (signed long long, unsigned __int128 *);
+vector signed long long vec_xl_be (signed long long, signed long long *);
+vector unsigned long long vec_xl_be (signed long long, unsigned long long *);
+vector signed short vec_xl_be (signed long long, signed short *);
+vector unsigned short vec_xl_be (signed long long, unsigned short *);
+vector double vec_xl_be (signed long long, double *);
+vector float vec_xl_be (signed long long, float *);
+
 vector signed char vec_xl_len (signed char *addr, size_t len);
 vector unsigned char vec_xl_len (unsigned char *addr, size_t len);
 vector signed int vec_xl_len (signed int *addr, size_t len);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-4-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-4-runnable.c
new file mode 100644
index 0000000..def5352
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-4-runnable.c
@@ -0,0 +1,321 @@ 
+/* { dg-do run { target { powerpc64*-*-* } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-maltivec -mvsx" } */
+
+#include <inttypes.h>
+#include <altivec.h> // vector
+#include <stdio.h>
+
+void abort (void);
+
+int main() {
+  int i;
+  signed char data_c[100];
+  unsigned char data_uc[100];
+
+  signed short int data_ssi[100];
+  unsigned short int data_usi[100];
+
+  signed int data_si[100];
+  unsigned int data_ui[100];
+
+  signed long long data_sll[100];
+  unsigned long long data_ull[100];
+
+  float data_f[100];
+  double data_d[100];
+   
+  signed long long disp;
+   
+  vector signed char vec_c_expected1, vec_c_expected2, vec_c_result1, vec_c_result2;
+  vector unsigned char vec_uc_expected1, vec_uc_expected2,
+    vec_uc_result1, vec_uc_result2;
+  vector signed short int vec_ssi_expected1, vec_ssi_expected2,
+    vec_ssi_result1, vec_ssi_result2;
+  vector unsigned short int  vec_usi_expected1, vec_usi_expected2,
+    vec_usi_result1, vec_usi_result2;
+  vector signed int vec_si_expected1, vec_si_expected2, vec_si_result1,
+    vec_si_result2;
+  vector unsigned int vec_ui_expected1, vec_ui_expected2, vec_ui_result1,
+    vec_ui_result2;
+  vector signed long long vec_sll_expected1, vec_sll_expected2,
+    vec_sll_result1, vec_sll_result2;
+  vector unsigned long long vec_ull_expected1, vec_ull_expected2,
+    vec_ull_result1, vec_ull_result2;
+  vector float vec_f_expected1, vec_f_expected2, vec_f_result1, vec_f_result2;
+  vector double vec_d_expected1, vec_d_expected2, vec_d_result1, vec_d_result2;
+  char buf[20];
+  signed long long zero = (signed long long) 0;
+  
+  for (i = 0; i < 100; i++)
+    {
+      data_c[i] = i;
+      data_uc[i] = i+1;
+      data_ssi[i] = i+10;
+      data_usi[i] = i+11;
+      data_si[i] = i+100;
+      data_ui[i] = i+101;
+      data_sll[i] = i+1000;
+      data_ull[i] = i+1001;
+      data_f[i] = i+100000.0;
+      data_d[i] = i+1000000.0;
+    }
+  
+  disp = 0;
+#ifdef __BIG_ENDIAN__
+  printf("BIG ENDIAN\n");
+  vec_c_expected1 = (vector signed char){0, 1, 2, 3, 4, 5, 6, 7,
+					 8, 9, 10, 11, 12, 13, 14, 15};
+#else
+  printf("LITTLE ENDIAN\n");
+  vec_c_expected1 = (vector signed char){15, 14, 13, 12, 11, 10, 9, 8,
+					 7, 6, 5, 4, 3, 2, 1, 0};
+#endif
+  vec_c_result1 = vec_xl_be (0, data_c);
+
+  disp = 1;
+
+#ifdef __BIG_ENDIAN__
+  vec_c_expected2 = (vector signed char){1, 2, 3, 4, 5, 6, 7, 8,
+					 9, 10, 11, 12, 13, 14, 15, 16};
+#else
+  vec_c_expected2 = (vector signed char){16, 15, 14, 13, 12, 11, 10, 9,
+					 8, 7, 6, 5, 4, 3, 2, 1};
+#endif
+
+  vec_c_result2 = vec_xl_be (disp, data_c);
+
+#ifdef __BIG_ENDIAN__
+  vec_uc_expected1 = (vector unsigned char){1, 2, 3, 4, 5, 6, 7, 8,
+					    9, 10, 11, 12, 13, 14, 15, 16};
+#else
+  vec_uc_expected1 = (vector unsigned char){16, 15, 14, 13, 12, 11, 10, 9,
+					 8, 7, 6, 5, 4, 3, 2, 1};
+#endif
+
+  vec_uc_result1 = vec_xl_be (0, data_uc);
+
+#ifdef __BIG_ENDIAN__
+  vec_uc_expected2 = (vector unsigned char){2, 3, 4, 5, 6, 7, 8, 9,
+					    10, 11, 12, 13, 14, 15, 16, 17};
+#else
+  vec_uc_expected2 = (vector unsigned char){17, 16, 15, 14, 13, 12, 11, 10,
+					    9, 8, 7, 6, 5, 4, 3, 2};
+#endif
+
+  vec_uc_result2 = vec_xl_be (disp, data_uc);
+
+  for (i = 0; i < 16; i++)
+    {
+      if (vec_c_result1[i] != vec_c_expected1[i])
+        abort ();
+
+      if (vec_c_result2[i] != vec_c_expected2[i])
+        abort ();
+
+      if (vec_uc_result1[i] != vec_uc_expected1[i])
+        abort ();
+
+      if (vec_uc_result2[i] != vec_uc_expected2[i])
+        abort ();
+    }
+
+  vec_ssi_result1 = vec_xl_be (zero, data_ssi);
+
+#ifdef __BIG_ENDIAN__
+  vec_ssi_expected1 = (vector signed short){10, 11, 12, 13, 14, 15, 16, 17};
+#else
+  vec_ssi_expected1 = (vector signed short){17, 16, 15, 14, 13, 12, 11, 10};
+#endif
+
+  disp = 2;
+  vec_ssi_result2 = vec_xl_be (disp, data_ssi);
+
+#ifdef __BIG_ENDIAN__
+  vec_ssi_expected2 = (vector signed short){11, 12, 13, 14, 15, 16, 17, 18};
+#else
+  vec_ssi_expected2 = (vector signed short){18, 17, 16, 15, 14, 13, 12, 11};
+#endif
+
+  vec_usi_result1 = vec_xl_be (zero, data_usi);
+
+#ifdef __BIG_ENDIAN__
+  vec_usi_expected1 = (vector unsigned short){11, 12, 13, 14, 15, 16, 17, 18};
+#else
+  vec_usi_expected1 = (vector unsigned short){18, 17, 16, 15, 14, 13, 12, 11};
+#endif
+   
+  disp = 2;
+  vec_usi_result2 = vec_xl_be (disp, data_usi);
+
+#ifdef __BIG_ENDIAN__
+  vec_usi_expected2 = (vector unsigned short){12, 13, 14, 15, 16, 17, 18, 19};
+#else
+  vec_usi_expected2 = (vector unsigned short){19, 18, 17, 16, 15, 14, 13, 12};
+#endif
+
+  for (i = 0; i < 8; i++)
+    {
+      if (vec_ssi_result1[i] != vec_ssi_expected1[i])
+        abort ();
+
+      if (vec_ssi_result2[i] != vec_ssi_expected2[i])
+        abort ();
+
+      if (vec_usi_result1[i] != vec_usi_expected1[i])
+        abort ();
+
+      if (vec_usi_result2[i] != vec_usi_expected2[i])
+        abort ();
+    }
+
+  vec_si_result1 = vec_xl_be (zero, data_si);
+
+#ifdef __BIG_ENDIAN__
+  vec_si_expected1 = (vector int){100, 101, 102, 103};
+#else
+  vec_si_expected1 = (vector int){103, 102, 101, 100};
+#endif
+
+  disp = 4;
+  vec_si_result2 = vec_xl_be (disp, data_si);
+
+#ifdef __BIG_ENDIAN__
+  vec_si_expected2 = (vector int){101, 102, 103, 104};
+#else
+  vec_si_expected2 = (vector int){104, 103, 102, 101};
+#endif
+
+  vec_ui_result1 = vec_xl_be (zero, data_ui);
+
+#ifdef __BIG_ENDIAN__
+  vec_ui_expected1 = (vector unsigned int){101, 102, 103, 104};
+#else
+  vec_ui_expected1 = (vector unsigned int){104, 103, 102, 101};
+#endif
+
+  disp = 4;
+  vec_ui_result2 = vec_xl_be (disp, data_ui);
+
+#ifdef __BIG_ENDIAN__
+  vec_ui_expected2 = (vector unsigned int){102, 103, 104, 105};
+#else
+  vec_ui_expected2 = (vector unsigned int){105, 104, 103, 102};
+#endif
+
+
+  for (i = 0; i < 4; i++)
+    {
+      if (vec_si_result1[i] != vec_si_expected1[i])
+        abort ();
+
+      if (vec_si_result2[i] != vec_si_expected2[i])
+        abort ();
+
+      if (vec_ui_result1[i] != vec_ui_expected1[i])
+        abort ();
+
+      if (vec_ui_result2[i] != vec_ui_expected2[i])
+        abort ();
+    }
+
+  vec_sll_result1 = vec_xl_be (zero, data_sll);
+
+#ifdef __BIG_ENDIAN__
+  vec_sll_expected1 = (vector signed long long){1000, 1001};
+#else
+  vec_sll_expected1 = (vector signed long long){1001, 1000};
+#endif
+
+  disp = 8;
+  vec_sll_result2 = vec_xl_be (disp, data_sll);
+
+#ifdef __BIG_ENDIAN__
+  vec_sll_expected2 = (vector signed long long){1001, 1002};
+#else
+  vec_sll_expected2 = (vector signed long long){1002, 1001};
+#endif
+
+  vec_ull_result1 = vec_xl_be (zero, data_ull);
+
+#ifdef __BIG_ENDIAN__
+  vec_ull_expected1 = (vector unsigned long long){1001, 1002};
+#else
+  vec_ull_expected1 = (vector unsigned long long){1002, 1001};
+#endif
+
+  disp = 8;
+  vec_ull_result2 = vec_xl_be (disp, data_ull);
+
+#ifdef __BIG_ENDIAN__
+  vec_ull_expected2 = (vector unsigned long long){1002, 1003};
+#else
+  vec_ull_expected2 = (vector unsigned long long){1003, 1002};
+#endif
+
+
+  for (i = 0; i < 2; i++)
+    {
+      if (vec_sll_result1[i] != vec_sll_expected1[i])
+        abort ();
+
+      if (vec_sll_result2[i] != vec_sll_expected2[i])
+	abort ();
+
+      if (vec_ull_result1[i] != vec_ull_expected1[i])
+        abort ();
+
+      if (vec_ull_result2[i] != vec_ull_expected2[i])
+        abort ();
+    }
+
+  vec_f_result1 = vec_xl_be (zero, data_f);
+
+#ifdef __BIG_ENDIAN__
+  vec_f_expected1 = (vector float){100000.0, 100001.0, 100002.0, 100003.0};
+#else
+  vec_f_expected1 = (vector float){100003.0, 100002.0, 100001.0, 100000.0};
+#endif
+
+  disp = 4;
+  vec_f_result2 = vec_xl_be (disp, data_f);
+
+#ifdef __BIG_ENDIAN__
+  vec_f_expected2 = (vector float){100001.0, 100002.0, 100003.0, 100004.0};
+#else
+  vec_f_expected2 = (vector float){100004.0, 100003.0, 100002.0, 100001.0};
+#endif
+
+  for (i = 0; i < 4; i++)
+    {
+      if (vec_f_result1[i] != vec_f_expected1[i])
+        abort ();
+      if (vec_f_result2[i] != vec_f_expected2[i])
+        abort ();
+    }
+
+  vec_d_result1 = vec_xl_be (zero, data_d);
+
+#ifdef __BIG_ENDIAN__
+  vec_d_expected1 = (vector double){1000000.0, 1000001.0};
+#else
+  vec_d_expected1 = (vector double){1000001.0, 1000000.0};
+#endif
+
+  disp = 8;
+  vec_d_result2 = vec_xl_be (disp, data_d);
+
+#ifdef __BIG_ENDIAN__
+  vec_d_expected2 = (vector double){1000001.0, 1000002.0};
+#else
+  vec_d_expected2 = (vector double){1000002.0, 1000001.0};
+#endif
+
+  for (i = 0; i < 2; i++)
+    {
+      if (vec_d_result1[i] != vec_d_expected1[i])
+        abort ();
+      if (vec_d_result2[i] != vec_d_expected2[i])
+        abort ();
+    }
+}