[AArch64,NEON] Improve vmulX intrinsics

Hi,
      This patch converts more intrinsics to use builtin functions instead of the
previous inline assembly syntax.
      Passed the glorious testsuite of Christophe Lyon.

      Three testcases are added for the testing of intriniscs which are not
covered by the testsuite:
      gcc.target/aarch64/vmull_high.c
      gcc.target/aarch64/vmull_high_lane.c
      gcc.target/aarch64/vmull_high_n.c

      Regtested with aarch64-linux-gnu on QEMU.
      This patch has no regressions for aarch64_be-linux-gnu big-endian target too.
      OK for the trunk?
Index: gcc/ChangeLog
===================================================================

--- gcc/ChangeLog	(revision 218464)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,38 @@
+2014-12-09  Felix Yang  <felix.yang@huawei.com>
+            Jiji Jiang  <jiangjiji@huawei.com>
+
+	* config/aarch64/aarch64-simd.md (aarch64_mul_n<mode>,
+	aarch64_<su>mull_n<mode>, aarch64_<su>mull<mode>,
+	aarch64_simd_<su>mull2_n<mode>, aarch64_<su>mull2_n<mode>,
+	aarch64_<su>mull_lane<mode>, aarch64_<su>mull2_lane<mode>_internal,
+	aarch64_<su>mull_laneq<mode>, aarch64_<su>mull2_laneq<mode>_internal,
+	aarch64_smull2_lane<mode>, aarch64_umull2_lane<mode>,
+	aarch64_smull2_laneq<mode>, aarch64_umull2_laneq<mode>,
+	aarch64_fmulx<mode>, aarch64_fmulx<mode>, aarch64_fmulx_lane<mode>,
+	aarch64_pmull2v16qi, aarch64_pmullv8qi): New patterns.
+	* config/aarch64/aarch64-simd-builtins.def (vec_widen_smult_hi_,
+	vec_widen_umult_hi_, umull, smull, smull_n, umull_n, mul_n, smull2_n,
+	umull2_n, smull_lane, umull_lane, smull_laneq, umull_laneq, pmull,
+	umull2_lane, smull2_laneq, umull2_laneq, fmulx, fmulx_lane, pmull2,
+	smull2_lane): New builtins.
+	* config/aarch64/arm_neon.h (vmul_n_f32, vmul_n_s16, vmul_n_s32,
+	vmul_n_u16, vmul_n_u32, vmulq_n_f32, vmulq_n_f64, vmulq_n_s16,
+	vmulq_n_s32, vmulq_n_u16, vmulq_n_u32, vmull_high_lane_s16,
+	vmull_high_lane_s32, vmull_high_lane_u16, vmull_high_lane_u32,
+	vmull_high_laneq_s16, vmull_high_laneq_s32, vmull_high_laneq_u16,
+	vmull_high_laneq_u32, vmull_high_n_s16, vmull_high_n_s32,
+	vmull_high_n_u16, vmull_high_n_u32, vmull_high_p8, vmull_high_s8,
+	vmull_high_s16, vmull_high_s32, vmull_high_u8, vmull_high_u16,
+	vmull_high_u32, vmull_lane_s16, vmull_lane_s32, vmull_lane_u16,
+	vmull_lane_u32, vmull_laneq_s16, vmull_laneq_s32, vmull_laneq_u16,
+	vmull_laneq_u32, vmull_n_s16, vmull_n_s32, vmull_n_u16, vmull_n_u32,
+	vmull_p8, vmull_s8, vmull_s16, vmull_s32, vmull_u8, vmull_u16,
+	vmull_u32, vmulx_f32, vmulx_lane_f32, vmulxd_f64, vmulxq_f32,
+	vmulxq_f64, vmulxq_lane_f32, vmulxq_lane_f64, vmulxs_f32): Rewrite
+	using builtin functions.
+	* config/aarch64/iterators.md (UNSPEC_FMULX, UNSPEC_FMULX_LANE,
+	VDQF_Q): New unspec and int iterator.
+
 2014-12-07  Felix Yang  <felix.yang@huawei.com>
 	    Shanyao Chen  <chenshanyao@huawei.com>
 
Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c	(revision 0)
+++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c	(revision 0)
@@ -0,0 +1,111 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfc48, 0xfcbf, 0xfd36, 0xfdad,
+                                        0xfe24, 0xfe9b, 0xff12, 0xff89 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff9a0, 0xfffffa28,
+                                        0xfffffab0, 0xfffffb38 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff7a2,
+                                        0xfffffffffffff83b };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xa4b0, 0xa55a, 0xa604, 0xa6ae,
+                                         0xa758, 0xa802, 0xa8ac, 0xa956 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xbaf73c, 0xbaf7f7,
+                                         0xbaf8b2, 0xbaf96d };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xcbfffff4d8,
+                                         0xcbfffff5a4};
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x6530, 0x659a, 0x6464, 0x64ce,
+                                         0x6798, 0x6732, 0x66cc, 0x6666 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vmull_high
+#define TEST_MSG "VMUL_HIGH"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMUL(T, W, N)                      \
+  DECL_VARIABLE(vector1, T, W, N);              \
+  DECL_VARIABLE(vector2, T, W, N);		
+
+  /* vector_res = OP(vector1, vector2), then store the result.  */
+#define TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1)                 \
+  VECT_VAR(vector_res, T1, W1, N1) =                                    \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),                      \
+                               VECT_VAR(vector2, T1, W, N));            \
+  vst1q##_##T2##W1(VECT_VAR(result, T1, W1, N1),                        \
+		    VECT_VAR(vector_res, T1, W1, N1))
+
+#define TEST_VMULL_HIGH(INSN, Q, T1, T2, W, N, W1, N1)	                \
+  TEST_VMULL_HIGH1(INSN, Q, T1, T2, W, N, W1, N1)
+
+#define CHECK_VMULL_HIGH_RESULTS(test_name,comment)                     \
+  {                                                                     \
+    CHECK(test_name, int, 16, 8, PRIx16, expected, comment);            \
+    CHECK(test_name, int, 32, 4, PRIx32, expected, comment);            \
+    CHECK(test_name, int, 64, 2, PRIx64, expected, comment);            \
+    CHECK(test_name, uint, 16, 8, PRIx16,  expected, comment);          \
+    CHECK(test_name, uint, 32, 4, PRIx32, expected, comment);           \
+    CHECK(test_name, uint, 64, 2, PRIx64, expected, comment);           \
+    CHECK(test_name, poly, 16, 8, PRIx16, expected, comment);           \
+  }  
+
+  DECL_VMUL(int, 8, 16);
+  DECL_VMUL(int, 16, 8);
+  DECL_VMUL(int, 32, 4);
+  DECL_VMUL(uint, 8, 16);
+  DECL_VMUL(uint, 16, 8);
+  DECL_VMUL(uint, 32, 4);
+  DECL_VMUL(poly, 8, 16);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+  DECL_VARIABLE(vector_res, poly, 16, 8);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, poly, p, 8, 16);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, poly, p, 8, 16, 0xAA);
+
+  /* Execute the tests.  */
+  TEST_VMULL_HIGH(INSN_NAME, , int, s, 8, 16, 16, 8);
+  TEST_VMULL_HIGH(INSN_NAME, , int, s, 16, 8, 32, 4);
+  TEST_VMULL_HIGH(INSN_NAME, , int, s, 32, 4, 64, 2);
+  TEST_VMULL_HIGH(INSN_NAME, , uint, u, 8, 16, 16, 8);
+  TEST_VMULL_HIGH(INSN_NAME, , uint, u, 16, 8, 32, 4);
+  TEST_VMULL_HIGH(INSN_NAME, , uint, u, 32, 4, 64, 2);
+  TEST_VMULL_HIGH(INSN_NAME, , poly, p, 8, 16, 16, 8);
+
+  CHECK_VMULL_HIGH_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}

Property changes on: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_high.c


[AArch64,NEON] Improve vmulX intrinsics

Commit Message

Comments

Patch