From patchwork Wed May 11 13:24:00 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Christophe Lyon X-Patchwork-Id: 621051 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3r4cLW71LGz9sf6 for ; Wed, 11 May 2016 23:25:23 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=lcYlUZls; dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references; q=dns; s= default; b=sUx1R2LniBN7hFD0MOP0pzrPpd1EGd34+9zk4/jHBO7E5JpzbdbUh Vt6fuY8/KF4bFrj0TwQSmbsTYWLZm6AOYcWveATOZkYjjLodY+WucQqc7pkUmREX A0crPKQI+1JcMYWKZMTUzMKUrV8DQYn4nDwtETnPaTR8FFEJGrteh0= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references; s=default; bh=/0tp9gl3KF7xZwubgQ7NsKLIGro=; b=lcYlUZlsSEgsGwWEww+y6jm+v/8n pmJmTDeu2qLGWGWvv3VXJfkWXdp0sg4LSSDfswQyvT4yx3l/QN7QKuVH2i/7yNmW qUaIG40s0AYx1M4BH1dUjaVn7djMHrSJM6ID0MJ2xoBFVVh6+KDA974s/ZgTErEM Z/VtJ/eonPsZ3R8= Received: (qmail 83663 invoked by alias); 11 May 2016 13:24:21 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 83485 invoked by uid 89); 11 May 2016 13:24:20 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.6 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 spammy=prix, EXPECTED, PRIx64, prix64 X-HELO: mail-wm0-f45.google.com Received: from mail-wm0-f45.google.com (HELO mail-wm0-f45.google.com) (74.125.82.45) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Wed, 11 May 2016 13:24:07 +0000 Received: by mail-wm0-f45.google.com with SMTP id e201so218875115wme.0 for ; Wed, 11 May 2016 06:24:06 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=3ZlzheF7mNSioksWCyAGyW8rsgEJNGdOoYit2cX5+T0=; b=JMhtkph7MGz53g+WflBnJ1L2WkCPsq7gEX0CG3SQJbvEVTsdU6WH3pq/mS3Nai0Hr8 wY/V5YYtXl1VXEDynUbvbnThWh2wfbMy+d6DPmGNF7FBbTVKgdBenOxRE5B3acV/izQc lGAlrsCsdbPn9VkYO83OJWVX6tG3o17GbyribX3x9IQbDHIIHkuwIsJ5NzPbx05YQWsJ VqkdjdyN3zDSue3Emeq8SEtRn1sFFuFTbiNdmtraItmDYnC+riYQcrT7PQIukDLbV16x O4O2iMTq0CUyunDqAZU05+CfMfC+1rYsI4Y6eDIjcdAulpITx5bf8cIEAW/mUFR1H6r0 0mXA== X-Gm-Message-State: AOPr4FU483iVZiHr87XIOpZSlaJMPgi6YNVH3oWaavYPPy4ZWvmaDE/qf43ZoVqByDgpJwlv X-Received: by 10.194.147.178 with SMTP id tl18mr688662wjb.142.1462973043949; Wed, 11 May 2016 06:24:03 -0700 (PDT) Received: from gnx2647.guests.st.com (29-39.80-90.static-ip.oleane.fr. [90.80.39.29]) by smtp.gmail.com with ESMTPSA id u6sm35876719wmd.21.2016.05.11.06.24.03 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Wed, 11 May 2016 06:24:03 -0700 (PDT) From: Christophe Lyon To: gcc-patches@gcc.gnu.org Subject: [Patch ARM/AArch64 10/11] Add missing tests for intrinsics operating on poly64 and poly128 types. Date: Wed, 11 May 2016 15:24:00 +0200 Message-Id: <1462973041-7911-11-git-send-email-christophe.lyon@linaro.org> In-Reply-To: <1462973041-7911-1-git-send-email-christophe.lyon@linaro.org> References: <1462973041-7911-1-git-send-email-christophe.lyon@linaro.org> X-IsSubscribed: yes 2016-05-02 Christophe Lyon * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (result): Add poly64x1_t and poly64x2_t cases if supported. * gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h (buffer, buffer_pad, buffer_dup, buffer_dup_pad): Likewise. * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c: New file. Change-Id: Ie9bb0c4fd0b8f04fb37668cdb315eaafd06e55c4 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index a2c160c..8664dfc 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -133,6 +133,9 @@ static ARRAY(result, uint, 32, 2); static ARRAY(result, uint, 64, 1); static ARRAY(result, poly, 8, 8); static ARRAY(result, poly, 16, 4); +#if defined (__ARM_FEATURE_CRYPTO) +static ARRAY(result, poly, 64, 1); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) static ARRAY(result, float, 16, 4); #endif @@ -147,6 +150,9 @@ static ARRAY(result, uint, 32, 4); static ARRAY(result, uint, 64, 2); static ARRAY(result, poly, 8, 16); static ARRAY(result, poly, 16, 8); +#if defined (__ARM_FEATURE_CRYPTO) +static ARRAY(result, poly, 64, 2); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) static ARRAY(result, float, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h index c8d4336..f8c4aef 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h @@ -118,6 +118,10 @@ VECT_VAR_DECL_INIT(buffer, uint, 32, 2); PAD(buffer_pad, uint, 32, 2); VECT_VAR_DECL_INIT(buffer, uint, 64, 1); PAD(buffer_pad, uint, 64, 1); +#if defined (__ARM_FEATURE_CRYPTO) +VECT_VAR_DECL_INIT(buffer, poly, 64, 1); +PAD(buffer_pad, poly, 64, 1); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer, float, 16, 4); PAD(buffer_pad, float, 16, 4); @@ -144,6 +148,10 @@ VECT_VAR_DECL_INIT(buffer, poly, 8, 16); PAD(buffer_pad, poly, 8, 16); VECT_VAR_DECL_INIT(buffer, poly, 16, 8); PAD(buffer_pad, poly, 16, 8); +#if defined (__ARM_FEATURE_CRYPTO) +VECT_VAR_DECL_INIT(buffer, poly, 64, 2); +PAD(buffer_pad, poly, 64, 2); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer, float, 16, 8); PAD(buffer_pad, float, 16, 8); @@ -178,6 +186,10 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8); VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8); VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4); VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); +#if defined (__ARM_FEATURE_CRYPTO) +VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 1); +VECT_VAR_DECL(buffer_dup_pad, poly, 64, 1); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4); VECT_VAR_DECL(buffer_dup_pad, float, 16, 4); @@ -205,6 +217,10 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16); VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16); VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8); VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); +#if defined (__ARM_FEATURE_CRYPTO) +VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 2); +VECT_VAR_DECL(buffer_dup_pad, poly, 64, 2); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8); VECT_VAR_DECL(buffer_dup_pad, float, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c new file mode 100644 index 0000000..ced3884 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c @@ -0,0 +1,665 @@ +/* This file contains tests for all the *p64 intrinsics, except for + vreinterpret which have their own testcase. */ + +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results: vbsl. */ +VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 }; +VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1, + 0xfffffff1 }; + +/* Expected results: vceq. */ +VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 }; + +/* Expected results: vcombine. */ +VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 }; + +/* Expected results: vcreate. */ +VECT_VAR_DECL(vcreate_expected,poly,64,1) [] = { 0x123456789abcdef0 }; + +/* Expected results: vdup_lane. */ +VECT_VAR_DECL(vdup_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vdup_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; + +/* Expected results: vdup_n. */ +VECT_VAR_DECL(vdup_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vdup_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(vdup_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vdup_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, + 0xfffffffffffffff2 }; + +/* Expected results: vext. */ +VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 }; + +/* Expected results: vget_low. */ +VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; + +/* Expected results: vld1. */ +VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; + +/* Expected results: vld1_dup. */ +VECT_VAR_DECL(vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2, + 0xfffffffffffffff2 }; + +/* Expected results: vld1_lane. */ +VECT_VAR_DECL(vld1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, + 0xaaaaaaaaaaaaaaaa }; + +/* Expected results: vldX. */ +VECT_VAR_DECL(vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 }; + +/* Expected results: vldX_dup. */ +VECT_VAR_DECL(vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; +VECT_VAR_DECL(vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; +VECT_VAR_DECL(vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 }; + +/* Expected results: vsli. */ +VECT_VAR_DECL(vsli_expected,poly,64,1) [] = { 0x10 }; +VECT_VAR_DECL(vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0, + 0x7ffffffffffff1 }; +VECT_VAR_DECL(vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 }; +VECT_VAR_DECL(vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; + +/* Expected results: vsri. */ +VECT_VAR_DECL(vsri_expected,poly,64,1) [] = { 0xe000000000000000 }; +VECT_VAR_DECL(vsri_expected,poly,64,2) [] = { 0xfffffffffffff800, + 0xfffffffffffff800 }; +VECT_VAR_DECL(vsri_expected_max_shift,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vsri_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; + +/* Expected results: vst1_lane. */ +VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, + 0x3333333333333333 }; + +int main (void) +{ + int i; + + /* vbsl_p64 tests. */ +#define TEST_MSG "VBSL/VBSLQ" + +#define TEST_VBSL(T3, Q, T1, T2, W, N) \ + VECT_VAR(vbsl_vector_res, T1, W, N) = \ + vbsl##Q##_##T2##W(VECT_VAR(vbsl_vector_first, T3, W, N), \ + VECT_VAR(vbsl_vector, T1, W, N), \ + VECT_VAR(vbsl_vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vbsl_vector_res, T1, W, N)) + + DECL_VARIABLE(vbsl_vector, poly, 64, 1); + DECL_VARIABLE(vbsl_vector, poly, 64, 2); + DECL_VARIABLE(vbsl_vector2, poly, 64, 1); + DECL_VARIABLE(vbsl_vector2, poly, 64, 2); + DECL_VARIABLE(vbsl_vector_res, poly, 64, 1); + DECL_VARIABLE(vbsl_vector_res, poly, 64, 2); + + DECL_VARIABLE(vbsl_vector_first, uint, 64, 1); + DECL_VARIABLE(vbsl_vector_first, uint, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vbsl_vector, buffer, , poly, p, 64, 1); + VLOAD(vbsl_vector, buffer, q, poly, p, 64, 2); + + VDUP(vbsl_vector2, , poly, p, 64, 1, 0xFFFFFFF3); + VDUP(vbsl_vector2, q, poly, p, 64, 2, 0xFFFFFFF3); + + VDUP(vbsl_vector_first, , uint, u, 64, 1, 0xFFFFFFF2); + VDUP(vbsl_vector_first, q, uint, u, 64, 2, 0xFFFFFFF2); + + TEST_VBSL(uint, , poly, p, 64, 1); + TEST_VBSL(uint, q, poly, p, 64, 2); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vbsl_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vbsl_expected, ""); + + /* vceq_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VCEQ" + +#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ + VECT_VAR(vceq_vector_res, T3, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N), \ + VECT_VAR(vceq_vector2, T1, W, N)); \ + vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N)) + +#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ + TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) + + DECL_VARIABLE(vceq_vector, poly, 64, 1); + DECL_VARIABLE(vceq_vector2, poly, 64, 1); + DECL_VARIABLE(vceq_vector_res, uint, 64, 1); + + CLEAN(result, uint, 64, 1); + + VLOAD(vceq_vector, buffer, , poly, p, 64, 1); + + VDUP(vceq_vector2, , poly, p, 64, 1, 0x88); + + fprintf(stderr, "toto\n"); + TEST_VCOMP(vceq, , poly, p, uint, 64, 1); + + CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, ""); + fprintf(stderr, "toto\n"); + + /* vcombine_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VCOMBINE" + +#define TEST_VCOMBINE(T1, T2, W, N, N2) \ + VECT_VAR(vcombine_vector128, T1, W, N2) = \ + vcombine_##T2##W(VECT_VAR(vcombine_vector64_a, T1, W, N), \ + VECT_VAR(vcombine_vector64_b, T1, W, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vcombine_vector128, T1, W, N2)) + + DECL_VARIABLE(vcombine_vector64_a, poly, 64, 1); + DECL_VARIABLE(vcombine_vector64_b, poly, 64, 1); + DECL_VARIABLE(vcombine_vector128, poly, 64, 2); + + CLEAN(result, poly, 64, 2); + + VLOAD(vcombine_vector64_a, buffer, , poly, p, 64, 1); + + VDUP(vcombine_vector64_b, , poly, p, 64, 1, 0x88); + + TEST_VCOMBINE(poly, p, 64, 1, 2); + + CHECK(TEST_MSG, poly, 64, 2, PRIx16, vcombine_expected, ""); + + /* vcreate_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VCREATE" + +#define TEST_VCREATE(T1, T2, W, N) \ + VECT_VAR(vcreate_vector_res, T1, W, N) = \ + vcreate_##T2##W(VECT_VAR(vcreate_val, T1, W, N)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vcreate_vector_res, T1, W, N)) + +#define DECL_VAL(VAR, T1, W, N) \ + uint64_t VECT_VAR(VAR, T1, W, N) + + DECL_VAL(vcreate_val, poly, 64, 1); + DECL_VARIABLE(vcreate_vector_res, poly, 64, 1); + + CLEAN(result, poly, 64, 2); + + VECT_VAR(vcreate_val, poly, 64, 1) = 0x123456789abcdef0ULL; + + TEST_VCREATE(poly, p, 64, 1); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vcreate_expected, ""); + + /* vdup_lane_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" + +#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vdup_lane_vector_res, T1, W, N) = \ + vdup##Q##_lane_##T2##W(VECT_VAR(vdup_lane_vector, T1, W, N2), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_lane_vector_res, T1, W, N)) + + DECL_VARIABLE(vdup_lane_vector, poly, 64, 1); + DECL_VARIABLE(vdup_lane_vector, poly, 64, 2); + DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 1); + DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vdup_lane_vector, buffer, , poly, p, 64, 1); + + TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0); + TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_lane_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_lane_expected, ""); + + /* vdup_n_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VDUP/VDUPQ" + +#define TEST_VDUP(Q, T1, T2, W, N) \ + VECT_VAR(vdup_n_vector, T1, W, N) = \ + vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_n_vector, T1, W, N)) + + DECL_VARIABLE(vdup_n_vector, poly, 64, 1); + DECL_VARIABLE(vdup_n_vector, poly, 64, 2); + + /* Try to read different places from the input buffer. */ + for (i=0; i< 3; i++) { + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VDUP(, poly, p, 64, 1); + TEST_VDUP(q, poly, p, 64, 2); + + switch (i) { + case 0: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected0, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected0, ""); + break; + case 1: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected1, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected1, ""); + break; + case 2: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected2, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected2, ""); + break; + default: + abort(); + } + } + + /* vexit_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VEXT/VEXTQ" + +#define TEST_VEXT(Q, T1, T2, W, N, V) \ + VECT_VAR(vext_vector_res, T1, W, N) = \ + vext##Q##_##T2##W(VECT_VAR(vext_vector1, T1, W, N), \ + VECT_VAR(vext_vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vext_vector_res, T1, W, N)) + + DECL_VARIABLE(vext_vector1, poly, 64, 1); + DECL_VARIABLE(vext_vector1, poly, 64, 2); + DECL_VARIABLE(vext_vector2, poly, 64, 1); + DECL_VARIABLE(vext_vector2, poly, 64, 2); + DECL_VARIABLE(vext_vector_res, poly, 64, 1); + DECL_VARIABLE(vext_vector_res, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vext_vector1, buffer, , poly, p, 64, 1); + VLOAD(vext_vector1, buffer, q, poly, p, 64, 2); + + VDUP(vext_vector2, , poly, p, 64, 1, 0x88); + VDUP(vext_vector2, q, poly, p, 64, 2, 0x88); + + TEST_VEXT(, poly, p, 64, 1, 0); + TEST_VEXT(q, poly, p, 64, 2, 1); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vext_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vext_expected, ""); + + /* vget_low_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VGET_LOW" + +#define TEST_VGET_LOW(T1, T2, W, N, N2) \ + VECT_VAR(vget_low_vector64, T1, W, N) = \ + vget_low_##T2##W(VECT_VAR(vget_low_vector128, T1, W, N2)); \ + vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_low_vector64, T1, W, N)) + + DECL_VARIABLE(vget_low_vector64, poly, 64, 1); + DECL_VARIABLE(vget_low_vector128, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + + VLOAD(vget_low_vector128, buffer, q, poly, p, 64, 2); + + TEST_VGET_LOW(poly, p, 64, 1, 2); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, ""); + + /* vld1_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VLD1/VLD1Q" + +#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + DECL_VARIABLE(vld1_vector, poly, 64, 1); + DECL_VARIABLE(vld1_vector, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vld1_vector, buffer, , poly, p, 64, 1); + VLOAD(vld1_vector, buffer, q, poly, p, 64, 2); + + TEST_VLD1(vld1_vector, buffer, , poly, p, 64, 1); + TEST_VLD1(vld1_vector, buffer, q, poly, p, 64, 2); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, ""); + + /* vld1_dup_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VLD1_DUP/VLD1_DUPQ" + +#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \ + VECT_VAR(VAR, T1, W, N) = \ + vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) + + DECL_VARIABLE(vld1_dup_vector, poly, 64, 1); + DECL_VARIABLE(vld1_dup_vector, poly, 64, 2); + + /* Try to read different places from the input buffer. */ + for (i=0; i<3; i++) { + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, , poly, p, 64, 1); + TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, q, poly, p, 64, 2); + + switch (i) { + case 0: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, ""); + break; + case 1: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, ""); + break; + case 2: + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, ""); + break; + default: + abort(); + } + } + + /* vld1_lane_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VLD1_LANE/VLD1_LANEQ" + +#define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \ + memset (VECT_VAR(vld1_lane_buffer_src, T1, W, N), 0xAA, W/8*N); \ + VECT_VAR(vld1_lane_vector_src, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(vld1_lane_buffer_src, T1, W, N)); \ + VECT_VAR(vld1_lane_vector, T1, W, N) = \ + vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ + VECT_VAR(vld1_lane_vector_src, T1, W, N), L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vld1_lane_vector, T1, W, N)) + + DECL_VARIABLE(vld1_lane_vector, poly, 64, 1); + DECL_VARIABLE(vld1_lane_vector, poly, 64, 2); + DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 1); + DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 2); + + ARRAY(vld1_lane_buffer_src, poly, 64, 1); + ARRAY(vld1_lane_buffer_src, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VLD1_LANE(, poly, p, 64, 1, 0); + TEST_VLD1_LANE(q, poly, p, 64, 2, 0); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, ""); + + /* vldX_p64 tests. */ +#define DECL_VLDX(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \ + VECT_VAR_DECL(vldX_result_bis_##X, T1, W, N)[X * N] + +#define TEST_VLDX(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vldX_vector, T1, W, N, X) = \ + /* Use dedicated init buffer, of size X */ \ + vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \ + vst##X##Q##_##T2##W(VECT_VAR(vldX_result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vldX_vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y]. */ +#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(vldX_result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + DECL_VLDX(poly, 64, 1, 2); + DECL_VLDX(poly, 64, 1, 3); + DECL_VLDX(poly, 64, 1, 4); + + VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1); + PAD(buffer_vld2_pad, poly, 64, 1); + VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1); + PAD(buffer_vld3_pad, poly, 64, 1); + VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1); + PAD(buffer_vld4_pad, poly, 64, 1); + +#undef TEST_MSG +#define TEST_MSG "VLD2/VLD2Q" + CLEAN(result, poly, 64, 1); + TEST_VLDX(, poly, p, 64, 1, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 2, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1"); + +#undef TEST_MSG +#define TEST_MSG "VLD3/VLD3Q" + CLEAN(result, poly, 64, 1); + TEST_VLDX(, poly, p, 64, 1, 3); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 3, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 3, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2"); + +#undef TEST_MSG +#define TEST_MSG "VLD4/VLD4Q" + CLEAN(result, poly, 64, 1); + TEST_VLDX(, poly, p, 64, 1, 4); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 4, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 4, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2"); + CLEAN(result, poly, 64, 1); + TEST_EXTRA_CHUNK(poly, 64, 1, 4, 3); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3"); + + /* vldX_dup_p64 tests. */ +#define DECL_VLDX_DUP(T1, W, N, X) \ + VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X); \ + VECT_VAR_DECL(vldX_dup_result_bis_##X, T1, W, N)[X * N] + +#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \ + VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X) = \ + vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]); \ + \ + vst##X##Q##_##T2##W(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \ + VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X)); \ + memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \ + sizeof(VECT_VAR(result, T1, W, N))); + + /* Overwrite "result" with the contents of "result_bis"[Y]. */ +#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \ + memcpy(VECT_VAR(result, T1, W, N), \ + &(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \ + sizeof(VECT_VAR(result, T1, W, N))); + + DECL_VLDX_DUP(poly, 64, 1, 2); + DECL_VLDX_DUP(poly, 64, 1, 3); + DECL_VLDX_DUP(poly, 64, 1, 4); + + +#undef TEST_MSG +#define TEST_MSG "VLD2_DUP/VLD2Q_DUP" + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP(, poly, p, 64, 1, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 2, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1"); + +#undef TEST_MSG +#define TEST_MSG "VLD3_DUP/VLD3Q_DUP" + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP(, poly, p, 64, 1, 3); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2"); + +#undef TEST_MSG +#define TEST_MSG "VLD4_DUP/VLD4Q_DUP" + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP(, poly, p, 64, 1, 4); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 1); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 2); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2"); + CLEAN(result, poly, 64, 1); + TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3); + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3"); + + /* vsli_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VSLI" + +#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \ + VECT_VAR(vsXi_vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vsXi_vector, T1, W, N), \ + VECT_VAR(vsXi_vector2, T1, W, N), \ + V); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vsXi_vector_res, T1, W, N)) + +#define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \ + TEST_VSXI1(INSN, Q, T1, T2, W, N, V) + + DECL_VARIABLE(vsXi_vector, poly, 64, 1); + DECL_VARIABLE(vsXi_vector, poly, 64, 2); + DECL_VARIABLE(vsXi_vector2, poly, 64, 1); + DECL_VARIABLE(vsXi_vector2, poly, 64, 2); + DECL_VARIABLE(vsXi_vector_res, poly, 64, 1); + DECL_VARIABLE(vsXi_vector_res, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vsXi_vector, buffer, , poly, p, 64, 1); + VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2); + + VDUP(vsXi_vector2, , poly, p, 64, 1, 2); + VDUP(vsXi_vector2, q, poly, p, 64, 2, 3); + + TEST_VSXI(vsli, , poly, p, 64, 1, 3); + TEST_VSXI(vsli, q, poly, p, 64, 2, 53); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, ""); + + /* Test cases with maximum shift amount. */ + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VSXI(vsli, , poly, p, 64, 1, 63); + TEST_VSXI(vsli, q, poly, p, 64, 2, 63); + +#define COMMENT "(max shift amount)" + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT); + + /* vsri_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VSRI" + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + VLOAD(vsXi_vector, buffer, , poly, p, 64, 1); + VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2); + + VDUP(vsXi_vector2, , poly, p, 64, 1, 2); + VDUP(vsXi_vector2, q, poly, p, 64, 2, 3); + + TEST_VSXI(vsri, , poly, p, 64, 1, 3); + TEST_VSXI(vsri, q, poly, p, 64, 2, 53); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected, ""); + + /* Test cases with maximum shift amount. */ + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VSXI(vsri, , poly, p, 64, 1, 64); + TEST_VSXI(vsri, q, poly, p, 64, 2, 64); + +#define COMMENT "(max shift amount)" + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected_max_shift, COMMENT); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected_max_shift, COMMENT); + + /* vst1_lane_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VST1_LANE/VST1_LANEQ" + +#define TEST_VST1_LANE(Q, T1, T2, W, N, L) \ + VECT_VAR(vst1_lane_vector, T1, W, N) = \ + vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ + vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vst1_lane_vector, T1, W, N), L) + + DECL_VARIABLE(vst1_lane_vector, poly, 64, 1); + DECL_VARIABLE(vst1_lane_vector, poly, 64, 2); + + CLEAN(result, poly, 64, 1); + CLEAN(result, poly, 64, 2); + + TEST_VST1_LANE(, poly, p, 64, 1, 0); + TEST_VST1_LANE(q, poly, p, 64, 2, 0); + + CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, ""); + CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, ""); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c new file mode 100644 index 0000000..a049cb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c @@ -0,0 +1,151 @@ +/* This file contains tests for the vreinterpret *p128 intrinsics. */ + +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results: vreinterpretq_p128_*. */ +VECT_VAR_DECL(vreint_expected_q_p128_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p128_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p128_s32,poly,64,2) [] = { 0xfffffff1fffffff0, + 0xfffffff3fffffff2 }; +VECT_VAR_DECL(vreint_expected_q_p128_s64,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p128_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p128_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p128_u32,poly,64,2) [] = { 0xfffffff1fffffff0, + 0xfffffff3fffffff2 }; +VECT_VAR_DECL(vreint_expected_q_p128_u64,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p128_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p128_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p128_f32,poly,64,2) [] = { 0xc1700000c1800000, + 0xc1500000c1600000 }; + +/* Expected results: vreinterpretq_*_p128. */ +VECT_VAR_DECL(vreint_expected_q_s8_p128,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_s16_p128,int,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_s32_p128,int,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_q_s64_p128,int,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_u8_p128,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_u16_p128,uint,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_u32_p128,uint,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_q_u64_p128,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p8_p128,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_p16_p128,poly,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_p64_p128,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_f32_p128,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; + +int main (void) +{ + DECL_VARIABLE_128BITS_VARIANTS(vreint_vector); + DECL_VARIABLE(vreint_vector, poly, 64, 2); + DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res); + DECL_VARIABLE(vreint_vector_res, poly, 64, 2); + + clean_results (); + + TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vreint_vector, buffer); + VLOAD(vreint_vector, buffer, q, poly, p, 64, 2); + VLOAD(vreint_vector, buffer, q, float, f, 32, 4); + + /* vreinterpretq_p128_* tests. */ +#undef TEST_MSG +#define TEST_MSG "VREINTERPRETQ_P128_*" + + /* Since there is no way to store a poly128_t value, convert to + poly64x2_t before storing. This means that we are not able to + test vreinterpretq_p128* alone, and that errors in + vreinterpretq_p64_p128 could compensate for errors in + vreinterpretq_p128*. */ +#define TEST_VREINTERPRET128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vreint_vector_res, poly, 64, 2) = vreinterpretq_p64_p128( \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS))); \ + vst1##Q##_##T2##64(VECT_VAR(result, poly, 64, 2), \ + VECT_VAR(vreint_vector_res, poly, 64, 2)); \ + CHECK(TEST_MSG, T1, 64, 2, PRIx##64, EXPECTED, ""); + + TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 8, 16, vreint_expected_q_p128_s8); + TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 16, 8, vreint_expected_q_p128_s16); + TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 32, 4, vreint_expected_q_p128_s32); + TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 64, 2, vreint_expected_q_p128_s64); + TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 8, 16, vreint_expected_q_p128_u8); + TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 16, 8, vreint_expected_q_p128_u16); + TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 32, 4, vreint_expected_q_p128_u32); + TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 64, 2, vreint_expected_q_p128_u64); + TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 8, 16, vreint_expected_q_p128_p8); + TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 16, 8, vreint_expected_q_p128_p16); + TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32); + + /* vreinterpretq_*_p128 tests. */ +#undef TEST_MSG +#define TEST_MSG "VREINTERPRETQ_*_P128" + + /* Since there is no way to load a poly128_t value, load a + poly64x2_t and convert it to poly128_t. This means that we are + not able to test vreinterpretq_*_p128 alone, and that errors in + vreinterpretq_p128_p64 could compensate for errors in + vreinterpretq_*_p128*. */ +#define TEST_VREINTERPRET_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vreint_vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS( \ + vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vreint_vector_res, T1, W, N)); \ + CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + +#define TEST_VREINTERPRET_FP_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vreint_vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS( \ + vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vreint_vector_res, T1, W, N)); \ + CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + + TEST_VREINTERPRET_FROM_P128(q, int, s, 8, 16, poly, p, 128, 1, vreint_expected_q_s8_p128); + TEST_VREINTERPRET_FROM_P128(q, int, s, 16, 8, poly, p, 128, 1, vreint_expected_q_s16_p128); + TEST_VREINTERPRET_FROM_P128(q, int, s, 32, 4, poly, p, 128, 1, vreint_expected_q_s32_p128); + TEST_VREINTERPRET_FROM_P128(q, int, s, 64, 2, poly, p, 128, 1, vreint_expected_q_s64_p128); + TEST_VREINTERPRET_FROM_P128(q, uint, u, 8, 16, poly, p, 128, 1, vreint_expected_q_u8_p128); + TEST_VREINTERPRET_FROM_P128(q, uint, u, 16, 8, poly, p, 128, 1, vreint_expected_q_u16_p128); + TEST_VREINTERPRET_FROM_P128(q, uint, u, 32, 4, poly, p, 128, 1, vreint_expected_q_u32_p128); + TEST_VREINTERPRET_FROM_P128(q, uint, u, 64, 2, poly, p, 128, 1, vreint_expected_q_u64_p128); + TEST_VREINTERPRET_FROM_P128(q, poly, p, 8, 16, poly, p, 128, 1, vreint_expected_q_p8_p128); + TEST_VREINTERPRET_FROM_P128(q, poly, p, 16, 8, poly, p, 128, 1, vreint_expected_q_p16_p128); + TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c new file mode 100644 index 0000000..79f3cd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c @@ -0,0 +1,188 @@ +/* This file contains tests for the vreinterpret *p64 intrinsics. */ + +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results: vreinterpret_p64_*. */ +VECT_VAR_DECL(vreint_expected_p64_s8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(vreint_expected_p64_s16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(vreint_expected_p64_s32,poly,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(vreint_expected_p64_s64,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vreint_expected_p64_u8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(vreint_expected_p64_u16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(vreint_expected_p64_u32,poly,64,1) [] = { 0xfffffff1fffffff0 }; +VECT_VAR_DECL(vreint_expected_p64_u64,poly,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vreint_expected_p64_p8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; +VECT_VAR_DECL(vreint_expected_p64_p16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; +VECT_VAR_DECL(vreint_expected_p64_f32,poly,64,1) [] = { 0xc1700000c1800000 }; + +/* Expected results: vreinterpretq_p64_*. */ +VECT_VAR_DECL(vreint_expected_q_p64_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p64_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p64_s32,poly,64,2) [] = { 0xfffffff1fffffff0, + 0xfffffff3fffffff2 }; +VECT_VAR_DECL(vreint_expected_q_p64_s64,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p64_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p64_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p64_u32,poly,64,2) [] = { 0xfffffff1fffffff0, + 0xfffffff3fffffff2 }; +VECT_VAR_DECL(vreint_expected_q_p64_u64,poly,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p64_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, + 0xfffefdfcfbfaf9f8 }; +VECT_VAR_DECL(vreint_expected_q_p64_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0, + 0xfff7fff6fff5fff4 }; +VECT_VAR_DECL(vreint_expected_q_p64_f32,poly,64,2) [] = { 0xc1700000c1800000, + 0xc1500000c1600000 }; + +/* Expected results: vreinterpret_*_p64. */ +VECT_VAR_DECL(vreint_expected_s8_p64,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_s16_p64,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_s32_p64,int,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_s64_p64,int,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vreint_expected_u8_p64,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_u16_p64,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_u32_p64,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_u64_p64,uint,64,1) [] = { 0xfffffffffffffff0 }; +VECT_VAR_DECL(vreint_expected_p8_p64,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_p16_p64,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_f32_p64,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; + +/* Expected results: vreinterpretq_*_p64. */ +VECT_VAR_DECL(vreint_expected_q_s8_p64,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_s16_p64,int,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_s32_p64,int,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_q_s64_p64,int,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_u8_p64,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_u16_p64,uint,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_u32_p64,uint,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; +VECT_VAR_DECL(vreint_expected_q_u64_p64,uint,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +VECT_VAR_DECL(vreint_expected_q_p8_p64,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xf1, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(vreint_expected_q_p16_p64,poly,16,8) [] = { 0xfff0, 0xffff, + 0xffff, 0xffff, + 0xfff1, 0xffff, + 0xffff, 0xffff }; +VECT_VAR_DECL(vreint_expected_q_f32_p64,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, + 0xfffffff1, 0xffffffff }; + +int main (void) +{ +#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vreint_vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vreint_vector_res, T1, W, N)); \ + CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + +#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ + VECT_VAR(vreint_vector_res, T1, W, N) = \ + vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vreint_vector_res, T1, W, N)); \ + CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); + + DECL_VARIABLE_ALL_VARIANTS(vreint_vector); + DECL_VARIABLE(vreint_vector, poly, 64, 1); + DECL_VARIABLE(vreint_vector, poly, 64, 2); + DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res); + DECL_VARIABLE(vreint_vector_res, poly, 64, 1); + DECL_VARIABLE(vreint_vector_res, poly, 64, 2); + + clean_results (); + + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vreint_vector, buffer); + VLOAD(vreint_vector, buffer, , poly, p, 64, 1); + VLOAD(vreint_vector, buffer, q, poly, p, 64, 2); + VLOAD(vreint_vector, buffer, , float, f, 32, 2); + VLOAD(vreint_vector, buffer, q, float, f, 32, 4); + + /* vreinterpret_p64_* tests. */ +#undef TEST_MSG +#define TEST_MSG "VREINTERPRET_P64_*" + TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 8, 8, vreint_expected_p64_s8); + TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 16, 4, vreint_expected_p64_s16); + TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 32, 2, vreint_expected_p64_s32); + TEST_VREINTERPRET(, poly, p, 64, 1, int, s, 64, 1, vreint_expected_p64_s64); + TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 8, 8, vreint_expected_p64_u8); + TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 16, 4, vreint_expected_p64_u16); + TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 32, 2, vreint_expected_p64_u32); + TEST_VREINTERPRET(, poly, p, 64, 1, uint, u, 64, 1, vreint_expected_p64_u64); + TEST_VREINTERPRET(, poly, p, 64, 1, poly, p, 8, 8, vreint_expected_p64_p8); + TEST_VREINTERPRET(, poly, p, 64, 1, poly, p, 16, 4, vreint_expected_p64_p16); + TEST_VREINTERPRET(, poly, p, 64, 1, float, f, 32, 2, vreint_expected_p64_f32); + + /* vreinterpretq_p64_* tests. */ +#undef TEST_MSG +#define TEST_MSG "VREINTERPRETQ_P64_*" + TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 8, 16, vreint_expected_q_p64_s8); + TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 16, 8, vreint_expected_q_p64_s16); + TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 32, 4, vreint_expected_q_p64_s32); + TEST_VREINTERPRET(q, poly, p, 64, 2, int, s, 64, 2, vreint_expected_q_p64_s64); + TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 8, 16, vreint_expected_q_p64_u8); + TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 16, 8, vreint_expected_q_p64_u16); + TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 32, 4, vreint_expected_q_p64_u32); + TEST_VREINTERPRET(q, poly, p, 64, 2, uint, u, 64, 2, vreint_expected_q_p64_u64); + TEST_VREINTERPRET(q, poly, p, 64, 2, poly, p, 8, 16, vreint_expected_q_p64_p8); + TEST_VREINTERPRET(q, poly, p, 64, 2, poly, p, 16, 8, vreint_expected_q_p64_p16); + TEST_VREINTERPRET(q, poly, p, 64, 2, float, f, 32, 4, vreint_expected_q_p64_f32); + + /* vreinterpret_*_p64 tests. */ +#undef TEST_MSG +#define TEST_MSG "VREINTERPRET_*_P64" + + TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 64, 1, vreint_expected_s8_p64); + TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 64, 1, vreint_expected_s16_p64); + TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 64, 1, vreint_expected_s32_p64); + TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 64, 1, vreint_expected_s64_p64); + TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 64, 1, vreint_expected_u8_p64); + TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 64, 1, vreint_expected_u16_p64); + TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 64, 1, vreint_expected_u32_p64); + TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 64, 1, vreint_expected_u64_p64); + TEST_VREINTERPRET(, poly, p, 8, 8, poly, p, 64, 1, vreint_expected_p8_p64); + TEST_VREINTERPRET(, poly, p, 16, 4, poly, p, 64, 1, vreint_expected_p16_p64); + TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 64, 1, vreint_expected_f32_p64); + TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 64, 2, vreint_expected_q_s8_p64); + TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 64, 2, vreint_expected_q_s16_p64); + TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 64, 2, vreint_expected_q_s32_p64); + TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 64, 2, vreint_expected_q_s64_p64); + TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 64, 2, vreint_expected_q_u8_p64); + TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 64, 2, vreint_expected_q_u16_p64); + TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 64, 2, vreint_expected_q_u32_p64); + TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 64, 2, vreint_expected_q_u64_p64); + TEST_VREINTERPRET(q, poly, p, 8, 16, poly, p, 64, 2, vreint_expected_q_p8_p64); + TEST_VREINTERPRET(q, poly, p, 16, 8, poly, p, 64, 2, vreint_expected_q_p16_p64); + TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 64, 2, vreint_expected_q_f32_p64); + + return 0; +}