diff mbox

[[ARM/AArch64,testsuite] 03/36] Add vmax, vmin, vhadd, vhsub and vrhadd tests.

Message ID 1421162314-25779-4-git-send-email-christophe.lyon@linaro.org
State New
Headers show

Commit Message

Christophe Lyon Jan. 13, 2015, 3:18 p.m. UTC
* gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vhadd.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vhsub.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vmax.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vmin.c: New file.
	* gcc.target/aarch64/advsimd-intrinsics/vrhadd.c: New file.

Comments

Tejas Belagod Jan. 16, 2015, 1:56 p.m. UTC | #1
> +#ifndef NO_FLOAT_VARIANT
> +  VLOAD(vector, buffer, , float, f, 32, 2);
> +  VLOAD(vector, buffer, q, float, f, 32, 4);
> +#endif
>
....
> +#ifndef NO_FLOAT_VARIANT
> +  VDUP(vector2, , float, f, 32, 2, -15.5f);
> +  VDUP(vector2, q, float, f, 32, 4, -14.5f);
> +#endif
> +
> +#ifndef NO_FLOAT_VARIANT
> +#define FLOAT_VARIANT(MACRO, VAR)                      \
> +  MACRO(VAR, , float, f, 32, 2);                       \
> +  MACRO(VAR, q, float, f, 32, 4)
> +#else
> +#define FLOAT_VARIANT(MACRO, VAR)
> +#endif

Double negative! :-) Probably easier on the reader to avoid it, but your 
call.

> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
> new file mode 100644
> index 0000000..2591b16
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
> @@ -0,0 +1,64 @@
> +#include <arm_neon.h>
> +#include "arm-neon-ref.h"
> +#include "compute-ref-data.h"
> +
> +#define INSN_NAME vmax
> +#define TEST_MSG "VMAX/VMAXQ"
> +
> +/* Expected results.  */
> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 };
> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
> +                                       0x33, 0x33, 0x33, 0x33 };
> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
> +                                       0xf4, 0xf5, 0xf6, 0xf7,
> +                                       0xf8, 0xf9, 0xfa, 0xfb,
> +                                       0xfc, 0xfd, 0xfe, 0xff };
> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
> +                                       0xfffffff2, 0xfffffff3 };
> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
> +                                       0x3333333333333333 };
> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
> +                                        0xf9, 0xf9, 0xf9, 0xf9,
> +                                        0xf9, 0xf9, 0xfa, 0xfb,
> +                                        0xfc, 0xfd, 0xfe, 0xff };
> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
> +                                        0xfffffff2, 0xfffffff3 };
> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
> +                                        0x3333333333333333 };
> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33 };
> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
> +                                          0xc1600000, 0xc1500000 };
> +
> +/* Expected results with special FP values.  */
> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
> +                                              0x7fc00000, 0x7fc00000 };
> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
> +                                               0x7fc00000, 0x7fc00000 };
> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
> +                                              0x7f800000, 0x7f800000 };
> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
> +                                               0x3f800000, 0x3f800000 };
> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
> +
> +#include "binary_op_no64.inc"
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
> new file mode 100644
> index 0000000..2b5e87c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
> @@ -0,0 +1,66 @@
> +#include <arm_neon.h>
> +#include "arm-neon-ref.h"
> +#include "compute-ref-data.h"
> +
> +#define INSN_NAME vmin
> +#define TEST_MSG "VMIN/VMINQ"
> +
> +/* Expected results.  */
> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
> +                                      0xf3, 0xf3, 0xf3, 0xf3 };
> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
> +                                       0xf3, 0xf3, 0xf3, 0xf3 };
> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 };
> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
> +                                       0x33, 0x33, 0x33, 0x33 };
> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
> +                                       0xf4, 0xf4, 0xf4, 0xf4,
> +                                       0xf4, 0xf4, 0xf4, 0xf4,
> +                                       0xf4, 0xf4, 0xf4, 0xf4 };
> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
> +                                       0xfff3, 0xfff3, 0xfff3, 0xfff3 };
> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
> +                                       0xfffffff1, 0xfffffff1 };
> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
> +                                       0x3333333333333333 };
> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
> +                                        0xf4, 0xf5, 0xf6, 0xf7,
> +                                        0xf8, 0xf9, 0xf9, 0xf9,
> +                                        0xf9, 0xf9, 0xf9, 0xf9 };
> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
> +                                        0xfff2, 0xfff2, 0xfff2, 0xfff2 };
> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
> +                                        0xfffffff1, 0xfffffff1 };
> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
> +                                        0x3333333333333333 };
> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33,
> +                                        0x33, 0x33, 0x33, 0x33 };
> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
> +
> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
> +                                          0xc1680000, 0xc1680000 };
> +/* Expected results with special FP values.  */
> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
> +                                              0x7fc00000, 0x7fc00000 };
> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
> +                                               0x7fc00000, 0x7fc00000 };
> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
> +                                              0x3f800000, 0x3f800000 };
> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
> +                                               0xff800000, 0xff800000 };
> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
> +                                                0x80000000, 0x80000000 };
> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
> +                                                0x80000000, 0x80000000 };
> +
> +#include "binary_op_no64.inc"

vmax and vmin do have v<maxmin>_f64 and v<maxmin>q_f64 variants.

Otherwise, they look good to me(but I can't approve it).

Tejas.
Christophe Lyon Jan. 16, 2015, 4:21 p.m. UTC | #2
On 16 January 2015 at 14:56, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> +#ifndef NO_FLOAT_VARIANT
>> +  VLOAD(vector, buffer, , float, f, 32, 2);
>> +  VLOAD(vector, buffer, q, float, f, 32, 4);
>> +#endif
>>
> ....
>>
>> +#ifndef NO_FLOAT_VARIANT
>> +  VDUP(vector2, , float, f, 32, 2, -15.5f);
>> +  VDUP(vector2, q, float, f, 32, 4, -14.5f);
>> +#endif
>> +
>> +#ifndef NO_FLOAT_VARIANT
>> +#define FLOAT_VARIANT(MACRO, VAR)                      \
>> +  MACRO(VAR, , float, f, 32, 2);                       \
>> +  MACRO(VAR, q, float, f, 32, 4)
>> +#else
>> +#define FLOAT_VARIANT(MACRO, VAR)
>> +#endif
>
>
> Double negative! :-) Probably easier on the reader to avoid it, but your
> call.

Oh yes... I am importing my existing code, so I try to minimize changes.

>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> new file mode 100644
>> index 0000000..2591b16
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
>> @@ -0,0 +1,64 @@
>> +#include <arm_neon.h>
>> +#include "arm-neon-ref.h"
>> +#include "compute-ref-data.h"
>> +
>> +#define INSN_NAME vmax
>> +#define TEST_MSG "VMAX/VMAXQ"
>> +
>> +/* Expected results.  */
>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
>> +                                      0xf4, 0xf5, 0xf6, 0xf7 };
>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
>> +                                       0xf4, 0xf5, 0xf6, 0xf7 };
>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3
>> };
>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                       0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>> };
>> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf5, 0xf6, 0xf7,
>> +                                       0xf8, 0xf9, 0xfa, 0xfb,
>> +                                       0xfc, 0xfd, 0xfe, 0xff };
>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
>> +                                       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
>> +                                       0xfffffff2, 0xfffffff3 };
>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>> +                                       0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xfa, 0xfb,
>> +                                        0xfc, 0xfd, 0xfe, 0xff };
>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
>> +                                        0xfff4, 0xfff5, 0xfff6, 0xfff7 };
>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
>> +                                        0xfffffff2, 0xfffffff3 };
>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>> +                                        0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
>> +                                          0xc1600000, 0xc1500000 };
>> +
>> +/* Expected results with special FP values.  */
>> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                              0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                               0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
>> +                                              0x7f800000, 0x7f800000 };
>> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
>> +                                               0x3f800000, 0x3f800000 };
>> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
>> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
>> +
>> +#include "binary_op_no64.inc"
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> new file mode 100644
>> index 0000000..2b5e87c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
>> @@ -0,0 +1,66 @@
>> +#include <arm_neon.h>
>> +#include "arm-neon-ref.h"
>> +#include "compute-ref-data.h"
>> +
>> +#define INSN_NAME vmin
>> +#define TEST_MSG "VMIN/VMINQ"
>> +
>> +/* Expected results.  */
>> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                      0xf3, 0xf3, 0xf3, 0xf3 };
>> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
>> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
>> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                       0xf3, 0xf3, 0xf3, 0xf3 };
>> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1
>> };
>> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
>> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                       0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333
>> };
>> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
>> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4,
>> +                                       0xf4, 0xf4, 0xf4, 0xf4 };
>> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
>> +                                       0xfff3, 0xfff3, 0xfff3, 0xfff3 };
>> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
>> +                                       0xfffffff1, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
>> +                                       0x3333333333333333 };
>> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
>> +                                        0xf4, 0xf5, 0xf6, 0xf7,
>> +                                        0xf8, 0xf9, 0xf9, 0xf9,
>> +                                        0xf9, 0xf9, 0xf9, 0xf9 };
>> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
>> +                                        0xfff2, 0xfff2, 0xfff2, 0xfff2 };
>> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
>> +                                        0xfffffff1, 0xfffffff1 };
>> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
>> +                                        0x3333333333333333 };
>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33,
>> +                                        0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
>> +                                        0x3333, 0x3333, 0x3333, 0x3333 };
>> +
>> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
>> +                                          0xc1680000, 0xc1680000 };
>> +/* Expected results with special FP values.  */
>> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                              0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
>> +                                               0x7fc00000, 0x7fc00000 };
>> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
>> +                                              0x3f800000, 0x3f800000 };
>> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
>> +                                               0xff800000, 0xff800000 };
>> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
>> +                                                0x80000000, 0x80000000 };
>> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
>> +                                                0x80000000, 0x80000000 };
>> +
>> +#include "binary_op_no64.inc"
>
>
> vmax and vmin do have v<maxmin>_f64 and v<maxmin>q_f64 variants.

My existing tests only cover armv7 so far.
I do plan to expand them once they are all in GCC.

> Otherwise, they look good to me(but I can't approve it).
>
> Tejas.
>
Marcus Shawcroft Jan. 16, 2015, 5:14 p.m. UTC | #3
On 16 January 2015 at 16:21, Christophe Lyon <christophe.lyon@linaro.org> wrote:

> My existing tests only cover armv7 so far.
> I do plan to expand them once they are all in GCC.
>
>> Otherwise, they look good to me(but I can't approve it).
>>
>> Tejas.
>>

OK provided, as per the previous couple, that we don;t regression or
introduce new fails on aarch64[_be] or aarch32.
/Marcus
Christophe Lyon Jan. 16, 2015, 5:52 p.m. UTC | #4
On 16 January 2015 at 18:14, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 16 January 2015 at 16:21, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>
>> My existing tests only cover armv7 so far.
>> I do plan to expand them once they are all in GCC.
>>
>>> Otherwise, they look good to me(but I can't approve it).
>>>
>>> Tejas.
>>>
>
> OK provided, as per the previous couple, that we don;t regression or
> introduce new fails on aarch64[_be] or aarch32.

This patch shows failures on aarch64 and aarch64_be for vmax and vmin
when the input is -NaN.
It's a corner case, and my reading of the ARM ARM is that the result
should the same as on aarch32.
I haven't had time to look at it in more details though.
So, not OK?

> /Marcus
Marcus Shawcroft Jan. 19, 2015, 1:29 p.m. UTC | #5
On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:

>> OK provided, as per the previous couple, that we don;t regression or
>> introduce new fails on aarch64[_be] or aarch32.
>
> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
> when the input is -NaN.
> It's a corner case, and my reading of the ARM ARM is that the result
> should the same as on aarch32.
> I haven't had time to look at it in more details though.
> So, not OK?

They should have the same behaviour in aarch32 and aarch64. Did you
test on HW or a model?

/Marcus
Christophe Lyon Jan. 19, 2015, 3:43 p.m. UTC | #6
On 19 January 2015 at 14:29, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>
>>> OK provided, as per the previous couple, that we don;t regression or
>>> introduce new fails on aarch64[_be] or aarch32.
>>
>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>> when the input is -NaN.
>> It's a corner case, and my reading of the ARM ARM is that the result
>> should the same as on aarch32.
>> I haven't had time to look at it in more details though.
>> So, not OK?
>
> They should have the same behaviour in aarch32 and aarch64. Did you
> test on HW or a model?
>
I ran the tests on qemu for aarch32 and aarch64-linux, and on the
foundation model for aarch64*-elf.

> /Marcus
Marcus Shawcroft Jan. 19, 2015, 4:54 p.m. UTC | #7
On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> On 19 January 2015 at 14:29, Marcus Shawcroft
> <marcus.shawcroft@gmail.com> wrote:
>> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>
>>>> OK provided, as per the previous couple, that we don;t regression or
>>>> introduce new fails on aarch64[_be] or aarch32.
>>>
>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>> when the input is -NaN.
>>> It's a corner case, and my reading of the ARM ARM is that the result
>>> should the same as on aarch32.
>>> I haven't had time to look at it in more details though.
>>> So, not OK?
>>
>> They should have the same behaviour in aarch32 and aarch64. Did you
>> test on HW or a model?
>>
> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
> foundation model for aarch64*-elf.

Leave this one out until we understand why it fails. /Marcus
Christophe Lyon Jan. 21, 2015, 3:07 p.m. UTC | #8
On 19 January 2015 at 17:54, Marcus Shawcroft
<marcus.shawcroft@gmail.com> wrote:
> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>> On 19 January 2015 at 14:29, Marcus Shawcroft
>> <marcus.shawcroft@gmail.com> wrote:
>>> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>>
>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>
>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>> when the input is -NaN.
>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>> should the same as on aarch32.
>>>> I haven't had time to look at it in more details though.
>>>> So, not OK?
>>>
>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>> test on HW or a model?
>>>
>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>> foundation model for aarch64*-elf.
>
> Leave this one out until we understand why it fails. /Marcus

I've looked at this a bit more.
We have
fmax    v0.4s, v0.4s, v1.4s
where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.

The output is still -NaN (0xffc00000), while the test expects
defaultNaN (0x7fc00000).

I have executed the test under GDB on AArch64 HW, and noticed that fpcr was 0.
I forced it to have DN==1:
set $fpcr=0x1000000
but this didn't change the result.

Does setting fpcr.dn under gdb actually work?

Christophe.
Tejas Belagod Jan. 22, 2015, 11:19 a.m. UTC | #9
On 21/01/15 15:07, Christophe Lyon wrote:
> On 19 January 2015 at 17:54, Marcus Shawcroft
> <marcus.shawcroft@gmail.com> wrote:
>> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>> <marcus.shawcroft@gmail.com> wrote:
>>>> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>>>
>>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>
>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>>> when the input is -NaN.
>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>> should the same as on aarch32.
>>>>> I haven't had time to look at it in more details though.
>>>>> So, not OK?
>>>>
>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>> test on HW or a model?
>>>>
>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>> foundation model for aarch64*-elf.
>>
>> Leave this one out until we understand why it fails. /Marcus
>
> I've looked at this a bit more.
> We have
> fmax    v0.4s, v0.4s, v1.4s
> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>
> The output is still -NaN (0xffc00000), while the test expects
> defaultNaN (0x7fc00000).
>

In the AArch32 execution state, Advanced SIMD FP arithmetic always uses 
the DefaultNaN setting regardless of the DN-bit value in the FPSCR. In 
AArch64 execution state, result of Advanced SIMD FP arithmetic 
operations depend on the value of the DN-bit i.e. either propagate the 
input NaN or generate DefaultNaN depending on the value of DN.

If you're running your test in the AArch64 execution state, you'd want 
to define the DN bit and modify the expected results accordingly or have 
the test poll at runtime what the DN-bit is set to and check expected 
results dynamically.

I think the test already has expected behaviour for AArch32 execution 
state by expecting DefaultNaN regardless.

> I have executed the test under GDB on AArch64 HW, and noticed that fpcr was 0.
> I forced it to have DN==1:
> set $fpcr=0x1000000
> but this didn't change the result.
>
> Does setting fpcr.dn under gdb actually work?
>

It should. Possibly a bug, patches welcome :-).

Tejas.
Christophe Lyon Jan. 22, 2015, 2:28 p.m. UTC | #10
On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 21/01/15 15:07, Christophe Lyon wrote:
>>
>> On 19 January 2015 at 17:54, Marcus Shawcroft
>> <marcus.shawcroft@gmail.com> wrote:
>>>
>>> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org>
>>> wrote:
>>>>
>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>
>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>
>>>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>
>>>>>>
>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>>>> when the input is -NaN.
>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>> should the same as on aarch32.
>>>>>> I haven't had time to look at it in more details though.
>>>>>> So, not OK?
>>>>>
>>>>>
>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>> test on HW or a model?
>>>>>
>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>> foundation model for aarch64*-elf.
>>>
>>>
>>> Leave this one out until we understand why it fails. /Marcus
>>
>>
>> I've looked at this a bit more.
>> We have
>> fmax    v0.4s, v0.4s, v1.4s
>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>
>> The output is still -NaN (0xffc00000), while the test expects
>> defaultNaN (0x7fc00000).
>>
>
> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses the
> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In AArch64
> execution state, result of Advanced SIMD FP arithmetic operations depend on
> the value of the DN-bit i.e. either propagate the input NaN or generate
> DefaultNaN depending on the value of DN.

Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
can see only the latter (no diff between aarch32 and aarch64 in
FPProcessNan pseudo-code)

> If you're running your test in the AArch64 execution state, you'd want to
> define the DN bit and modify the expected results accordingly or have the
> test poll at runtime what the DN-bit is set to and check expected results
> dynamically.
Makes sense, I hadn't noticed the different aarch64 spec here.

> I think the test already has expected behaviour for AArch32 execution state
> by expecting DefaultNaN regardless.
Yes.

>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>> was 0.
>> I forced it to have DN==1:
>> set $fpcr=0x1000000
>> but this didn't change the result.
>>
>> Does setting fpcr.dn under gdb actually work?
>>
>
> It should. Possibly a bug, patches welcome :-).
>
:-)
Tejas Belagod Jan. 22, 2015, 3:22 p.m. UTC | #11
On 22/01/15 14:28, Christophe Lyon wrote:
> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>
>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>> <marcus.shawcroft@gmail.com> wrote:
>>>>
>>>> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org>
>>>> wrote:
>>>>>
>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>
>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>
>>>>>>>> OK provided, as per the previous couple, that we don;t regression or
>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>
>>>>>>>
>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin
>>>>>>> when the input is -NaN.
>>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>>> should the same as on aarch32.
>>>>>>> I haven't had time to look at it in more details though.
>>>>>>> So, not OK?
>>>>>>
>>>>>>
>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>> test on HW or a model?
>>>>>>
>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>> foundation model for aarch64*-elf.
>>>>
>>>>
>>>> Leave this one out until we understand why it fails. /Marcus
>>>
>>>
>>> I've looked at this a bit more.
>>> We have
>>> fmax    v0.4s, v0.4s, v1.4s
>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>
>>> The output is still -NaN (0xffc00000), while the test expects
>>> defaultNaN (0x7fc00000).
>>>
>>
>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses the
>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In AArch64
>> execution state, result of Advanced SIMD FP arithmetic operations depend on
>> the value of the DN-bit i.e. either propagate the input NaN or generate
>> DefaultNaN depending on the value of DN.
>
> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
> can see only the latter (no diff between aarch32 and aarch64 in
> FPProcessNan pseudo-code)
>

If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec - 
under DN:

"The value of this bit only controls scalar floating-point arithmetic. 
Advanced SIMD arithmetic always uses the Default NaN setting, regardless 
of the value of the DN bit."

Also on page 3180 for the description of VMAX(vector FP), it says:
"
*  max(+0.0, -0.0) = +0.0
* If any input is a NaN, the corresponding result element is the default 
NaN.
"

The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to 
FPMax() which is on pg. 2285

// StandardFPSCRValue()
// ====================
FPCRType StandardFPSCRValue()
return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’

Here bit-25(FPSCR.DN) is set to 1.

Thanks,
Tejas.

>> If you're running your test in the AArch64 execution state, you'd want to
>> define the DN bit and modify the expected results accordingly or have the
>> test poll at runtime what the DN-bit is set to and check expected results
>> dynamically.
> Makes sense, I hadn't noticed the different aarch64 spec here.
>
>> I think the test already has expected behaviour for AArch32 execution state
>> by expecting DefaultNaN regardless.
> Yes.
>
>>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>>> was 0.
>>> I forced it to have DN==1:
>>> set $fpcr=0x1000000
>>> but this didn't change the result.
>>>
>>> Does setting fpcr.dn under gdb actually work?
>>>
>>
>> It should. Possibly a bug, patches welcome :-).
>>
> :-)
>
Christophe Lyon Jan. 22, 2015, 9:31 p.m. UTC | #12
On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 22/01/15 14:28, Christophe Lyon wrote:
>>
>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>
>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>
>>>>
>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>
>>>>>
>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>> <christophe.lyon@linaro.org>
>>>>> wrote:
>>>>>>
>>>>>>
>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>
>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>> or
>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>> vmin
>>>>>>>> when the input is -NaN.
>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>>>> should the same as on aarch32.
>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>> So, not OK?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>> test on HW or a model?
>>>>>>>
>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>> foundation model for aarch64*-elf.
>>>>>
>>>>>
>>>>>
>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>
>>>>
>>>>
>>>> I've looked at this a bit more.
>>>> We have
>>>> fmax    v0.4s, v0.4s, v1.4s
>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>
>>>> The output is still -NaN (0xffc00000), while the test expects
>>>> defaultNaN (0x7fc00000).
>>>>
>>>
>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>> the
>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>> AArch64
>>> execution state, result of Advanced SIMD FP arithmetic operations depend
>>> on
>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>> DefaultNaN depending on the value of DN.
>>
>>
>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>> can see only the latter (no diff between aarch32 and aarch64 in
>> FPProcessNan pseudo-code)
>>
>
> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
> under DN:
>
> "The value of this bit only controls scalar floating-point arithmetic.
> Advanced SIMD arithmetic always uses the Default NaN setting, regardless of
> the value of the DN bit."
>
> Also on page 3180 for the description of VMAX(vector FP), it says:
> "
> *  max(+0.0, -0.0) = +0.0
> * If any input is a NaN, the corresponding result element is the default
> NaN.
> "
>
Oops I was looking at FMAX (vector) pg 936.

> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
> FPMax() which is on pg. 2285
>
> // StandardFPSCRValue()
> // ====================
> FPCRType StandardFPSCRValue()
> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>
> Here bit-25(FPSCR.DN) is set to 1.
>

So, we should get defaultNaN too on aarch64, and no need to try to
force DN to 1 in gdb?

What can be wrong?

> Thanks,
> Tejas.
>
>
>>> If you're running your test in the AArch64 execution state, you'd want to
>>> define the DN bit and modify the expected results accordingly or have the
>>> test poll at runtime what the DN-bit is set to and check expected results
>>> dynamically.
>>
>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>
>>> I think the test already has expected behaviour for AArch32 execution
>>> state
>>> by expecting DefaultNaN regardless.
>>
>> Yes.
>>
>>>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>>>> was 0.
>>>> I forced it to have DN==1:
>>>> set $fpcr=0x1000000
>>>> but this didn't change the result.
>>>>
>>>> Does setting fpcr.dn under gdb actually work?
>>>>
>>>
>>> It should. Possibly a bug, patches welcome :-).
>>>
>> :-)
>>
>
>
Tejas Belagod Jan. 23, 2015, 10:18 a.m. UTC | #13
On 22/01/15 21:31, Christophe Lyon wrote:
> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> On 22/01/15 14:28, Christophe Lyon wrote:
>>>
>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>>
>>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>>
>>>>>
>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>
>>>>>>
>>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>>> <christophe.lyon@linaro.org>
>>>>>> wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>>
>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>>> or
>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>>> vmin
>>>>>>>>> when the input is -NaN.
>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the result
>>>>>>>>> should the same as on aarch32.
>>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>>> So, not OK?
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>>> test on HW or a model?
>>>>>>>>
>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>>> foundation model for aarch64*-elf.
>>>>>>
>>>>>>
>>>>>>
>>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>>
>>>>>
>>>>>
>>>>> I've looked at this a bit more.
>>>>> We have
>>>>> fmax    v0.4s, v0.4s, v1.4s
>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>>
>>>>> The output is still -NaN (0xffc00000), while the test expects
>>>>> defaultNaN (0x7fc00000).
>>>>>
>>>>
>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>>> the
>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>>> AArch64
>>>> execution state, result of Advanced SIMD FP arithmetic operations depend
>>>> on
>>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>>> DefaultNaN depending on the value of DN.
>>>
>>>
>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>>> can see only the latter (no diff between aarch32 and aarch64 in
>>> FPProcessNan pseudo-code)
>>>
>>
>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
>> under DN:
>>
>> "The value of this bit only controls scalar floating-point arithmetic.
>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless of
>> the value of the DN bit."
>>
>> Also on page 3180 for the description of VMAX(vector FP), it says:
>> "
>> *  max(+0.0, -0.0) = +0.0
>> * If any input is a NaN, the corresponding result element is the default
>> NaN.
>> "
>>
> Oops I was looking at FMAX (vector) pg 936.
>
>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
>> FPMax() which is on pg. 2285
>>
>> // StandardFPSCRValue()
>> // ====================
>> FPCRType StandardFPSCRValue()
>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>>
>> Here bit-25(FPSCR.DN) is set to 1.
>>
>
> So, we should get defaultNaN too on aarch64, and no need to try to
> force DN to 1 in gdb?
>
> What can be wrong?
>

On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're 
reading the same document.

Regardless of the page number, if you see the pseudocode for 
VMAX(FPSIMD) for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed 
to FPMax() which means generate DefaultNaN() regardless.

OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the 
pseudocode gets just FPCR.


Thanks,
Tejas.

>> Thanks,
>> Tejas.
>>
>>
>>>> If you're running your test in the AArch64 execution state, you'd want to
>>>> define the DN bit and modify the expected results accordingly or have the
>>>> test poll at runtime what the DN-bit is set to and check expected results
>>>> dynamically.
>>>
>>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>>
>>>> I think the test already has expected behaviour for AArch32 execution
>>>> state
>>>> by expecting DefaultNaN regardless.
>>>
>>> Yes.
>>>
>>>>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr
>>>>> was 0.
>>>>> I forced it to have DN==1:
>>>>> set $fpcr=0x1000000
>>>>> but this didn't change the result.
>>>>>
>>>>> Does setting fpcr.dn under gdb actually work?
>>>>>
>>>>
>>>> It should. Possibly a bug, patches welcome :-).
>>>>
>>> :-)
>>>
>>
>>
>
Christophe Lyon Jan. 23, 2015, 11:42 a.m. UTC | #14
On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote:
> On 22/01/15 21:31, Christophe Lyon wrote:
>>
>> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>
>>> On 22/01/15 14:28, Christophe Lyon wrote:
>>>>
>>>>
>>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com>
>>>> wrote:
>>>>>
>>>>>
>>>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>>>> <christophe.lyon@linaro.org>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>>>
>>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>>>> or
>>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>>>> vmin
>>>>>>>>>> when the input is -NaN.
>>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the
>>>>>>>>>> result
>>>>>>>>>> should the same as on aarch32.
>>>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>>>> So, not OK?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>>>> test on HW or a model?
>>>>>>>>>
>>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>>>> foundation model for aarch64*-elf.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> I've looked at this a bit more.
>>>>>> We have
>>>>>> fmax    v0.4s, v0.4s, v1.4s
>>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>>>
>>>>>> The output is still -NaN (0xffc00000), while the test expects
>>>>>> defaultNaN (0x7fc00000).
>>>>>>
>>>>>
>>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>>>> the
>>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>>>> AArch64
>>>>> execution state, result of Advanced SIMD FP arithmetic operations
>>>>> depend
>>>>> on
>>>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>>>> DefaultNaN depending on the value of DN.
>>>>
>>>>
>>>>
>>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>>>> can see only the latter (no diff between aarch32 and aarch64 in
>>>> FPProcessNan pseudo-code)
>>>>
>>>
>>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
>>> under DN:
>>>
>>> "The value of this bit only controls scalar floating-point arithmetic.
>>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless
>>> of
>>> the value of the DN bit."
>>>
>>> Also on page 3180 for the description of VMAX(vector FP), it says:
>>> "
>>> *  max(+0.0, -0.0) = +0.0
>>> * If any input is a NaN, the corresponding result element is the default
>>> NaN.
>>> "
>>>
>> Oops I was looking at FMAX (vector) pg 936.
>>
>>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
>>> FPMax() which is on pg. 2285
>>>
>>> // StandardFPSCRValue()
>>> // ====================
>>> FPCRType StandardFPSCRValue()
>>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>>>
>>> Here bit-25(FPSCR.DN) is set to 1.
>>>
>>
>> So, we should get defaultNaN too on aarch64, and no need to try to
>> force DN to 1 in gdb?
>>
>> What can be wrong?
>>
>
> On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading
> the same document.
>
> Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD)
> for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which
> means generate DefaultNaN() regardless.
>
> OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the
> pseudocode gets just FPCR.
>
>
Ok, that was my initial understanding but our discussion confused me.

And that's why I tried to force DN = 1 in gdb before single-stepping over
fmax    v0.4s, v0.4s, v1.4s

but it changed nothing :-(
Hence my question about a gdb possible bug or misuse.

I'll try modifying the test to have it force DN=1.

> Thanks,
> Tejas.
>
>
>>> Thanks,
>>> Tejas.
>>>
>>>
>>>>> If you're running your test in the AArch64 execution state, you'd want
>>>>> to
>>>>> define the DN bit and modify the expected results accordingly or have
>>>>> the
>>>>> test poll at runtime what the DN-bit is set to and check expected
>>>>> results
>>>>> dynamically.
>>>>
>>>>
>>>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>>>
>>>>> I think the test already has expected behaviour for AArch32 execution
>>>>> state
>>>>> by expecting DefaultNaN regardless.
>>>>
>>>>
>>>> Yes.
>>>>
>>>>>> I have executed the test under GDB on AArch64 HW, and noticed that
>>>>>> fpcr
>>>>>> was 0.
>>>>>> I forced it to have DN==1:
>>>>>> set $fpcr=0x1000000
>>>>>> but this didn't change the result.
>>>>>>
>>>>>> Does setting fpcr.dn under gdb actually work?
>>>>>>
>>>>>
>>>>> It should. Possibly a bug, patches welcome :-).
>>>>>
>>>> :-)
>>>>
>>>
>>>
>>
>
>
Christophe Lyon Jan. 23, 2015, 1:44 p.m. UTC | #15
On 23 January 2015 at 12:42, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote:
>> On 22/01/15 21:31, Christophe Lyon wrote:
>>>
>>> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote:
>>>>
>>>> On 22/01/15 14:28, Christophe Lyon wrote:
>>>>>
>>>>>
>>>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com>
>>>>> wrote:
>>>>>>
>>>>>>
>>>>>> On 21/01/15 15:07, Christophe Lyon wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft
>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 19 January 2015 at 15:43, Christophe Lyon
>>>>>>>> <christophe.lyon@linaro.org>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft
>>>>>>>>> <marcus.shawcroft@gmail.com> wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon
>>>>>>>>>> <christophe.lyon@linaro.org> wrote:
>>>>>>>>>>
>>>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression
>>>>>>>>>>>> or
>>>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and
>>>>>>>>>>> vmin
>>>>>>>>>>> when the input is -NaN.
>>>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the
>>>>>>>>>>> result
>>>>>>>>>>> should the same as on aarch32.
>>>>>>>>>>> I haven't had time to look at it in more details though.
>>>>>>>>>>> So, not OK?
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you
>>>>>>>>>> test on HW or a model?
>>>>>>>>>>
>>>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the
>>>>>>>>> foundation model for aarch64*-elf.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Leave this one out until we understand why it fails. /Marcus
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> I've looked at this a bit more.
>>>>>>> We have
>>>>>>> fmax    v0.4s, v0.4s, v1.4s
>>>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1.
>>>>>>>
>>>>>>> The output is still -NaN (0xffc00000), while the test expects
>>>>>>> defaultNaN (0x7fc00000).
>>>>>>>
>>>>>>
>>>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses
>>>>>> the
>>>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In
>>>>>> AArch64
>>>>>> execution state, result of Advanced SIMD FP arithmetic operations
>>>>>> depend
>>>>>> on
>>>>>> the value of the DN-bit i.e. either propagate the input NaN or generate
>>>>>> DefaultNaN depending on the value of DN.
>>>>>
>>>>>
>>>>>
>>>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I
>>>>> can see only the latter (no diff between aarch32 and aarch64 in
>>>>> FPProcessNan pseudo-code)
>>>>>
>>>>
>>>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec -
>>>> under DN:
>>>>
>>>> "The value of this bit only controls scalar floating-point arithmetic.
>>>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless
>>>> of
>>>> the value of the DN bit."
>>>>
>>>> Also on page 3180 for the description of VMAX(vector FP), it says:
>>>> "
>>>> *  max(+0.0, -0.0) = +0.0
>>>> * If any input is a NaN, the corresponding result element is the default
>>>> NaN.
>>>> "
>>>>
>>> Oops I was looking at FMAX (vector) pg 936.
>>>
>>>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to
>>>> FPMax() which is on pg. 2285
>>>>
>>>> // StandardFPSCRValue()
>>>> // ====================
>>>> FPCRType StandardFPSCRValue()
>>>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’
>>>>
>>>> Here bit-25(FPSCR.DN) is set to 1.
>>>>
>>>
>>> So, we should get defaultNaN too on aarch64, and no need to try to
>>> force DN to 1 in gdb?
>>>
>>> What can be wrong?
>>>
>>
>> On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading
>> the same document.
>>
>> Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD)
>> for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which
>> means generate DefaultNaN() regardless.
>>
>> OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the
>> pseudocode gets just FPCR.
>>
>>
> Ok, that was my initial understanding but our discussion confused me.
>
> And that's why I tried to force DN = 1 in gdb before single-stepping over
> fmax    v0.4s, v0.4s, v1.4s
>
> but it changed nothing :-(
> Hence my question about a gdb possible bug or misuse.

Hmm... user error, I missed one bit
set $fpcr=0x2000000
works under gdb.

> I'll try modifying the test to have it force DN=1.
>
Forcing DN=1 in the test makes it pass.

I am going to look at adding that cleanly to my test, and resubmit it.

Thanks, and sorry for the noise.

>> Thanks,
>> Tejas.
>>
>>
>>>> Thanks,
>>>> Tejas.
>>>>
>>>>
>>>>>> If you're running your test in the AArch64 execution state, you'd want
>>>>>> to
>>>>>> define the DN bit and modify the expected results accordingly or have
>>>>>> the
>>>>>> test poll at runtime what the DN-bit is set to and check expected
>>>>>> results
>>>>>> dynamically.
>>>>>
>>>>>
>>>>> Makes sense, I hadn't noticed the different aarch64 spec here.
>>>>>
>>>>>> I think the test already has expected behaviour for AArch32 execution
>>>>>> state
>>>>>> by expecting DefaultNaN regardless.
>>>>>
>>>>>
>>>>> Yes.
>>>>>
>>>>>>> I have executed the test under GDB on AArch64 HW, and noticed that
>>>>>>> fpcr
>>>>>>> was 0.
>>>>>>> I forced it to have DN==1:
>>>>>>> set $fpcr=0x1000000
>>>>>>> but this didn't change the result.
>>>>>>>
>>>>>>> Does setting fpcr.dn under gdb actually work?
>>>>>>>
>>>>>>
>>>>>> It should. Possibly a bug, patches welcome :-).
>>>>>>
>>>>> :-)
>>>>>
>>>>
>>>>
>>>
>>
>>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
new file mode 100644
index 0000000..36efe3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
@@ -0,0 +1,120 @@ 
+/* Can't use the standard binary_op.inc template because vmax has no
+   64 bits variant.  */
+
+#include <math.h>
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* Basic test: y=vmax(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                       \
+                      VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)   \
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)        \
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#ifndef NO_FLOAT_VARIANT
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+#endif
+
+  /* Choose init value arbitrarily, will be used as comparison value.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+  VDUP(vector2, q, int, s, 8, 16, -12);
+  VDUP(vector2, q, int, s, 16, 8, -13);
+  VDUP(vector2, q, int, s, 32, 4, -15);
+  VDUP(vector2, q, uint, u, 8, 16, 0xf9);
+  VDUP(vector2, q, uint, u, 16, 8, 0xfff2);
+  VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1);
+#ifndef NO_FLOAT_VARIANT
+  VDUP(vector2, , float, f, 32, 2, -15.5f);
+  VDUP(vector2, q, float, f, 32, 4, -14.5f);
+#endif
+
+#ifndef NO_FLOAT_VARIANT
+#define FLOAT_VARIANT(MACRO, VAR)			\
+  MACRO(VAR, , float, f, 32, 2);			\
+  MACRO(VAR, q, float, f, 32, 4)
+#else
+#define FLOAT_VARIANT(MACRO, VAR)
+#endif
+
+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);				\
+  MACRO(VAR, , int, s, 16, 4);				\
+  MACRO(VAR, , int, s, 32, 2);				\
+  MACRO(VAR, , uint, u, 8, 8);				\
+  MACRO(VAR, , uint, u, 16, 4);				\
+  MACRO(VAR, , uint, u, 32, 2);				\
+  MACRO(VAR, q, int, s, 8, 16);				\
+  MACRO(VAR, q, int, s, 16, 8);				\
+  MACRO(VAR, q, int, s, 32, 4);				\
+  MACRO(VAR, q, uint, u, 8, 16);			\
+  MACRO(VAR, q, uint, u, 16, 8);			\
+  MACRO(VAR, q, uint, u, 32, 4);			\
+  FLOAT_VARIANT(MACRO, VAR)
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifndef NO_FLOAT_VARIANT
+  /* Extra FP tests with special values (NaN, ....)  */
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, NAN);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, -NAN);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)");
+
+  VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)");
+
+  VDUP(vector, q, float, f, 32, 4, 0.0f);
+  VDUP(vector2, q, float, f, 32, 4, -0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)");
+
+  VDUP(vector, q, float, f, 32, 4, -0.0f);
+  VDUP(vector2, q, float, f, 32, 4, 0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)");
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
new file mode 100644
index 0000000..0c67df9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
@@ -0,0 +1,54 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhadd
+#define TEST_MSG "VHADD/VHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+				       0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+					0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5,
+					0xf6, 0xf6, 0xf7, 0xf7,
+					0xf8, 0xf8, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6,
+					 0xf6, 0xf7, 0xf7, 0xf8,
+					 0xf8, 0xf9, 0xf9, 0xfa,
+					 0xfa, 0xfb, 0xfb, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff3, 0xfff3, 0xfff4, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
new file mode 100644
index 0000000..2431288
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
@@ -0,0 +1,52 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhsub
+#define TEST_MSG "VHSUB/VHSUBQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+				       0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff,
+					0x0, 0x0, 0x1, 0x1,
+					0x2, 0x2, 0x3, 0x3,
+					0x4, 0x4, 0x5, 0x5 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd,
+					 0xfd, 0xfe, 0xfe, 0xff,
+					 0xff, 0x0, 0x0, 0x1,
+					 0x1, 0x2, 0x2, 0x3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0,
+					 0x1, 0x1, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
new file mode 100644
index 0000000..2591b16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
@@ -0,0 +1,64 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmax
+#define TEST_MSG "VMAX/VMAXQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
+					   0xc1600000, 0xc1500000 };
+
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
+					       0x7f800000, 0x7f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+						0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
new file mode 100644
index 0000000..2b5e87c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
@@ -0,0 +1,66 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmin
+#define TEST_MSG "VMIN/VMINQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0xc1680000, 0xc1680000 };
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+					       0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
+						0xff800000, 0xff800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+
+#include "binary_op_no64.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
new file mode 100644
index 0000000..8629beb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
@@ -0,0 +1,54 @@ 
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vrhadd
+#define TEST_MSG "VRHADD/VRHADDQ"
+
+#define NO_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+				       0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4,
+					0xf4, 0xf5, 0xf5, 0xf6,
+					0xf6, 0xf7, 0xf7, 0xf8,
+					0xf8, 0xf9, 0xf9, 0xfa };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3,
+					0xfff4, 0xfff4, 0xfff5, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6,
+					 0xf7, 0xf7, 0xf8, 0xf8,
+					 0xf9, 0xf9, 0xfa, 0xfa,
+					 0xfb, 0xfb, 0xfc, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "binary_op_no64.inc"