@@ -2544,6 +2548,27 @@ (define_insn "avx_cvtpd2dq256"
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_expand "avx_cvtpd2dq256_2"
+ [(set (match_operand:V8SI 0 "register_operand" "")
+ (vec_concat:V8SI
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_dup 2)))]
+ "TARGET_AVX"
+ "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvtpd2dq256_2"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_operand:V4SI 2 "const0_operand" "")))]
+ "TARGET_AVX"
+ "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_cvtpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
@@ -2584,6 +2609,25 @@ (define_insn "avx_cvttpd2dq256"
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_expand "avx_cvttpd2dq256_2"
+ [(set (match_operand:V8SI 0 "register_operand" "")
+ (vec_concat:V8SI
+ (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_AVX"
+ "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_cvttpd2dq256_2"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V4SI 2 "const0_operand" "")))]
+ "TARGET_AVX"
+ "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
@@ -3027,6 +3075,23 @@ (define_expand "vec_pack_trunc_v2df"
DONE;
})
+(define_expand "vec_pack_sfix_trunc_v4df"
+ [(match_operand:V8SI 0 "register_operand" "")
+ (match_operand:V4DF 1 "nonimmediate_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")]
+ "TARGET_AVX"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V8SImode);
+ r2 = gen_reg_rtx (V8SImode);
+
+ emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
+ emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
+ emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+ DONE;
+})
+
(define_expand "vec_pack_sfix_trunc_v2df"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V2DF 1 "nonimmediate_operand" "")
@@ -3046,6 +3111,23 @@ (define_expand "vec_pack_sfix_trunc_v2df
DONE;
})
+(define_expand "vec_pack_sfix_v4df"
+ [(match_operand:V8SI 0 "register_operand" "")
+ (match_operand:V4DF 1 "nonimmediate_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")]
+ "TARGET_AVX"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V8SImode);
+ r2 = gen_reg_rtx (V8SImode);
+
+ emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
+ emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
+ emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
+ DONE;
+})
+
(define_expand "vec_pack_sfix_v2df"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V2DF 1 "nonimmediate_operand" "")
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -msse2 -mno-avx" } */
+/* { dg-require-effective-target sse2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#define N 16
+float f[N];
+double d[N];
+int n[N];
+
+__attribute__((noinline)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[i] = d[i];
+}
+
+__attribute__((noinline)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ f[i] = n[i];
+}
+
+__attribute__((noinline)) void
+f3 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ d[i] = f[i];
+}
+
+__attribute__((noinline)) void
+f4 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[i] = f[i];
+}
+
+__attribute__((noinline)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ d[i] = n[i];
+}
+
+__attribute__((noinline)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ f[i] = d[i];
+}
+
+static void
+TEST ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ d[i] = i + 2.5;
+ }
+ f1 ();
+ for (i = 0; i < N; i++)
+ if (n[i] != i + 2)
+ abort ();
+ else
+ n[i] = i + 7;
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (f[i] != i + 7)
+ abort ();
+ else
+ f[i] = i - 2.25f;
+ f3 ();
+ for (i = 0; i < N; i++)
+ if (d[i] != i - 2.25)
+ abort ();
+ else
+ f[i] = i + 3.5;
+ f4 ();
+ for (i = 0; i < N; i++)
+ if (n[i] != i + 3)
+ abort ();
+ else
+ n[i] = i + 9;
+ f5 ();
+ for (i = 0; i < N; i++)
+ if (d[i] != i + 9)
+ abort ();
+ else
+ d[i] = i - 7.25;
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (f[i] != i - 7.25)
+ abort ();
+}
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-sse3 -fdump-tree-vect-details" } */
+
+#include "sse2-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */
+/* { dg-final { scan-assembler "cvttpd2dq" } } */
+/* { dg-final { scan-assembler "cvtdq2ps" } } */
+/* { dg-final { scan-assembler "cvtps2pd" } } */
+/* { dg-final { scan-assembler "cvttps2dq" } } */
+/* { dg-final { scan-assembler "cvtdq2pd" } } */
+/* { dg-final { scan-assembler "cvtpd2ps" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx -mno-avx2" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#include "sse2-cvt-1.c"
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx -mno-avx2 -fdump-tree-vect-details" } */
+
+#include "avx-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */
+/* { dg-final { scan-assembler "vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*xmm" } } */
+/* { dg-final { scan-assembler "vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\r\]*xmm\[^\n\r\]*xmm" } } */
+/* { dg-final { scan-assembler "vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST avx2_test
+#endif
+
+#include "sse2-cvt-1.c"
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+
+#include "avx2-cvt-1.c"
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops in function" 6 "vect" } } */
+/* { dg-final { scan-assembler "vcvttpd2dq(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler "vcvtps2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler "vcvttps2dq\[^\n\r\]*ymm" } } */
+/* { dg-final { scan-assembler "vcvtdq2pd\[^\n\r\]*(%xmm\[^\n\r\]*%ymm|ymm\[^\n\r\]*xmm)" } } */
+/* { dg-final { scan-assembler "vcvtpd2ps(y\[^\n\r\]*%xmm|\[^\n\r\]*xmm\[^\n\r\]*YMMWORD PTR)" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */