@@ -3863,13 +3863,23 @@ (define_insn "vec_extract_hi_<mode>"
(match_operand:VI8F_256 1 "register_operand" "x,x")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_AVX"
- "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (get_attr_mode (insn) == MODE_OI)
+ return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+ else
+ return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,store")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set (attr "mode")
+ (if_then_else
+ (and (match_test "TARGET_AVX2")
+ (eq (const_string "<MODE>mode") (const_string "V4DImode")))
+ (const_string "OI")
+ (const_string "V4DF")))])
(define_insn_and_split "vec_extract_lo_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
@@ -3898,13 +3908,23 @@ (define_insn "vec_extract_hi_<mode>"
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX"
- "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (get_attr_mode (insn) == MODE_OI)
+ return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+ else
+ return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,store")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set (attr "mode")
+ (if_then_else
+ (and (match_test "TARGET_AVX2")
+ (eq (const_string "<MODE>mode") (const_string "V8SImode")))
+ (const_string "OI")
+ (const_string "V8SF")))])
(define_insn_and_split "vec_extract_lo_v16hi"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
@@ -3937,13 +3957,21 @@ (define_insn "vec_extract_hi_v16hi"
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
"TARGET_AVX"
- "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (get_attr_mode (insn) == MODE_OI)
+ return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+ else
+ return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,store")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX2")
+ (const_string "OI")
+ (const_string "V8SF")))])
(define_insn_and_split "vec_extract_lo_v32qi"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
@@ -3984,13 +4012,21 @@ (define_insn "vec_extract_hi_v32qi"
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
"TARGET_AVX"
- "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (get_attr_mode (insn) == MODE_OI)
+ return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+ else
+ return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "memory" "none,store")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX2")
+ (const_string "OI")
+ (const_string "V8SF")))])
(define_insn "*sse4_1_extractps"
[(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
@@ -4024,7 +4060,10 @@ (define_insn_and_split "*vec_extract_v4s
;; Modes handled by vec_extract patterns.
(define_mode_iterator VEC_EXTRACT_MODE
- [V16QI V8HI V4SI V2DI
+ [(V32QI "TARGET_AVX") V16QI
+ (V16HI "TARGET_AVX") V8HI
+ (V8SI "TARGET_AVX") V4SI
+ (V4DI "TARGET_AVX") V2DI
(V8SF "TARGET_AVX") V4SF
(V4DF "TARGET_AVX") V2DF])
@@ -11952,7 +11991,7 @@ (define_insn "vec_set_lo_<mode>"
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set_attr "mode" "V4DF")])
(define_insn "vec_set_hi_<mode>"
[(set (match_operand:VI8F_256 0 "register_operand" "=x")
@@ -11967,7 +12006,7 @@ (define_insn "vec_set_hi_<mode>"
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
+ (set_attr "mode" "V4DF")])
(define_insn "vec_set_lo_<mode>"
[(set (match_operand:VI4F_256 0 "register_operand" "=x")
@@ -12158,17 +12197,29 @@ (define_expand "vec_init<mode>"
DONE;
})
-(define_insn "avx2_extracti128"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (vec_select:V2DI
- (match_operand:V4DI 1 "nonimmediate_operand" "xm")
- (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+(define_expand "avx2_extracti128"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4DI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_1_operand" "")]
"TARGET_AVX2"
- "vextracti128\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
+{
+ rtx (*insn)(rtx, rtx);
+
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ insn = gen_vec_extract_lo_v4di;
+ break;
+ case 1:
+ insn = gen_vec_extract_hi_v4di;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1]));
+ DONE;
+})
(define_expand "avx2_inserti128"
[(match_operand:V4DI 0 "register_operand" "")
@@ -32592,6 +32592,84 @@ ix86_expand_vector_extract (bool mmx_ok,
use_vec_extr = TARGET_SSE4_1;
break;
+ case V8SFmode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+ }
+ break;
+
+ case V4DFmode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V2DFmode);
+ if (elt < 2)
+ emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 1);
+ return;
+ }
+ break;
+
+ case V32QImode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V16QImode);
+ if (elt < 16)
+ emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+ }
+ break;
+
+ case V16HImode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V8HImode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+ }
+ break;
+
+ case V8SImode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V4SImode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+ }
+ break;
+
+ case V4DImode:
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V2DImode);
+ if (elt < 2)
+ emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 1);
+ return;
+ }
+ break;
+
case V8QImode:
/* ??? Could extract the appropriate HImode element and shift. */
default:
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2_runtime } */
+
+extern void abort (void);
+typedef unsigned long long uint64_t;
+
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define FN(elcount, type, idx) \
+__attribute__((noinline, noclone)) \
+type f##type##elcount##_##idx (vector (elcount, type) x) { return x[idx] + 1; }
+#define T2(elcount, type) \
+ H (elcount, type) \
+ F (elcount, type, 0) \
+ F (elcount, type, 1)
+#define T4(elcount, type) \
+ T2 (elcount, type) \
+ F (elcount, type, 2) \
+ F (elcount, type, 3)
+#define T8(elcount, type) \
+ T4 (elcount, type) \
+ F (elcount, type, 4) \
+ F (elcount, type, 5) \
+ F (elcount, type, 6) \
+ F (elcount, type, 7)
+#define T16(elcount, type) \
+ T8 (elcount, type) \
+ F (elcount, type, 8) \
+ F (elcount, type, 9) \
+ F (elcount, type, 10) \
+ F (elcount, type, 11) \
+ F (elcount, type, 12) \
+ F (elcount, type, 13) \
+ F (elcount, type, 14) \
+ F (elcount, type, 15)
+#define T32(elcount, type) \
+ T16 (elcount, type) \
+ F (elcount, type, 16) \
+ F (elcount, type, 17) \
+ F (elcount, type, 18) \
+ F (elcount, type, 19) \
+ F (elcount, type, 20) \
+ F (elcount, type, 21) \
+ F (elcount, type, 22) \
+ F (elcount, type, 23) \
+ F (elcount, type, 24) \
+ F (elcount, type, 25) \
+ F (elcount, type, 26) \
+ F (elcount, type, 27) \
+ F (elcount, type, 28) \
+ F (elcount, type, 29) \
+ F (elcount, type, 30) \
+ F (elcount, type, 31)
+#define TESTS_SSE2 \
+T2 (2, double) E \
+T2 (2, uint64_t) E \
+T4 (4, float) E \
+T4 (4, int) E \
+T8 (8, short) E \
+T16 (16, char) E
+#define TESTS_AVX \
+T4 (4, double) E \
+T4 (4, uint64_t) E \
+T8 (8, float) E \
+T8 (8, int) E \
+T16 (16, short) E \
+T32 (32, char) E
+#ifdef __AVX__
+#define TESTS TESTS_SSE2 TESTS_AVX
+#else
+#define TESTS TESTS_SSE2
+#endif
+
+#define F FN
+#define H(elcount, type)
+#define E
+TESTS
+
+int
+main ()
+{
+#undef F
+#undef H
+#undef E
+#define H(elcount, type) \
+ vector (elcount, type) v##type##elcount = {
+#define E };
+#define F(elcount, type, idx) idx + 1,
+ TESTS
+#undef F
+#undef H
+#undef E
+#define H(elcount, type)
+#define E
+#define F(elcount, type, idx) \
+ if (f##type##elcount##_##idx (v##type##elcount) != idx + 2) \
+ abort ();
+ TESTS
+ return 0;
+}
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#include "sse2-extract-1.c"