===================================================================
@@ -9596,7 +9596,7 @@
[(set (match_operand:V8HI 0 "register_operand" "=x")
(sign_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9612,32 +9612,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv8qiv8hi2"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
- (sign_extend:V8HI
- (vec_select:V8QI
- (vec_duplicate:V16QI
- (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)
- (const_int 4)
- (const_int 5)
- (const_int 6)
- (const_int 7)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxbw\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_extendv4qiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(sign_extend:V4SI
(vec_select:V4QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9649,28 +9628,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv4qiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (sign_extend:V4SI
- (vec_select:V4QI
- (vec_duplicate:V16QI
- (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxbd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_extendv2qiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(sign_extend:V2DI
(vec_select:V2QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9680,26 +9642,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv2qiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (sign_extend:V2DI
- (vec_select:V2QI
- (vec_duplicate:V16QI
- (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxbq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_extendv4hiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9711,28 +9658,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv4hiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (sign_extend:V4SI
- (vec_select:V4HI
- (vec_duplicate:V8HI
- (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxwd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_extendv2hiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(sign_extend:V2DI
(vec_select:V2HI
- (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9742,26 +9672,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv2hiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (sign_extend:V2DI
- (vec_select:V2HI
- (vec_duplicate:V8HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxwq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_extendv2siv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9771,26 +9686,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_extendv2siv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (sign_extend:V2DI
- (vec_select:V2SI
- (vec_duplicate:V4SI
- (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovsxdq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv8qiv8hi2"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(zero_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9806,32 +9706,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv8qiv8hi2"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
- (zero_extend:V8HI
- (vec_select:V8QI
- (vec_duplicate:V16QI
- (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)
- (const_int 4)
- (const_int 5)
- (const_int 6)
- (const_int 7)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxbw\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv4qiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(zero_extend:V4SI
(vec_select:V4QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9843,28 +9722,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv4qiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (zero_extend:V4SI
- (vec_select:V4QI
- (vec_duplicate:V16QI
- (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxbd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv2qiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(zero_extend:V2DI
(vec_select:V2QI
- (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9874,26 +9736,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv2qiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (zero_extend:V2DI
- (vec_select:V2QI
- (vec_duplicate:V16QI
- (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxbq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv4hiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(zero_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -9905,28 +9752,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv4hiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (zero_extend:V4SI
- (vec_select:V4HI
- (vec_duplicate:V8HI
- (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)
- (const_int 2)
- (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxwd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv2hiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(zero_extend:V2DI
(vec_select:V2HI
- (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9936,26 +9766,11 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv2hiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (zero_extend:V2DI
- (vec_select:V2HI
- (vec_duplicate:V8HI
- (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxwq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
(define_insn "sse4_1_zero_extendv2siv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(zero_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
@@ -9965,21 +9780,6 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_zero_extendv2siv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (zero_extend:V2DI
- (vec_select:V2SI
- (vec_duplicate:V4SI
- (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
- (parallel [(const_int 0)
- (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmovzxdq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
-
;; ptestps/ptestpd are very similar to comiss and ucomiss when
;; setting FLAGS_REG. But it is not a really compare instruction.
(define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
Hello! As said in the PR: This patch removes special (vec_duplicate ...) forms of zero/sign extension instructions. This is similar to existing sse2_cvtps2pd pattern that access full 128bit memory even if only low 64bits are used. Also, current gcc generates: movdqa (%rdi), %xmm0 # 6 *movv16qi_internal/2 [length = 4] pmovzxbd %xmm0, %xmm0 # 7 sse4_1_zero_extendv4qiv4si2 which also access full 128bit 16byte aligned value. This is no better than: pmovzxbd (%rdi), %xmm0 # 7 sse4_1_zero_extendv4qiv4si2 2010-08-27 Uros Bizjak <ubizjak@gmail.com> PR target/41484 * config/i386/sse.md (sse4_1_extendv8qiv8hi2): Also accept memory operands for operand 1. (sse4_1_extendv4qiv4si2): Ditto. (sse4_1_extendv2qiv2di2): Ditto. (sse4_1_extendv4hiv4si2): Ditto. (sse4_1_extendv2hiv2di2): Ditto. (sse4_1_extendv2siv2di2): Ditto. (sse4_1_zero_extendv8qiv8hi2): Ditto. (sse4_1_zero_extendv4qiv4si2): Ditto. (sse4_1_zero_extendv2qiv2di2): Ditto. (sse4_1_zero_extendv4hiv4si2): Ditto. (sse4_1_zero_extendv2hiv2di2): Ditto. (sse4_1_zero_extendv2siv2di2): Ditto. (*sse4_1_extendv8qiv8hi2): Remove insn pattern. (*sse4_1_extendv4qiv4si2): Ditto. (*sse4_1_extendv2qiv2di2): Ditto. (*sse4_1_extendv4hiv4si2): Ditto. (*sse4_1_extendv2hiv2di2): Ditto. (*sse4_1_extendv2siv2di2): Ditto. (*sse4_1_zero_extendv8qiv8hi2): Ditto. (*sse4_1_zero_extendv4qiv4si2): Ditto. (*sse4_1_zero_extendv2qiv2di2): Ditto. (*sse4_1_zero_extendv4hiv4si2): Ditto. (*sse4_1_zero_extendv2hiv2di2): Ditto. (*sse4_1_zero_extendv2siv2di2): Ditto. Patch was bootstrapped on x86_64-pc-linux-gnu and regression tested by H.J. on sse4.1 capable target. Patch was committed to mainline, will be backported to 4.4 and 4.5 branch. Uros.