Patchwork [i386] : Macroize horizontal add/sub and integer mac patterns

login
register
mail settings
Submitter Uros Bizjak
Date April 14, 2012, 1:33 p.m.
Message ID <CAFULd4Zo8B6Qy08Ev4ReRGartBo+5fPV1TWMXDxbgXL-9Z+8Ng@mail.gmail.com>
Download mbox | patch
Permalink /patch/152506/
State New
Headers show

Comments

Uros Bizjak - April 14, 2012, 1:33 p.m.
Hello!

Mechanical patch that removes nearly 900 lines of code from sse.md.

No functional changes.

2012-04-14  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/sse.md (ssse3_plusminus): New code iterator.
	(avx2_ph<plusminus_mnemonic>wv16hi3): Macroize insn from
	avx2_ph{add,adds,sub,subs}wv16hi3 using ssse3_plusminus code iterator.
	(ssse3_ph<plusminus_mnemonic>wv8hi3): Macroize insn from
	ssse3_ph{add,adds,sub,subs}wv8hi3 using ssse3_plusminus code iterator.
	(ssse3_ph<plusminus_mnemonic>wv4hi3): Macroize insn from
	ssse3_ph{add,adds,sub,subs}wv4hi3 using ssse3_plusminus code iterator.

	(avx2_ph<plusminus_mnemonic>dv8si3): Macroize insn from
	avx2_ph{add,adds,sub,subs}dv8si3 using plusminus code iterator.
	(ssse3_ph<plusminus_mnemonic>dv4si3): Macroize insn from
	ssse3_ph{add,adds,sub,subs}dv4si3 using plusminus code iterator.
	(ssse3_ph<plusminus_mnemonic>dv2si3): Macroize insn from
	ssse3_ph{add,adds,sub,subs}dv2si3 using plusminus code iterator.

	(xop_plus): New code iterator.
	(macs): New code attribute.
	(macds): Ditto.
	(xop_p<macs><ssemodesuffix><ssemodesuffix>): Macroize insn from
	xop_pmacs{,s}{ww,dd} using xop_plus code iterator and VI24_128 mode
	iterator.
	(xop_p<macs>dql): Macroize insn from xop_pmacs{,s}dql using
	xop_plus code iterator.
	(xop_p<macs>dqh): Macroize insn from xop_pmacs{,s}dqh using
	xop_plus code iterator.
	(xop_p<macs>wd): Macroize insn from xop_pmacs{,s}wd using
	xop_plus code iterator.
	(xop_p<madcs>wd): Macroize insn from xop_pmadcs{,s}wd using
	xop_plus code iterator.

	(xop_phadd<u>bw): Macroize insn from xop_phadd{,u}bw usign
	any_extend code iterator.
	(xop_phadd<u>bd): Macroize insn from xop_phadd{,u}bd usign
	any_extend code iterator.
	(xop_phadd<u>bq): Macroize insn from xop_phadd{,u}bq usign
	any_extend code iterator.
	(xop_phadd<u>wd): Macroize insn from xop_phadd{,u}wd usign
	any_extend code iterator.
	(xop_phadd<u>wq): Macroize insn from xop_phadd{,u}wq usign
	any_extend code iterator.
	(xop_phadd<u>dq): Macroize insn from xop_phadd{,u}dq usign
	any_extend code iterator.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.

Patch

Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 186448)
+++ config/i386/sse.md	(working copy)
@@ -8037,9 +8037,10 @@ 
 ;; surely not generally useful.
 (define_insn "<sse2_avx2>_psadbw"
   [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
-	(unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
-			  (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
-			  UNSPEC_PSADBW))]
+	(unspec:VI8_AVX2
+	  [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
+	   (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
+	  UNSPEC_PSADBW))]
   "TARGET_SSE2"
   "@
    psadbw\t{%2, %0|%0, %2}
@@ -8175,375 +8176,125 @@ 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "avx2_phaddwv16hi3"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
-	(vec_concat:V16HI
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 1 "register_operand" "x")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
-		(plus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
-  "TARGET_AVX2"
-  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
+(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
 
-(define_insn "ssse3_phaddwv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-	(vec_concat:V8HI
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (plus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 1 "register_operand" "0,x")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	      (plus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (plus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-	      (plus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (plus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	      (plus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (plus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-	      (plus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
-  "TARGET_SSSE3"
-  "@
-   phaddw\t{%2, %0|%0, %2}
-   vphaddw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssse3_phaddwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	(vec_concat:V4HI
-	  (vec_concat:V2HI
-	    (plus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 1 "register_operand" "0")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	    (plus:HI
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	  (vec_concat:V2HI
-	    (plus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	    (plus:HI
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
-  "TARGET_SSSE3"
-  "phaddw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_extra" "1")
-   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
-
-(define_insn "avx2_phadddv8si3"
-  [(set (match_operand:V8SI 0 "register_operand" "=x")
-	(vec_concat:V8SI
-	  (vec_concat:V4SI
-	    (vec_concat:V2SI
-	      (plus:SI
-		(vec_select:SI
-		  (match_operand:V8SI 1 "register_operand" "x")
-		  (parallel [(const_int 0)]))
-		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	      (plus:SI
-		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
-		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
-	    (vec_concat:V2SI
-	      (plus:SI
-		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
-		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
-	      (plus:SI
-		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
-		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
-	  (vec_concat:V4SI
-	    (vec_concat:V2SI
-	      (plus:SI
-		(vec_select:SI
-		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
-		  (parallel [(const_int 0)]))
-		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
-	      (plus:SI
-		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
-		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
-	    (vec_concat:V2SI
-	      (plus:SI
-		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
-		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
-	      (plus:SI
-		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
-		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
-  "TARGET_AVX2"
-  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
-
-(define_insn "ssse3_phadddv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
-	(vec_concat:V4SI
-	  (vec_concat:V2SI
-	    (plus:SI
-	      (vec_select:SI
-		(match_operand:V4SI 1 "register_operand" "0,x")
-		(parallel [(const_int 0)]))
-	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	    (plus:SI
-	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
-	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
-	  (vec_concat:V2SI
-	    (plus:SI
-	      (vec_select:SI
-		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
-		(parallel [(const_int 0)]))
-	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
-	    (plus:SI
-	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
-	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
-  "TARGET_SSSE3"
-  "@
-   phaddd\t{%2, %0|%0, %2}
-   vphaddd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssse3_phadddv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-	(vec_concat:V2SI
-	  (plus:SI
-	    (vec_select:SI
-	      (match_operand:V2SI 1 "register_operand" "0")
-	      (parallel [(const_int 0)]))
-	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	  (plus:SI
-	    (vec_select:SI
-	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
-	      (parallel [(const_int 0)]))
-	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
-  "TARGET_SSSE3"
-  "phaddd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_extra" "1")
-   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
-
-(define_insn "avx2_phaddswv16hi3"
+(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
   [(set (match_operand:V16HI 0 "register_operand" "=x")
 	(vec_concat:V16HI
 	  (vec_concat:V8HI
 	    (vec_concat:V4HI
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI
 		    (match_operand:V16HI 1 "register_operand" "x")
 		    (parallel [(const_int 0)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
 	    (vec_concat:V4HI
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
 		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
 	  (vec_concat:V8HI
 	    (vec_concat:V4HI
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI
 		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
 		    (parallel [(const_int 0)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
 	    (vec_concat:V4HI
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
 	      (vec_concat:V2HI
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
-		(ss_plus:HI
+		(ssse3_plusminus:HI
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
 		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
   "TARGET_AVX2"
-  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "ssse3_phaddswv8hi3"
+(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x,x")
 	(vec_concat:V8HI
 	  (vec_concat:V4HI
 	    (vec_concat:V2HI
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI
 		  (match_operand:V8HI 1 "register_operand" "0,x")
 		  (parallel [(const_int 0)]))
 		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
 		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
 	    (vec_concat:V2HI
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
 		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
 		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
 	  (vec_concat:V4HI
 	    (vec_concat:V2HI
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI
 		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
 		  (parallel [(const_int 0)]))
 		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
 		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
 	    (vec_concat:V2HI
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
 		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-	      (ss_plus:HI
+	      (ssse3_plusminus:HI
 		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
 		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
   "TARGET_SSSE3"
   "@
-   phaddsw\t{%2, %0|%0, %2}
-   vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
+   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
@@ -8552,259 +8303,104 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "ssse3_phaddswv4hi3"
+(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
 	(vec_concat:V4HI
 	  (vec_concat:V2HI
-	    (ss_plus:HI
+	    (ssse3_plusminus:HI
 	      (vec_select:HI
 		(match_operand:V4HI 1 "register_operand" "0")
 		(parallel [(const_int 0)]))
 	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	    (ss_plus:HI
+	    (ssse3_plusminus:HI
 	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
 	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
 	  (vec_concat:V2HI
-	    (ss_plus:HI
+	    (ssse3_plusminus:HI
 	      (vec_select:HI
 		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
 		(parallel [(const_int 0)]))
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	    (ss_plus:HI
+	    (ssse3_plusminus:HI
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
 	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
   "TARGET_SSSE3"
-  "phaddsw\t{%2, %0|%0, %2}"
+  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
-(define_insn "avx2_phsubwv16hi3"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
-	(vec_concat:V16HI
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 1 "register_operand" "x")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
-		(minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
-  "TARGET_AVX2"
-  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
-
-(define_insn "ssse3_phsubwv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-	(vec_concat:V8HI
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (minus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 1 "register_operand" "0,x")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	      (minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-	      (minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (minus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	      (minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-	      (minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
-  "TARGET_SSSE3"
-  "@
-   phsubw\t{%2, %0|%0, %2}
-   vphsubw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssse3_phsubwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	(vec_concat:V4HI
-	  (vec_concat:V2HI
-	    (minus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 1 "register_operand" "0")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	    (minus:HI
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	  (vec_concat:V2HI
-	    (minus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	    (minus:HI
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
-  "TARGET_SSSE3"
-  "phsubw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_extra" "1")
-   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
-
-(define_insn "avx2_phsubdv8si3"
+(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
 	(vec_concat:V8SI
 	  (vec_concat:V4SI
 	    (vec_concat:V2SI
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI
 		  (match_operand:V8SI 1 "register_operand" "x")
 		  (parallel [(const_int 0)]))
 		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
 		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
 	    (vec_concat:V2SI
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
 		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
 		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
 	  (vec_concat:V4SI
 	    (vec_concat:V2SI
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI
 		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
 		  (parallel [(const_int 0)]))
 		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
 		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
 	    (vec_concat:V2SI
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
 		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
-	      (minus:SI
+	      (plusminus:SI
 		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
 		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
   "TARGET_AVX2"
-  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
+  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_insn "ssse3_phsubdv4si3"
+(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
   [(set (match_operand:V4SI 0 "register_operand" "=x,x")
 	(vec_concat:V4SI
 	  (vec_concat:V2SI
-	    (minus:SI
+	    (plusminus:SI
 	      (vec_select:SI
 		(match_operand:V4SI 1 "register_operand" "0,x")
 		(parallel [(const_int 0)]))
 	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	    (minus:SI
+	    (plusminus:SI
 	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
 	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
 	  (vec_concat:V2SI
-	    (minus:SI
+	    (plusminus:SI
 	      (vec_select:SI
 		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
 		(parallel [(const_int 0)]))
 	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
-	    (minus:SI
+	    (plusminus:SI
 	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
 	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
   "TARGET_SSSE3"
   "@
-   phsubd\t{%2, %0|%0, %2}
-   vphsubd\t{%2, %1, %0|%0, %1, %2}"
-
+   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
+   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
@@ -8813,181 +8409,27 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "ssse3_phsubdv2si3"
+(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
 	(vec_concat:V2SI
-	  (minus:SI
+	  (plusminus:SI
 	    (vec_select:SI
 	      (match_operand:V2SI 1 "register_operand" "0")
 	      (parallel [(const_int 0)]))
 	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
-	  (minus:SI
+	  (plusminus:SI
 	    (vec_select:SI
 	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
 	      (parallel [(const_int 0)]))
 	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_SSSE3"
-  "phsubd\t{%2, %0|%0, %2}"
+  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "atom_unit" "complex")
    (set_attr "prefix_extra" "1")
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
-(define_insn "avx2_phsubswv16hi3"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
-	(vec_concat:V16HI
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 1 "register_operand" "x")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
-	  (vec_concat:V8HI
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI
-		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
-		    (parallel [(const_int 0)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
-	    (vec_concat:V4HI
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
-	      (vec_concat:V2HI
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
-		(ss_minus:HI
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
-		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
-  "TARGET_AVX2"
-  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
-
-(define_insn "ssse3_phsubswv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-	(vec_concat:V8HI
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (ss_minus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 1 "register_operand" "0,x")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
-	  (vec_concat:V4HI
-	    (vec_concat:V2HI
-	      (ss_minus:HI
-		(vec_select:HI
-		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
-		  (parallel [(const_int 0)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
-	    (vec_concat:V2HI
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
-	      (ss_minus:HI
-		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
-		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
-  "TARGET_SSSE3"
-  "@
-   phsubsw\t{%2, %0|%0, %2}
-   vphsubsw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssse3_phsubswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	(vec_concat:V4HI
-	  (vec_concat:V2HI
-	    (ss_minus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 1 "register_operand" "0")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
-	    (ss_minus:HI
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
-	  (vec_concat:V2HI
-	    (ss_minus:HI
-	      (vec_select:HI
-		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
-		(parallel [(const_int 0)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
-	    (ss_minus:HI
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
-	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
-  "TARGET_SSSE3"
-  "phsubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix_extra" "1")
-   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
-
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x")
 	(ss_plus:V16HI
@@ -9314,9 +8756,10 @@ 
 
 (define_insn "<ssse3_avx2>_pshufb<mode>3"
   [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
-	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
-			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
-			 UNSPEC_PSHUFB))]
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
+	  UNSPEC_PSHUFB))]
   "TARGET_SSSE3"
   "@
    pshufb\t{%2, %0|%0, %2}
@@ -9372,10 +8815,11 @@ 
 
 (define_insn "<ssse3_avx2>_palignr<mode>"
   [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
-	(unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
-			       (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
-			       (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
-			      UNSPEC_PALIGNR))]
+	(unspec:SSESCALARMODE
+	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
+	   (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
+	  UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
   operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -9595,10 +9039,11 @@ 
 
 (define_insn "<sse4_1_avx2>_mpsadbw"
   [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
-	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
-			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
-			  (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
-			 UNSPEC_MPSADBW))]
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_MPSADBW))]
   "TARGET_SSE4_1"
   "@
    mpsadbw\t{%3, %2, %0|%0, %2, %3}
@@ -10396,78 +9841,51 @@ 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(define_code_iterator xop_plus [plus ss_plus])
+
+(define_code_attr macs [(plus "macs") (ss_plus "macss")])
+(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
+
 ;; XOP parallel integer multiply/add instructions.
 ;; Note the XOP multiply/add instructions
 ;;     a[i] = b[i] * c[i] + d[i];
 ;; do not allow the value being added to be a memory operation.
-(define_insn "xop_pmacsww"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-	(plus:V8HI
-	 (mult:V8HI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "%x")
-	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacssww"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-	(ss_plus:V8HI
-	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
-		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
+(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
+  [(set (match_operand:VI24_128 0 "register_operand" "=x")
+	(xop_plus:VI24_128
+	 (mult:VI24_128
+	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
+	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
+	 (match_operand:VI24_128 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
-  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacsdd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(plus:V4SI
-	 (mult:V4SI
-	  (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
-(define_insn "xop_pmacssdd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(ss_plus:V4SI
-	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
-(define_insn "xop_pmacssdql"
+(define_insn "xop_p<macs>dql"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(ss_plus:V2DI
+	(xop_plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
-	  (vec_select:V2SI
-	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	   (parallel [(const_int 1)
-		      (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
-  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacssdqh"
+(define_insn "xop_p<macs>dqh"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(ss_plus:V2DI
+	(xop_plus:V2DI
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
@@ -10481,30 +9899,10 @@ 
 		       (const_int 2)]))))
 	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
-  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacsdql"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 1)
-		       (const_int 3)]))))
-	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
 ;; fake it with a multiply/add.  In general, we expect the define_split to
 ;; occur before register allocation, so we have to handle the corner case where
@@ -10547,26 +9945,6 @@ 
   [(set_attr "type" "ssemul")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacsdqh"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
 ;; fake it with a multiply/add.  In general, we expect the define_split to
 ;; occur before register allocation, so we have to handle the corner case where
@@ -10610,9 +9988,9 @@ 
    (set_attr "mode" "TI")])
 
 ;; XOP parallel integer multiply/add instructions for the intrinisics
-(define_insn "xop_pmacsswd"
+(define_insn "xop_p<macs>wd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(ss_plus:V4SI
+	(xop_plus:V4SI
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
@@ -10630,37 +10008,13 @@ 
 		       (const_int 7)]))))
 	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
-  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmacswd"
+(define_insn "xop_p<madcs>wd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(plus:V4SI
-	 (mult:V4SI
-	  (sign_extend:V4SI
-	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
-	    (parallel [(const_int 1)
-		       (const_int 3)
-		       (const_int 5)
-		       (const_int 7)])))
-	  (sign_extend:V4SI
-	   (vec_select:V4HI
-	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 1)
-		       (const_int 3)
-		       (const_int 5)
-		       (const_int 7)]))))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
-(define_insn "xop_pmadcsswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(ss_plus:V4SI
+	(xop_plus:V4SI
 	 (plus:V4SI
 	  (mult:V4SI
 	   (sign_extend:V4SI
@@ -10694,50 +10048,10 @@ 
 			(const_int 7)])))))
 	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
-  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn "xop_pmadcswd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(plus:V4SI
-	 (plus:V4SI
-	  (mult:V4SI
-	   (sign_extend:V4SI
-	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
-	     (parallel [(const_int 0)
-			(const_int 2)
-			(const_int 4)
-			(const_int 6)])))
-	   (sign_extend:V4SI
-	    (vec_select:V4HI
-	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
-	     (parallel [(const_int 0)
-			(const_int 2)
-			(const_int 4)
-			(const_int 6)]))))
-	  (mult:V4SI
-	   (sign_extend:V4SI
-	    (vec_select:V4HI
-	     (match_dup 1)
-	     (parallel [(const_int 1)
-			(const_int 3)
-			(const_int 5)
-			(const_int 7)])))
-	   (sign_extend:V4SI
-	    (vec_select:V4HI
-	     (match_dup 2)
-	     (parallel [(const_int 1)
-			(const_int 3)
-			(const_int 5)
-			(const_int 7)])))))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
-  "TARGET_XOP"
-  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "TI")])
-
 ;; XOP parallel XMM conditional moves
 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
   [(set (match_operand:V 0 "register_operand" "=x,x")
@@ -10750,10 +10064,10 @@ 
   [(set_attr "type" "sse4arg")])
 
 ;; XOP horizontal add/subtract instructions
-(define_insn "xop_phaddbw"
+(define_insn "xop_phadd<u>bw"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(plus:V8HI
-	 (sign_extend:V8HI
+	 (any_extend:V8HI
 	  (vec_select:V8QI
 	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
 	   (parallel [(const_int 0)
@@ -10764,7 +10078,7 @@ 
 		      (const_int 10)
 		      (const_int 12)
 		      (const_int 14)])))
-	 (sign_extend:V8HI
+	 (any_extend:V8HI
 	  (vec_select:V8QI
 	   (match_dup 1)
 	   (parallel [(const_int 1)
@@ -10776,21 +10090,21 @@ 
 		      (const_int 13)
 		      (const_int 15)])))))]
   "TARGET_XOP"
-  "vphaddbw\t{%1, %0|%0, %1}"
+  "vphadd<u>bw\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phaddbd"
+(define_insn "xop_phadd<u>bd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(plus:V4SI
 	 (plus:V4SI
-	  (sign_extend:V4SI
+	  (any_extend:V4SI
 	   (vec_select:V4QI
 	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 4)
 		       (const_int 8)
 		       (const_int 12)])))
-	  (sign_extend:V4SI
+	  (any_extend:V4SI
 	   (vec_select:V4QI
 	    (match_dup 1)
 	    (parallel [(const_int 1)
@@ -10798,14 +10112,14 @@ 
 		       (const_int 9)
 		       (const_int 13)]))))
 	 (plus:V4SI
-	  (sign_extend:V4SI
+	  (any_extend:V4SI
 	   (vec_select:V4QI
 	    (match_dup 1)
 	    (parallel [(const_int 2)
 		       (const_int 6)
 		       (const_int 10)
 		       (const_int 14)])))
-	  (sign_extend:V4SI
+	  (any_extend:V4SI
 	   (vec_select:V4QI
 	    (match_dup 1)
 	    (parallel [(const_int 3)
@@ -10813,73 +10127,73 @@ 
 		       (const_int 11)
 		       (const_int 15)]))))))]
   "TARGET_XOP"
-  "vphaddbd\t{%1, %0|%0, %1}"
+  "vphadd<u>bd\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phaddbq"
+(define_insn "xop_phadd<u>bq"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
 	 (plus:V2DI
 	  (plus:V2DI
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
 	     (parallel [(const_int 0)
 			(const_int 4)])))
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 1)
 			(const_int 5)]))))
 	  (plus:V2DI
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 2)
 			(const_int 6)])))
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 3)
 			(const_int 7)])))))
 	 (plus:V2DI
 	  (plus:V2DI
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 8)
 			(const_int 12)])))
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 9)
 			(const_int 13)]))))
 	  (plus:V2DI
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 10)
 			(const_int 14)])))
-	   (sign_extend:V2DI
+	   (any_extend:V2DI
 	    (vec_select:V2QI
 	     (match_dup 1)
 	     (parallel [(const_int 11)
 			(const_int 15)])))))))]
   "TARGET_XOP"
-  "vphaddbq\t{%1, %0|%0, %1}"
+  "vphadd<u>bq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phaddwd"
+(define_insn "xop_phadd<u>wd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(plus:V4SI
-	 (sign_extend:V4SI
+	 (any_extend:V4SI
 	  (vec_select:V4HI
 	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
 	   (parallel [(const_int 0)
 		      (const_int 2)
 		      (const_int 4)
 		      (const_int 6)])))
-	 (sign_extend:V4SI
+	 (any_extend:V4SI
 	  (vec_select:V4HI
 	   (match_dup 1)
 	   (parallel [(const_int 1)
@@ -10887,241 +10201,55 @@ 
 		      (const_int 5)
 		      (const_int 7)])))))]
   "TARGET_XOP"
-  "vphaddwd\t{%1, %0|%0, %1}"
+  "vphadd<u>wd\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phaddwq"
+(define_insn "xop_phadd<u>wq"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
 	 (plus:V2DI
-	  (sign_extend:V2DI
+	  (any_extend:V2DI
 	   (vec_select:V2HI
 	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 4)])))
-	  (sign_extend:V2DI
+	  (any_extend:V2DI
 	   (vec_select:V2HI
 	    (match_dup 1)
 	    (parallel [(const_int 1)
 		       (const_int 5)]))))
 	 (plus:V2DI
-	  (sign_extend:V2DI
+	  (any_extend:V2DI
 	   (vec_select:V2HI
 	    (match_dup 1)
 	    (parallel [(const_int 2)
 		       (const_int 6)])))
-	  (sign_extend:V2DI
+	  (any_extend:V2DI
 	   (vec_select:V2HI
 	    (match_dup 1)
 	    (parallel [(const_int 3)
 		       (const_int 7)]))))))]
   "TARGET_XOP"
-  "vphaddwq\t{%1, %0|%0, %1}"
+  "vphadd<u>wq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phadddq"
+(define_insn "xop_phadd<u>dq"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(plus:V2DI
-	 (sign_extend:V2DI
+	 (any_extend:V2DI
 	  (vec_select:V2SI
 	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
 	   (parallel [(const_int 0)
 		      (const_int 2)])))
-	 (sign_extend:V2DI
+	 (any_extend:V2DI
 	  (vec_select:V2SI
 	   (match_dup 1)
 	   (parallel [(const_int 1)
 		      (const_int 3)])))))]
   "TARGET_XOP"
-  "vphadddq\t{%1, %0|%0, %1}"
+  "vphadd<u>dq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseiadd1")])
 
-(define_insn "xop_phaddubw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-	(plus:V8HI
-	 (zero_extend:V8HI
-	  (vec_select:V8QI
-	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
-	   (parallel [(const_int 0)
-		      (const_int 2)
-		      (const_int 4)
-		      (const_int 6)
-		      (const_int 8)
-		      (const_int 10)
-		      (const_int 12)
-		      (const_int 14)])))
-	 (zero_extend:V8HI
-	  (vec_select:V8QI
-	   (match_dup 1)
-	   (parallel [(const_int 1)
-		      (const_int 3)
-		      (const_int 5)
-		      (const_int 7)
-		      (const_int 9)
-		      (const_int 11)
-		      (const_int 13)
-		      (const_int 15)])))))]
-  "TARGET_XOP"
-  "vphaddubw\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
-(define_insn "xop_phaddubd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(plus:V4SI
-	 (plus:V4SI
-	  (zero_extend:V4SI
-	   (vec_select:V4QI
-	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 4)
-		       (const_int 8)
-		       (const_int 12)])))
-	  (zero_extend:V4SI
-	   (vec_select:V4QI
-	    (match_dup 1)
-	    (parallel [(const_int 1)
-		       (const_int 5)
-		       (const_int 9)
-		       (const_int 13)]))))
-	 (plus:V4SI
-	  (zero_extend:V4SI
-	   (vec_select:V4QI
-	    (match_dup 1)
-	    (parallel [(const_int 2)
-		       (const_int 6)
-		       (const_int 10)
-		       (const_int 14)])))
-	  (zero_extend:V4SI
-	   (vec_select:V4QI
-	    (match_dup 1)
-	    (parallel [(const_int 3)
-		       (const_int 7)
-		       (const_int 11)
-		       (const_int 15)]))))))]
-  "TARGET_XOP"
-  "vphaddubd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
-(define_insn "xop_phaddubq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(plus:V2DI
-	 (plus:V2DI
-	  (plus:V2DI
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
-	     (parallel [(const_int 0)
-			(const_int 4)])))
-	   (sign_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 1)
-			(const_int 5)]))))
-	  (plus:V2DI
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 2)
-			(const_int 6)])))
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 3)
-			(const_int 7)])))))
-	 (plus:V2DI
-	  (plus:V2DI
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 8)
-			(const_int 12)])))
-	   (sign_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 9)
-			(const_int 13)]))))
-	  (plus:V2DI
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 10)
-			(const_int 14)])))
-	   (zero_extend:V2DI
-	    (vec_select:V2QI
-	     (match_dup 1)
-	     (parallel [(const_int 11)
-			(const_int 15)])))))))]
-  "TARGET_XOP"
-  "vphaddubq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
-(define_insn "xop_phadduwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(plus:V4SI
-	 (zero_extend:V4SI
-	  (vec_select:V4HI
-	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
-	   (parallel [(const_int 0)
-		      (const_int 2)
-		      (const_int 4)
-		      (const_int 6)])))
-	 (zero_extend:V4SI
-	  (vec_select:V4HI
-	   (match_dup 1)
-	   (parallel [(const_int 1)
-		      (const_int 3)
-		      (const_int 5)
-		      (const_int 7)])))))]
-  "TARGET_XOP"
-  "vphadduwd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
-(define_insn "xop_phadduwq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(plus:V2DI
-	 (plus:V2DI
-	  (zero_extend:V2DI
-	   (vec_select:V2HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 4)])))
-	  (zero_extend:V2DI
-	   (vec_select:V2HI
-	    (match_dup 1)
-	    (parallel [(const_int 1)
-		       (const_int 5)]))))
-	 (plus:V2DI
-	  (zero_extend:V2DI
-	   (vec_select:V2HI
-	    (match_dup 1)
-	    (parallel [(const_int 2)
-		       (const_int 6)])))
-	  (zero_extend:V2DI
-	   (vec_select:V2HI
-	    (match_dup 1)
-	    (parallel [(const_int 3)
-		       (const_int 7)]))))))]
-  "TARGET_XOP"
-  "vphadduwq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
-(define_insn "xop_phaddudq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(plus:V2DI
-	 (zero_extend:V2DI
-	  (vec_select:V2SI
-	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
-	   (parallel [(const_int 0)
-		      (const_int 2)])))
-	 (zero_extend:V2DI
-	  (vec_select:V2SI
-	   (match_dup 1)
-	   (parallel [(const_int 1)
-		      (const_int 3)])))))]
-  "TARGET_XOP"
-  "vphaddudq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseiadd1")])
-
 (define_insn "xop_phsubbw"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
 	(minus:V8HI