diff mbox series

[11/13,APX,EGPR] Handle legacy insns that only support GPR16 (3/5)

Message ID 20230922105631.2298849-12-hongyu.wang@intel.com
State New
Headers show
Series Support Intel APX EGPR | expand

Commit Message

Hongyu Wang Sept. 22, 2023, 10:56 a.m. UTC
From: Kong Lingling <lingling.kong@intel.com>

Disable EGPR usage for below legacy insns in opcode map2/3 that have vex
but no evex counterpart.

insn list:
1. phminposuw/vphminposuw
2. ptest/vptest
3. roundps/vroundps, roundpd/vroundpd,
   roundss/vroundss, roundsd/vroundsd
4. pcmpestri/vpcmpestri, pcmpestrm/vpcmpestrm
5. pcmpistri/vpcmpistri, pcmpistrm/vpcmpistrm
6. aesimc/vaesimc, aeskeygenassist/vaeskeygenassist

gcc/ChangeLog:

	* config/i386/i386-protos.h (x86_evex_reg_mentioned_p): New
	prototype.
	* config/i386/i386.cc (x86_evex_reg_mentioned_p): New
	function.
	* config/i386/i386.md (sse4_1_round<mode>2): Set attr gpr32 0
	and constraint jm to all non-evex alternatives, adjust
	alternative outputs if evex reg is mentioned.
	* config/i386/sse.md (<sse4_1>_ptest<mode>): Set attr gpr32 0
	and constraint jm/ja to all non-evex alternatives.
	(ptesttf2): Likewise.
	(<sse4_1>_round<ssemodesuffix><avxsizesuffix): Likewise.
	(sse4_1_round<ssescalarmodesuffix>): Likewise.
	(sse4_2_pcmpestri): Likewise.
	(sse4_2_pcmpestrm): Likewise.
	(sse4_2_pcmpestr_cconly): Likewise.
	(sse4_2_pcmpistr): Likewise.
	(sse4_2_pcmpistri): Likewise.
	(sse4_2_pcmpistrm): Likewise.
	(sse4_2_pcmpistr_cconly): Likewise.
	(aesimc): Likewise.
	(aeskeygenassist): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-legacy-insn-check-norex2.c: Add intrinsic
	tests.

Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
---
 gcc/config/i386/i386-protos.h                 |  1 +
 gcc/config/i386/i386.cc                       | 13 +++
 gcc/config/i386/i386.md                       |  3 +-
 gcc/config/i386/sse.md                        | 93 +++++++++++++------
 .../i386/apx-legacy-insn-check-norex2.c       | 55 ++++++++++-
 5 files changed, 132 insertions(+), 33 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a54e3f6b1dc..28d0eab11d5 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -65,6 +65,7 @@  extern bool extended_reg_mentioned_p (rtx);
 extern bool x86_extended_QIreg_mentioned_p (rtx_insn *);
 extern bool x86_extended_reg_mentioned_p (rtx);
 extern bool x86_extended_rex2reg_mentioned_p (rtx);
+extern bool x86_evex_reg_mentioned_p (rtx [], int);
 extern bool x86_maybe_negate_const_int (rtx *, machine_mode);
 extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 5d47c2af25e..58fa054635a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22937,6 +22937,19 @@  x86_extended_rex2reg_mentioned_p (rtx insn)
   return false;
 }
 
+/* Return true when rtx operands mentions register that must be encoded using
+   evex prefix.  */
+bool
+x86_evex_reg_mentioned_p (rtx operands[], int nops)
+{
+  int i;
+  for (i = 0; i < nops; i++)
+    if (EXT_REX_SSE_REG_P (operands[i])
+	|| x86_extended_rex2reg_mentioned_p (operands[i]))
+      return true;
+  return false;
+}
+
 /* If profitable, negate (without causing overflow) integer constant
    of mode MODE at location LOC.  Return true in this case.  */
 bool
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6cf86b798a8..271d417146c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21603,7 +21603,7 @@  (define_expand "significand<mode>2"
 (define_insn "sse4_1_round<mode>2"
   [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
 	(unspec:MODEFH
-	  [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
+	  [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m")
 	   (match_operand:SI 2 "const_0_to_15_operand")]
 	  UNSPEC_ROUND))]
   "TARGET_SSE4_1"
@@ -21616,6 +21616,7 @@  (define_insn "sse4_1_round<mode>2"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1,1,1,*,*")
    (set_attr "length_immediate" "1")
+   (set_attr "gpr32" "1,1,0,1,1")
    (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
    (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
    (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a7858a7f8cf..4db3940e422 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -22610,11 +22610,12 @@  (define_insn "avx2_pblendd<mode>"
 
 (define_insn "sse4_1_phminposuw"
   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
-	(unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "Yrja,*xja,xjm")]
 		     UNSPEC_PHMINPOSUW))]
   "TARGET_SSE4_1"
   "%vphminposuw\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "gpr32" "0")
    (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,orig,vex")
@@ -23803,12 +23804,13 @@  (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
 (define_insn "*<sse4_1>_ptest<mode>"
   [(set (reg FLAGS_REG)
 	(unspec [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
-		 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
+		 (match_operand:V_AVX 1 "vector_operand" "Yrja, *xja, xjm")]
 		UNSPEC_PTEST))]
   "TARGET_SSE4_1 && ix86_match_ptest_ccmode (insn)"
   "%vptest\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecomi")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,orig,vex")
    (set (attr "btver2_decode")
@@ -23845,12 +23847,13 @@  (define_expand "<sse4_1>_ptest<mode>"
 (define_insn "ptesttf2"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
-		    (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
+		    (match_operand:TF 1 "vector_operand" "Yrja, *xja, xjm")]
 		   UNSPEC_PTEST))]
   "TARGET_SSE4_1"
   "%vptest\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecomi")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
@@ -23961,13 +23964,14 @@  (define_expand "lrint<mode><sseintvecmodelower>2"
 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
 	(unspec:VF_128_256
-	  [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
+	  [(match_operand:VF_128_256 1 "vector_operand" "Yrja,*xja,xjm")
 	   (match_operand:SI 2 "const_0_to_15_operand")]
 	  UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_data16" "1,1,*")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
@@ -24054,19 +24058,32 @@  (define_insn "sse4_1_round<ssescalarmodesuffix>"
   [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
+	    [(match_operand:VF_128 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm")
 	     (match_operand:SI 3 "const_0_to_15_operand")]
 	    UNSPEC_ROUND)
 	  (match_operand:VF_128 1 "register_operand" "0,0,x,v")
 	  (const_int 1)))]
   "TARGET_SSE4_1"
-  "@
-   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
-   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
-   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
-   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
-  [(set_attr "isa" "noavx,noavx,avx,avx512f")
+{
+  switch (which_alternative)
+    {
+      case 0:
+      case 1:
+	return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}";
+      case 2:
+	return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+      case 3:
+	if (x86_evex_reg_mentioned_p (operands, 3))
+	  return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+	else
+	  return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}";
+      default:
+	gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,noavx,noavx512f,avx512f")
    (set_attr "type" "ssecvt")
+   (set_attr "gpr32" "0,0,0,1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix_data16" "1,1,*,*")
    (set_attr "prefix_extra" "1")
@@ -24078,19 +24095,32 @@  (define_insn "*sse4_1_round<ssescalarmodesuffix>"
 	(vec_merge:VFH_128
 	  (vec_duplicate:VFH_128
 	    (unspec:<ssescalarmode>
-	      [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
+	      [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm")
 	       (match_operand:SI 3 "const_0_to_15_operand")]
 	      UNSPEC_ROUND))
 	  (match_operand:VFH_128 1 "register_operand" "0,0,x,v")
 	  (const_int 1)))]
   "TARGET_SSE4_1"
-  "@
-   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
-   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "noavx,noavx,avx,avx512f")
+{
+  switch (which_alternative)
+    {
+      case 0:
+      case 1:
+	return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}";
+      case 2:
+	return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+      case 3:
+	if (x86_evex_reg_mentioned_p (operands, 3) || <MODE>mode == V8HFmode)
+	  return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+	else
+	  return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+      default:
+	gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,noavx,noavx512f,avx512f")
    (set_attr "type" "ssecvt")
+   (set_attr "gpr32" "0,0,0,1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix_data16" "1,1,*,*")
    (set_attr "prefix_extra" "1")
@@ -24311,7 +24341,7 @@  (define_insn "sse4_2_pcmpestri"
 	(unspec:SI
 	  [(match_operand:V16QI 1 "register_operand" "x,x")
 	   (match_operand:SI 2 "register_operand" "a,a")
-	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
 	   (match_operand:SI 4 "register_operand" "d,d")
 	   (match_operand:SI 5 "const_0_to_255_operand")]
 	  UNSPEC_PCMPESTR))
@@ -24326,6 +24356,7 @@  (define_insn "sse4_2_pcmpestri"
   "TARGET_SSE4_2"
   "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "length_immediate" "1")
@@ -24338,7 +24369,7 @@  (define_insn "sse4_2_pcmpestrm"
 	(unspec:V16QI
 	  [(match_operand:V16QI 1 "register_operand" "x,x")
 	   (match_operand:SI 2 "register_operand" "a,a")
-	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
 	   (match_operand:SI 4 "register_operand" "d,d")
 	   (match_operand:SI 5 "const_0_to_255_operand")]
 	  UNSPEC_PCMPESTR))
@@ -24353,6 +24384,7 @@  (define_insn "sse4_2_pcmpestrm"
   "TARGET_SSE4_2"
   "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
@@ -24365,7 +24397,7 @@  (define_insn "sse4_2_pcmpestr_cconly"
 	(unspec:CC
 	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
 	   (match_operand:SI 3 "register_operand" "a,a,a,a")
-	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:V16QI 4 "nonimmediate_operand" "x,jm,x,jm")
 	   (match_operand:SI 5 "register_operand" "d,d,d,d")
 	   (match_operand:SI 6 "const_0_to_255_operand")]
 	  UNSPEC_PCMPESTR))
@@ -24378,6 +24410,7 @@  (define_insn "sse4_2_pcmpestr_cconly"
    %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
    %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load,none,load")
@@ -24389,7 +24422,7 @@  (define_insn_and_split "sse4_2_pcmpistr"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
 	(unspec:SI
 	  [(match_operand:V16QI 2 "register_operand" "x,x")
-	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,jm")
 	   (match_operand:SI 4 "const_0_to_255_operand")]
 	  UNSPEC_PCMPISTR))
    (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
@@ -24432,6 +24465,7 @@  (define_insn_and_split "sse4_2_pcmpistr"
   DONE;
 }
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load")
@@ -24441,7 +24475,7 @@  (define_insn "sse4_2_pcmpistri"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
 	(unspec:SI
 	  [(match_operand:V16QI 1 "register_operand" "x,x")
-	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,jm")
 	   (match_operand:SI 3 "const_0_to_255_operand")]
 	  UNSPEC_PCMPISTR))
    (set (reg:CC FLAGS_REG)
@@ -24453,6 +24487,7 @@  (define_insn "sse4_2_pcmpistri"
   "TARGET_SSE4_2"
   "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
@@ -24464,7 +24499,7 @@  (define_insn "sse4_2_pcmpistrm"
   [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
 	(unspec:V16QI
 	  [(match_operand:V16QI 1 "register_operand" "x,x")
-	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,jm")
 	   (match_operand:SI 3 "const_0_to_255_operand")]
 	  UNSPEC_PCMPISTR))
    (set (reg:CC FLAGS_REG)
@@ -24476,6 +24511,7 @@  (define_insn "sse4_2_pcmpistrm"
   "TARGET_SSE4_2"
   "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
@@ -24487,7 +24523,7 @@  (define_insn "sse4_2_pcmpistr_cconly"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC
 	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
-	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,jm,x,jm")
 	   (match_operand:SI 4 "const_0_to_255_operand")]
 	  UNSPEC_PCMPISTR))
    (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
@@ -24499,6 +24535,7 @@  (define_insn "sse4_2_pcmpistr_cconly"
    %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
    %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
   [(set_attr "type" "sselog")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load,none,load")
@@ -25983,23 +26020,25 @@  (define_insn "aesdeclast"
 
 (define_insn "aesimc"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
+	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja")]
 		      UNSPEC_AESIMC))]
   "TARGET_AES"
   "%vaesimc\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog1")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
 (define_insn "aeskeygenassist"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
+	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja")
 		      (match_operand:SI 2 "const_0_to_255_operand")]
 		     UNSPEC_AESKEYGENASSIST))]
   "TARGET_AES"
   "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog1")
+   (set_attr "gpr32" "0")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
diff --git a/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c b/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c
index 510213a6ca7..771bcb078e1 100644
--- a/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c
+++ b/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c
@@ -45,13 +45,22 @@  typedef union
   DTYPE a[16];
 } tmp_u;
 
-__attribute__((target("sse4.2")))
+__attribute__((target("sse4.2,aes")))
 void sse_test ()
 {
   register tmp_u *tdst __asm__("%r16");
   register tmp_u *src1 __asm__("%r17");
   register tmp_u *src2 __asm__("%r18");
- 
+
+  src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]);
+  src1->a[2] = _mm_testc_si128 (src1->xi[3], src2->xi[4]);
+  src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6],
+			      _MM_FROUND_CUR_DIRECTION);
+  src1->xf[4] = _mm_round_ps (src1->xf[7], _MM_FROUND_CUR_DIRECTION);
+  src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3],
+			      _MM_FROUND_CUR_DIRECTION);
+  src1->xd[1] = _mm_round_pd (src1->xd[4], _MM_FROUND_CUR_DIRECTION);
+
   src1->xi[0] = _mm_hadd_epi16 (tdst->xi[2], src2->xi[3]);
   src1->xi[1] = _mm_hadd_epi32 (tdst->xi[0], src2->xi[1]);
   tdst->xi[2] = _mm_hadds_epi16 (src1->xi[4], src2->xi[5]);
@@ -77,16 +86,33 @@  void sse_test ()
   tdst->xi[1] = _mm_sign_epi8 (src1->xi[5], src2->xi[6]);
   tdst->xi[2] = _mm_sign_epi16 (src1->xi[7], src2->xi[0]);
   tdst->xi[3] = _mm_sign_epi32 (src1->xi[1], src2->xi[2]);
+
+  tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c);
+  tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20);
+  tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30);
+  tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40);
+
+  tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]);
+  tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b);
 }
 
-__attribute__((target("avx2")))
+__attribute__((target("avx2,aes")))
 void vex_test ()
 {
 
   register tmp_u *tdst __asm__("%r16");
   register tmp_u *src1 __asm__("%r17");
   register tmp_u *src2 __asm__("%r18");
-  
+ 
+  src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]);
+  src1->a[2] = _mm256_testc_si256 (src1->yi[2], src2->yi[3]);
+  src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6],
+			      _MM_FROUND_CUR_DIRECTION);
+  src1->yf[4] = _mm256_round_ps (src1->yf[2], _MM_FROUND_CUR_DIRECTION);
+  src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3],
+			      _MM_FROUND_CUR_DIRECTION);
+  src1->yd[1] = _mm256_round_pd (src1->yd[3], _MM_FROUND_CUR_DIRECTION);
+ 
   src1->yi[1] = _mm256_hadd_epi16 (tdst->yi[2], src2->yi[3]);
   src1->yi[2] = _mm256_hadd_epi32 (tdst->yi[0], src2->yi[1]);
   tdst->yi[3] = _mm256_hadds_epi16 (src1->yi[1], src2->yi[2]);
@@ -98,7 +124,6 @@  void vex_test ()
   src1->yi[1] = _mm256_cmpgt_epi64 (tdst->yi[3], src2->yi[0]);
 
   tdst->yf[2] = _mm256_dp_ps (src1->yf[0], src2->yf[1], 0xbf);
-  tdst->xd[3] = _mm_dp_pd (src1->xd[0], src2->xd[1], 0xbf);
 
   tdst->yi[3] = _mm256_mpsadbw_epu8 (src1->yi[1], src2->yi[1], 0xc1);
 
@@ -112,6 +137,14 @@  void vex_test ()
   tdst->yi[2] = _mm256_sign_epi8 (src1->yi[0], src2->yi[1]);
   tdst->yi[3] = _mm256_sign_epi16 (src1->yi[2], src2->yi[3]);
   tdst->yi[0] = _mm256_sign_epi32 (src1->yi[0], src2->yi[1]);
+
+  tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c);
+  tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20);
+  tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30);
+  tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40);
+
+  tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]);
+  tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b);
 }
 
 /* { dg-final { scan-assembler-not "v?pcmpeqq\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
@@ -134,3 +167,15 @@  void vex_test ()
 /* { dg-final { scan-assembler-not "v?psignb\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
 /* { dg-final { scan-assembler-not "v?psignw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
 /* { dg-final { scan-assembler-not "v?psignd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?phminposuw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?ptest\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundss\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundsd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundps\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?roundpd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpestri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpistri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpestrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?pcmpistrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?aesimc\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */
+/* { dg-final { scan-assembler-not "v?aeskeygenassist\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */