diff mbox

[AArch64] Fix some saturating math NEON intrinsics types

Message ID 53B130E6.7020501@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov June 30, 2014, 9:41 a.m. UTC
On 20/06/14 09:41, Marcus Shawcroft wrote:
> On 16 June 2014 15:26, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>> Hi all,
>>
>> I noticed that a few saturating math intrinsics in arm_neon.h for aarch64
>> have the wrong types, i.e. not what's mandated by the ACLE spec.
>>
>> This patch fixes that by adjusting the types of the builtin functions that
>> those intrinsics map to (and in the process cleaning up the VCON iterator)
>> and adding tests for the affected intrinsics.
>>
>> I realise it's quite big, but the changes are mostly uniform.
>>
>> Bootstrapped and tested aarch64-none-linux-gnu.
>>
>> Ok for trunk?
> OK, can you prepare a 4.9 backport?
> Cheers
> /Marcus
>

Here it is.
Now it applies cleanly to 4.9 and the tests are in gcc.target/aarch64 
instead of gcc.target/aarch64/simd

Tested on aarch64-none-elf and aarch64_be-none-elf.

Ok to apply?

Thanks,
Kyrill

Comments

Marcus Shawcroft June 30, 2014, 10:44 a.m. UTC | #1
On 30 June 2014 10:41, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>

> Here it is.
> Now it applies cleanly to 4.9 and the tests are in gcc.target/aarch64
> instead of gcc.target/aarch64/simd
>
> Tested on aarch64-none-elf and aarch64_be-none-elf.
>
> Ok to apply?


OK, thanks.
/Marcus
Kyrylo Tkachov June 30, 2014, 10:51 a.m. UTC | #2
On 30/06/14 11:44, Marcus Shawcroft wrote:
> On 30 June 2014 10:41, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>> Here it is.
>> Now it applies cleanly to 4.9 and the tests are in gcc.target/aarch64
>> instead of gcc.target/aarch64/simd
>>
>> Tested on aarch64-none-elf and aarch64_be-none-elf.
>>
>> Ok to apply?
>
> OK, thanks.
> /Marcus

Thanks, committed with r212141.

Kyrill
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7fed485..a48d0b5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2735,12 +2735,12 @@ 
 (define_expand "aarch64_sqdmulh_lane<mode>"
   [(match_operand:SD_HSI 0 "register_operand" "")
    (match_operand:SD_HSI 1 "register_operand" "")
-   (match_operand:<VCONQ> 2 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_SIMD"
   {
-    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
-    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
     emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
                                                         operands[1],
                                                         operands[2],
@@ -2752,12 +2752,12 @@ 
 (define_expand "aarch64_sqrdmulh_lane<mode>"
   [(match_operand:SD_HSI 0 "register_operand" "")
    (match_operand:SD_HSI 1 "register_operand" "")
-   (match_operand:<VCONQ> 2 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_SIMD"
   {
-    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
-    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
     emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
                                                          operands[1],
                                                          operands[2],
@@ -2771,12 +2771,12 @@ 
         (unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "w")
            (vec_select:<VEL>
-             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
 	 VQDMULH))]
   "TARGET_SIMD"
   "*
-   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
@@ -2812,7 +2812,31 @@ 
 	      (sign_extend:<VWIDE>
 		(vec_duplicate:VD_HSI
 		  (vec_select:<VEL>
-		    (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              ))
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:VD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_duplicate:VD_HSI
+		  (vec_select:<VEL>
+		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               ))
 	    (const_int 1))))]
@@ -2835,7 +2859,30 @@ 
 		(match_operand:SD_HSI 2 "register_operand" "w"))
 	      (sign_extend:<VWIDE>
 		(vec_select:<VEL>
-		  (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              )
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:SD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_select:<VEL>
+		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               )
 	    (const_int 1))))]
@@ -2852,12 +2899,12 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
@@ -2868,13 +2915,13 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
-  emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
   DONE;
@@ -2884,12 +2931,12 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
@@ -2900,13 +2947,13 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[4])));
-  emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
   DONE;
@@ -2994,7 +3041,33 @@ 
 		(sign_extend:<VWIDE>
                   (vec_duplicate:<VHALF>
 		    (vec_select:<VEL>
-		      (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+		    ))))
+	      (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+                  (vec_select:<VHALF>
+                    (match_operand:VQ_HSI 2 "register_operand" "w")
+                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(sign_extend:<VWIDE>
+                  (vec_duplicate:<VHALF>
+		    (vec_select:<VEL>
+		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 		    ))))
 	      (const_int 1))))]
@@ -3011,13 +3084,13 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
@@ -3028,14 +3101,14 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
-  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
   DONE;
@@ -3045,13 +3118,13 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
@@ -3062,14 +3135,14 @@ 
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
-  operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
-  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
   DONE;
@@ -3149,7 +3222,28 @@ 
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:VD_HSI
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:VD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:VD_HSI
+                   (vec_select:<VEL>
+		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -3169,7 +3263,27 @@ 
 		 (match_operand:SD_HSI 1 "register_operand" "w"))
 	       (sign_extend:<VWIDE>
                  (vec_select:<VEL>
-		   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:SD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_select:<VEL>
+		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
 	       ))
 	     (const_int 1)))]
@@ -3184,12 +3298,12 @@ 
 (define_expand "aarch64_sqdmull_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VSD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
-  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCON>mode, INTVAL (operands[3])));
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
   emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3]));
   DONE;
@@ -3198,13 +3312,13 @@ 
 (define_expand "aarch64_sqdmull_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
-  emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
+  emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal
 	       (operands[0], operands[1], operands[2], operands[3]));
   DONE;
 })
@@ -3253,7 +3367,7 @@ 
 (define_expand "aarch64_sqdmull2<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")]
+   (match_operand:VQ_HSI 2 "register_operand" "w")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
@@ -3275,7 +3389,30 @@ 
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:<VHALF>
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 1 "register_operand" "w")
+                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:<VHALF>
+                   (vec_select:<VEL>
+		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -3290,13 +3427,13 @@ 
 (define_expand "aarch64_sqdmull2_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
-  operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       p));
@@ -3306,14 +3443,14 @@ 
 (define_expand "aarch64_sqdmull2_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
-  operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
-  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       p));
   DONE;
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b03d114..c01669b 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -21008,7 +21008,7 @@  vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
@@ -21030,8 +21030,7 @@  vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21059,7 +21058,7 @@  vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
@@ -21081,8 +21080,7 @@  vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21104,7 +21102,7 @@  vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
 }
@@ -21116,7 +21114,7 @@  vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
 }
@@ -21136,7 +21134,7 @@  vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
@@ -21158,8 +21156,7 @@  vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21187,7 +21184,7 @@  vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
@@ -21209,8 +21206,7 @@  vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21232,7 +21228,7 @@  vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
 }
@@ -21244,7 +21240,7 @@  vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
 }
@@ -21282,7 +21278,7 @@  vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
 }
@@ -21294,7 +21290,7 @@  vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
 }
@@ -21314,7 +21310,7 @@  vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
 {
   return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
 }
@@ -21334,8 +21330,7 @@  vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21363,7 +21358,7 @@  vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
 {
   return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
 }
@@ -21383,8 +21378,7 @@  vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21406,7 +21400,7 @@  vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
@@ -21418,7 +21412,7 @@  vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
 }
@@ -21594,7 +21588,7 @@  vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
 }
@@ -21606,7 +21600,7 @@  vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
 }
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f1339b8..e76e3ef 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -396,14 +396,15 @@ 
 			(SI   "SI") (HI   "HI")
 			(QI   "QI")])
 
-;; Define container mode for lane selection.
-(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+;; 64-bit container modes the inner or scalar source mode.
+(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
+			 (V4HI "V4HI") (V8HI "V4HI")
 			 (V2SI "V2SI") (V4SI "V2SI")
 			 (DI   "DI") (V2DI "DI")
 			 (V2SF "V2SF") (V4SF "V2SF")
 			 (V2DF "DF")])
 
-;; Define container mode for lane selection.
+;; 128-bit container modes the inner or scalar source mode.
 (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HI "V8HI") (V8HI "V8HI")
 			 (V2SI "V4SI") (V4SI "V4SI")
@@ -412,15 +413,6 @@ 
 			 (V2DF "V2DF") (SI   "V4SI")
 			 (HI   "V8HI") (QI   "V16QI")])
 
-;; Define container mode for lane selection.
-(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
-			(V4HI "V8HI") (V8HI "V8HI")
-			(V2SI "V4SI") (V4SI "V4SI")
-			(DI   "V2DI") (V2DI "V2DI")
-			(V2SF "V4SF") (V4SF "V4SF")
-			(V2DF "V2DF") (SI   "V4SI")
-			(HI   "V8HI") (QI   "V16QI")])
-
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
 			 (V4HI "V2HI")  (V8HI  "V4HI")
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index aa041cc..782f6d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -387,7 +387,7 @@  test_vqdmlalh_s16 (int32x1_t a, int16x1_t b, int16x1_t c)
 /* { dg-final { scan-assembler-times "\\tsqdmlal\\ts\[0-9\]+, h\[0-9\]+, v" 1 } } */
 
 int32x1_t
-test_vqdmlalh_lane_s16 (int32x1_t a, int16x1_t b, int16x8_t c)
+test_vqdmlalh_lane_s16 (int32x1_t a, int16x1_t b, int16x4_t c)
 {
   return vqdmlalh_lane_s16 (a, b, c, 3);
 }
@@ -403,7 +403,7 @@  test_vqdmlals_s32 (int64x1_t a, int32x1_t b, int32x1_t c)
 /* { dg-final { scan-assembler-times "\\tsqdmlal\\td\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
 int64x1_t
-test_vqdmlals_lane_s32 (int64x1_t a, int32x1_t b, int32x4_t c)
+test_vqdmlals_lane_s32 (int64x1_t a, int32x1_t b, int32x2_t c)
 {
   return vqdmlals_lane_s32 (a, b, c, 1);
 }
@@ -419,7 +419,7 @@  test_vqdmlslh_s16 (int32x1_t a, int16x1_t b, int16x1_t c)
 /* { dg-final { scan-assembler-times "\\tsqdmlsl\\ts\[0-9\]+, h\[0-9\]+, v" 1 } } */
 
 int32x1_t
-test_vqdmlslh_lane_s16 (int32x1_t a, int16x1_t b, int16x8_t c)
+test_vqdmlslh_lane_s16 (int32x1_t a, int16x1_t b, int16x4_t c)
 {
   return vqdmlslh_lane_s16 (a, b, c, 3);
 }
@@ -435,7 +435,7 @@  test_vqdmlsls_s32 (int64x1_t a, int32x1_t b, int32x1_t c)
 /* { dg-final { scan-assembler-times "\\tsqdmlsl\\td\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
 int64x1_t
-test_vqdmlsls_lane_s32 (int64x1_t a, int32x1_t b, int32x4_t c)
+test_vqdmlsls_lane_s32 (int64x1_t a, int32x1_t b, int32x2_t c)
 {
   return vqdmlsls_lane_s32 (a, b, c, 1);
 }
@@ -451,7 +451,7 @@  test_vqdmulhh_s16 (int16x1_t a, int16x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqdmulh\\th\[0-9\]+, h\[0-9\]+, v" 1 } } */
 
 int16x1_t
-test_vqdmulhh_lane_s16 (int16x1_t a, int16x8_t b)
+test_vqdmulhh_lane_s16 (int16x1_t a, int16x4_t b)
 {
   return vqdmulhh_lane_s16 (a, b, 3);
 }
@@ -467,9 +467,9 @@  test_vqdmulhs_s32 (int32x1_t a, int32x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqdmulh\\ts\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
 int32x1_t
-test_vqdmulhs_lane_s32 (int32x1_t a, int32x4_t b)
+test_vqdmulhs_lane_s32 (int32x1_t a, int32x2_t b)
 {
-  return vqdmulhs_lane_s32 (a, b, 3);
+  return vqdmulhs_lane_s32 (a, b, 1);
 }
 
 /* { dg-final { scan-assembler-times "\\tsqdmull\\ts\[0-9\]+, h\[0-9\]+, h\[0-9\]+" 1 } } */
@@ -483,7 +483,7 @@  test_vqdmullh_s16 (int16x1_t a, int16x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqdmull\\ts\[0-9\]+, h\[0-9\]+, v" 1 } } */
 
 int32x1_t
-test_vqdmullh_lane_s16 (int16x1_t a, int16x8_t b)
+test_vqdmullh_lane_s16 (int16x1_t a, int16x4_t b)
 {
   return vqdmullh_lane_s16 (a, b, 3);
 }
@@ -499,7 +499,7 @@  test_vqdmulls_s32 (int32x1_t a, int32x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqdmull\\td\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
 int64x1_t
-test_vqdmulls_lane_s32 (int32x1_t a, int32x4_t b)
+test_vqdmulls_lane_s32 (int32x1_t a, int32x2_t b)
 {
   return vqdmulls_lane_s32 (a, b, 1);
 }
@@ -515,9 +515,9 @@  test_vqrdmulhh_s16 (int16x1_t a, int16x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqrdmulh\\th\[0-9\]+, h\[0-9\]+, v" 1 } } */
 
 int16x1_t
-test_vqrdmulhh_lane_s16 (int16x1_t a, int16x8_t b)
+test_vqrdmulhh_lane_s16 (int16x1_t a, int16x4_t b)
 {
-  return vqrdmulhh_lane_s16 (a, b, 6);
+  return vqrdmulhh_lane_s16 (a, b, 3);
 }
 
 /* { dg-final { scan-assembler-times "\\tsqrdmulh\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" 1 } } */
@@ -531,9 +531,9 @@  test_vqrdmulhs_s32 (int32x1_t a, int32x1_t b)
 /* { dg-final { scan-assembler-times "\\tsqrdmulh\\ts\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
 int32x1_t
-test_vqrdmulhs_lane_s32 (int32x1_t a, int32x4_t b)
+test_vqrdmulhs_lane_s32 (int32x1_t a, int32x2_t b)
 {
-  return vqrdmulhs_lane_s32 (a, b, 2);
+  return vqrdmulhs_lane_s32 (a, b, 1);
 }
 
 /* { dg-final { scan-assembler-times "\\tsuqadd\\tb\[0-9\]+" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vector_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_intrinsics.c
index affb8a8..52b0496 100644
--- a/gcc/testsuite/gcc.target/aarch64/vector_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/vector_intrinsics.c
@@ -1,7 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
 
-#include "../../../config/aarch64/arm_neon.h"
+#include "arm_neon.h"
 
 
 /* { dg-final { scan-assembler-times "\\tfmax\\tv\[0-9\]+\.2s, v\[0-9\].2s, v\[0-9\].2s" 1 } } */
@@ -305,7 +305,7 @@  test_vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 /* { dg-final { scan-assembler-times "\\tsqdmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.8h, v\[0-9\]+\.h" 3 } } */
 
 int32x4_t
-test_vqdmlal_high_lane_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+test_vqdmlal_high_lane_s16 (int32x4_t a, int16x8_t b, int16x4_t c)
 {
   return vqdmlal_high_lane_s16 (a, b, c, 3);
 }
@@ -361,7 +361,7 @@  test_vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 /* { dg-final { scan-assembler-times "\\tsqdmlal2\\tv\[0-9\]+\.2d, v\[0-9\]+\.4s, v\[0-9\]+\.s" 3 } } */
 
 int64x2_t
-test_vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+test_vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c)
 {
   return vqdmlal_high_lane_s32 (__a, __b, __c, 1);
 }
@@ -417,7 +417,7 @@  test_vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 /* { dg-final { scan-assembler-times "\\tsqdmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.8h, v\[0-9\]+\.h" 3 } } */
 
 int32x4_t
-test_vqdmlsl_high_lane_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+test_vqdmlsl_high_lane_s16 (int32x4_t a, int16x8_t b, int16x4_t c)
 {
   return vqdmlsl_high_lane_s16 (a, b, c, 3);
 }
@@ -473,7 +473,7 @@  test_vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 /* { dg-final { scan-assembler-times "\\tsqdmlsl2\\tv\[0-9\]+\.2d, v\[0-9\]+\.4s, v\[0-9\]+\.s" 3 } } */
 
 int64x2_t
-test_vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
+test_vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c)
 {
   return vqdmlsl_high_lane_s32 (__a, __b, __c, 1);
 }
@@ -529,7 +529,7 @@  test_vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
 /* { dg-final { scan-assembler-times "\\tsqdmull2\\tv\[0-9\]+\.4s, v\[0-9\]+\.8h, v\[0-9\]+\.h" 3 } } */
 
 int32x4_t
-test_vqdmull_high_lane_s16 (int16x8_t a, int16x8_t b)
+test_vqdmull_high_lane_s16 (int16x8_t a, int16x4_t b)
 {
   return vqdmull_high_lane_s16 (a, b, 3);
 }
@@ -585,7 +585,7 @@  test_vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
 /* { dg-final { scan-assembler-times "\\tsqdmull2\\tv\[0-9\]+\.2d, v\[0-9\]+\.4s, v\[0-9\]+\.s" 3 } } */
 
 int64x2_t
-test_vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b)
+test_vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b)
 {
   return vqdmull_high_lane_s32 (__a, __b, 1);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s16.c
new file mode 100644
index 0000000..1388c3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_high_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlal_high_lane_s16 (int32x4_t a, int16x8_t b, int16x4_t c)
+{
+  return vqdmlal_high_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s32.c
new file mode 100644
index 0000000..f90387d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_high_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlal_high_lane_s32 (int64x2_t a, int32x4_t b, int32x2_t c)
+{
+  return vqdmlal_high_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s16.c
new file mode 100644
index 0000000..5399ce9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_high_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlal_high_laneq_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+{
+  return vqdmlal_high_laneq_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s32.c
new file mode 100644
index 0000000..e4b5558
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_high_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_high_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlal_high_laneq_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+{
+  return vqdmlal_high_laneq_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s16.c
new file mode 100644
index 0000000..7e60c82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlal_lane_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+{
+  return vqdmlal_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s32.c
new file mode 100644
index 0000000..c0f508d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlal_lane_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+{
+  return vqdmlal_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s16.c
new file mode 100644
index 0000000..9bf1304
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlal_laneq_s16 (int32x4_t a, int16x4_t b, int16x8_t c)
+{
+  return vqdmlal_laneq_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s32.c
new file mode 100644
index 0000000..5fd9c56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlal_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlal_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlal_laneq_s32 (int64x2_t a, int32x2_t b, int32x4_t c)
+{
+  return vqdmlal_laneq_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlalh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlalh_lane_s16.c
new file mode 100644
index 0000000..83f5af5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlalh_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlalh_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x1_t
+t_vqdmlalh_lane_s16 (int32x1_t a, int16x1_t b, int16x4_t c)
+{
+  return vqdmlalh_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlals_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlals_lane_s32.c
new file mode 100644
index 0000000..ef94e95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlals_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlals_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x1_t
+t_vqdmlals_lane_s32 (int64x1_t a, int32x1_t b, int32x2_t c)
+{
+  return vqdmlals_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s16.c
new file mode 100644
index 0000000..276a1a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_high_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlsl_high_lane_s16 (int32x4_t a, int16x8_t b, int16x4_t c)
+{
+  return vqdmlsl_high_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s32.c
new file mode 100644
index 0000000..2ae58ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_high_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlsl_high_lane_s32 (int64x2_t a, int32x4_t b, int32x2_t c)
+{
+  return vqdmlsl_high_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s16.c
new file mode 100644
index 0000000..1db5db4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_high_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlsl_high_laneq_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
+{
+  return vqdmlsl_high_laneq_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s32.c
new file mode 100644
index 0000000..3a72a7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_high_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_high_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlsl_high_laneq_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
+{
+  return vqdmlsl_high_laneq_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s16.c
new file mode 100644
index 0000000..0535378
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmlsl_lane_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
+{
+  return vqdmlsl_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s32.c
new file mode 100644
index 0000000..b52e51e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlsl_lane_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
+{
+  return vqdmlsl_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsl_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_laneq_s32.c
new file mode 100644
index 0000000..7009a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsl_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsl_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmlsl_lane_s32 (int64x2_t a, int32x2_t b, int32x4_t c)
+{
+  return vqdmlsl_laneq_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlslh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmlslh_lane_s16.c
new file mode 100644
index 0000000..056dfbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlslh_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlslh_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x1_t
+t_vqdmlslh_lane_s16 (int32x1_t a, int16x1_t b, int16x4_t c)
+{
+  return vqdmlslh_lane_s16 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmlsls_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmlsls_lane_s32.c
new file mode 100644
index 0000000..9e351bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmlsls_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmlsls_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x1_t
+t_vqdmlsls_lane_s32 (int64x1_t a, int32x1_t b, int32x2_t c)
+{
+  return vqdmlsls_lane_s32 (a, b, c, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s16.c
new file mode 100644
index 0000000..d3c699b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmulh_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int16x4_t
+t_vqdmulh_laneq_s16 (int16x4_t a, int16x8_t b)
+{
+  return vqdmulh_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s32.c
new file mode 100644
index 0000000..c6202ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulh_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmulh_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x2_t
+t_vqdmulh_laneq_s32 (int32x2_t a, int32x4_t b)
+{
+  return vqdmulh_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulhh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmulhh_lane_s16.c
new file mode 100644
index 0000000..7635851
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulhh_lane_s16.c
@@ -0,0 +1,36 @@ 
+/* Test the vqdmulhh_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int16_t arg1;
+  int16x4_t arg2;
+  int16_t result;
+  int16_t actual;
+  int16_t expected;
+
+  arg1 = -32768;
+  arg2 = vcreate_s16 (0x0000ffff2489e398ULL);
+  actual = vqdmulhh_lane_s16 (arg1, arg2, 2);
+  expected = 1;
+
+  if (expected != actual)
+    {
+      fprintf (stderr, "Expected: %xd, got %xd\n", expected, actual);
+      abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[2\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s16.c
new file mode 100644
index 0000000..809c85a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmulhq_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int16x8_t
+t_vqdmulhq_laneq_s16 (int16x8_t a, int16x8_t b)
+{
+  return vqdmulhq_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s32.c
new file mode 100644
index 0000000..d375fe8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulhq_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmulhq_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmulhq_laneq_s32 (int32x4_t a, int32x4_t b)
+{
+  return vqdmulhq_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulhs_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmulhs_lane_s32.c
new file mode 100644
index 0000000..9c27f5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulhs_lane_s32.c
@@ -0,0 +1,34 @@ 
+/* Test the vqdmulhs_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int32_t arg1;
+  int32x2_t arg2;
+  int32_t result;
+  int32_t actual;
+  int32_t expected;
+
+  arg1 = 57336;
+  arg2 = vcreate_s32 (0x55897fff7fff0000ULL);
+  actual = vqdmulhs_lane_s32 (arg1, arg2, 0);
+  expected = 57334;
+
+  if (expected != actual)
+    {
+      fprintf (stderr, "Expected: %xd, got %xd\n", expected, actual);
+      abort ();
+    }
+
+  return 0;
+}
+/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s16.c
new file mode 100644
index 0000000..0af320e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_high_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmull_high_lane_s16 (int16x8_t a, int16x4_t b)
+{
+  return vqdmull_high_lane_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s32.c
new file mode 100644
index 0000000..583e8a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_high_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmull_high_lane_s32 (int32x4_t a, int32x2_t b)
+{
+  return vqdmull_high_lane_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s16.c
new file mode 100644
index 0000000..dcfd14c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_high_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmull_high_laneq_s16 (int16x8_t a, int16x8_t b)
+{
+  return vqdmull_high_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull2\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s32.c
new file mode 100644
index 0000000..3e8b652
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_high_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_high_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmull_high_laneq_s32 (int32x4_t a, int32x4_t b)
+{
+  return vqdmull_high_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull2\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s16.c
new file mode 100644
index 0000000..695d4e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmull_lane_s16 (int16x4_t a, int16x4_t b)
+{
+  return vqdmull_lane_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s32.c
new file mode 100644
index 0000000..e6a02b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmull_lane_s32 (int32x2_t a, int32x2_t b)
+{
+  return vqdmull_lane_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s16.c
new file mode 100644
index 0000000..ba761b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqdmull_laneq_s16 (int16x4_t a, int16x8_t b)
+{
+  return vqdmull_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s32.c
new file mode 100644
index 0000000..82b8e19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmull_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmull_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x2_t
+t_vqdmull_laneq_s32 (int32x2_t a, int32x4_t b)
+{
+  return vqdmull_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmullh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqdmullh_lane_s16.c
new file mode 100644
index 0000000..fd271e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmullh_lane_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmullh_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x1_t
+t_vqdmullh_lane_s16 (int16x1_t a, int16x4_t b)
+{
+  return vqdmullh_lane_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqdmulls_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqdmulls_lane_s32.c
new file mode 100644
index 0000000..1103333
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqdmulls_lane_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqdmulls_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64x1_t
+t_vqdmulls_lane_s32 (int32x1_t a, int32x2_t b)
+{
+  return vqdmulls_lane_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s16.c
new file mode 100644
index 0000000..0313f1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqrdmulh_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int16x4_t
+t_vqrdmulh_laneq_s16 (int16x4_t a, int16x8_t b)
+{
+  return vqrdmulh_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.4\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s32.c
new file mode 100644
index 0000000..a9124ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulh_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqrdmulh_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x2_t
+t_vqrdmulh_laneq_s32 (int32x2_t a, int32x4_t b)
+{
+  return vqrdmulh_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulhh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulhh_lane_s16.c
new file mode 100644
index 0000000..f21863a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulhh_lane_s16.c
@@ -0,0 +1,35 @@ 
+/* Test the vqrdmulhh_lane_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int16_t arg1;
+  int16x4_t arg2;
+  int16_t result;
+  int16_t actual;
+  int16_t expected;
+
+  arg1 = -32768;
+  arg2 = vcreate_s16 (0xd78e000005d78000ULL);
+  actual = vqrdmulhh_lane_s16 (arg1, arg2, 3);
+  expected = 10354;
+
+  if (expected != actual)
+    {
+      fprintf (stderr, "Expected: %xd, got %xd\n", expected, actual);
+      abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[3\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s16.c
new file mode 100644
index 0000000..488e694
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s16.c
@@ -0,0 +1,15 @@ 
+/* Test the vqrdmulhq_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int16x8_t
+t_vqrdmulhq_laneq_s16 (int16x8_t a, int16x8_t b)
+{
+  return vqrdmulhq_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.8\[hH\], ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s32.c
new file mode 100644
index 0000000..42519f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulhq_laneq_s32.c
@@ -0,0 +1,15 @@ 
+/* Test the vqrdmulhq_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32x4_t
+t_vqrdmulhq_laneq_s32 (int32x4_t a, int32x4_t b)
+{
+  return vqrdmulhq_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vqrdmulhs_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/vqrdmulhs_lane_s32.c
new file mode 100644
index 0000000..83d2ba2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqrdmulhs_lane_s32.c
@@ -0,0 +1,35 @@ 
+/* Test the vqrdmulhs_lane_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include <stdio.h>
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int32_t arg1;
+  int32x2_t arg2;
+  int32_t result;
+  int32_t actual;
+  int32_t expected;
+
+  arg1 = -2099281921;
+  arg2 = vcreate_s32 (0x000080007fff0000ULL);
+  actual = vqrdmulhs_lane_s32 (arg1, arg2, 1);
+  expected = -32033;
+
+  if (expected != actual)
+    {
+      fprintf (stderr, "Expected: %xd, got %xd\n", expected, actual);
+      abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */