Patchwork [AArch64,AArch64-4.7] Fix sq<r>dmulh<q>_lane<q>_* intrinsics.

login
register
mail settings
Submitter Tejas Belagod
Date Jan. 14, 2013, 6 p.m.
Message ID <50F447D2.4070902@arm.com>
Download mbox | patch
Permalink /patch/211854/
State New
Headers show

Comments

Tejas Belagod - Jan. 14, 2013, 6 p.m.
Hi,

Attached is a patch that fixes sq<r>dmulh<q>_lane_* intrinsics. Previously they,
used to accept 128-bit lane index range. This fixes this bug to accept 64-bit 
lane index range. sq<r>dmulh<q>_laneq_* and AdvSIMD scalar ones still accept 
128-bit lane index range as before.

It has passed regressions on aarch64-none-elf. OK for trunk and aarch64-4.7-branch?

Thanks,
Tejas Belagod
ARM.

Changelog

2013-01-14  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/aarch64-simd-builtins.def: Separate sq<r>dmulh_lane
	entries into lane and laneq entries.
	* config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>): Remove
	AdvSIMD scalar modes.
	(aarch64_sq<r>dmulh_laneq<mode>): New.
	(aarch64_sq<r>dmulh_lane<mode>): New RTL pattern for Scalar AdvSIMD
	modes.
	* config/aarch64/arm_neon.h: Fix all the vq<r>dmulh_lane* intrinsics'
	builtin implementations to relfect changes in RTL in aarch64-simd.md.
	* config/aarch64/iterators.md (VCOND): New.
	(VCONQ): New.
Marcus Shawcroft - Jan. 25, 2013, 10:30 a.m.
On 14/01/13 18:00, Tejas Belagod wrote:
>
> Hi,
>
> Attached is a patch that fixes sq<r>dmulh<q>_lane_* intrinsics. Previously they,
> used to accept 128-bit lane index range. This fixes this bug to accept 64-bit
> lane index range. sq<r>dmulh<q>_laneq_* and AdvSIMD scalar ones still accept
> 128-bit lane index range as before.
>
> It has passed regressions on aarch64-none-elf. OK for trunk and aarch64-4.7-branch?

OK

Patch

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d441417..af27079 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -142,9 +142,13 @@ 
   /* Implemented by aarch64_sq<r>dmulh<mode>.  */
   BUILTIN_VSDQ_HSI (BINOP, sqdmulh)
   BUILTIN_VSDQ_HSI (BINOP, sqrdmulh)
-  /* Implemented by aarch64_sq<r>dmulh_lane<mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP, sqdmulh_lane)
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmulh_lane)
+  /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>.  */
+  BUILTIN_VDQHS (TERNOP, sqdmulh_lane)
+  BUILTIN_VDQHS (TERNOP, sqdmulh_laneq)
+  BUILTIN_VDQHS (TERNOP, sqrdmulh_lane)
+  BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq)
+  BUILTIN_SD_HSI (TERNOP, sqdmulh_lane)
+  BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane)
 
   BUILTIN_VSDQ_I_DI (BINOP, sshl_n)
   BUILTIN_VSDQ_I_DI (BINOP, ushl_n)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 63716c3..1fc912c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2068,17 +2068,49 @@ 
 ;; sq<r>dmulh_lane
 
 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
-  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
-        (unspec:VSDQ_HSI
-	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+        (unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "w")
+           (vec_select:<VEL>
+             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "*
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
+  [(set_attr "simd_type" "simd_sat_mul")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+  [(set (match_operand:VDQHS 0 "register_operand" "=w")
+        (unspec:VDQHS
+	  [(match_operand:VDQHS 1 "register_operand" "w")
+           (vec_select:<VEL>
+             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
+	 VQDMULH))]
+  "TARGET_SIMD"
+  "*
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
+  [(set_attr "simd_type" "simd_sat_mul")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
+        (unspec:SD_HSI
+	  [(match_operand:SD_HSI 1 "register_operand" "w")
            (vec_select:<VEL>
-             (match_operand:<VCON> 2 "register_operand" "<vwx>")
+             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
 	 VQDMULH))]
   "TARGET_SIMD"
   "*
-   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
-   return \"sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]\";"
+   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
   [(set_attr "simd_type" "simd_sat_mul")
    (set_attr "simd_mode" "<MODE>")]
 )
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 5a72029..c455cf0 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -18877,49 +18877,49 @@  vpaddd_s64 (int64x2_t __a)
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
+  return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
+  return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
+  return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
+  return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
-  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
+  return  __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
+  return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
+  return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
+  return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
 }
 
 /* Table intrinsics.  */
@@ -21974,29 +21974,25 @@  vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
-  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
-  return __builtin_aarch64_sqdmulh_lanev2si (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
-  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
-  return __builtin_aarch64_sqdmulh_lanev4si (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
@@ -22290,29 +22286,25 @@  vqnegs_s32 (int32x1_t __a)
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
-  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __tmp, __c);
+  return  __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
-  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __tmp, __c);
+  return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
-  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __tmp, __c);
+  return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
-  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __tmp, __c);
+  return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0eb30f0..f193214 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -337,6 +337,22 @@ 
 			(QI   "QI")])
 
 ;; Define container mode for lane selection.
+(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+			 (V2SI "V2SI") (V4SI "V2SI")
+			 (DI   "DI") (V2DI "DI")
+			 (V2SF "V2SF") (V4SF "V2SF")
+			 (V2DF "DF")])
+
+;; Define container mode for lane selection.
+(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
+			 (V4HI "V8HI") (V8HI "V8HI")
+			 (V2SI "V4SI") (V4SI "V4SI")
+			 (DI   "V2DI") (V2DI "V2DI")
+			 (V2SF "V2SF") (V4SF "V4SF")
+			 (V2DF "V2DF") (SI   "V4SI")
+			 (HI   "V8HI") (QI   "V16QI")])
+
+;; Define container mode for lane selection.
 (define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
 			(V4HI "V8HI") (V8HI "V8HI")
 			(V2SI "V4SI") (V4SI "V4SI")