diff mbox

[AArch64,4/4] Reimplement vmvn* intrinscis, remove inline assembly

Message ID 57398E56.4040200@foss.arm.com
State New
Headers show

Commit Message

Jiong Wang May 16, 2016, 9:09 a.m. UTC
This patch remove inline assembly and reimplement all mvn/mvnq vector
integer intrinsics through the standard "one_cmpl<mode>2" pattern was
introduced later after the initial implementation of those intrinsics.
that's why inline assembly was used historically.

OK for trunk?

no regression on the exist advsimd-intrinsics/vmvn.c.

2016-05-16  Jiong Wang<jiong.wang@arm.com>

gcc/
   * config/aarch64/arm_neon.h (vmvn_s8): Reimplement using C operator.
   Remove inline assembly.
   (vmvn_s16): Likewise.
   (vmvn_s32): Likewise.
   (vmvn_u8): Likewise.
   (vmvn_u16): Likewise.
   (vmvn_u32): Likewise.
   (vmvnq_s8): Likewise.
   (vmvnq_s16): Likewise.
   (vmvnq_s32): Likewise.
   (vmvnq_u8): Likewise.
   (vmvnq_u16): Likewise.
   (vmvnq_u32): Likewise.
   (vmvn_p8): Likewise.
   (vmvnq_p16): Likewise.

Comments

James Greenhalgh May 17, 2016, 12:38 p.m. UTC | #1
On Mon, May 16, 2016 at 10:09:42AM +0100, Jiong Wang wrote:
> This patch remove inline assembly and reimplement all mvn/mvnq vector
> integer intrinsics through the standard "one_cmpl<mode>2" pattern was
> introduced later after the initial implementation of those intrinsics.
> that's why inline assembly was used historically.
> 
> OK for trunk?
> 
> no regression on the exist advsimd-intrinsics/vmvn.c.
> 
> 2016-05-16  Jiong Wang<jiong.wang@arm.com>
> 
> gcc/
>   * config/aarch64/arm_neon.h (vmvn_s8): Reimplement using C operator.
>   Remove inline assembly.
>   (vmvn_s16): Likewise.
>   (vmvn_s32): Likewise.
>   (vmvn_u8): Likewise.
>   (vmvn_u16): Likewise.
>   (vmvn_u32): Likewise.
>   (vmvnq_s8): Likewise.
>   (vmvnq_s16): Likewise.
>   (vmvnq_s32): Likewise.
>   (vmvnq_u8): Likewise.
>   (vmvnq_u16): Likewise.
>   (vmvnq_u32): Likewise.
>   (vmvn_p8): Likewise.
>   (vmvnq_p16): Likewise.

ChangeLog formatting is incorrect.

Otherwise, this is OK.

Thanks,
James
diff mbox

Patch

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 432a1fad9a6df6fef844896df5e8ad29cc31f548..ae4c429a87822a8807f2d2ec054d3194b39ef6ac 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8093,161 +8093,6 @@  vmull_u32 (uint32x2_t a, uint32x2_t b)
   return result;
 }
 
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vmvn_p8 (poly8x8_t a)
-{
-  poly8x8_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vmvn_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vmvn_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vmvn_s32 (int32x2_t a)
-{
-  int32x2_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vmvn_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vmvn_u16 (uint16x4_t a)
-{
-  uint16x4_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vmvn_u32 (uint32x2_t a)
-{
-  uint32x2_t result;
-  __asm__ ("mvn %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vmvnq_p8 (poly8x16_t a)
-{
-  poly8x16_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vmvnq_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vmvnq_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vmvnq_s32 (int32x4_t a)
-{
-  int32x4_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vmvnq_u8 (uint8x16_t a)
-{
-  uint8x16_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vmvnq_u16 (uint16x8_t a)
-{
-  uint16x8_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vmvnq_u32 (uint32x4_t a)
-{
-  uint32x4_t result;
-  __asm__ ("mvn %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vpadal_s8 (int16x4_t a, int8x8_t b)
 {
@@ -18622,6 +18467,92 @@  vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
   return __a * __b;
 }
 
+/* vmvn  */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmvn_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t) ~((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmvn_s8 (int8x8_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmvn_s16 (int16x4_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmvn_s32 (int32x2_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmvn_u8 (uint8x8_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmvn_u16 (uint16x4_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmvn_u32 (uint32x2_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmvnq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t) ~((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmvnq_s8 (int8x16_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmvnq_s16 (int16x8_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmvnq_s32 (int32x4_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmvnq_u8 (uint8x16_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmvnq_u16 (uint16x8_t __a)
+{
+  return ~__a;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmvnq_u32 (uint32x4_t __a)
+{
+  return ~__a;
+}
+
 /* vneg  */
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))