diff mbox

[AArch64] Fix argument types for some high_lane* intrinsics implemented in assembly

Message ID 53BD539B.3070200@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov July 9, 2014, 2:37 p.m. UTC
Hi all,

These intrinsics are implemented as macros that map down to asms but the 
types they accept are inconsistent with the ACLE spec. This patch fixes 
them, although they should be reimplemented properly in C in the future.

This is a bugfix and it applies cleanly to trunk, 4.9 and 4.8.
I know we're close to the 4.9.1 release, but this is not an ABI-breaking 
change so it's the aarch64 maintainers' call on whether it should be 
backported.

Tested aarch64-none-elf

Ok?

Thanks,
Kyrill

2014-07-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
     (vmlal_high_lane_s32): Likewise.
     (vmlal_high_lane_u16): Likewise.
     (vmlal_high_lane_u32): Likewise.
     (vmlsl_high_lane_s16): Likewise.
     (vmlsl_high_lane_s32): Likewise.
     (vmlsl_high_lane_u16): Likewise.
     (vmlsl_high_lane_u32): Likewise.

Comments

Marcus Shawcroft July 17, 2014, 9:53 a.m. UTC | #1
On 9 July 2014 15:37, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
> Hi all,
>
> These intrinsics are implemented as macros that map down to asms but the
> types they accept are inconsistent with the ACLE spec. This patch fixes
> them, although they should be reimplemented properly in C in the future.
>
> This is a bugfix and it applies cleanly to trunk, 4.9 and 4.8.
> I know we're close to the 4.9.1 release, but this is not an ABI-breaking
> change so it's the aarch64 maintainers' call on whether it should be
> backported.
>
> Tested aarch64-none-elf
>
> Ok?
>
> Thanks,
> Kyrill
>
> 2014-07-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
>     (vmlal_high_lane_s32): Likewise.
>     (vmlal_high_lane_u16): Likewise.
>     (vmlal_high_lane_u32): Likewise.
>     (vmlsl_high_lane_s16): Likewise.
>     (vmlsl_high_lane_s32): Likewise.
>     (vmlsl_high_lane_u16): Likewise.
>     (vmlsl_high_lane_u32): Likewise.

OK thanks.

/Marcus
diff mbox

Patch

commit 991893519ceea282bfaf696b88d5c9291ce2e3a0
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Thu Jun 26 13:59:19 2014 +0100

    [AArch64] Fix types for some assembly intrinsics

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7807181..9e8d15a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6735,7 +6735,7 @@  vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -6749,7 +6749,7 @@  vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -6763,7 +6763,7 @@  vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -6777,7 +6777,7 @@  vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \
@@ -7423,7 +7423,7 @@  vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -7437,7 +7437,7 @@  vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -7451,7 +7451,7 @@  vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -7465,7 +7465,7 @@  vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \