Patchwork [AArch64] Fix Narrowing high shifts.

login
register
mail settings
Submitter Tejas Belagod
Date Sept. 10, 2012, 3:22 p.m.
Message ID <504E05BB.20101@arm.com>
Download mbox | patch
Permalink /patch/182910/
State New
Headers show

Comments

Tejas Belagod - Sept. 10, 2012, 3:22 p.m.
Hi,

The attached patch has fixes to assembler templates for rshrn2 and shrn2. OK?

Thanks,
Tejas Belagod.
ARM.

Changelog:

2012-09-10  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/arm_neon.h (vrshrn_high_n_s16, vrshrn_high_n_s32,
	vrshrn_high_n_s64, vrshrn_high_n_u16, vrshrn_high_n_u32,
	vrshrn_high_n_u64, vshrn_high_n_s16, vshrn_high_n_s32, vshrn_high_n_s32,
	vshrn_high_n_s64, vshrn_high_n_u16, vshrn_high_n_u32, vshrn_high_n_u64):
	Fix template to reference correct operands.
Marcus Shawcroft - Sept. 25, 2012, 6:13 p.m.
On 10/09/12 16:22, Tejas Belagod wrote:
> 2012-09-10  Tejas Belagod<tejas.belagod@arm.com>
>
> gcc/
> 	* config/aarch64/arm_neon.h (vrshrn_high_n_s16, vrshrn_high_n_s32,
> 	vrshrn_high_n_s64, vrshrn_high_n_u16, vrshrn_high_n_u32,
> 	vrshrn_high_n_u64, vshrn_high_n_s16, vshrn_high_n_s32, vshrn_high_n_s32,
> 	vshrn_high_n_s64, vshrn_high_n_u16, vshrn_high_n_u32, vshrn_high_n_u64):
> 	Fix template to reference correct operands.

Committed to aarch64-branch and aarch64-4.7-branch.
/Marcus

Patch

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 46abaf6..a4b2e78 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15334,7 +15334,7 @@  vrndqp_f64 (float64x2_t a)
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
-       __asm__ ("rshrn2 %0.16b,%2.8h,#%3"                               \
+       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -15348,7 +15348,7 @@  vrndqp_f64 (float64x2_t a)
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.8h,%2.4s,#%3"                                \
+       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -15362,7 +15362,7 @@  vrndqp_f64 (float64x2_t a)
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.4s,%2.2d,#%3"                                \
+       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -15376,7 +15376,7 @@  vrndqp_f64 (float64x2_t a)
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8 (UINT64_C (0x0)));          \
-       __asm__ ("rshrn2 %0.16b,%2.8h,#%3"                               \
+       __asm__ ("rshrn2 %0.16b,%1.8h,#%2"                               \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -15390,7 +15390,7 @@  vrndqp_f64 (float64x2_t a)
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16 (UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.8h,%2.4s,#%3"                                \
+       __asm__ ("rshrn2 %0.8h,%1.4s,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -15404,7 +15404,7 @@  vrndqp_f64 (float64x2_t a)
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32 (UINT64_C (0x0)));         \
-       __asm__ ("rshrn2 %0.4s,%2.2d,#%3"                                \
+       __asm__ ("rshrn2 %0.4s,%1.2d,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16088,7 +16088,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int8x8_t a_ = (a);                                               \
        int8x16_t result = vcombine_s8                                   \
                             (a_, vcreate_s8 (UINT64_C (0x0)));          \
-       __asm__ ("shrn2 %0.16b,%2.8h,#%3"                                \
+       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16102,7 +16102,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int16x4_t a_ = (a);                                              \
        int16x8_t result = vcombine_s16                                  \
                             (a_, vcreate_s16 (UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.8h,%2.4s,#%3"                                 \
+       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16116,7 +16116,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        int32x2_t a_ = (a);                                              \
        int32x4_t result = vcombine_s32                                  \
                             (a_, vcreate_s32 (UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.4s,%2.2d,#%3"                                 \
+       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16130,7 +16130,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint8x8_t a_ = (a);                                              \
        uint8x16_t result = vcombine_u8                                  \
                             (a_, vcreate_u8 (UINT64_C (0x0)));          \
-       __asm__ ("shrn2 %0.16b,%2.8h,#%3"                                \
+       __asm__ ("shrn2 %0.16b,%1.8h,#%2"                                \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16144,7 +16144,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint16x4_t a_ = (a);                                             \
        uint16x8_t result = vcombine_u16                                 \
                             (a_, vcreate_u16 (UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.8h,%2.4s,#%3"                                 \
+       __asm__ ("shrn2 %0.8h,%1.4s,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \
@@ -16158,7 +16158,7 @@  vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
        uint32x2_t a_ = (a);                                             \
        uint32x4_t result = vcombine_u32                                 \
                             (a_, vcreate_u32 (UINT64_C (0x0)));         \
-       __asm__ ("shrn2 %0.4s,%2.2d,#%3"                                 \
+       __asm__ ("shrn2 %0.4s,%1.2d,#%2"                                 \
                 : "+w"(result)                                          \
                 : "w"(b_), "i"(c)                                       \
                 : /* No clobbers */);                                   \