diff mbox series

[v5,30/35] target/arm: Pass index to AdvSIMD FCMLA (indexed)

Message ID 20180621015359.12018-31-richard.henderson@linaro.org
State New
Headers show
Series target/arm SVE patches | expand

Commit Message

Richard Henderson June 21, 2018, 1:53 a.m. UTC
The original commit failed to pass, or use, the index.

Fixes: d17b7cdcf4ea
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 21 ++++++++++++---------
 target/arm/vec_helper.c    | 10 ++++++----
 2 files changed, 18 insertions(+), 13 deletions(-)

Comments

Peter Maydell June 26, 2018, 1:38 p.m. UTC | #1
On 21 June 2018 at 02:53, Richard Henderson
<richard.henderson@linaro.org> wrote:
> The original commit failed to pass, or use, the index.
>
> Fixes: d17b7cdcf4ea
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/translate-a64.c | 21 ++++++++++++---------
>  target/arm/vec_helper.c    | 10 ++++++----
>  2 files changed, 18 insertions(+), 13 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 8d8a4cecb0..038e48278f 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -12669,15 +12669,18 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>      case 0x13: /* FCMLA #90 */
>      case 0x15: /* FCMLA #180 */
>      case 0x17: /* FCMLA #270 */
> -        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
> -                           vec_full_reg_offset(s, rn),
> -                           vec_reg_offset(s, rm, index, size), fpst,
> -                           is_q ? 16 : 8, vec_full_reg_size(s),
> -                           extract32(insn, 13, 2), /* rot */
> -                           size == MO_64
> -                           ? gen_helper_gvec_fcmlas_idx
> -                           : gen_helper_gvec_fcmlah_idx);
> -        tcg_temp_free_ptr(fpst);
> +        {
> +            int rot = extract32(insn, 13, 2);
> +            int data = index * 4 + rot;

Using arithmetic to do bit operations again.

> +            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
> +                               vec_full_reg_offset(s, rn),
> +                               vec_reg_offset(s, rm, index, size), fpst,
> +                               is_q ? 16 : 8, vec_full_reg_size(s), data,
> +                               size == MO_64
> +                               ? gen_helper_gvec_fcmlas_idx
> +                               : gen_helper_gvec_fcmlah_idx);

We're already using vec_reg_offset() to pass the helper the address
of the index'th element in Vm -- why do we need to also add
2*index when we use that pointer as an array base in the helper?

thanks
-- PMM
Richard Henderson June 26, 2018, 3:07 p.m. UTC | #2
On 06/26/2018 06:38 AM, Peter Maydell wrote:
> On 21 June 2018 at 02:53, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>> The original commit failed to pass, or use, the index.
>>
>> Fixes: d17b7cdcf4ea
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>  target/arm/translate-a64.c | 21 ++++++++++++---------
>>  target/arm/vec_helper.c    | 10 ++++++----
>>  2 files changed, 18 insertions(+), 13 deletions(-)
>>
>> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
>> index 8d8a4cecb0..038e48278f 100644
>> --- a/target/arm/translate-a64.c
>> +++ b/target/arm/translate-a64.c
>> @@ -12669,15 +12669,18 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
>>      case 0x13: /* FCMLA #90 */
>>      case 0x15: /* FCMLA #180 */
>>      case 0x17: /* FCMLA #270 */
>> -        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
>> -                           vec_full_reg_offset(s, rn),
>> -                           vec_reg_offset(s, rm, index, size), fpst,
>> -                           is_q ? 16 : 8, vec_full_reg_size(s),
>> -                           extract32(insn, 13, 2), /* rot */
>> -                           size == MO_64
>> -                           ? gen_helper_gvec_fcmlas_idx
>> -                           : gen_helper_gvec_fcmlah_idx);
>> -        tcg_temp_free_ptr(fpst);
>> +        {
>> +            int rot = extract32(insn, 13, 2);
>> +            int data = index * 4 + rot;
> 
> Using arithmetic to do bit operations again.
> 
>> +            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
>> +                               vec_full_reg_offset(s, rn),
>> +                               vec_reg_offset(s, rm, index, size), fpst,
>> +                               is_q ? 16 : 8, vec_full_reg_size(s), data,
>> +                               size == MO_64
>> +                               ? gen_helper_gvec_fcmlas_idx
>> +                               : gen_helper_gvec_fcmlah_idx);
> 
> We're already using vec_reg_offset() to pass the helper the address
> of the index'th element in Vm -- why do we need to also add
> 2*index when we use that pointer as an array base in the helper?

Ah, bug.  Because of SVE, we must pass index (since it applies per 128-bit
segment), and we should pass the full_reg as the pointer.


r~
diff mbox series

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8d8a4cecb0..038e48278f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -12669,15 +12669,18 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     case 0x13: /* FCMLA #90 */
     case 0x15: /* FCMLA #180 */
     case 0x17: /* FCMLA #270 */
-        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
-                           vec_full_reg_offset(s, rn),
-                           vec_reg_offset(s, rm, index, size), fpst,
-                           is_q ? 16 : 8, vec_full_reg_size(s),
-                           extract32(insn, 13, 2), /* rot */
-                           size == MO_64
-                           ? gen_helper_gvec_fcmlas_idx
-                           : gen_helper_gvec_fcmlah_idx);
-        tcg_temp_free_ptr(fpst);
+        {
+            int rot = extract32(insn, 13, 2);
+            int data = index * 4 + rot;
+            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+                               vec_full_reg_offset(s, rn),
+                               vec_reg_offset(s, rm, index, size), fpst,
+                               is_q ? 16 : 8, vec_full_reg_size(s), data,
+                               size == MO_64
+                               ? gen_helper_gvec_fcmlas_idx
+                               : gen_helper_gvec_fcmlah_idx);
+            tcg_temp_free_ptr(fpst);
+        }
         return;
     }
 
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 073e5c58e7..8f2dc4b989 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -317,10 +317,11 @@  void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+    intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
     uint32_t neg_real = flip ^ neg_imag;
     uintptr_t i;
-    float16 e1 = m[H2(flip)];
-    float16 e3 = m[H2(1 - flip)];
+    float16 e1 = m[H2(2 * index + flip)];
+    float16 e3 = m[H2(2 * index + 1 - flip)];
 
     /* Shift boolean to the sign bit so we can xor to negate.  */
     neg_real <<= 15;
@@ -377,10 +378,11 @@  void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+    intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
     uint32_t neg_real = flip ^ neg_imag;
     uintptr_t i;
-    float32 e1 = m[H4(flip)];
-    float32 e3 = m[H4(1 - flip)];
+    float32 e1 = m[H4(2 * index + flip)];
+    float32 e3 = m[H4(2 * index + 1 - flip)];
 
     /* Shift boolean to the sign bit so we can xor to negate.  */
     neg_real <<= 31;