diff mbox series

[v2,32/32] arm/translate-a64: add all single op FP16 to handle_fp_1src_half

Message ID 20180208173157.24705-33-alex.bennee@linaro.org
State New
Headers show
Series Add ARMv8.2 half-precision functions | expand

Commit Message

Alex Bennée Feb. 8, 2018, 5:31 p.m. UTC
This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use
existing helpers to achieve this.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

Comments

Richard Henderson Feb. 9, 2018, 6:37 p.m. UTC | #1
On 02/08/2018 09:31 AM, Alex Bennée wrote:
> This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use
> existing helpers to achieve this.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> ---
>  target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 72 insertions(+)
> 
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 92adf43a89..265bfb14d0 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -4508,6 +4508,66 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
>      tcg_temp_free_i64(t_true);
>  }
>  
> +/* Floating-point data-processing (1 source) - half precision */
> +static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
> +{
> +    TCGv_ptr fpst = NULL;
> +    TCGv_i32 tcg_op;
> +    TCGv_i32 tcg_res;
> +
> +    tcg_op = read_fp_sreg(s, rn);
> +    tcg_res = tcg_temp_new_i32();
> +
> +    switch (opcode) {
> +    case 0x0: /* FMOV */
> +        tcg_gen_mov_i32(tcg_res, tcg_op);
> +        break;
> +    case 0x1: /* FABS */
> +        gen_helper_advsimd_absh(tcg_res, tcg_op);
> +        break;
> +    case 0x2: /* FNEG */
> +        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
> +        break;
> +    case 0x3: /* FSQRT */
> +        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
> +        break;
> +    case 0x8: /* FRINTN */
> +    case 0x9: /* FRINTP */
> +    case 0xa: /* FRINTM */
> +    case 0xb: /* FRINTZ */
> +    case 0xc: /* FRINTA */
> +    {
> +        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
> +        fpst = get_fpstatus_ptr(true);
> +
> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
> +
> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
> +        tcg_temp_free_i32(tcg_rmode);
> +        break;
> +    }
> +    case 0xe: /* FRINTX */
> +        fpst = get_fpstatus_ptr(true);
> +        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
> +        break;
> +    case 0xf: /* FRINTI */
> +        fpst = get_fpstatus_ptr(true);
> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
> +        break;
> +    default:
> +        abort();
> +    }
> +
> +    write_fp_sreg(s, rd, tcg_res);

Some of these helpers will zero-extend from 16 bits, but at least a few won't
-- notably fmov and fneg.  I wonder if it wouldn't be best to have a write_fp_hreg.


r~
Alex Bennée Feb. 23, 2018, 9:45 a.m. UTC | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> On 02/08/2018 09:31 AM, Alex Bennée wrote:
>> This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use
>> existing helpers to achieve this.
>>
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> ---
>>  target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 72 insertions(+)
>>
>> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
>> index 92adf43a89..265bfb14d0 100644
>> --- a/target/arm/translate-a64.c
>> +++ b/target/arm/translate-a64.c
>> @@ -4508,6 +4508,66 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
>>      tcg_temp_free_i64(t_true);
>>  }
>>
>> +/* Floating-point data-processing (1 source) - half precision */
>> +static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>> +{
>> +    TCGv_ptr fpst = NULL;
>> +    TCGv_i32 tcg_op;
>> +    TCGv_i32 tcg_res;
>> +
>> +    tcg_op = read_fp_sreg(s, rn);
>> +    tcg_res = tcg_temp_new_i32();
>> +
>> +    switch (opcode) {
>> +    case 0x0: /* FMOV */
>> +        tcg_gen_mov_i32(tcg_res, tcg_op);
>> +        break;
>> +    case 0x1: /* FABS */
>> +        gen_helper_advsimd_absh(tcg_res, tcg_op);
>> +        break;
>> +    case 0x2: /* FNEG */
>> +        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
>> +        break;
>> +    case 0x3: /* FSQRT */
>> +        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
>> +        break;
>> +    case 0x8: /* FRINTN */
>> +    case 0x9: /* FRINTP */
>> +    case 0xa: /* FRINTM */
>> +    case 0xb: /* FRINTZ */
>> +    case 0xc: /* FRINTA */
>> +    {
>> +        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
>> +        fpst = get_fpstatus_ptr(true);
>> +
>> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
>> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
>> +
>> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
>> +        tcg_temp_free_i32(tcg_rmode);
>> +        break;
>> +    }
>> +    case 0xe: /* FRINTX */
>> +        fpst = get_fpstatus_ptr(true);
>> +        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
>> +        break;
>> +    case 0xf: /* FRINTI */
>> +        fpst = get_fpstatus_ptr(true);
>> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
>> +        break;
>> +    default:
>> +        abort();
>> +    }
>> +
>> +    write_fp_sreg(s, rd, tcg_res);
>
> Some of these helpers will zero-extend from 16 bits, but at least a few won't
> -- notably fmov and fneg.  I wonder if it wouldn't be best to have a
> write_fp_hreg.

I fixed this up by using read_vec_element to load the value.

--
Alex Bennée
diff mbox series

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 92adf43a89..265bfb14d0 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -4508,6 +4508,66 @@  static void disas_fp_csel(DisasContext *s, uint32_t insn)
     tcg_temp_free_i64(t_true);
 }
 
+/* Floating-point data-processing (1 source) - half precision */
+static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
+{
+    TCGv_ptr fpst = NULL;
+    TCGv_i32 tcg_op;
+    TCGv_i32 tcg_res;
+
+    tcg_op = read_fp_sreg(s, rn);
+    tcg_res = tcg_temp_new_i32();
+
+    switch (opcode) {
+    case 0x0: /* FMOV */
+        tcg_gen_mov_i32(tcg_res, tcg_op);
+        break;
+    case 0x1: /* FABS */
+        gen_helper_advsimd_absh(tcg_res, tcg_op);
+        break;
+    case 0x2: /* FNEG */
+        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+        break;
+    case 0x3: /* FSQRT */
+        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
+        break;
+    case 0x8: /* FRINTN */
+    case 0x9: /* FRINTP */
+    case 0xa: /* FRINTM */
+    case 0xb: /* FRINTZ */
+    case 0xc: /* FRINTA */
+    {
+        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+        fpst = get_fpstatus_ptr(true);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+        tcg_temp_free_i32(tcg_rmode);
+        break;
+    }
+    case 0xe: /* FRINTX */
+        fpst = get_fpstatus_ptr(true);
+        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
+        break;
+    case 0xf: /* FRINTI */
+        fpst = get_fpstatus_ptr(true);
+        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+        break;
+    default:
+        abort();
+    }
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    if (fpst) {
+        tcg_temp_free_ptr(fpst);
+    }
+    tcg_temp_free_i32(tcg_op);
+    tcg_temp_free_i32(tcg_res);
+}
+
 /* Floating-point data-processing (1 source) - single precision */
 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
 {
@@ -4734,6 +4794,18 @@  static void disas_fp_1src(DisasContext *s, uint32_t insn)
 
             handle_fp_1src_double(s, opcode, rd, rn);
             break;
+        case 2:
+            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                unallocated_encoding(s);
+                return;
+            }
+
+            if (!fp_access_check(s)) {
+                return;
+            }
+
+            handle_fp_1src_half(s, opcode, rd, rn);
+            break;
         default:
             unallocated_encoding(s);
         }