diff mbox series

[PULL,05/20] target/riscv: Check nanboxed inputs in trans_rvf.inc.c

Message ID 20200812223045.96803-6-alistair.francis@wdc.com
State New
Headers show
Series [PULL,01/20] target/riscv: Generate nanboxed results from fp helpers | expand

Commit Message

Alistair Francis Aug. 12, 2020, 10:30 p.m. UTC
From: Richard Henderson <richard.henderson@linaro.org>

If a 32-bit input is not properly nanboxed, then the input is replaced
with the default qnan.  The only inline expansion is for the sign-changing
set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Message-Id: <20200724002807.441147-6-richard.henderson@linaro.org>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
---
 target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
 target/riscv/translate.c                | 18 +++++++
 2 files changed, 73 insertions(+), 16 deletions(-)

Comments

LIU Zhiwei Aug. 13, 2020, 2:14 a.m. UTC | #1
On 2020/8/13 6:30, Alistair Francis wrote:
> From: Richard Henderson <richard.henderson@linaro.org>
>
> If a 32-bit input is not properly nanboxed, then the input is replaced
> with the default qnan.  The only inline expansion is for the sign-changing
> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> Message-Id: <20200724002807.441147-6-richard.henderson@linaro.org>
> Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
> ---
>   target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
>   target/riscv/translate.c                | 18 +++++++
>   2 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
> index 264d3139f1..f9a9e0643a 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
>   {
>       REQUIRE_FPU;
>       REQUIRE_EXT(ctx, RVF);
> +
>       if (a->rs1 == a->rs2) { /* FMOV */
> -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
>       } else { /* FSGNJ */
> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
> -                            0, 31);
> +        TCGv_i64 rs1 = tcg_temp_new_i64();
> +        TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> +        /* This formulation retains the nanboxing of rs2. */
> +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> +        tcg_temp_free_i64(rs1);
> +        tcg_temp_free_i64(rs2);
>       }
> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
>       mark_fs_dirty(ctx);
>       return true;
>   }
>   
>   static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
>   {
> +    TCGv_i64 rs1, rs2, mask;
> +
>       REQUIRE_FPU;
>       REQUIRE_EXT(ctx, RVF);
> +
> +    rs1 = tcg_temp_new_i64();
> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
>       if (a->rs1 == a->rs2) { /* FNEG */
> -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
>       } else {
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
> -        tcg_temp_free_i64(t0);
> +        rs2 = tcg_temp_new_i64();
> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> +        /*
> +         * Replace bit 31 in rs1 with inverse in rs2.
> +         * This formulation retains the nanboxing of rs1.
> +         */
> +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> +        tcg_gen_andc_i64(rs2, mask, rs2);
Hi Alistair,

As Chih-Min said, it's wrong here.  He has given the correct patch code
https://www.mail-archive.com/qemu-devel@nongnu.org/msg728540.html

We can either  squash the code to this patch or add an separate patch 
later. I prefer the former.
Thanks very much.

Zhiwei
> +        tcg_gen_and_i64(rs1, mask, rs1);
> +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> +        tcg_temp_free_i64(mask);
> +        tcg_temp_free_i64(rs2);
>       }
> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> +    tcg_temp_free_i64(rs1);
> +
>       mark_fs_dirty(ctx);
>       return true;
>   }
>   
>   static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
>   {
> +    TCGv_i64 rs1, rs2;
> +
>       REQUIRE_FPU;
>       REQUIRE_EXT(ctx, RVF);
> +
> +    rs1 = tcg_temp_new_i64();
> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
>       if (a->rs1 == a->rs2) { /* FABS */
> -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
> +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
>       } else {
> -        TCGv_i64 t0 = tcg_temp_new_i64();
> -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> -        tcg_temp_free_i64(t0);
> +        rs2 = tcg_temp_new_i64();
> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> +        /*
> +         * Xor bit 31 in rs1 with that in rs2.
> +         * This formulation retains the nanboxing of rs1.
> +         */
> +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> +        tcg_temp_free_i64(rs2);
>       }
> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> +    tcg_temp_free_i64(rs1);
> +
>       mark_fs_dirty(ctx);
>       return true;
>   }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 12a746da97..bf35182776 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
>       tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
>   }
>   
> +/*
> + * A narrow n-bit operation, where n < FLEN, checks that input operands
> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> + * If so, the least-significant bits of the input are used, otherwise the
> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
> + *
> + * Here, the result is always nan-boxed, even the canonical nan.
> + */
> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> +{
> +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> +
> +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> +    tcg_temp_free_i64(t_max);
> +    tcg_temp_free_i64(t_nan);
> +}
> +
>   static void generate_exception(DisasContext *ctx, int excp)
>   {
>       tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
Alistair Francis Aug. 13, 2020, 2:46 p.m. UTC | #2
On Wed, Aug 12, 2020 at 7:14 PM LIU Zhiwei <zhiwei_liu@c-sky.com> wrote:
>
>
>
> On 2020/8/13 6:30, Alistair Francis wrote:
> > From: Richard Henderson <richard.henderson@linaro.org>
> >
> > If a 32-bit input is not properly nanboxed, then the input is replaced
> > with the default qnan.  The only inline expansion is for the sign-changing
> > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
> >
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> > Message-Id: <20200724002807.441147-6-richard.henderson@linaro.org>
> > Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
> > ---
> >   target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
> >   target/riscv/translate.c                | 18 +++++++
> >   2 files changed, 73 insertions(+), 16 deletions(-)
> >
> > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
> > index 264d3139f1..f9a9e0643a 100644
> > --- a/target/riscv/insn_trans/trans_rvf.inc.c
> > +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
> >   {
> >       REQUIRE_FPU;
> >       REQUIRE_EXT(ctx, RVF);
> > +
> >       if (a->rs1 == a->rs2) { /* FMOV */
> > -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> > +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> >       } else { /* FSGNJ */
> > -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
> > -                            0, 31);
> > +        TCGv_i64 rs1 = tcg_temp_new_i64();
> > +        TCGv_i64 rs2 = tcg_temp_new_i64();
> > +
> > +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> > +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> > +
> > +        /* This formulation retains the nanboxing of rs2. */
> > +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> > +        tcg_temp_free_i64(rs1);
> > +        tcg_temp_free_i64(rs2);
> >       }
> > -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> >       mark_fs_dirty(ctx);
> >       return true;
> >   }
> >
> >   static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
> >   {
> > +    TCGv_i64 rs1, rs2, mask;
> > +
> >       REQUIRE_FPU;
> >       REQUIRE_EXT(ctx, RVF);
> > +
> > +    rs1 = tcg_temp_new_i64();
> > +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> > +
> >       if (a->rs1 == a->rs2) { /* FNEG */
> > -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> > +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
> >       } else {
> > -        TCGv_i64 t0 = tcg_temp_new_i64();
> > -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> > -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
> > -        tcg_temp_free_i64(t0);
> > +        rs2 = tcg_temp_new_i64();
> > +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> > +
> > +        /*
> > +         * Replace bit 31 in rs1 with inverse in rs2.
> > +         * This formulation retains the nanboxing of rs1.
> > +         */
> > +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> > +        tcg_gen_andc_i64(rs2, mask, rs2);
> Hi Alistair,
>
> As Chih-Min said, it's wrong here.  He has given the correct patch code
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg728540.html
>
> We can either  squash the code to this patch or add an separate patch
> later. I prefer the former.
> Thanks very much.

Richard are you ok if I squash this diff into the patch and send a PR v2?

diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
b/target/riscv/insn_trans/trans_rvf.inc.c
index f9a9e0643a..76f281d275 100644
--- a/target/riscv/insn_trans/trans_rvf.inc.c
+++ b/target/riscv/insn_trans/trans_rvf.inc.c
@@ -201,7 +201,8 @@ static bool trans_fsgnjn_s(DisasContext *ctx,
arg_fsgnjn_s *a)
          * This formulation retains the nanboxing of rs1.
          */
         mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
-        tcg_gen_andc_i64(rs2, mask, rs2);
+        tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2
+        tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be
         tcg_gen_and_i64(rs1, mask, rs1);
         tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);


Alistair

>
> Zhiwei
> > +        tcg_gen_and_i64(rs1, mask, rs1);
> > +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> > +
> > +        tcg_temp_free_i64(mask);
> > +        tcg_temp_free_i64(rs2);
> >       }
> > -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> > +    tcg_temp_free_i64(rs1);
> > +
> >       mark_fs_dirty(ctx);
> >       return true;
> >   }
> >
> >   static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
> >   {
> > +    TCGv_i64 rs1, rs2;
> > +
> >       REQUIRE_FPU;
> >       REQUIRE_EXT(ctx, RVF);
> > +
> > +    rs1 = tcg_temp_new_i64();
> > +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> > +
> >       if (a->rs1 == a->rs2) { /* FABS */
> > -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
> > +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
> >       } else {
> > -        TCGv_i64 t0 = tcg_temp_new_i64();
> > -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> > -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> > -        tcg_temp_free_i64(t0);
> > +        rs2 = tcg_temp_new_i64();
> > +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> > +
> > +        /*
> > +         * Xor bit 31 in rs1 with that in rs2.
> > +         * This formulation retains the nanboxing of rs1.
> > +         */
> > +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> > +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> > +
> > +        tcg_temp_free_i64(rs2);
> >       }
> > -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> > +    tcg_temp_free_i64(rs1);
> > +
> >       mark_fs_dirty(ctx);
> >       return true;
> >   }
> > diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> > index 12a746da97..bf35182776 100644
> > --- a/target/riscv/translate.c
> > +++ b/target/riscv/translate.c
> > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> >       tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
> >   }
> >
> > +/*
> > + * A narrow n-bit operation, where n < FLEN, checks that input operands
> > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> > + * If so, the least-significant bits of the input are used, otherwise the
> > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
> > + *
> > + * Here, the result is always nan-boxed, even the canonical nan.
> > + */
> > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> > +{
> > +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> > +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> > +
> > +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> > +    tcg_temp_free_i64(t_max);
> > +    tcg_temp_free_i64(t_nan);
> > +}
> > +
> >   static void generate_exception(DisasContext *ctx, int excp)
> >   {
> >       tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
>
>
Richard Henderson Aug. 13, 2020, 4:48 p.m. UTC | #3
On 8/13/20 7:46 AM, Alistair Francis wrote:
>> Hi Alistair,
>>
>> As Chih-Min said, it's wrong here.  He has given the correct patch code
>> https://www.mail-archive.com/qemu-devel@nongnu.org/msg728540.html
>>
>> We can either  squash the code to this patch or add an separate patch
>> later. I prefer the former.
>> Thanks very much.
> 
> Richard are you ok if I squash this diff into the patch and send a PR v2?
> 
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
> b/target/riscv/insn_trans/trans_rvf.inc.c
> index f9a9e0643a..76f281d275 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -201,7 +201,8 @@ static bool trans_fsgnjn_s(DisasContext *ctx,
> arg_fsgnjn_s *a)
>           * This formulation retains the nanboxing of rs1.
>           */
>          mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> -        tcg_gen_andc_i64(rs2, mask, rs2);
> +        tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2
> +        tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be

Ah, well.  Yes, it's a bug.  However,

   ~rs2 & ~mask
= ~(rs2 | mask)

so a better fix could be

-    tcg_gen_andc_i64(rs2, mask, rs2);
+    tcg_gen_nor_i64(rs2, rs2, mask);


As an aside, I think perhaps I should have added a ppc-style rotate-and-insert
primitive to handle this sort of bitfield insert, since the best set of host
insns to perform this operation, when the start of the field is not bit 0, is
difficult to predict from the translator.


r~
Alistair Francis Aug. 13, 2020, 9:19 p.m. UTC | #4
On Thu, Aug 13, 2020 at 9:48 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 8/13/20 7:46 AM, Alistair Francis wrote:
> >> Hi Alistair,
> >>
> >> As Chih-Min said, it's wrong here.  He has given the correct patch code
> >> https://www.mail-archive.com/qemu-devel@nongnu.org/msg728540.html
> >>
> >> We can either  squash the code to this patch or add an separate patch
> >> later. I prefer the former.
> >> Thanks very much.
> >
> > Richard are you ok if I squash this diff into the patch and send a PR v2?
> >
> > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
> > b/target/riscv/insn_trans/trans_rvf.inc.c
> > index f9a9e0643a..76f281d275 100644
> > --- a/target/riscv/insn_trans/trans_rvf.inc.c
> > +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> > @@ -201,7 +201,8 @@ static bool trans_fsgnjn_s(DisasContext *ctx,
> > arg_fsgnjn_s *a)
> >           * This formulation retains the nanboxing of rs1.
> >           */
> >          mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> > -        tcg_gen_andc_i64(rs2, mask, rs2);
> > +        tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2
> > +        tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be
>
> Ah, well.  Yes, it's a bug.  However,
>
>    ~rs2 & ~mask
> = ~(rs2 | mask)
>
> so a better fix could be
>
> -    tcg_gen_andc_i64(rs2, mask, rs2);
> +    tcg_gen_nor_i64(rs2, rs2, mask);

Fixed.

Alistair

>
>
> As an aside, I think perhaps I should have added a ppc-style rotate-and-insert
> primitive to handle this sort of bitfield insert, since the best set of host
> insns to perform this operation, when the start of the field is not bit 0, is
> difficult to predict from the translator.
>
>
> r~
diff mbox series

Patch

diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
index 264d3139f1..f9a9e0643a 100644
--- a/target/riscv/insn_trans/trans_rvf.inc.c
+++ b/target/riscv/insn_trans/trans_rvf.inc.c
@@ -161,47 +161,86 @@  static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
 {
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
     if (a->rs1 == a->rs2) { /* FMOV */
-        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
     } else { /* FSGNJ */
-        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
-                            0, 31);
+        TCGv_i64 rs1 = tcg_temp_new_i64();
+        TCGv_i64 rs2 = tcg_temp_new_i64();
+
+        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /* This formulation retains the nanboxing of rs2. */
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
+        tcg_temp_free_i64(rs1);
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
 {
+    TCGv_i64 rs1, rs2, mask;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
     if (a->rs1 == a->rs2) { /* FNEG */
-        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
+        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
-        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
-        tcg_temp_free_i64(t0);
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Replace bit 31 in rs1 with inverse in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
+        tcg_gen_andc_i64(rs2, mask, rs2);
+        tcg_gen_and_i64(rs1, mask, rs1);
+        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(mask);
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+    tcg_temp_free_i64(rs1);
+
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
 {
+    TCGv_i64 rs1, rs2;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
     if (a->rs1 == a->rs2) { /* FABS */
-        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
+        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
-        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
-        tcg_temp_free_i64(t0);
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Xor bit 31 in rs1 with that in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
+        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+    tcg_temp_free_i64(rs1);
+
     mark_fs_dirty(ctx);
     return true;
 }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 12a746da97..bf35182776 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,24 @@  static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
     tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
 }
 
+/*
+ * A narrow n-bit operation, where n < FLEN, checks that input operands
+ * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
+ * If so, the least-significant bits of the input are used, otherwise the
+ * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
+ *
+ * Here, the result is always nan-boxed, even the canonical nan.
+ */
+static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
+{
+    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
+    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
+
+    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
+    tcg_temp_free_i64(t_max);
+    tcg_temp_free_i64(t_nan);
+}
+
 static void generate_exception(DisasContext *ctx, int excp)
 {
     tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);