diff mbox series

[4/8] target/riscv: 128-bit arithmetic and logic instructions

Message ID 20210830171638.126325-4-frederic.petrot@univ-grenoble-alpes.fr
State New
Headers show
Series [1/8] target/riscv: Settings for 128-bit extension support | expand

Commit Message

Frédéric Pétrot Aug. 30, 2021, 5:16 p.m. UTC
Adding the support for the 128-bit arithmetic and logic instructions.
Remember that all (i) instructions are now acting on 128-bit registers, that
a few others are added to cope with values that are held on 64 bits within
the 128-bit registers, and that the ones that cope with values on 32-bit
must also be modified for proper sign extension.
Most algorithms taken from Hackers' delight.

Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
---
 target/riscv/insn32.decode              |  13 +
 target/riscv/insn_trans/trans_rvi.c.inc | 955 +++++++++++++++++++++++-
 target/riscv/translate.c                |  25 +
 3 files changed, 976 insertions(+), 17 deletions(-)

Comments

Philippe Mathieu-Daudé Aug. 30, 2021, 9:38 p.m. UTC | #1
On 8/30/21 7:16 PM, Frédéric Pétrot wrote:
> Adding the support for the 128-bit arithmetic and logic instructions.
> Remember that all (i) instructions are now acting on 128-bit registers, that
> a few others are added to cope with values that are held on 64 bits within
> the 128-bit registers, and that the ones that cope with values on 32-bit
> must also be modified for proper sign extension.
> Most algorithms taken from Hackers' delight.
> 
> Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
> ---
>  target/riscv/insn32.decode              |  13 +
>  target/riscv/insn_trans/trans_rvi.c.inc | 955 +++++++++++++++++++++++-
>  target/riscv/translate.c                |  25 +
>  3 files changed, 976 insertions(+), 17 deletions(-)

> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
> index 772330a766..0401ba3d69 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -26,14 +26,20 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
>  
>  static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
>  {
> -     REQUIRE_64BIT(ctx);
> -     return trans_illegal(ctx, a);
> +    REQUIRE_64_OR_128BIT(ctx);
> +    return trans_illegal(ctx, a);
>  }
>  
>  static bool trans_lui(DisasContext *ctx, arg_lui *a)
>  {
>      if (a->rd != 0) {
>          tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
> +#if defined(TARGET_RISCV128)
> +        if (is_128bit(ctx)) {

Maybe this could allow the compiler eventually elide the
code and avoid superfluous #ifdef'ry:

           if (TARGET_LONG_BITS >= 128) {

> +            tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd],
> +                                 cpu_gpr[a->rd]);
> +        }
> +#endif
>      }
>      return true;
>  }
Philippe Mathieu-Daudé Aug. 30, 2021, 9:40 p.m. UTC | #2
On 8/30/21 11:38 PM, Philippe Mathieu-Daudé wrote:
> On 8/30/21 7:16 PM, Frédéric Pétrot wrote:
>> Adding the support for the 128-bit arithmetic and logic instructions.
>> Remember that all (i) instructions are now acting on 128-bit registers, that
>> a few others are added to cope with values that are held on 64 bits within
>> the 128-bit registers, and that the ones that cope with values on 32-bit
>> must also be modified for proper sign extension.
>> Most algorithms taken from Hackers' delight.
>>
>> Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
>> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
>> ---
>>  target/riscv/insn32.decode              |  13 +
>>  target/riscv/insn_trans/trans_rvi.c.inc | 955 +++++++++++++++++++++++-
>>  target/riscv/translate.c                |  25 +
>>  3 files changed, 976 insertions(+), 17 deletions(-)
> 
>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
>> index 772330a766..0401ba3d69 100644
>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>> @@ -26,14 +26,20 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
>>  
>>  static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
>>  {
>> -     REQUIRE_64BIT(ctx);
>> -     return trans_illegal(ctx, a);
>> +    REQUIRE_64_OR_128BIT(ctx);
>> +    return trans_illegal(ctx, a);
>>  }
>>  
>>  static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>  {
>>      if (a->rd != 0) {
>>          tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
>> +#if defined(TARGET_RISCV128)
>> +        if (is_128bit(ctx)) {
> 
> Maybe this could allow the compiler eventually elide the
> code and avoid superfluous #ifdef'ry:
> 
>            if (TARGET_LONG_BITS >= 128) {

Actually:

             if (TARGET_LONG_BITS >= 128 && is_128bit(ctx)) {

> 
>> +            tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd],
>> +                                 cpu_gpr[a->rd]);
>> +        }
>> +#endif
>>      }
>>      return true;
>>  }
>
Richard Henderson Aug. 31, 2021, 3:30 a.m. UTC | #3
On 8/30/21 10:16 AM, Frédéric Pétrot wrote:
> Adding the support for the 128-bit arithmetic and logic instructions.
> Remember that all (i) instructions are now acting on 128-bit registers, that
> a few others are added to cope with values that are held on 64 bits within
> the 128-bit registers, and that the ones that cope with values on 32-bit
> must also be modified for proper sign extension.
> Most algorithms taken from Hackers' delight.
> 
> Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
> ---
>   target/riscv/insn32.decode              |  13 +
>   target/riscv/insn_trans/trans_rvi.c.inc | 955 +++++++++++++++++++++++-
>   target/riscv/translate.c                |  25 +
>   3 files changed, 976 insertions(+), 17 deletions(-)
> 
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 225669e277..2fe7e1dd36 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -22,6 +22,7 @@
>   %rs1       15:5
>   %rd        7:5
>   %sh5       20:5
> +%sh6       20:6
>   
>   %sh7    20:7
>   %csr    20:12
> @@ -91,6 +92,9 @@
>   # Formats 64:
>   @sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd
>   
> +# Formats 128:
> +@sh6       ...... ...... ..... ... ..... ....... &shift shamt=%sh6 %rs1 %rd
> +
>   # *** Privileged Instructions ***
>   ecall       000000000000     00000 000 00000 1110011
>   ebreak      000000000001     00000 000 00000 1110011
> @@ -166,6 +170,15 @@ sraw     0100000 .....  ..... 101 ..... 0111011 @r
>   ldu      ............   ..... 111 ..... 0000011 @i
>   lq       ............   ..... 010 ..... 0001111 @i
>   sq       ............   ..... 100 ..... 0100011 @s
> +addid    ............  .....  000 ..... 1011011 @i
> +sllid    000000 ......  ..... 001 ..... 1011011 @sh6
> +srlid    000000 ......  ..... 101 ..... 1011011 @sh6
> +sraid    010000 ......  ..... 101 ..... 1011011 @sh6
> +addd     0000000 ..... .....  000 ..... 1111011 @r
> +subd     0100000 ..... .....  000 ..... 1111011 @r
> +slld     0000000 ..... .....  001 ..... 1111011 @r
> +srld     0000000 ..... .....  101 ..... 1111011 @r
> +srad     0100000 ..... .....  101 ..... 1111011 @r
>   
>   # *** RV32M Standard Extension ***
>   mul      0000001 .....  ..... 000 ..... 0110011 @r
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
> index 772330a766..0401ba3d69 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -26,14 +26,20 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
>   
>   static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
>   {
> -     REQUIRE_64BIT(ctx);
> -     return trans_illegal(ctx, a);
> +    REQUIRE_64_OR_128BIT(ctx);
> +    return trans_illegal(ctx, a);
>   }
>   
>   static bool trans_lui(DisasContext *ctx, arg_lui *a)
>   {
>       if (a->rd != 0) {
>           tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
> +#if defined(TARGET_RISCV128)
> +        if (is_128bit(ctx)) {
> +            tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd],
> +                                 cpu_gpr[a->rd]);
> +        }
> +#endif
>       }
>       return true;
>   }

I think it is a mistake to introduce all of these ifdefs.

If 128-bit is not enabled, then is_128bit should evaluate to false, and all should be 
well.  As for cpu_gprh[], that should be hidden behind some function/macro so that if 
128-bit is not enabled we get qemu_build_not_reached().

Finally, you can compute this immediate value directly.  Don't leave it to the optimizer 
to provide the extension.

>   static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
>   {
>       if (a->rd != 0) {
> +#if defined(TARGET_RISCV128)
> +        if (is_128bit(ctx)) {
> +            /* TODO : when pc is 128 bits, use all its bits */
> +            TCGv pc = tcg_const_tl(ctx->base.pc_next),
> +                 imm = tcg_const_tl(a->imm),
> +                 immh = tcg_const_tl((a->imm & 0x80000)
> +                         ? 0xffffffffffffffff : 0),

No need to test bits here: -(a->imm < 0) will do fine.

> +                 cnst_zero = tcg_const_tl(0);
> +            tcg_gen_add2_tl(cpu_gpr[a->rd], cpu_gprh[a->rd], pc, cnst_zero,
> +                            imm, immh);
> +            tcg_temp_free(pc);
> +            tcg_temp_free(imm);
> +            tcg_temp_free(immh);
> +            tcg_temp_free(cnst_zero);
> +            return true;

tcg_constant_tl, not tcg_const_tl and no tcg_temp_free.

> +    case TCG_COND_LT:
> +    {
> +        TCGv tmp1 = tcg_temp_new(),
> +             tmp2 = tcg_temp_new();
> +
> +        tcg_gen_xor_tl(tmp1, rh, ah);
> +        tcg_gen_xor_tl(tmp2, ah, bh);
> +        tcg_gen_and_tl(tmp1, tmp1, tmp2);
> +        tcg_gen_xor_tl(tmp1, rh, tmp1);
> +        tcg_gen_setcondi_tl(TCG_COND_LT, rl, tmp1, 0); /* Check sign bit */
> +
> +        tcg_temp_free(tmp1);
> +        tcg_temp_free(tmp2);
> +        break;
> +    }

Incorrect, as you're not examining the low parts at all.


> +
> +    case TCG_COND_GE:
> +        /* We invert the result of TCG_COND_LT */
> +        gen_setcond_128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, rl, rl, 0);

Inversion of a boolean is better as xor with 1.

> +    case TCG_COND_LTU:
> +    {
> +        TCGv tmp1 = tcg_temp_new(),
> +             tmp2 = tcg_temp_new();
> +
> +        tcg_gen_eqv_tl(tmp1, ah, bh);
> +        tcg_gen_and_tl(tmp1, tmp1, rh);
> +        tcg_gen_not_tl(tmp2, ah);
> +        tcg_gen_and_tl(tmp2, tmp2, bh);
> +        tcg_gen_or_tl(tmp1, tmp1, tmp2);
> +
> +        tcg_gen_setcondi_tl(TCG_COND_LT, rl, tmp1, 0); /* Check sign bit */
> +
> +        tcg_temp_free(tmp1);
> +        tcg_temp_free(tmp2);
> +        break;
> +    }

Again, missing comparison of low parts.

> @@ -93,7 +205,28 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
>       gen_get_gpr(source1, a->rs1);
>       gen_get_gpr(source2, a->rs2);
>   
> +#if defined(TARGET_RISCV128)
> +    if (is_128bit(ctx)) {
> +        TCGv source1h, source2h, tmph, tmpl;
> +        source1h = tcg_temp_new();
> +        source2h = tcg_temp_new();
> +        tmph = tcg_temp_new();
> +        tmpl = tcg_temp_new();
> +        gen_get_gprh(source1h, a->rs1);
> +        gen_get_gprh(source2h, a->rs2);
> +
> +        gen_setcond_128(tmpl, tmph, source1, source1h, source2, source2h, cond);
> +        tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
> +        tcg_temp_free(source1h);
> +        tcg_temp_free(source2h);
> +        tcg_temp_free(tmph);
> +        tcg_temp_free(tmpl);

setcond feeding brcond results in too many comparisons, usually.

In this instance it may be just as easy to generate multiple branches, in general.  But 
EQ/NE should not be

	setcond	t1, al, bl, eq
	setcond t2, ah, bh, eq
	and	t3, t1, t1
	brcond	t3, 0, ne

but

	xor	t1, al, bl
	xor	t2, ah, bh
	or	t3, t1, t2
	brcond	t3, 0, eq

>   static bool trans_srli(DisasContext *ctx, arg_srli *a)
>   {
> +#if defined(TARGET_RISCV128)
> +    if (is_128bit(ctx)) {
> +        if (a->shamt >= 128) {
> +            return false;
> +        }
> +
> +        if (a->rd != 0 && a->shamt != 0) {
> +            TCGv rs = tcg_temp_new(),
> +                 rsh = tcg_temp_new(),
> +                 res = tcg_temp_new(),
> +                 resh = tcg_temp_new(),
> +                 tmp = tcg_temp_new();
> +            gen_get_gpr(rs, a->rs1);
> +            gen_get_gprh(rsh, a->rs1);
> +
> +            /*
> +             * Computation of double-length right logical shift,
> +             * adapted for immediates from section 2.17 of Hacker's Delight
> +             */
> +            if (a->shamt >= 64) {
> +                tcg_gen_movi_tl(res, 0);
> +            } else {
> +                tcg_gen_shri_tl(res, rs, a->shamt);
> +            }
> +            if (64 - a->shamt < 0) {
> +                tcg_gen_movi_tl(tmp, 0);
> +            } else {
> +                tcg_gen_shli_tl(tmp, rsh, 64 - a->shamt);
> +            }
> +            tcg_gen_or_tl(res, res, tmp);
> +            if (a->shamt - 64 < 0) {
> +                tcg_gen_movi_tl(tmp, 0);
> +            } else {
> +                tcg_gen_shri_tl(tmp, rsh, a->shamt - 64);
> +            }
> +            tcg_gen_or_tl(res, res, tmp);
> +
> +            if (a->shamt >= 64) {
> +                tcg_gen_movi_tl(resh, 0);
> +            } else {
> +                tcg_gen_shri_tl(resh, rsh, a->shamt);
> +            }

We have tcg_gen_extract2_i64 for the purpose of double-word immediate shifts.  You should 
be doing

     if (shamt >= 64) {
         tcg_gen_shri_tl(resl, srch, shamt - 64);
         tcg_gen_movi_tl(resh, 0);
     } else {
         tcg_gen_extract2_tl(resl, srcl, srch, shamt);
         tcg_gen_shri_tl(resh, srch, shamt);
     }

>   static bool trans_srai(DisasContext *ctx, arg_srai *a)

     if (shamt >= 64) {
         tcg_gen_sari_tl(resl, srch, shamt - 64);
         tcg_gen_sari_tl(resh, srch, 63);
     } else {
         tcg_gen_extract2_tl(resl, srcl, srch, shamt);
         tcg_gen_sari_tl(resh, srch, shamt);
     }

>  static bool trans_slli(DisasContext *ctx, arg_slli *a)

     if (shamt >= 64) {
         tcg_gen_shli_tl(resh, srcl, shamt - 64);
         tcg_gen_movi_tl(resl, 0);
     } else {
         tcg_gen_extract2_tl(resh, srcl, srch, 64 - shamt);
         tcg_gen_shli_tl(resl, srcl, shamt);
     }

C.f. tcg/tcg-op.c, tcg_gen_shifti_i64, which is doing the same thing for i32.

> +#if defined(TARGET_RISCV128)
> +enum M128_DIR { M128_LEFT, M128_RIGHT, M128_RIGHT_ARITH };
> +static void gen_shift_mod128(TCGv ret, TCGv arg1, TCGv arg2, enum M128_DIR dir)
> +{
> +    TCGv tmp1 = tcg_temp_new(),
> +         tmp2 = tcg_temp_new(),
> +         cnst_zero = tcg_const_tl(0),
> +         sgn = tcg_temp_new();
> +
> +    tcg_gen_setcondi_tl(TCG_COND_GE, tmp1, arg2, 64);
> +    tcg_gen_setcondi_tl(TCG_COND_LT, tmp2, arg2, 0);
> +    tcg_gen_or_tl(tmp1, tmp1, tmp2);

What in the world are you doing with signed comparisons?

> +    tcg_gen_andi_tl(tmp2, arg2, 0x3f);

You should have one test with 0x3f and one with 0x40.



r~
Richard Henderson Aug. 31, 2021, 3:32 a.m. UTC | #4
On 8/30/21 2:38 PM, Philippe Mathieu-Daudé wrote:
>> +#if defined(TARGET_RISCV128)
>> +        if (is_128bit(ctx)) {
> 
> Maybe this could allow the compiler eventually elide the
> code and avoid superfluous #ifdef'ry:
> 
>             if (TARGET_LONG_BITS >= 128) {

TCG does not support TARGET_LONG_BITS != {32,64}.
This will not work.

But is_128bit() should be sufficient in each opcode, because that itself should evaluate 
to false if unsupported.


r~
Frédéric Pétrot Aug. 31, 2021, 3:57 p.m. UTC | #5
Hello Philippe,

Le 30/08/2021 à 23:40, Philippe Mathieu-Daudé a écrit :
> On 8/30/21 11:38 PM, Philippe Mathieu-Daudé wrote:
>> On 8/30/21 7:16 PM, Frédéric Pétrot wrote:
>>> Adding the support for the 128-bit arithmetic and logic instructions.
>>> Remember that all (i) instructions are now acting on 128-bit registers, that
>>> a few others are added to cope with values that are held on 64 bits within
>>> the 128-bit registers, and that the ones that cope with values on 32-bit
>>> must also be modified for proper sign extension.
>>> Most algorithms taken from Hackers' delight.
>>>
>>> Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
>>> Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
>>> ---
>>>  target/riscv/insn32.decode              |  13 +
>>>  target/riscv/insn_trans/trans_rvi.c.inc | 955 +++++++++++++++++++++++-
>>>  target/riscv/translate.c                |  25 +
>>>  3 files changed, 976 insertions(+), 17 deletions(-)
>>
>>> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
>>> index 772330a766..0401ba3d69 100644
>>> --- a/target/riscv/insn_trans/trans_rvi.c.inc
>>> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
>>> @@ -26,14 +26,20 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
>>>  
>>>  static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
>>>  {
>>> -     REQUIRE_64BIT(ctx);
>>> -     return trans_illegal(ctx, a);
>>> +    REQUIRE_64_OR_128BIT(ctx);
>>> +    return trans_illegal(ctx, a);
>>>  }
>>>  
>>>  static bool trans_lui(DisasContext *ctx, arg_lui *a)
>>>  {
>>>      if (a->rd != 0) {
>>>          tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
>>> +#if defined(TARGET_RISCV128)
>>> +        if (is_128bit(ctx)) {
>>
>> Maybe this could allow the compiler eventually elide the
>> code and avoid superfluous #ifdef'ry:
>>
>>            if (TARGET_LONG_BITS >= 128) {
> 
> Actually:
> 
>              if (TARGET_LONG_BITS >= 128 && is_128bit(ctx)) {

  We may have taken a wrong path then, because we have kept
  TARGET_LONG_BITS == 64 for the 128-bit case (as we use the tcg_xxx_tl of the
  64 version to generate our micro-ops, which I admit might be a mistake).

  Frédéric

> 
>>
>>> +            tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd],
>>> +                                 cpu_gpr[a->rd]);
>>> +        }
>>> +#endif
>>>      }
>>>      return true;
>>>  }
>>
diff mbox series

Patch

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 225669e277..2fe7e1dd36 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -22,6 +22,7 @@ 
 %rs1       15:5
 %rd        7:5
 %sh5       20:5
+%sh6       20:6
 
 %sh7    20:7
 %csr    20:12
@@ -91,6 +92,9 @@ 
 # Formats 64:
 @sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd
 
+# Formats 128:
+@sh6       ...... ...... ..... ... ..... ....... &shift shamt=%sh6 %rs1 %rd
+
 # *** Privileged Instructions ***
 ecall       000000000000     00000 000 00000 1110011
 ebreak      000000000001     00000 000 00000 1110011
@@ -166,6 +170,15 @@  sraw     0100000 .....  ..... 101 ..... 0111011 @r
 ldu      ............   ..... 111 ..... 0000011 @i
 lq       ............   ..... 010 ..... 0001111 @i
 sq       ............   ..... 100 ..... 0100011 @s
+addid    ............  .....  000 ..... 1011011 @i
+sllid    000000 ......  ..... 001 ..... 1011011 @sh6
+srlid    000000 ......  ..... 101 ..... 1011011 @sh6
+sraid    010000 ......  ..... 101 ..... 1011011 @sh6
+addd     0000000 ..... .....  000 ..... 1111011 @r
+subd     0100000 ..... .....  000 ..... 1111011 @r
+slld     0000000 ..... .....  001 ..... 1111011 @r
+srld     0000000 ..... .....  101 ..... 1111011 @r
+srad     0100000 ..... .....  101 ..... 1111011 @r
 
 # *** RV32M Standard Extension ***
 mul      0000001 .....  ..... 000 ..... 0110011 @r
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index 772330a766..0401ba3d69 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -26,14 +26,20 @@  static bool trans_illegal(DisasContext *ctx, arg_empty *a)
 
 static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
 {
-     REQUIRE_64BIT(ctx);
-     return trans_illegal(ctx, a);
+    REQUIRE_64_OR_128BIT(ctx);
+    return trans_illegal(ctx, a);
 }
 
 static bool trans_lui(DisasContext *ctx, arg_lui *a)
 {
     if (a->rd != 0) {
         tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
+#if defined(TARGET_RISCV128)
+        if (is_128bit(ctx)) {
+            tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd],
+                                 cpu_gpr[a->rd]);
+        }
+#endif
     }
     return true;
 }
@@ -41,7 +47,25 @@  static bool trans_lui(DisasContext *ctx, arg_lui *a)
 static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
 {
     if (a->rd != 0) {
+#if defined(TARGET_RISCV128)
+        if (is_128bit(ctx)) {
+            /* TODO : when pc is 128 bits, use all its bits */
+            TCGv pc = tcg_const_tl(ctx->base.pc_next),
+                 imm = tcg_const_tl(a->imm),
+                 immh = tcg_const_tl((a->imm & 0x80000)
+                         ? 0xffffffffffffffff : 0),
+                 cnst_zero = tcg_const_tl(0);
+            tcg_gen_add2_tl(cpu_gpr[a->rd], cpu_gprh[a->rd], pc, cnst_zero,
+                            imm, immh);
+            tcg_temp_free(pc);
+            tcg_temp_free(imm);
+            tcg_temp_free(immh);
+            tcg_temp_free(cnst_zero);
+            return true;
+        }
+#endif
         tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm + ctx->base.pc_next);
+        return true;
     }
     return true;
 }
@@ -84,6 +108,94 @@  static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
     return true;
 }
 
+#if defined(TARGET_RISCV128)
+static bool gen_setcond_128(TCGv rl, TCGv rh,
+                            TCGv al, TCGv ah,
+                            TCGv bl, TCGv bh,
+                            TCGCond cond) {
+    tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
+    switch (cond) {
+    case TCG_COND_EQ:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp1, rl, 0);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, tmp2, rh, 0);
+        tcg_gen_and_tl(rl, tmp1, tmp2);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_NE:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+        tcg_gen_setcondi_tl(TCG_COND_NE, tmp1, rl, 0);
+        tcg_gen_setcondi_tl(TCG_COND_NE, tmp2, rh, 0);
+        tcg_gen_or_tl(rl, tmp1, tmp2);
+        tcg_gen_movi_tl(rh, 0);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_LT:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+
+        tcg_gen_xor_tl(tmp1, rh, ah);
+        tcg_gen_xor_tl(tmp2, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, tmp2);
+        tcg_gen_xor_tl(tmp1, rh, tmp1);
+        tcg_gen_setcondi_tl(TCG_COND_LT, rl, tmp1, 0); /* Check sign bit */
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GE:
+        /* We invert the result of TCG_COND_LT */
+        gen_setcond_128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, rl, rl, 0);
+        break;
+
+    case TCG_COND_LTU:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+
+        tcg_gen_eqv_tl(tmp1, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, rh);
+        tcg_gen_not_tl(tmp2, ah);
+        tcg_gen_and_tl(tmp2, tmp2, bh);
+        tcg_gen_or_tl(tmp1, tmp1, tmp2);
+
+        tcg_gen_setcondi_tl(TCG_COND_LT, rl, tmp1, 0); /* Check sign bit */
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GEU:
+        /* We invert the result of TCG_COND_LTU */
+        gen_setcond_128(rl, rh, al, ah, bl, bh, TCG_COND_LTU);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, rl, rl, 0);
+        break;
+
+    default:
+        return false;
+    }
+    tcg_gen_movi_tl(rh, 0);
+    return true;
+}
+#endif
+
 static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
 {
     TCGLabel *l = gen_new_label();
@@ -93,7 +205,28 @@  static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
     gen_get_gpr(source1, a->rs1);
     gen_get_gpr(source2, a->rs2);
 
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv source1h, source2h, tmph, tmpl;
+        source1h = tcg_temp_new();
+        source2h = tcg_temp_new();
+        tmph = tcg_temp_new();
+        tmpl = tcg_temp_new();
+        gen_get_gprh(source1h, a->rs1);
+        gen_get_gprh(source2h, a->rs2);
+
+        gen_setcond_128(tmpl, tmph, source1, source1h, source2, source2h, cond);
+        tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
+        tcg_temp_free(source1h);
+        tcg_temp_free(source2h);
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmpl);
+    } else {
+        tcg_gen_brcond_tl(cond, source1, source2, l);
+    }
+#else
     tcg_gen_brcond_tl(cond, source1, source2, l);
+#endif
     gen_goto_tb(ctx, 1, ctx->pc_succ_insn);
     gen_set_label(l); /* branch taken */
 
@@ -166,7 +299,7 @@  static bool gen_load_tl(DisasContext *ctx, arg_lb *a, MemOp memop)
  * Changing MemOp values is too involved given our understanding, we
  * therefore use our own way to deal locally with zero or sign extended
  * 64-bit values, and 128-bit values.
- * Doing this implies adding a preprocessor conditional in all memory access
+ * Doing this implies adding a preprocessor conditional for all memory access
  * functions to avoid penalizing 32 and 64-bit accesses.
  */
 typedef enum XMemOp {
@@ -275,7 +408,7 @@  static bool trans_lwu(DisasContext *ctx, arg_lwu *a)
     return gen_load(ctx, a, MO_TEUL, XMO_NOP);
 }
 
-static bool trans_ldu(DisasContext *ctx, arg_ldu* a)
+static bool trans_ldu(DisasContext *ctx, arg_ldu *a)
 {
     REQUIRE_128BIT(ctx);
     return gen_load(ctx, a, MO_TEQ, XMO_TEUQ);
@@ -372,8 +505,98 @@  static bool trans_sq(DisasContext *ctx, arg_sq *a)
     return gen_store(ctx, a, MO_TEQ, XMO_TET);
 }
 
+static bool trans_addd(DisasContext *ctx, arg_addd *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    tcg_gen_add_tl(src1, src1, src2);
+    tcg_gen_ext_i64_i128(src1, src2, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, src2);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+#endif
+    return true;
+}
+
+static bool trans_addid(DisasContext *ctx, arg_addid *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         resh    = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+
+    tcg_gen_addi_tl(src1, src1, a->imm);
+    tcg_gen_ext_i64_i128(src1, resh, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, resh);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(resh);
+#endif
+    return true;
+}
+
+static bool trans_subd(DisasContext *ctx, arg_subd *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    tcg_gen_sub_tl(src1, src1, src2);
+    tcg_gen_ext_i64_i128(src1, src2, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, src2);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+#endif
+    return true;
+}
+
 static bool trans_addi(DisasContext *ctx, arg_addi *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv rs1vl = tcg_temp_new(),
+             rs1vh = tcg_temp_new(),
+             imml  = tcg_const_tl(a->imm),
+             immh  = tcg_const_tl((a->imm & 0b100000000000)
+                                   ? 0xffffffffffffffff
+                                   : 0);
+
+        gen_get_gpr(rs1vl, a->rs1);
+        gen_get_gprh(rs1vh, a->rs1);
+
+        tcg_gen_add2_tl(rs1vl, rs1vh, rs1vl, rs1vh, imml, immh);
+
+        gen_set_gpr(a->rd, rs1vl);
+        gen_set_gprh(a->rd, rs1vh);
+
+        tcg_temp_free(rs1vl);
+        tcg_temp_free(rs1vh);
+        tcg_temp_free(imml);
+        tcg_temp_free(immh);
+
+        return true;
+    }
+#endif
     return gen_arith_imm_fn(ctx, a, &tcg_gen_addi_tl);
 }
 
@@ -390,11 +613,69 @@  static void gen_sltu(TCGv ret, TCGv s1, TCGv s2)
 
 static bool trans_slti(DisasContext *ctx, arg_slti *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv r1h  = tcg_temp_new(),
+             r1l  = tcg_temp_new(),
+             immh = tcg_const_tl((a->imm & 0b100000000000)
+                                  ? 0xffffffffffffffff
+                                  : 0),
+             imml = tcg_const_tl(a->imm),
+             resh = tcg_temp_new(),
+             resl = tcg_temp_new();
+
+        gen_get_gpr(r1l, a->rs1);
+        gen_get_gprh(r1h, a->rs1);
+
+        gen_setcond_128(resl, resh, r1l, r1h, imml, immh, TCG_COND_LT);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(r1h);
+        tcg_temp_free(r1l);
+        tcg_temp_free(immh);
+        tcg_temp_free(imml);
+        tcg_temp_free(resh);
+        tcg_temp_free(resl);
+
+        return true;
+    }
+#endif
     return gen_arith_imm_tl(ctx, a, &gen_slt);
 }
 
 static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv resh = tcg_temp_new(),
+             resl = tcg_temp_new(),
+             r1h = tcg_temp_new(),
+             r1l = tcg_temp_new(),
+             immh = tcg_const_tl((a->imm & 0b100000000000)
+                                  ? 0xffffffffffffffff
+                                  : 0),
+             imml = tcg_const_tl(a->imm);
+
+        gen_get_gprh(r1h, a->rs1);
+        gen_get_gpr(r1l, a->rs1);
+
+        gen_setcond_128(resl, resh, r1l, r1h, imml, immh, TCG_COND_LTU);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+        tcg_temp_free(r1h);
+        tcg_temp_free(r1l);
+        tcg_temp_free(immh);
+        tcg_temp_free(imml);
+
+        return true;
+    }
+#endif
     return gen_arith_imm_tl(ctx, a, &gen_sltu);
 }
 
@@ -412,41 +693,406 @@  static bool trans_andi(DisasContext *ctx, arg_andi *a)
 }
 static bool trans_slli(DisasContext *ctx, arg_slli *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        if (a->shamt >= 128) {
+            return false;
+        }
+
+        if (a->rd != 0 && a->shamt != 0) {
+            TCGv rs = tcg_temp_new(),
+                 rsh = tcg_temp_new();
+            TCGv res = tcg_temp_new(),
+                 resh = tcg_temp_new(),
+                 tmp = tcg_temp_new();
+            gen_get_gpr(rs, a->rs1);
+            gen_get_gprh(rsh, a->rs1);
+
+            /*
+             * Computation of double-length left shift,
+             * adapted for immediates from section 2.17 of Hacker's Delight
+             */
+            if (a->shamt >= 64) {
+                tcg_gen_movi_tl(resh, 0);
+            } else {
+                tcg_gen_shli_tl(resh, rsh, a->shamt);
+            }
+
+            if (64 - a->shamt < 0) {
+                tcg_gen_movi_tl(tmp, 0);
+            } else {
+                tcg_gen_shri_tl(tmp, rs, 64 - a->shamt);
+            }
+            tcg_gen_or_tl(resh, resh, tmp);
+            if (a->shamt - 64 < 0) {
+                tcg_gen_movi_tl(tmp, 0);
+            } else {
+                tcg_gen_shli_tl(tmp, rs, a->shamt - 64);
+            }
+            tcg_gen_or_tl(resh, resh, tmp);
+
+            if (a->shamt >= 64) {
+                tcg_gen_movi_tl(res, 0);
+            } else {
+                tcg_gen_shli_tl(res, rs, a->shamt);
+            }
+
+            gen_set_gpr(a->rd, res);
+            gen_set_gprh(a->rd, resh);
+
+            tcg_temp_free(rs);
+            tcg_temp_free(rsh);
+            tcg_temp_free(res);
+            tcg_temp_free(resh);
+            tcg_temp_free(tmp);
+        } /* NOP otherwise */
+        return true;
+    }
+#endif
     return gen_shifti(ctx, a, tcg_gen_shl_tl);
 }
 
 static bool trans_srli(DisasContext *ctx, arg_srli *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        if (a->shamt >= 128) {
+            return false;
+        }
+
+        if (a->rd != 0 && a->shamt != 0) {
+            TCGv rs = tcg_temp_new(),
+                 rsh = tcg_temp_new(),
+                 res = tcg_temp_new(),
+                 resh = tcg_temp_new(),
+                 tmp = tcg_temp_new();
+            gen_get_gpr(rs, a->rs1);
+            gen_get_gprh(rsh, a->rs1);
+
+            /*
+             * Computation of double-length right logical shift,
+             * adapted for immediates from section 2.17 of Hacker's Delight
+             */
+            if (a->shamt >= 64) {
+                tcg_gen_movi_tl(res, 0);
+            } else {
+                tcg_gen_shri_tl(res, rs, a->shamt);
+            }
+            if (64 - a->shamt < 0) {
+                tcg_gen_movi_tl(tmp, 0);
+            } else {
+                tcg_gen_shli_tl(tmp, rsh, 64 - a->shamt);
+            }
+            tcg_gen_or_tl(res, res, tmp);
+            if (a->shamt - 64 < 0) {
+                tcg_gen_movi_tl(tmp, 0);
+            } else {
+                tcg_gen_shri_tl(tmp, rsh, a->shamt - 64);
+            }
+            tcg_gen_or_tl(res, res, tmp);
+
+            if (a->shamt >= 64) {
+                tcg_gen_movi_tl(resh, 0);
+            } else {
+                tcg_gen_shri_tl(resh, rsh, a->shamt);
+            }
+
+            gen_set_gpr(a->rd, res);
+            gen_set_gprh(a->rd, resh);
+
+            tcg_temp_free(rs);
+            tcg_temp_free(rsh);
+            tcg_temp_free(res);
+            tcg_temp_free(resh);
+            tcg_temp_free(tmp);
+        } /* NOP otherwise */
+        return true;
+    }
+#endif
     return gen_shifti(ctx, a, tcg_gen_shr_tl);
 }
 
 static bool trans_srai(DisasContext *ctx, arg_srai *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        if (a->shamt >= 128) {
+            return false;
+        }
+
+        if (a->rd != 0 && a->shamt != 0) {
+            TCGv rs = tcg_temp_new(),
+                 rsh = tcg_temp_new(),
+                 res = tcg_temp_new(),
+                 resh = tcg_temp_new(),
+                 tmp = tcg_temp_new();
+            gen_get_gpr(rs, a->rs1);
+            gen_get_gprh(rsh, a->rs1);
+
+            /*
+             * Computation of double-length right arith shift,
+             * adapted for immediates from section 2.17 of Hacker's Delight
+             */
+            if (a->shamt < 64) {
+                tcg_gen_shri_tl(res, rs, a->shamt);
+                tcg_gen_shli_tl(tmp, rsh, 64 - a->shamt);
+                tcg_gen_or_tl(res, res, tmp);
+            } else {
+                tcg_gen_sari_tl(res, rsh, a->shamt - 64);
+            }
+
+            /* Arithmetic shift of upper bits by shamt */
+            if (a->shamt == 127) {
+                tcg_gen_sari_tl(resh, rsh, 63);
+                tcg_gen_sari_tl(resh, resh, 63);
+                tcg_gen_sari_tl(resh, resh, 1);
+            } else if (a->shamt >= 64) {
+                tcg_gen_sari_tl(resh, rsh, 63);
+                tcg_gen_sari_tl(resh, resh, a->shamt - 63);
+            } else {
+                tcg_gen_sari_tl(resh, rsh, a->shamt);
+            }
+
+            gen_set_gpr(a->rd, res);
+            gen_set_gprh(a->rd, resh);
+
+            tcg_temp_free(rs);
+            tcg_temp_free(rsh);
+            tcg_temp_free(res);
+            tcg_temp_free(resh);
+            tcg_temp_free(tmp);
+        } /* NOP otherwise */
+        return true;
+    }
+#endif
     return gen_shifti(ctx, a, tcg_gen_sar_tl);
 }
 
 static bool trans_add(DisasContext *ctx, arg_add *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv rs1vl = tcg_temp_new(),
+             rs1vh = tcg_temp_new(),
+             rs2vl = tcg_temp_new(),
+             rs2vh = tcg_temp_new(),
+             resl  = tcg_temp_new(),
+             resh  = tcg_temp_new();
+
+        gen_get_gpr(rs1vl, a->rs1);
+        gen_get_gprh(rs1vh, a->rs1);
+        gen_get_gpr(rs2vl, a->rs2);
+        gen_get_gprh(rs2vh, a->rs2);
+
+        tcg_gen_add2_tl(resl, resh, rs1vl, rs1vh, rs2vl, rs2vh);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(rs1vl);
+        tcg_temp_free(rs1vh);
+        tcg_temp_free(rs2vl);
+        tcg_temp_free(rs2vh);
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+
+        return true;
+    }
+#endif
     return gen_arith(ctx, a, &tcg_gen_add_tl);
 }
 
 static bool trans_sub(DisasContext *ctx, arg_sub *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv rs1vl = tcg_temp_new(),
+             rs1vh = tcg_temp_new(),
+             rs2vl = tcg_temp_new(),
+             rs2vh = tcg_temp_new(),
+             resl  = tcg_temp_new(),
+             resh  = tcg_temp_new();
+
+        gen_get_gpr(rs1vl, a->rs1);
+        gen_get_gprh(rs1vh, a->rs1);
+        gen_get_gpr(rs2vl, a->rs2);
+        gen_get_gprh(rs2vh, a->rs2);
+
+        tcg_gen_sub2_tl(resl, resh, rs1vl, rs1vh, rs2vl, rs2vh);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(rs1vl);
+        tcg_temp_free(rs1vh);
+        tcg_temp_free(rs2vl);
+        tcg_temp_free(rs2vh);
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+
+        return true;
+    }
+#endif
     return gen_arith(ctx, a, &tcg_gen_sub_tl);
 }
 
+#if defined(TARGET_RISCV128)
+enum M128_DIR { M128_LEFT, M128_RIGHT, M128_RIGHT_ARITH };
+static void gen_shift_mod128(TCGv ret, TCGv arg1, TCGv arg2, enum M128_DIR dir)
+{
+    TCGv tmp1 = tcg_temp_new(),
+         tmp2 = tcg_temp_new(),
+         cnst_zero = tcg_const_tl(0),
+         sgn = tcg_temp_new();
+
+    tcg_gen_setcondi_tl(TCG_COND_GE, tmp1, arg2, 64);
+    tcg_gen_setcondi_tl(TCG_COND_LT, tmp2, arg2, 0);
+    tcg_gen_or_tl(tmp1, tmp1, tmp2);
+
+    tcg_gen_andi_tl(tmp2, arg2, 0x3f);
+    switch (dir) {
+    case M128_LEFT:
+        tcg_gen_shl_tl(tmp2, arg1, tmp2);
+        break;
+    case M128_RIGHT:
+        tcg_gen_shr_tl(tmp2, arg1, tmp2);
+        break;
+    case M128_RIGHT_ARITH:
+        tcg_gen_sar_tl(tmp2, arg1, tmp2);
+        break;
+    }
+
+    if (dir == M128_RIGHT_ARITH) {
+        tcg_gen_sari_tl(sgn, arg1, 63);
+        tcg_gen_movcond_tl(TCG_COND_NE, ret, tmp1, cnst_zero, sgn, tmp2);
+    } else {
+        tcg_gen_movcond_tl(TCG_COND_NE, ret, tmp1, cnst_zero, cnst_zero, tmp2);
+    }
+
+    tcg_temp_free(tmp1);
+    tcg_temp_free(tmp2);
+    tcg_temp_free(cnst_zero);
+    tcg_temp_free(sgn);
+    return;
+}
+#endif
+
 static bool trans_sll(DisasContext *ctx, arg_sll *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv src1l = tcg_temp_new(),
+             src1h = tcg_temp_new(),
+             src2l = tcg_temp_new(),
+             tmp   = tcg_temp_new(),
+             resl  = tcg_temp_new(),
+             resh  = tcg_temp_new();
+
+        gen_get_gpr(src1l, a->rs1);
+        gen_get_gprh(src1h, a->rs1);
+        gen_get_gpr(src2l, a->rs2);
+
+        tcg_gen_andi_tl(src2l, src2l, 0x7f); /* Use 7 lower bits for shift */
+
+        /*
+         * From Hacker's Delight 2.17:
+         *  y1 = x1 << n | x0 u>> (64 - n) | x0 << (n - 64)
+         */
+        gen_shift_mod128(resh, src1h, src2l, M128_LEFT);
+
+        tcg_gen_movi_tl(tmp, 64);
+        tcg_gen_sub_tl(tmp, tmp, src2l);
+        gen_shift_mod128(tmp, src1l, tmp, M128_RIGHT);
+        tcg_gen_or_tl(resh, resh, tmp);
+
+        tcg_gen_subi_tl(tmp, src2l, 64);
+        gen_shift_mod128(tmp, src1l, tmp, M128_LEFT);
+        tcg_gen_or_tl(resh, resh, tmp);
+
+        /* From Hacker's Delight 2.17: y0 = x0 << n */
+        gen_shift_mod128(resl, src1l, src2l, M128_LEFT);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(src1l);
+        tcg_temp_free(src1h);
+        tcg_temp_free(src2l);
+        tcg_temp_free(tmp);
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+
+        return true;
+    }
+#endif
     return gen_shift(ctx, a, &tcg_gen_shl_tl);
 }
 
 static bool trans_slt(DisasContext *ctx, arg_slt *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv r1h  = tcg_temp_new(),
+             r1l  = tcg_temp_new(),
+             r2h  = tcg_temp_new(),
+             r2l  = tcg_temp_new(),
+             tmph = tcg_temp_new(),
+             tmpl = tcg_temp_new();
+
+        gen_get_gprh(r1h, a->rs1);
+        gen_get_gpr(r1l, a->rs1);
+        gen_get_gprh(r2h, a->rs2);
+        gen_get_gpr(r2l, a->rs2);
+
+        gen_setcond_128(tmpl, tmph, r1l, r1h, r2l, r2h, TCG_COND_LT);
+
+        gen_set_gpr(a->rd, tmpl);
+        gen_set_gprh(a->rd, tmph);
+
+        tcg_temp_free(r1h);
+        tcg_temp_free(r1l);
+        tcg_temp_free(r2h);
+        tcg_temp_free(r2l);
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmpl);
+
+        return true;
+    }
+#endif
     return gen_arith(ctx, a, &gen_slt);
 }
 
 static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv resh = tcg_temp_new(),
+             resl = tcg_temp_new(),
+             r1h = tcg_temp_new(),
+             r1l = tcg_temp_new(),
+             r2h = tcg_temp_new(),
+             r2l = tcg_temp_new();
+
+        gen_get_gprh(r1h, a->rs1);
+        gen_get_gpr(r1l, a->rs1);
+        gen_get_gprh(r2h, a->rs2);
+        gen_get_gpr(r2l, a->rs2);
+
+        gen_setcond_128(resl, resh, r1l, r1h, r2l, r2h, TCG_COND_LTU);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+        tcg_temp_free(r1h);
+        tcg_temp_free(r1l);
+        tcg_temp_free(r2h);
+        tcg_temp_free(r2l);
+
+        return true;
+    }
+#endif
     return gen_arith(ctx, a, &gen_sltu);
 }
 
@@ -457,11 +1103,106 @@  static bool trans_xor(DisasContext *ctx, arg_xor *a)
 
 static bool trans_srl(DisasContext *ctx, arg_srl *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv src1l = tcg_temp_new(),
+             src1h = tcg_temp_new(),
+             src2l = tcg_temp_new(),
+             tmp   = tcg_temp_new(),
+             resl  = tcg_temp_new(),
+             resh  = tcg_temp_new();
+
+        gen_get_gpr(src1l, a->rs1);
+        gen_get_gprh(src1h, a->rs1);
+        gen_get_gpr(src2l, a->rs2);
+
+        tcg_gen_andi_tl(src2l, src2l, 0x7f); /* Use 7 lower bits for shift */
+
+        /*
+         * From Hacker's Delight 2.17:
+         * y0 = x0 u>> n | x1 << (64 - n) | x1 u>> (n - 64)
+         */
+        gen_shift_mod128(resl, src1l, src2l, M128_RIGHT);
+
+        tcg_gen_movi_tl(tmp, 64);
+        tcg_gen_sub_tl(tmp, tmp, src2l);
+        gen_shift_mod128(tmp, src1h, tmp, M128_LEFT);
+        tcg_gen_or_tl(resl, resl, tmp);
+
+        tcg_gen_subi_tl(tmp, src2l, 64);
+        gen_shift_mod128(tmp, src1h, tmp, M128_RIGHT);
+        tcg_gen_or_tl(resl, resl, tmp);
+
+        /* From Hacker's Delight 2.17 : y1 = x1 u>> n */
+        gen_shift_mod128(resh, src1h, src2l, M128_RIGHT);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(src1l);
+        tcg_temp_free(src1h);
+        tcg_temp_free(src2l);
+        tcg_temp_free(tmp);
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+
+        return true;
+    }
+#endif
     return gen_shift(ctx, a, &tcg_gen_shr_tl);
 }
 
 static bool trans_sra(DisasContext *ctx, arg_sra *a)
 {
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        TCGv src1l   = tcg_temp_new(),
+             src1h   = tcg_temp_new(),
+             src2l   = tcg_temp_new(),
+             tmp1    = tcg_temp_new(),
+             tmp2    = tcg_temp_new(),
+             const64 = tcg_const_tl(64),
+             resl    = tcg_temp_new(),
+             resh    = tcg_temp_new();
+
+        gen_get_gpr(src1l, a->rs1);
+        gen_get_gprh(src1h, a->rs1);
+        gen_get_gpr(src2l, a->rs2);
+
+        tcg_gen_andi_tl(src2l, src2l, 0x7f); /* Use 7 lower bits for shift */
+
+        /* Compute y0 value if n < 64: x0 u>> n | x1 << (64 - n) */
+        gen_shift_mod128(tmp1, src1l, src2l, M128_RIGHT);
+        tcg_gen_movi_tl(tmp2, 64);
+        tcg_gen_sub_tl(tmp2, tmp2, src2l);
+        gen_shift_mod128(tmp2, src1h, tmp2, M128_LEFT);
+        tcg_gen_or_tl(tmp1, tmp1, tmp2);
+
+        /* Compute y0 value if n >= 64: x1 s>> (n - 64) */
+        tcg_gen_subi_tl(tmp2, src2l, 64);
+        gen_shift_mod128(tmp2, src1h, tmp2, M128_RIGHT_ARITH);
+
+        /* Conditionally move one value or the other */
+        tcg_gen_movcond_tl(TCG_COND_LT, resl, src2l, const64, tmp1, tmp2);
+
+        /* y1 = x1 s>> n */
+        gen_shift_mod128(resh, src1h, src2l, M128_RIGHT_ARITH);
+
+        gen_set_gpr(a->rd, resl);
+        gen_set_gprh(a->rd, resh);
+
+        tcg_temp_free(src1l);
+        tcg_temp_free(src1h);
+        tcg_temp_free(src2l);
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        tcg_temp_free(const64);
+        tcg_temp_free(resl);
+        tcg_temp_free(resh);
+
+        return true;
+    }
+#endif
     return gen_shift(ctx, a, &tcg_gen_sar_tl);
 }
 
@@ -477,24 +1218,95 @@  static bool trans_and(DisasContext *ctx, arg_and *a)
 
 static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
 {
-    REQUIRE_64BIT(ctx);
-    return gen_arith_imm_tl(ctx, a, &gen_addw);
+    REQUIRE_64_OR_128BIT(ctx);
+    const bool rv = gen_arith_imm_tl(ctx, a, &gen_addw);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+    }
+#endif
+    return rv;
 }
 
 static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
 {
-    REQUIRE_64BIT(ctx);
-    return gen_shiftiw(ctx, a, tcg_gen_shl_tl);
+    REQUIRE_64_OR_128BIT(ctx);
+    const bool rv = gen_shiftiw(ctx, a, tcg_gen_shl_tl);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+    }
+#endif
+    return rv;
+}
+
+static bool trans_sllid(DisasContext *ctx, arg_slliw *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv source1 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+
+    tcg_gen_shli_tl(source1, source1, a->shamt);
+    if (a->rd != 0) {
+        tcg_gen_ext_i64_i128(source1, cpu_gprh[a->rd], source1);
+    }
+    gen_set_gpr(a->rd, source1);
+
+    tcg_temp_free(source1);
+#endif
+    return true;
+}
+
+static bool trans_srlid(DisasContext *ctx, arg_srlid *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv source1 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+
+    tcg_gen_shri_tl(source1, source1, a->shamt);
+    if (a->rd != 0) {
+        tcg_gen_ext_i64_i128(source1, cpu_gprh[a->rd], source1);
+    }
+    gen_set_gpr(a->rd, source1);
+
+    tcg_temp_free(source1);
+#endif
+    return true;
+}
+
+static bool trans_sraid(DisasContext *ctx, arg_sraid *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv source1 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+
+    tcg_gen_sari_tl(source1, source1, a->shamt);
+    if (a->rd != 0) {
+        tcg_gen_ext_i64_i128(source1, cpu_gprh[a->rd], source1);
+    }
+    gen_set_gpr(a->rd, source1);
+
+    tcg_temp_free(source1);
+#endif
+    return true;
 }
 
 static bool trans_srliw(DisasContext *ctx, arg_srliw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     TCGv t = tcg_temp_new();
     gen_get_gpr(t, a->rs1);
     tcg_gen_extract_tl(t, t, a->shamt, 32 - a->shamt);
     /* sign-extend for W instructions */
     tcg_gen_ext32s_tl(t, t);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(t, cpu_gprh[a->rd], t);
+    }
+#endif
     gen_set_gpr(a->rd, t);
     tcg_temp_free(t);
     return true;
@@ -502,10 +1314,15 @@  static bool trans_srliw(DisasContext *ctx, arg_srliw *a)
 
 static bool trans_sraiw(DisasContext *ctx, arg_sraiw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     TCGv t = tcg_temp_new();
     gen_get_gpr(t, a->rs1);
     tcg_gen_sextract_tl(t, t, a->shamt, 32 - a->shamt);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(t, cpu_gprh[a->rd], t);
+    }
+#endif
     gen_set_gpr(a->rd, t);
     tcg_temp_free(t);
     return true;
@@ -513,19 +1330,31 @@  static bool trans_sraiw(DisasContext *ctx, arg_sraiw *a)
 
 static bool trans_addw(DisasContext *ctx, arg_addw *a)
 {
-    REQUIRE_64BIT(ctx);
-    return gen_arith(ctx, a, &gen_addw);
+    REQUIRE_64_OR_128BIT(ctx);
+    const bool ok = gen_arith(ctx, a, &gen_addw);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+    }
+#endif
+    return ok;
 }
 
 static bool trans_subw(DisasContext *ctx, arg_subw *a)
 {
-    REQUIRE_64BIT(ctx);
-    return gen_arith(ctx, a, &gen_subw);
+    REQUIRE_64_OR_128BIT(ctx);
+    const bool rv = gen_arith(ctx, a, &gen_subw);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        tcg_gen_ext_i64_i128(cpu_gpr[a->rd], cpu_gprh[a->rd], cpu_gpr[a->rd]);
+    }
+#endif
+    return rv;
 }
 
 static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     TCGv source1 = tcg_temp_new();
     TCGv source2 = tcg_temp_new();
 
@@ -536,6 +1365,14 @@  static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
     tcg_gen_shl_tl(source1, source1, source2);
 
     tcg_gen_ext32s_tl(source1, source1);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        TCGv resh = tcg_temp_new();
+        tcg_gen_ext_i64_i128(source1, resh, source1);
+        gen_set_gprh(a->rd, resh);
+        tcg_temp_free(resh);
+    }
+#endif
     gen_set_gpr(a->rd, source1);
     tcg_temp_free(source1);
     tcg_temp_free(source2);
@@ -544,7 +1381,7 @@  static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
 
 static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     TCGv source1 = tcg_temp_new();
     TCGv source2 = tcg_temp_new();
 
@@ -557,6 +1394,14 @@  static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
     tcg_gen_shr_tl(source1, source1, source2);
 
     tcg_gen_ext32s_tl(source1, source1);
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        TCGv resh = tcg_temp_new();
+        tcg_gen_ext_i64_i128(source1, resh, source1);
+        gen_set_gprh(a->rd, resh);
+        tcg_temp_free(resh);
+    }
+#endif
     gen_set_gpr(a->rd, source1);
     tcg_temp_free(source1);
     tcg_temp_free(source2);
@@ -565,7 +1410,7 @@  static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
 
 static bool trans_sraw(DisasContext *ctx, arg_sraw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     TCGv source1 = tcg_temp_new();
     TCGv source2 = tcg_temp_new();
 
@@ -580,6 +1425,15 @@  static bool trans_sraw(DisasContext *ctx, arg_sraw *a)
     tcg_gen_andi_tl(source2, source2, 0x1F);
     tcg_gen_sar_tl(source1, source1, source2);
 
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx) && a->rd != 0) {
+        TCGv resh = tcg_temp_new();
+        tcg_gen_ext_i64_i128(source1, resh, source1);
+        gen_set_gprh(a->rd, resh);
+        tcg_temp_free(resh);
+    }
+#endif
+
     gen_set_gpr(a->rd, source1);
     tcg_temp_free(source1);
     tcg_temp_free(source2);
@@ -587,6 +1441,73 @@  static bool trans_sraw(DisasContext *ctx, arg_sraw *a)
     return true;
 }
 
+/* Translation functions for 64-bit operations specific to RV128 */
+static bool trans_slld(DisasContext *ctx, arg_slld *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    tcg_gen_shl_tl(src1, src1, src2);
+    tcg_gen_ext_i64_i128(src1, src2, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, src2);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+#endif
+    return true;
+}
+
+static bool trans_srld(DisasContext *ctx, arg_srld *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    tcg_gen_shr_tl(src1, src1, src2);
+    tcg_gen_ext_i64_i128(src1, src2, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, src2);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    return true;
+#endif
+}
+
+static bool trans_srad(DisasContext *ctx, arg_srad *a)
+{
+    REQUIRE_128BIT(ctx);
+#if defined(TARGET_RISCV128)
+    TCGv src1 = tcg_temp_new(),
+         src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    tcg_gen_sar_tl(src1, src1, src2);
+    tcg_gen_ext_i64_i128(src1, src2, src1);
+
+    gen_set_gpr(a->rd, src1);
+    gen_set_gprh(a->rd, src2);
+
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+#endif
+    return true;
+}
+
 static bool trans_fence(DisasContext *ctx, arg_fence *a)
 {
     /* FENCE is a full memory barrier. */
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index be9c64f3e4..7d447bd225 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -509,6 +509,19 @@  static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a,
     (*func)(source1, source1, a->imm);
 
     gen_set_gpr(a->rd, source1);
+
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        uint64_t immh = (a->imm & 0b100000000000) ? 0xffffffffffffffff : 0;
+
+        gen_get_gprh(source1, a->rs1);
+
+        (*func)(source1, source1, immh);
+
+        gen_set_gprh(a->rd, source1);
+    }
+#endif
+
     tcg_temp_free(source1);
     return true;
 }
@@ -827,6 +840,18 @@  static bool gen_arith(DisasContext *ctx, arg_r *a,
     (*func)(source1, source1, source2);
 
     gen_set_gpr(a->rd, source1);
+
+#if defined(TARGET_RISCV128)
+    if (is_128bit(ctx)) {
+        gen_get_gprh(source1, a->rs1);
+        gen_get_gprh(source2, a->rs2);
+
+        (*func)(source1, source1, source2);
+
+        gen_set_gprh(a->rd, source1);
+    }
+#endif
+
     tcg_temp_free(source1);
     tcg_temp_free(source2);
     return true;