diff mbox series

[RFC,v1,2/2] Enable custom instruction suport for Andes A25 and AX25 CPU model

Message ID 20211021151136.721746-2-ruinland@andestech.com
State New
Headers show
Series riscv: Add preliminary custom instruction support | expand

Commit Message

Ruinland ChuanTzu Tsai Oct. 21, 2021, 3:11 p.m. UTC
In this patch, we demonstrate how Andes Performance Extension(c) insn :
bfos and bfoz could be used with Andes CoDense : exec.it.

By doing so, an Andes vendor designed CSR : uitb must be used.

Signed-off-by: Ruinland Chuan-Tzu Tsai <ruinland@andestech.com>
---
 target/riscv/andes_codense.decode         |  23 +++++
 target/riscv/andes_custom_rv32.decode     |  27 +++++
 target/riscv/andes_custom_rv64.decode     |  27 +++++
 target/riscv/andes_helper.c               |  49 +++++++++
 target/riscv/andes_helper.h               |   1 +
 target/riscv/cpu.c                        |   8 ++
 target/riscv/helper.h                     |   2 +
 target/riscv/insn_trans/trans_andes.c.inc | 115 ++++++++++++++++++++++
 target/riscv/meson.build                  |  13 +++
 target/riscv/translate.c                  |  12 +++
 10 files changed, 277 insertions(+)
 create mode 100644 target/riscv/andes_codense.decode
 create mode 100644 target/riscv/andes_custom_rv32.decode
 create mode 100644 target/riscv/andes_custom_rv64.decode
 create mode 100644 target/riscv/andes_helper.c
 create mode 100644 target/riscv/andes_helper.h
 create mode 100644 target/riscv/insn_trans/trans_andes.c.inc

Comments

Richard Henderson Oct. 21, 2021, 7:17 p.m. UTC | #1
On 10/21/21 8:11 AM, Ruinland Chuan-Tzu Tsai wrote:
> In this patch, we demonstrate how Andes Performance Extension(c) insn :
> bfos and bfoz could be used with Andes CoDense : exec.it.
> 
> By doing so, an Andes vendor designed CSR : uitb must be used.
> 
> Signed-off-by: Ruinland Chuan-Tzu Tsai <ruinland@andestech.com>
> ---
>   target/riscv/andes_codense.decode         |  23 +++++
>   target/riscv/andes_custom_rv32.decode     |  27 +++++
>   target/riscv/andes_custom_rv64.decode     |  27 +++++
>   target/riscv/andes_helper.c               |  49 +++++++++
>   target/riscv/andes_helper.h               |   1 +
>   target/riscv/cpu.c                        |   8 ++
>   target/riscv/helper.h                     |   2 +
>   target/riscv/insn_trans/trans_andes.c.inc | 115 ++++++++++++++++++++++
>   target/riscv/meson.build                  |  13 +++
>   target/riscv/translate.c                  |  12 +++
>   10 files changed, 277 insertions(+)
>   create mode 100644 target/riscv/andes_codense.decode
>   create mode 100644 target/riscv/andes_custom_rv32.decode
>   create mode 100644 target/riscv/andes_custom_rv64.decode
>   create mode 100644 target/riscv/andes_helper.c
>   create mode 100644 target/riscv/andes_helper.h
>   create mode 100644 target/riscv/insn_trans/trans_andes.c.inc
> 
> diff --git a/target/riscv/andes_codense.decode b/target/riscv/andes_codense.decode
> new file mode 100644
> index 0000000000..639d22ac67
> --- /dev/null
> +++ b/target/riscv/andes_codense.decode
> @@ -0,0 +1,23 @@
> +#
> +# RISC-V translation routines for the RVXI Base Integer Instruction Set.
> +#
> +# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
> +#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
> +#
> +# This program is free software; you can redistribute it and/or modify it
> +# under the terms and conditions of the GNU General Public License,
> +# version 2 or later, as published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope it will be useful, but WITHOUT
> +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +# more details.
> +#
> +# You should have received a copy of the GNU General Public License along with
> +# this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +
> +%imm_ex10 8:s1 12:1 3:1 9:1 5:2 2:1 10:2 4:1
> +&codense imm_codense
> +@ex10     ... .... . . ..... .. &codense imm_codense=%imm_ex10
> +execit    100 .... . 0 ..... 00 @ex10

It would probably be a bit clearer without the argument set and format, which in this case 
are used exactly once, so there's no actual savings.

execit    100 ..... 0 ..... 00   imm=%imm_ex10


> +++ b/target/riscv/andes_custom_rv32.decode
...
> +++ b/target/riscv/andes_custom_rv64.decode

Note that we're moving toward a single riscv64 executable, and so these two files should 
be combined.  In this case, just taking the rv64 file will work for RV32, with an extra 
check during translate.

> +# Fields:
> +%rs1       15:5
> +%rd        7:5
> +## Similar to I-Type
> +%mbfob    26:6
> +%lbfob    20:6
> +&i_b mbfob lbfob rs1 rd
> +@i_bfo    . ..... . ..... ..... ... ..... .......  &i_b %mbfob %lbfob %rs1 %rd

Better as

@i_bfo       msb:6 lsb:6 rs1:5 ... rd:5 .......   &i_bfo

There's quite a lot of riscv code that could be cleaned up like this.

> +++ b/target/riscv/andes_helper.c
> @@ -0,0 +1,49 @@
> +#include "qemu/osdep.h"

All new files must have copyright boilerplate.
That said...

> +#include "cpu.h"
> +#include "qemu/host-utils.h"
> +#include "exec/exec-all.h"
> +#include "exec/helper-proto.h"
> +#include "fpu/softfloat.h"
> +#include "internals.h"
> +typedef int (*test_function)(uint8_t a, uint8_t b);
> +target_ulong helper_andes_v5_bfo_x(target_ulong rd, target_ulong rs1,
> +                                   target_ulong insn)

Never pass the instruction to a helper. This is an indication that you should have done 
more work during translate.

> --- /dev/null
> +++ b/target/riscv/insn_trans/trans_andes.c.inc
> @@ -0,0 +1,115 @@
> +#define GP 3

Again, boilerplate.

> +#define ANDES_V5_CODE_DENSE_MASK (0xe083)
> +#define ANDES_V5_CODE_DENSE_ID(x) ((x)&ANDES_V5_CODE_DENSE_MASK)
> +#define ANDES_V5_CODE_DENSE_IMM11(x) (     \
> +    (extract32(x, 4, 1) << 2) |   \
> +    (extract32(x, 10, 2) << 3) |  \
> +    (extract32(x, 2, 1) << 5) |   \
> +    (extract32(x, 5, 2) << 6) |   \
> +    (extract32(x, 9, 1) << 8) |   \
> +    (extract32(x, 3, 1) << 9) |   \
> +    (extract32(x, 12, 1) << 10) | \
> +    (extract64(x, 8, 1) << 11) | \
> +    0)
> +
> +#define ANDES_V5_GET_JAL_UIMM(inst) ((extract32(inst, 21, 10) << 1) \
> +                           | (extract32(inst, 20, 1) << 11) \
> +                           | (extract32(inst, 12, 8) << 12) \
> +                           | (extract32(inst, 31, 1) << 20))
> +
> +
> +enum andes_v5_inst_id
> +{
> +    /* Code Dense Extension */
> +    EXEC_IT = (0x8000),
> +    /* custom 2 */
> +    BFOS = 0x03,
> +    BFOZ = 0x02,
> +};

Left over from something else?
This all looks like something that should be handled by decodetree.

> +static bool trans_bfos(DisasContext *ctx, arg_bfos *a)
> +{
> +    TCGv v0, v1, v2;
> +    v2 = tcg_const_tl(ctx->opcode);
> +    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
> +    v1 = get_gpr(ctx, a->rd, EXT_NONE);
> +    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
> +    gen_set_gpr(ctx, a->rd, v1);
> +    tcg_temp_free(v2);
> +    return true;
> +}
> +
> +static bool trans_bfoz(DisasContext *ctx, arg_bfoz *a)
> +{
> +    TCGv v0, v1, v2;
> +    v2 = tcg_const_tl(ctx->opcode);
> +    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
> +    v1 = get_gpr(ctx, a->rd,  EXT_NONE);
> +    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
> +    gen_set_gpr(ctx, a->rd, v1);
> +    tcg_temp_free(v2);
> +    return true;
> +}

So, you've just passed off everything to the helper.  Not good.

First, these two instructions are so close we add a common helper.

static bool do_bfox(DisasContext *ctx, arg_i_bfo *a, bool is_signed)
{
     ...
}

static bool trans_bfos(DisasContext *ctx, arg_i_bfo *a)
{
     return do_bfox(ctx, a, true);
}

static bool trans_bfoz(DisasContext *ctx, arg_i_bfo *a)
{
     return do_bfox(ctx, a, false);
}

Second, re the RV32/RV64 merge, range check the bits:

     if (a->msb >= get_xlen(ctx) || a->lsb >= get_xlen(ctx)) {
         return false;
     }

Third, TCG can easily handle extract/deposit inline:

     dest = dest_gpr(ctx, a->rd);
     src1 = get_gpr(ctx, a->rs1, EXT_NONE);
     if (a->msb >= a->lsb) {
         len = a->msb - a->lsb + 1;
         if (is_signed) {
             tcg_gen_sextract_tl(dest, src1, a->lsb, len);
         } else {
             tcg_gen_extract_tl(dest, src1, a->lsb, len);
         }
     } else {
         len = a->lsb - a->msb + 1;
         if (is_signed) {
             tcg_gen_sextract_tl(dest, src1, 0, len);
         } else {
             tcg_gen_extract_tl(dest, src1, 0, len);
         }
         tcg_gen_shli_tl(dest, dest, a->msb);
     }
     gen_set_gpr(ctx, a->rd, dest);


> +static int andes_v5_gen_codense_exec_it(CPURISCVState *env, DisasContext *ctx, arg_execit *a)
> +{
> +    uint32_t insn;
> +    uint32_t imm_ex10 = a->imm_codense;
> +    target_ulong uitb_val = 0;
> +    riscv_csrrw(env, 0x800, &uitb_val, 0, 0);

This won't work -- you can't access env during translation.  So all this faff around 
passing env through HartState is for naught.

Having a look through the rest of this:

> +
> +    if (extract32(uitb_val, 0, 1)) { /* UTIB.HW == 1 */
> +        qemu_log_mask(LOG_GUEST_ERROR, "exec.it: UITB.HW == 1 is not supported by now!\n");
> +        gen_exception_illegal(ctx);
> +
> +        uint32_t instruction_table[0];
> +        insn = instruction_table[imm_ex10 >> 2];
> +
> +        return false;
> +    } else { /* UTIB.HW == 0 */
> +        target_ulong vaddr = (uitb_val & ~0x3) + (imm_ex10<<2);
> +        insn = cpu_ldl_code(env, vaddr);
> +    }
> +
> +/*  Execute(insn)
> + *  do as the replaced instruction, even exceptions,
> + *  except ctx->pc_succ_insn value (2).
> + */
> +        uint32_t op = MASK_OP_MAJOR(insn);
> +        if (op == OPC_RISC_JAL) {
> +            /* implement this by hack imm */
> +            int rd = GET_RD(ctx->opcode);
> +            target_long imm = ANDES_V5_GET_JAL_UIMM(ctx->opcode);
> +            target_ulong next_pc = (ctx->base.pc_next >> 21 << 21) | imm;
> +            imm = next_pc - ctx->base.pc_next;
> +            gen_jal(ctx, rd, imm);
> +        } else {
> +            /* JARL done as SPEC already */
> +            /* presume ctx->pc_succ_insn not changed in any ISA extension */
> +            decode_opc_andes(env, ctx, insn);
> +        }
> +
> +    return true;
> +}

This looks quite a lot like the S390X EXECUTE instruction.  It's hard to suggest exactly 
how to structure this, because I can't find documentation, and thus the edge cases are 
unknown.

Because of the .HW check, I would expect all of this do be in a helper function.  The 
qemu_log_mask would be followed by riscv_raise_exception.

As for the actual execute, I'd suggest following the lead of s390x and utilize the cs_base 
field of cpu_get_tb_cpu_state to hold the opcode to be executed.  You'd load the opcode in 
the helper, install the proper state into env, and end the current TranslationBlock.  The 
next TranslationBlock will find the opcode and decode it in the normal way.

Have a look at target/s390x/tcg/mem_helper.c, HELPER(ex), where we load the opcode and 
store state.  Then translate.c, extract_insn, where we consume the state.


r~
Ruinland ChuanTzu Tsai Oct. 22, 2021, 8:48 a.m. UTC | #2
On Thu, Oct 21, 2021 at 12:17:47PM -0700, Richard Henderson wrote:
> On 10/21/21 8:11 AM, Ruinland Chuan-Tzu Tsai wrote:
> > In this patch, we demonstrate how Andes Performance Extension(c) insn :
> > bfos and bfoz could be used with Andes CoDense : exec.it.
> > 
> > By doing so, an Andes vendor designed CSR : uitb must be used.
> > 
> > Signed-off-by: Ruinland Chuan-Tzu Tsai <ruinland@andestech.com>
> > ---
> >   target/riscv/andes_codense.decode         |  23 +++++
> >   target/riscv/andes_custom_rv32.decode     |  27 +++++
> >   target/riscv/andes_custom_rv64.decode     |  27 +++++
> >   target/riscv/andes_helper.c               |  49 +++++++++
> >   target/riscv/andes_helper.h               |   1 +
> >   target/riscv/cpu.c                        |   8 ++
> >   target/riscv/helper.h                     |   2 +
> >   target/riscv/insn_trans/trans_andes.c.inc | 115 ++++++++++++++++++++++
> >   target/riscv/meson.build                  |  13 +++
> >   target/riscv/translate.c                  |  12 +++
> >   10 files changed, 277 insertions(+)
> >   create mode 100644 target/riscv/andes_codense.decode
> >   create mode 100644 target/riscv/andes_custom_rv32.decode
> >   create mode 100644 target/riscv/andes_custom_rv64.decode
> >   create mode 100644 target/riscv/andes_helper.c
> >   create mode 100644 target/riscv/andes_helper.h
> >   create mode 100644 target/riscv/insn_trans/trans_andes.c.inc
> > 
> > diff --git a/target/riscv/andes_codense.decode b/target/riscv/andes_codense.decode
> > new file mode 100644
> > index 0000000000..639d22ac67
> > --- /dev/null
> > +++ b/target/riscv/andes_codense.decode
> > @@ -0,0 +1,23 @@
> > +#
> > +# RISC-V translation routines for the RVXI Base Integer Instruction Set.
> > +#
> > +# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
> > +#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
> > +#
> > +# This program is free software; you can redistribute it and/or modify it
> > +# under the terms and conditions of the GNU General Public License,
> > +# version 2 or later, as published by the Free Software Foundation.
> > +#
> > +# This program is distributed in the hope it will be useful, but WITHOUT
> > +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > +# more details.
> > +#
> > +# You should have received a copy of the GNU General Public License along with
> > +# this program.  If not, see <http://www.gnu.org/licenses/>.
> > +
> > +
> > +%imm_ex10 8:s1 12:1 3:1 9:1 5:2 2:1 10:2 4:1
> > +&codense imm_codense
> > +@ex10     ... .... . . ..... .. &codense imm_codense=%imm_ex10
> > +execit    100 .... . 0 ..... 00 @ex10
> 
> It would probably be a bit clearer without the argument set and format,
> which in this case are used exactly once, so there's no actual savings.
> 
> execit    100 ..... 0 ..... 00   imm=%imm_ex10
> 
> 
> > +++ b/target/riscv/andes_custom_rv32.decode
> ...
> > +++ b/target/riscv/andes_custom_rv64.decode
> 
> Note that we're moving toward a single riscv64 executable, and so these two
> files should be combined.  In this case, just taking the rv64 file will work
> for RV32, with an extra check during translate.
> 
> > +# Fields:
> > +%rs1       15:5
> > +%rd        7:5
> > +## Similar to I-Type
> > +%mbfob    26:6
> > +%lbfob    20:6
> > +&i_b mbfob lbfob rs1 rd
> > +@i_bfo    . ..... . ..... ..... ... ..... .......  &i_b %mbfob %lbfob %rs1 %rd
> 
> Better as
> 
> @i_bfo       msb:6 lsb:6 rs1:5 ... rd:5 .......   &i_bfo
> 
> There's quite a lot of riscv code that could be cleaned up like this.
> 
> > +++ b/target/riscv/andes_helper.c
> > @@ -0,0 +1,49 @@
> > +#include "qemu/osdep.h"
> 
> All new files must have copyright boilerplate.
> That said...

I deeply apologize for that.
Was way too fatigued last night.

> 
> > +#include "cpu.h"
> > +#include "qemu/host-utils.h"
> > +#include "exec/exec-all.h"
> > +#include "exec/helper-proto.h"
> > +#include "fpu/softfloat.h"
> > +#include "internals.h"
> > +typedef int (*test_function)(uint8_t a, uint8_t b);
> > +target_ulong helper_andes_v5_bfo_x(target_ulong rd, target_ulong rs1,
> > +                                   target_ulong insn)
> 
> Never pass the instruction to a helper. This is an indication that you
> should have done more work during translate.
> 
> > --- /dev/null
> > +++ b/target/riscv/insn_trans/trans_andes.c.inc
> > @@ -0,0 +1,115 @@
> > +#define GP 3
> 
> Again, boilerplate.

Wilco.

> 
> > +#define ANDES_V5_CODE_DENSE_MASK (0xe083)
> > +#define ANDES_V5_CODE_DENSE_ID(x) ((x)&ANDES_V5_CODE_DENSE_MASK)
> > +#define ANDES_V5_CODE_DENSE_IMM11(x) (     \
> > +    (extract32(x, 4, 1) << 2) |   \
> > +    (extract32(x, 10, 2) << 3) |  \
> > +    (extract32(x, 2, 1) << 5) |   \
> > +    (extract32(x, 5, 2) << 6) |   \
> > +    (extract32(x, 9, 1) << 8) |   \
> > +    (extract32(x, 3, 1) << 9) |   \
> > +    (extract32(x, 12, 1) << 10) | \
> > +    (extract64(x, 8, 1) << 11) | \
> > +    0)
> > +
> > +#define ANDES_V5_GET_JAL_UIMM(inst) ((extract32(inst, 21, 10) << 1) \
> > +                           | (extract32(inst, 20, 1) << 11) \
> > +                           | (extract32(inst, 12, 8) << 12) \
> > +                           | (extract32(inst, 31, 1) << 20))
> > +
> > +
> > +enum andes_v5_inst_id
> > +{
> > +    /* Code Dense Extension */
> > +    EXEC_IT = (0x8000),
> > +    /* custom 2 */
> > +    BFOS = 0x03,
> > +    BFOZ = 0x02,
> > +};
> 
> Left over from something else?
> This all looks like something that should be handled by decodetree.

Oops, sorry for that. This part was cutting out from our inhouse code.

> 
> > +static bool trans_bfos(DisasContext *ctx, arg_bfos *a)
> > +{
> > +    TCGv v0, v1, v2;
> > +    v2 = tcg_const_tl(ctx->opcode);
> > +    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
> > +    v1 = get_gpr(ctx, a->rd, EXT_NONE);
> > +    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
> > +    gen_set_gpr(ctx, a->rd, v1);
> > +    tcg_temp_free(v2);
> > +    return true;
> > +}
> > +
> > +static bool trans_bfoz(DisasContext *ctx, arg_bfoz *a)
> > +{
> > +    TCGv v0, v1, v2;
> > +    v2 = tcg_const_tl(ctx->opcode);
> > +    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
> > +    v1 = get_gpr(ctx, a->rd,  EXT_NONE);
> > +    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
> > +    gen_set_gpr(ctx, a->rd, v1);
> > +    tcg_temp_free(v2);
> > +    return true;
> > +}
> 
> So, you've just passed off everything to the helper.  Not good.
> 
> First, these two instructions are so close we add a common helper.
> 
> static bool do_bfox(DisasContext *ctx, arg_i_bfo *a, bool is_signed)
> {
>     ...
> }
> 
> static bool trans_bfos(DisasContext *ctx, arg_i_bfo *a)
> {
>     return do_bfox(ctx, a, true);
> }
> 
> static bool trans_bfoz(DisasContext *ctx, arg_i_bfo *a)
> {
>     return do_bfox(ctx, a, false);
> }
> 
> Second, re the RV32/RV64 merge, range check the bits:
> 
>     if (a->msb >= get_xlen(ctx) || a->lsb >= get_xlen(ctx)) {
>         return false;
>     }

Okay, that's new to me.

> 
> Third, TCG can easily handle extract/deposit inline:
> 
>     dest = dest_gpr(ctx, a->rd);
>     src1 = get_gpr(ctx, a->rs1, EXT_NONE);
>     if (a->msb >= a->lsb) {
>         len = a->msb - a->lsb + 1;
>         if (is_signed) {
>             tcg_gen_sextract_tl(dest, src1, a->lsb, len);
>         } else {
>             tcg_gen_extract_tl(dest, src1, a->lsb, len);
>         }
>     } else {
>         len = a->lsb - a->msb + 1;
>         if (is_signed) {
>             tcg_gen_sextract_tl(dest, src1, 0, len);
>         } else {
>             tcg_gen_extract_tl(dest, src1, 0, len);
>         }
>         tcg_gen_shli_tl(dest, dest, a->msb);
>     }
>     gen_set_gpr(ctx, a->rd, dest);
> 
> 
> > +static int andes_v5_gen_codense_exec_it(CPURISCVState *env, DisasContext *ctx, arg_execit *a)
> > +{
> > +    uint32_t insn;
> > +    uint32_t imm_ex10 = a->imm_codense;
> > +    target_ulong uitb_val = 0;
> > +    riscv_csrrw(env, 0x800, &uitb_val, 0, 0);
> 
> This won't work -- you can't access env during translation.  So all this
> faff around passing env through HartState is for naught.

Sorry, please elaborate me a little bit more. 

> 
> Having a look through the rest of this:
> 
> > +
> > +    if (extract32(uitb_val, 0, 1)) { /* UTIB.HW == 1 */
> > +        qemu_log_mask(LOG_GUEST_ERROR, "exec.it: UITB.HW == 1 is not supported by now!\n");
> > +        gen_exception_illegal(ctx);
> > +
> > +        uint32_t instruction_table[0];
> > +        insn = instruction_table[imm_ex10 >> 2];
> > +
> > +        return false;
> > +    } else { /* UTIB.HW == 0 */
> > +        target_ulong vaddr = (uitb_val & ~0x3) + (imm_ex10<<2);
> > +        insn = cpu_ldl_code(env, vaddr);
> > +    }
> > +
> > +/*  Execute(insn)
> > + *  do as the replaced instruction, even exceptions,
> > + *  except ctx->pc_succ_insn value (2).
> > + */
> > +        uint32_t op = MASK_OP_MAJOR(insn);
> > +        if (op == OPC_RISC_JAL) {
> > +            /* implement this by hack imm */
> > +            int rd = GET_RD(ctx->opcode);
> > +            target_long imm = ANDES_V5_GET_JAL_UIMM(ctx->opcode);
> > +            target_ulong next_pc = (ctx->base.pc_next >> 21 << 21) | imm;
> > +            imm = next_pc - ctx->base.pc_next;
> > +            gen_jal(ctx, rd, imm);
> > +        } else {
> > +            /* JARL done as SPEC already */
> > +            /* presume ctx->pc_succ_insn not changed in any ISA extension */
> > +            decode_opc_andes(env, ctx, insn);
> > +        }
> > +
> > +    return true;
> > +}
> 
> This looks quite a lot like the S390X EXECUTE instruction.  It's hard to
> suggest exactly how to structure this, because I can't find documentation,
> and thus the edge cases are unknown.

Sorry for the inconvience, I'll try harder to make the spec to be publicily
accessible.

> 
> Because of the .HW check, I would expect all of this do be in a helper
> function.  The qemu_log_mask would be followed by riscv_raise_exception.

The HW instruction table is a feature we still have no idea how to implement
in QEMU, it was meant to be a security extension that rogue users couldn't
copy out vendor kept code - - the only permission is execution, no read
access on that region.

> 
> As for the actual execute, I'd suggest following the lead of s390x and
> utilize the cs_base field of cpu_get_tb_cpu_state to hold the opcode to be
> executed.  You'd load the opcode in the helper, install the proper state
> into env, and end the current TranslationBlock.  The next TranslationBlock
> will find the opcode and decode it in the normal way.
> 
> Have a look at target/s390x/tcg/mem_helper.c, HELPER(ex), where we load the
> opcode and store state.  Then translate.c, extract_insn, where we consume
> the state.
> 
> 
> r~
Alex Bennée Oct. 22, 2021, 11:52 a.m. UTC | #3
Ruinland ChuanTzu Tsai <ruinland@andestech.com> writes:

> On Thu, Oct 21, 2021 at 12:17:47PM -0700, Richard Henderson wrote:
>> On 10/21/21 8:11 AM, Ruinland Chuan-Tzu Tsai wrote:
>> > In this patch, we demonstrate how Andes Performance Extension(c) insn :
>> > bfos and bfoz could be used with Andes CoDense : exec.it.
>> > 
<snip>
>> > +static int andes_v5_gen_codense_exec_it(CPURISCVState *env, DisasContext *ctx, arg_execit *a)
>> > +{
>> > +    uint32_t insn;
>> > +    uint32_t imm_ex10 = a->imm_codense;
>> > +    target_ulong uitb_val = 0;
>> > +    riscv_csrrw(env, 0x800, &uitb_val, 0, 0);
>> 
>> This won't work -- you can't access env during translation.  So all this
>> faff around passing env through HartState is for naught.
>
> Sorry, please elaborate me a little bit more.

When dealing with translation you need to separate the translation phase
from the execution phase. CPUEnv is the current run time state of the
vCPU so the value that might be stored in it when you do translation
could very well be different when the translation runs (or runs again).

The correct way to deal with this is by the use of translation flags. If
for example the translation is only valid for a particular execution
state you represent that in a translation flag which you compute in
cpu_get_tb_cpu_state. This ensures that the translated block will only
get looked up when you are in that translation state - if it's not you
will generate a new block for the current state. See the section:

 https://qemu.readthedocs.io/en/latest/devel/tcg.html#cpu-state-optimisations

of the developer guide.
Richard Henderson Oct. 22, 2021, 5:24 p.m. UTC | #4
On 10/22/21 4:52 AM, Alex Bennée wrote:
> 
> Ruinland ChuanTzu Tsai <ruinland@andestech.com> writes:
> 
>> On Thu, Oct 21, 2021 at 12:17:47PM -0700, Richard Henderson wrote:
>>> On 10/21/21 8:11 AM, Ruinland Chuan-Tzu Tsai wrote:
>>>> In this patch, we demonstrate how Andes Performance Extension(c) insn :
>>>> bfos and bfoz could be used with Andes CoDense : exec.it.
>>>>
> <snip>
>>>> +static int andes_v5_gen_codense_exec_it(CPURISCVState *env, DisasContext *ctx, arg_execit *a)
>>>> +{
>>>> +    uint32_t insn;
>>>> +    uint32_t imm_ex10 = a->imm_codense;
>>>> +    target_ulong uitb_val = 0;
>>>> +    riscv_csrrw(env, 0x800, &uitb_val, 0, 0);
>>>
>>> This won't work -- you can't access env during translation.  So all this
>>> faff around passing env through HartState is for naught.
>>
>> Sorry, please elaborate me a little bit more.
> 
> When dealing with translation you need to separate the translation phase
> from the execution phase. CPUEnv is the current run time state of the
> vCPU so the value that might be stored in it when you do translation
> could very well be different when the translation runs (or runs again).
> 
> The correct way to deal with this is by the use of translation flags...

Yes translation flags will be important for this, but first, the cpu state which you are 
reading here at translation time, which you are baking into this TranslationBlock 
forevermore, is (1) the uitb csr value and (2) the contents of the memory at the final 
address.

Both of these things might vary in between translation of the exec_it instruction and 
execution of the TranslationBlock.  At which point the translation you did earlier is wrong.

Avoiding these kinds of mistakes is exactly why we have *not* provided env to you at this 
point in translation.  Which is why all of the changes you did to recover env are also wrong.


r~
diff mbox series

Patch

diff --git a/target/riscv/andes_codense.decode b/target/riscv/andes_codense.decode
new file mode 100644
index 0000000000..639d22ac67
--- /dev/null
+++ b/target/riscv/andes_codense.decode
@@ -0,0 +1,23 @@ 
+#
+# RISC-V translation routines for the RVXI Base Integer Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+%imm_ex10 8:s1 12:1 3:1 9:1 5:2 2:1 10:2 4:1
+&codense imm_codense
+@ex10     ... .... . . ..... .. &codense imm_codense=%imm_ex10
+execit    100 .... . 0 ..... 00 @ex10
diff --git a/target/riscv/andes_custom_rv32.decode b/target/riscv/andes_custom_rv32.decode
new file mode 100644
index 0000000000..f69d75018d
--- /dev/null
+++ b/target/riscv/andes_custom_rv32.decode
@@ -0,0 +1,27 @@ 
+#
+# Andes Technology V5 Performance Extension
+#
+# Copyright (c) 2021 Andes Technology, ruinland@andestech.com
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Fields:
+%rs1       15:5
+%rd        7:5
+## Similar to I-Type
+%mbfob    26:5
+%lbfob    20:5
+&i_b mbfob lbfob rs1 rd
+@i_bfo    . ..... . ..... ..... ... ..... .......  &i_b %mbfob %lbfob %rs1 %rd
+bfos      0 ..... 0 ..... ..... 011 .....  1011011 @i_bfo #0x03
+bfoz      0 ..... 0 ..... ..... 010 .....  1011011 @i_bfo #0x02
diff --git a/target/riscv/andes_custom_rv64.decode b/target/riscv/andes_custom_rv64.decode
new file mode 100644
index 0000000000..61f1fb208b
--- /dev/null
+++ b/target/riscv/andes_custom_rv64.decode
@@ -0,0 +1,27 @@ 
+#
+# Andes Technology V5 Performance Extension
+#
+# Copyright (c) 2021 Andes Technology, ruinland@andestech.com
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Fields:
+%rs1       15:5
+%rd        7:5
+## Similar to I-Type
+%mbfob    26:6
+%lbfob    20:6
+&i_b mbfob lbfob rs1 rd
+@i_bfo    . ..... . ..... ..... ... ..... .......  &i_b %mbfob %lbfob %rs1 %rd
+bfos      . ..... . ..... ..... 011 .....  1011011 @i_bfo #0x03
+bfoz      . ..... . ..... ..... 010 .....  1011011 @i_bfo #0x02
diff --git a/target/riscv/andes_helper.c b/target/riscv/andes_helper.c
new file mode 100644
index 0000000000..5e1e52fccb
--- /dev/null
+++ b/target/riscv/andes_helper.c
@@ -0,0 +1,49 @@ 
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu/host-utils.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+#include "internals.h"
+typedef int (*test_function)(uint8_t a, uint8_t b);
+target_ulong helper_andes_v5_bfo_x(target_ulong rd, target_ulong rs1,
+                                   target_ulong insn)
+{
+    int msb, lsb, is_se;
+    int lsbp1, msbm1, lsbm1, lenm1;
+    uint64_t se;
+    uint64_t nxrd = rd; /* for safety sake */
+
+    msb = extract64(insn, 26, 6);
+    lsb = extract64(insn, 20, 6);
+    is_se = 0x3 == (0x7 & (insn >> 12)); /* BFOS */
+    lsbp1 = lsb + 1;
+    msbm1 = msb - 1;
+    lsbm1 = lsb - 1;
+
+    if (msb == 0) {
+        nxrd = deposit64(nxrd, lsb, 1, 1 & rs1);
+        if (lsb > 0) {
+            nxrd = deposit64(nxrd, 0, lsbm1 + 1, 0);
+        }
+        if (lsb < 63) {
+            se = (is_se && (1 & rs1)) ? -1LL : 0;
+            nxrd = deposit64(nxrd, lsbp1, 64 - lsbp1, se);
+        }
+    } else if (msb < lsb) {
+        lenm1 = lsb - msb;
+        nxrd = deposit64(nxrd, msb, lenm1 + 1, rs1 >> 0);
+        if (lsb < 63) {
+            se = (is_se && (1 & (rs1 >> lenm1))) ? -1LL : 0;
+            nxrd = deposit64(nxrd, lsbp1, 64 - lsbp1, se);
+        }
+        nxrd = deposit64(nxrd, 0, msbm1 + 1, 0);
+    } else { /* msb >= lsb */
+        lenm1 = msb - lsb;
+        nxrd = deposit64(nxrd, 0, lenm1 + 1, rs1 >> lsb);
+        se = (is_se && (1 & (rs1 >> msb))) ? -1LL : 0;
+        nxrd = deposit64(nxrd, lenm1 + 1, 63 - lenm1, se);
+    }
+
+    return (target_long)nxrd;
+}
diff --git a/target/riscv/andes_helper.h b/target/riscv/andes_helper.h
new file mode 100644
index 0000000000..dfe15d8841
--- /dev/null
+++ b/target/riscv/andes_helper.h
@@ -0,0 +1 @@ 
+DEF_HELPER_FLAGS_3(andes_v5_bfo_x, TCG_CALL_NO_RWG, tl, tl, tl, tl)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e8ab5734c2..fc20673c1a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -207,8 +207,12 @@  static void rv64_base_cpu_init(Object *obj)
 static void ax25_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
+    HartState = env;
     set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
+    cpu_has_custom_insns = true;
+    custom_insn_handler    = decode_andes;
+    custom_c_insn_handler  = decode_codense;
     setup_custom_csr(env, andes_custom_csr_table);
     env->custom_csr_val = g_malloc(andes_custom_csr_size);
 }
@@ -271,8 +275,12 @@  static void rv32_imafcu_nommu_cpu_init(Object *obj)
 static void a25_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
+    HartState = env;
     set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
+    cpu_has_custom_insns = true;
+    custom_insn_handler    = decode_andes;
+    custom_c_insn_handler  = decode_codense;
     setup_custom_csr(env, andes_custom_csr_table);
     env->custom_csr_val = g_malloc(andes_custom_csr_size);
 }
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 460eee9988..ca60825e80 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1149,3 +1149,5 @@  DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+#include "andes_helper.h"
diff --git a/target/riscv/insn_trans/trans_andes.c.inc b/target/riscv/insn_trans/trans_andes.c.inc
new file mode 100644
index 0000000000..384745fceb
--- /dev/null
+++ b/target/riscv/insn_trans/trans_andes.c.inc
@@ -0,0 +1,115 @@ 
+#define GP 3
+#define ANDES_V5_CODE_DENSE_MASK (0xe083)
+#define ANDES_V5_CODE_DENSE_ID(x) ((x)&ANDES_V5_CODE_DENSE_MASK)
+#define ANDES_V5_CODE_DENSE_IMM11(x) (     \
+    (extract32(x, 4, 1) << 2) |   \
+    (extract32(x, 10, 2) << 3) |  \
+    (extract32(x, 2, 1) << 5) |   \
+    (extract32(x, 5, 2) << 6) |   \
+    (extract32(x, 9, 1) << 8) |   \
+    (extract32(x, 3, 1) << 9) |   \
+    (extract32(x, 12, 1) << 10) | \
+    (extract64(x, 8, 1) << 11) | \
+    0)
+
+#define ANDES_V5_GET_JAL_UIMM(inst) ((extract32(inst, 21, 10) << 1) \
+                           | (extract32(inst, 20, 1) << 11) \
+                           | (extract32(inst, 12, 8) << 12) \
+                           | (extract32(inst, 31, 1) << 20))
+
+
+enum andes_v5_inst_id
+{
+    /* Code Dense Extension */
+    EXEC_IT = (0x8000),
+    /* custom 2 */
+    BFOS = 0x03,
+    BFOZ = 0x02,
+};
+
+static bool trans_bfos(DisasContext *ctx, arg_bfos *a)
+{
+    TCGv v0, v1, v2;
+    v2 = tcg_const_tl(ctx->opcode);
+    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
+    v1 = get_gpr(ctx, a->rd, EXT_NONE);
+    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
+    gen_set_gpr(ctx, a->rd, v1);
+    tcg_temp_free(v2);
+    return true;
+}
+
+static bool trans_bfoz(DisasContext *ctx, arg_bfoz *a)
+{
+    TCGv v0, v1, v2;
+    v2 = tcg_const_tl(ctx->opcode);
+    v0 = get_gpr(ctx, a->rs1, EXT_NONE);
+    v1 = get_gpr(ctx, a->rd,  EXT_NONE);
+    gen_helper_andes_v5_bfo_x(v1, v1, v0, v2);
+    gen_set_gpr(ctx, a->rd, v1);
+    tcg_temp_free(v2);
+    return true;
+}
+
+static void decode_opc_andes(CPURISCVState *env, DisasContext *ctx, uint32_t opcode32)
+{
+    uint32_t old_opcode = ctx->opcode;
+    ctx->opcode = opcode32;
+    custom_insn_decoded = decode_andes(ctx, opcode32);
+    if(!custom_insn_decoded) {
+        if (!decode_insn32(ctx, opcode32)) {
+            gen_exception_illegal(ctx);
+        }
+    }
+    ctx->opcode = old_opcode;
+}
+
+static int andes_v5_gen_codense_exec_it(CPURISCVState *env, DisasContext *ctx, arg_execit *a)
+{
+    uint32_t insn;
+    uint32_t imm_ex10 = a->imm_codense;
+    target_ulong uitb_val = 0;
+    riscv_csrrw(env, 0x800, &uitb_val, 0, 0);
+
+    if (extract32(uitb_val, 0, 1)) { /* UTIB.HW == 1 */
+        qemu_log_mask(LOG_GUEST_ERROR, "exec.it: UITB.HW == 1 is not supported by now!\n");
+        gen_exception_illegal(ctx);
+
+        uint32_t instruction_table[0];
+        insn = instruction_table[imm_ex10 >> 2];
+
+        return false;
+    } else { /* UTIB.HW == 0 */
+        target_ulong vaddr = (uitb_val & ~0x3) + (imm_ex10<<2);
+        insn = cpu_ldl_code(env, vaddr);
+    }
+
+/*  Execute(insn) 
+ *  do as the replaced instruction, even exceptions,
+ *  except ctx->pc_succ_insn value (2).
+ */
+        uint32_t op = MASK_OP_MAJOR(insn);
+        if (op == OPC_RISC_JAL) {
+            /* implement this by hack imm */
+            int rd = GET_RD(ctx->opcode);
+            target_long imm = ANDES_V5_GET_JAL_UIMM(ctx->opcode);
+            target_ulong next_pc = (ctx->base.pc_next >> 21 << 21) | imm;
+            imm = next_pc - ctx->base.pc_next;
+            gen_jal(ctx, rd, imm);
+        } else {
+            /* JARL done as SPEC already */
+            /* presume ctx->pc_succ_insn not changed in any ISA extension */
+            decode_opc_andes(env, ctx, insn);
+        }
+
+    return true;
+}
+extern __thread CPURISCVState *HartState;
+
+static bool trans_execit(DisasContext *ctx, arg_execit *a)
+{
+    /* This is merely something to make things happen */
+    CPURISCVState *env = HartState;
+    andes_v5_gen_codense_exec_it(env ,ctx, a);
+    return true;
+}
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 5c87672ff9..002aec2746 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -6,10 +6,23 @@  gen = [
   decodetree.process('insn32.decode', extra_args: '--static-decode=decode_insn32'),
 ]
 
+gen_custom_RV32 = [
+  decodetree.process('andes_codense.decode', extra_args: ['--decode=decode_codense', '--insnwidth=16']),
+  decodetree.process('andes_custom_rv32.decode', extra_args: '--decode=decode_andes'),
+]
+
+gen_custom_RV64 = [
+  decodetree.process('andes_codense.decode', extra_args: ['--decode=decode_codense', '--insnwidth=16']),
+  decodetree.process('andes_custom_rv64.decode', extra_args: '--decode=decode_andes'),
+]
+
 riscv_ss = ss.source_set()
 riscv_ss.add(gen)
+riscv_ss.add(when: 'TARGET_RISCV32', if_true: gen_custom_RV32)
+riscv_ss.add(when: 'TARGET_RISCV64', if_true: gen_custom_RV64)
 riscv_ss.add(files(
   'csr_andes.c',
+  'andes_helper.c',
   'cpu.c',
   'cpu_helper.c',
   'csr.c',
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index f755749380..4c0085dde5 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -498,6 +498,18 @@  __thread bool custom_insn_decoded;
 extern bool (*custom_c_insn_handler)(DisasContext *, uint16_t);
 extern bool (*custom_insn_handler)(DisasContext *, uint32_t);
 
+/* Include the auto-generated decoder for andes custom insn */
+bool decode_andes(DisasContext *, uint32_t);
+bool decode_codense(DisasContext *, uint16_t);
+#include "decode-andes_codense.c.inc"
+#ifdef TARGET_RISCV32
+#include "decode-andes_custom_rv32.c.inc"
+#endif
+#ifdef TARGET_RISCV64
+#include "decode-andes_custom_rv64.c.inc"
+#endif
+#include "insn_trans/trans_andes.c.inc"
+
 static void try_decode_custom_insn(CPURISCVState *env, DisasContext *ctx,
                                    uint16_t opcode)
 {