diff mbox series

[v5,41/48] target/nios2: Introduce shadow register sets

Message ID 20220310112725.570053-42-richard.henderson@linaro.org
State New
Headers show
Series target/nios2: Shadow register set, EIC and VIC | expand

Commit Message

Richard Henderson March 10, 2022, 11:27 a.m. UTC
Do not actually enable them so far, but add all of the
plumbing to address them.  Do not enable them for user-only.

Add an env->regs pointer that handles the indirection to
the current register set.  The naming of the pointer hides
the difference between old and new, user-only and sysemu.

From the notes on wrprs, which states that r0 must be initialized
before use in shadow register sets, infer that R_ZERO is *not*
hardwired to zero in shadow register sets.  Adjust load_gpr and
dest_gpr to reflect this.  At the same time we might as well
special case crs == 0 to avoid the indirection through env->regs
during translation as well.  Given that this is intended to be
the most common case for non-interrupt handlers.

Init env->regs at reset.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/nios2/cpu.h       | 24 +++++++++++++++++
 target/nios2/cpu.c       |  1 +
 target/nios2/translate.c | 58 +++++++++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 13 deletions(-)

Comments

Amir Gonnen March 13, 2022, 11:55 a.m. UTC | #1
(Continue discussion from Re: [PATCH v4 24/33] target/nios2: Introduce shadow register sets)

> > How does "cpu_crs_R" work?
> > ... Otherwise, each gpr access would be indirect. I'm probably missing something here.

> They are indirect, but with some optimization.

Why not always access directly?  
With an EIC each interrupt handler is associated with a specific shadow register set, so we can expect that (on a sane use case) each block always executes on the same register set.  
If we update cpu_get_tb_cpu_state to translate differently based on STATUS.CRS we would still end up with a single translation for each block.  
This way the translator could emit direct registers access for shadow registers, and we won't need to rely on optimizations to lower indirect access.

Thanks,
Amir
Richard Henderson March 13, 2022, 4:53 p.m. UTC | #2
On 3/13/22 04:55, Amir Gonnen wrote:
> (Continue discussion from Re: [PATCH v4 24/33] target/nios2: Introduce shadow register sets)
> 
>>> How does "cpu_crs_R" work?
>>> ... Otherwise, each gpr access would be indirect. I'm probably missing something here.
> 
>> They are indirect, but with some optimization.
> 
> Why not always access directly?
> With an EIC each interrupt handler is associated with a specific shadow register set, so we can expect that (on a sane use case) each block always executes on the same register set.
> If we update cpu_get_tb_cpu_state to translate differently based on STATUS.CRS we would still end up with a single translation for each block.
> This way the translator could emit direct registers access for shadow registers, and we won't need to rely on optimizations to lower indirect access.

We could do that if we support fewer than 64 shadow register sets:

#define TCG_MAX_TEMPS 512

Global temps (e.g. cpu_R[]) count against that limit, and 64 * 32 == 2048.
The maximum number of shadow reg sets you could support like this would be about 8.


r~
diff mbox series

Patch

diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index d5255e9e76..e32bebe9b7 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -60,6 +60,11 @@  struct Nios2CPUClass {
 #define NUM_GP_REGS 32
 #define NUM_CR_REGS 32
 
+#ifndef CONFIG_USER_ONLY
+/* 63 shadow register sets; index 0 is the primary register set. */
+#define NUM_REG_SETS 64
+#endif
+
 /* General purpose register aliases */
 enum {
     R_ZERO   = 0,
@@ -178,7 +183,13 @@  FIELD(CR_TLBMISC, EE, 24, 1)
 #define EXCP_MPUD     17
 
 struct CPUArchState {
+#ifdef CONFIG_USER_ONLY
     uint32_t regs[NUM_GP_REGS];
+#else
+    uint32_t shadow_regs[NUM_REG_SETS][NUM_GP_REGS];
+    /* Pointer into shadow_regs for the current register set. */
+    uint32_t *regs;
+#endif
     uint32_t ctrl[NUM_CR_REGS];
     uint32_t pc;
 
@@ -229,6 +240,14 @@  static inline bool nios2_cr_reserved(const ControlRegState *s)
     return (s->writable | s->readonly) == 0;
 }
 
+static inline void nios2_update_crs(CPUNios2State *env)
+{
+#ifndef CONFIG_USER_ONLY
+    unsigned crs = FIELD_EX32(env->ctrl[CR_STATUS], CR_STATUS, CRS);
+    env->regs = env->shadow_regs[crs];
+#endif
+}
+
 void nios2_tcg_init(void);
 void nios2_cpu_do_interrupt(CPUState *cs);
 void dump_mmu(CPUNios2State *env);
@@ -271,12 +290,17 @@  typedef Nios2CPU ArchCPU;
 
 #include "exec/cpu-all.h"
 
+FIELD(TBFLAGS, CRS0, 0, 1)  /* Set if CRS == 0. */
+FIELD(TBFLAGS, U, 1, 1)     /* Overlaps CR_STATUS_U */
+
 static inline void cpu_get_tb_cpu_state(CPUNios2State *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
     *pc = env->pc;
     *cs_base = 0;
     *flags = env->ctrl[CR_STATUS] & CR_STATUS_U;
+    *flags |= (env->ctrl[CR_STATUS] & R_CR_STATUS_CRS_MASK
+               ? 0 : R_TBFLAGS_CRS0_MASK);
 }
 
 #endif /* NIOS2_CPU_H */
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index b8410d8447..efd6a21a8e 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -59,6 +59,7 @@  static void nios2_cpu_reset(DeviceState *dev)
     env->ctrl[CR_STATUS] = CR_STATUS_RSIE | CR_STATUS_U | CR_STATUS_PIE;
 #else
     env->ctrl[CR_STATUS] = CR_STATUS_RSIE;
+    nios2_update_crs(env);
 #endif
 
     env->regs[R_ZERO] = 0;
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 1e784c8a37..525df7b023 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -100,12 +100,16 @@  typedef struct DisasContext {
     DisasContextBase  base;
     target_ulong      pc;
     int               mem_idx;
+    bool              crs0;
     TCGv              sink;
     const ControlRegState *cr_state;
 } DisasContext;
 
 static TCGv cpu_R[NUM_GP_REGS];
 static TCGv cpu_pc;
+#ifndef CONFIG_USER_ONLY
+static TCGv cpu_crs_R[NUM_GP_REGS];
+#endif
 
 typedef struct Nios2Instruction {
     void     (*handler)(DisasContext *dc, uint32_t code, uint32_t flags);
@@ -127,22 +131,36 @@  static uint8_t get_opxcode(uint32_t code)
 static TCGv load_gpr(DisasContext *dc, unsigned reg)
 {
     assert(reg < NUM_GP_REGS);
-    if (unlikely(reg == R_ZERO)) {
-        return tcg_constant_tl(0);
+    if (dc->crs0) {
+        if (unlikely(reg == R_ZERO)) {
+            return tcg_constant_tl(0);
+        }
+        return cpu_R[reg];
     }
-    return cpu_R[reg];
+#ifdef CONFIG_USER_ONLY
+    g_assert_not_reached();
+#else
+    return cpu_crs_R[reg];
+#endif
 }
 
 static TCGv dest_gpr(DisasContext *dc, unsigned reg)
 {
     assert(reg < NUM_GP_REGS);
-    if (unlikely(reg == R_ZERO)) {
-        if (dc->sink == NULL) {
-            dc->sink = tcg_temp_new();
+    if (dc->crs0) {
+        if (unlikely(reg == R_ZERO)) {
+            if (dc->sink == NULL) {
+                dc->sink = tcg_temp_new();
+            }
+            return dc->sink;
         }
-        return dc->sink;
+        return cpu_R[reg];
     }
-    return cpu_R[reg];
+#ifdef CONFIG_USER_ONLY
+    g_assert_not_reached();
+#else
+    return cpu_crs_R[reg];
+#endif
 }
 
 static void t_gen_helper_raise_exception(DisasContext *dc,
@@ -198,7 +216,7 @@  static void gen_excp(DisasContext *dc, uint32_t code, uint32_t flags)
 
 static bool gen_check_supervisor(DisasContext *dc)
 {
-    if (dc->base.tb->flags & CR_STATUS_U) {
+    if (dc->base.tb->flags & R_TBFLAGS_U_MASK) {
         /* CPU in user mode, privileged instruction called, stop. */
         t_gen_helper_raise_exception(dc, EXCP_SUPERI);
         return false;
@@ -794,6 +812,7 @@  static void nios2_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 
     dc->mem_idx = cpu_mmu_index(env, false);
     dc->cr_state = cpu->cr_state;
+    dc->crs0 = FIELD_EX32(dc->base.tb->flags, TBFLAGS, CRS0);
 
     /* Bound the number of insns to execute to those left on the page.  */
     page_insns = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
@@ -927,13 +946,26 @@  void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 
 void nios2_tcg_init(void)
 {
-    int i;
+#ifndef CONFIG_USER_ONLY
+    TCGv_ptr crs = tcg_global_mem_new_ptr(cpu_env,
+                                          offsetof(CPUNios2State, regs), "crs");
 
-    for (i = 0; i < NUM_GP_REGS; i++) {
-        cpu_R[i] = tcg_global_mem_new(cpu_env,
-                                      offsetof(CPUNios2State, regs[i]),
+    for (int i = 0; i < NUM_GP_REGS; i++) {
+        cpu_crs_R[i] = tcg_global_mem_new(crs, 4 * i, gr_regnames[i]);
+    }
+
+#define offsetof_regs0(N)  offsetof(CPUNios2State, shadow_regs[0][N])
+#else
+#define offsetof_regs0(N)  offsetof(CPUNios2State, regs[N])
+#endif
+
+    for (int i = 0; i < NUM_GP_REGS; i++) {
+        cpu_R[i] = tcg_global_mem_new(cpu_env, offsetof_regs0(i),
                                       gr_regnames[i]);
     }
+
+#undef offsetof_regs0
+
     cpu_pc = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUNios2State, pc), "pc");
 }