diff mbox

[RFC,20/28] target-xtensa: implement windowed registers

Message ID 1304470768-16924-20-git-send-email-jcmvbkbc@gmail.com
State New
Headers show

Commit Message

Max Filippov May 4, 2011, 12:59 a.m. UTC
See ISA, 4.7.1 for details.

Physical registers and currently visible window are separate fields in
CPUEnv. Only current window is accessible to TCG. On operations that
change window base helpers copy current window to and from physical
registers.

Window overflow check described in 4.7.1.3 is in separate patch.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 target-xtensa/cpu.h       |    6 ++
 target-xtensa/helper.c    |    1 +
 target-xtensa/helpers.h   |    8 ++
 target-xtensa/op_helper.c |  172 +++++++++++++++++++++++++++++++++++++++++++++
 target-xtensa/translate.c |  134 ++++++++++++++++++++++++++++++++---
 5 files changed, 312 insertions(+), 9 deletions(-)

Comments

Blue Swirl May 4, 2011, 7:35 p.m. UTC | #1
On Wed, May 4, 2011 at 3:59 AM, Max Filippov <jcmvbkbc@gmail.com> wrote:
> See ISA, 4.7.1 for details.
>
> Physical registers and currently visible window are separate fields in
> CPUEnv. Only current window is accessible to TCG. On operations that
> change window base helpers copy current window to and from physical
> registers.

I'm not sure how the register windows work, but maybe you could use
the same trick used for Sparc. There is a pool of registers
(env->regbase[]), a register window pointer (env->regwptr,
cpu_regwptr) tracks which are the currently accessible ones. The
advantage is to avoid copying (not entirely for Sparc due to the
window overlap).
Richard Henderson May 4, 2011, 8:07 p.m. UTC | #2
On 05/04/2011 12:35 PM, Blue Swirl wrote:
> On Wed, May 4, 2011 at 3:59 AM, Max Filippov <jcmvbkbc@gmail.com> wrote:
>> See ISA, 4.7.1 for details.
>>
>> Physical registers and currently visible window are separate fields in
>> CPUEnv. Only current window is accessible to TCG. On operations that
>> change window base helpers copy current window to and from physical
>> registers.
> 
> I'm not sure how the register windows work, but maybe you could use
> the same trick used for Sparc. There is a pool of registers
> (env->regbase[]), a register window pointer (env->regwptr,
> cpu_regwptr) tracks which are the currently accessible ones. The
> advantage is to avoid copying (not entirely for Sparc due to the
> window overlap).

Sparc loses out on some TCG optimizations because of that, although
to be fair the most effective of these are still in Aurlien's trees.

That said, I still would not recommend a new port to follow suit.


r~
Blue Swirl May 4, 2011, 8:13 p.m. UTC | #3
On Wed, May 4, 2011 at 11:07 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 05/04/2011 12:35 PM, Blue Swirl wrote:
>> On Wed, May 4, 2011 at 3:59 AM, Max Filippov <jcmvbkbc@gmail.com> wrote:
>>> See ISA, 4.7.1 for details.
>>>
>>> Physical registers and currently visible window are separate fields in
>>> CPUEnv. Only current window is accessible to TCG. On operations that
>>> change window base helpers copy current window to and from physical
>>> registers.
>>
>> I'm not sure how the register windows work, but maybe you could use
>> the same trick used for Sparc. There is a pool of registers
>> (env->regbase[]), a register window pointer (env->regwptr,
>> cpu_regwptr) tracks which are the currently accessible ones. The
>> advantage is to avoid copying (not entirely for Sparc due to the
>> window overlap).
>
> Sparc loses out on some TCG optimizations because of that, although
> to be fair the most effective of these are still in Aurlien's trees.

Interesting. Which optimizations? What trees? How would you implement
the register windows then?
Richard Henderson May 4, 2011, 8:30 p.m. UTC | #4
On 05/04/2011 01:13 PM, Blue Swirl wrote:
>> Sparc loses out on some TCG optimizations because of that, although
>> to be fair the most effective of these are still in Aurlien's trees.
> 
> Interesting. Which optimizations? What trees? How would you implement
> the register windows then?

Constant propagation for one.  This one in particular would allow the
tcg backend to see full constants, rather than seeing the constant 
being built up from risc instructions.

  git://git.aurel32.net/qemu.git tcg-optimizations

I have previously built on this to streamline the code generated for
target load/store operations.  We currently always force the address
into a register and do the arithmetic on that.  If we have the full
constant for the address, we can push the parts of that constant into
the TLB load etc.

  git://repo.or.cz/qemu/rth.git tcg-const-addr-1

As for how to implement the register windows...

I'm not 100% sure.  The easiest way is indeed to copy values to and
fro a consolidated register file, as Max is doing here.  I've also
experimented briefly with extending TCG to handle "indirect" registers.
Where the register values are consistent as far as the TCG optimizers
are concerned, but when it comes time to expand the code, we perform
the indirect read, just as you currently expand by hand ahead of time.
I never got either solution totally working for sparc.


r~
diff mbox

Patch

diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 25041be..3ebccd1 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -108,6 +108,8 @@  enum {
 enum {
     SAR = 3,
     SCOMPARE1 = 12,
+    WINDOW_BASE = 72,
+    WINDOW_START = 73,
     EPC1 = 177,
     DEPC = 192,
     EXCSAVE1 = 209,
@@ -134,6 +136,8 @@  enum {
 
 #define PS_WOE 0x40000
 
+#define MAX_NAREG 64
+
 enum {
     /* Static vectors */
     EXC_RESET,
@@ -185,6 +189,7 @@  enum {
 typedef struct XtensaConfig {
     const char *name;
     uint64_t options;
+    unsigned nareg;
     int excm_level;
     int ndepc;
     uint32_t exception_vector[EXC_MAX];
@@ -196,6 +201,7 @@  typedef struct CPUXtensaState {
     uint32_t pc;
     uint32_t sregs[256];
     uint32_t uregs[256];
+    uint32_t phys_regs[MAX_NAREG];
 
     int exception_taken;
 
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index e641360..61d1ab3 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -45,6 +45,7 @@  static const XtensaConfig core_config[] = {
     {
         .name = "sample-xtensa-core",
         .options = -1,
+        .nareg = 64,
         .ndepc = 1,
         .excm_level = 16,
         .exception_vector = {
diff --git a/target-xtensa/helpers.h b/target-xtensa/helpers.h
index f13e005..4a50280 100644
--- a/target-xtensa/helpers.h
+++ b/target-xtensa/helpers.h
@@ -3,5 +3,13 @@ 
 DEF_HELPER_1(exception, void, i32)
 DEF_HELPER_2(exception_cause, void, i32, i32)
 DEF_HELPER_3(exception_cause_vaddr, void, i32, i32, i32)
+DEF_HELPER_1(wsr_windowbase, void, i32)
+DEF_HELPER_3(entry, void, i32, i32, i32)
+DEF_HELPER_1(retw, i32, i32)
+DEF_HELPER_1(rotw, void, i32)
+DEF_HELPER_2(window_check, void, i32, i32)
+DEF_HELPER_0(restore_owb, void)
+DEF_HELPER_1(movsp, void, i32)
+DEF_HELPER_0(dump_state, void)
 
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 9bfe493..b5925dd 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -85,3 +85,175 @@  void HELPER(exception_cause_vaddr)(uint32_t pc, uint32_t cause, uint32_t vaddr)
     env->sregs[EXCVADDR] = vaddr;
     HELPER(exception_cause)(pc, cause);
 }
+
+static void copy_window_from_phys(uint32_t window, uint32_t phys, uint32_t n)
+{
+    assert(phys < env->config->nareg);
+    if (phys + n <= env->config->nareg) {
+        memcpy(env->regs + window, env->phys_regs + phys,
+                n * sizeof(uint32_t));
+    } else {
+        uint32_t n1 = env->config->nareg - phys;
+        memcpy(env->regs + window, env->phys_regs + phys,
+                n1 * sizeof(uint32_t));
+        memcpy(env->regs + window + n1, env->phys_regs,
+                (n - n1) * sizeof(uint32_t));
+    }
+}
+
+static void copy_phys_from_window(uint32_t phys, uint32_t window, uint32_t n)
+{
+    assert(phys < env->config->nareg);
+    if (phys + n <= env->config->nareg) {
+        memcpy(env->phys_regs + phys, env->regs + window,
+                n * sizeof(uint32_t));
+    } else {
+        uint32_t n1 = env->config->nareg - phys;
+        memcpy(env->phys_regs + phys, env->regs + window,
+                n1 * sizeof(uint32_t));
+        memcpy(env->phys_regs, env->regs + window + n1,
+                (n - n1) * sizeof(uint32_t));
+    }
+}
+
+
+#define WINDOWBASE_BOUND(a) ((a) & (env->config->nareg / 4 - 1))
+#define WINDOW_BOUND(a) ((a) & (env->config->nareg - 1))
+#define WINDOWSTART_BIT(a) (1 << WINDOWBASE_BOUND(a))
+
+static void rotate_window_abs(uint32_t position)
+{
+    copy_phys_from_window(env->sregs[WINDOW_BASE] * 4, 0, 16);
+    env->sregs[WINDOW_BASE] = WINDOWBASE_BOUND(position);
+    copy_window_from_phys(0, env->sregs[WINDOW_BASE] * 4, 16);
+}
+
+static void rotate_window(uint32_t delta)
+{
+    rotate_window_abs(env->sregs[WINDOW_BASE] + delta);
+}
+
+void HELPER(wsr_windowbase)(uint32_t v)
+{
+    rotate_window_abs(v);
+}
+
+void HELPER(entry)(uint32_t pc, uint32_t s, uint32_t imm)
+{
+    int callinc = (env->sregs[PS] & PS_CALLINC) >> PS_CALLINC_SHIFT;
+    if (s > 3 || ((env->sregs[PS] & (PS_WOE | PS_EXCM)) ^ PS_WOE) != 0) {
+        printf("Illegal entry instruction (%08x), PS = %08x\n",
+                pc, env->sregs[PS]);
+        HELPER(exception_cause)(pc, ILLEGAL_INSTRUCTION_CAUSE);
+    } else {
+        env->regs[(callinc << 2) | (s & 3)] = env->regs[s] - (imm << 3);
+        rotate_window(callinc);
+        env->sregs[WINDOW_START] |= WINDOWSTART_BIT(env->sregs[WINDOW_BASE]);
+    }
+}
+
+void HELPER(window_check)(uint32_t pc, uint32_t w)
+{
+    uint32_t windowbase = WINDOWBASE_BOUND(env->sregs[WINDOW_BASE]);
+    uint32_t windowstart = env->sregs[WINDOW_START];
+    uint32_t m, n;
+
+    if ((env->sregs[PS] & (PS_WOE | PS_EXCM)) ^ PS_WOE) {
+        return;
+    }
+
+    for (n = 1; n <= w; ++n)
+        if (windowstart & WINDOWSTART_BIT(windowbase + n)) {
+            break;
+        }
+
+    if (n > w) {
+        return;
+    }
+
+    m = WINDOWBASE_BOUND(windowbase + n);
+    rotate_window(n);
+    env->sregs[PS] = (env->sregs[PS] & ~PS_OWB) |
+        (windowbase << PS_OWB_SHIFT) | PS_EXCM;
+    env->sregs[EPC1] = env->pc = pc;
+
+    if (windowstart & WINDOWSTART_BIT(m + 1)) {
+        HELPER(exception)(EXC_WINDOW_OVERFLOW4);
+    } else if (windowstart & WINDOWSTART_BIT(m + 2)) {
+        HELPER(exception)(EXC_WINDOW_OVERFLOW8);
+    } else {
+        HELPER(exception)(EXC_WINDOW_OVERFLOW12);
+    }
+}
+
+uint32_t HELPER(retw)(uint32_t pc)
+{
+    int n = (env->regs[0] >> 30) & 0x3;
+    int m = 0;
+    uint32_t windowbase = WINDOWBASE_BOUND(env->sregs[WINDOW_BASE]);
+    uint32_t windowstart = env->sregs[WINDOW_START];
+    uint32_t ret_pc = 0;
+
+    if (windowstart & WINDOWSTART_BIT(windowbase - 1)) {
+        m = 1;
+    } else if (windowstart & WINDOWSTART_BIT(windowbase - 2)) {
+        m = 2;
+    } else if (windowstart & WINDOWSTART_BIT(windowbase - 3)) {
+        m = 3;
+    }
+
+    if (n == 0 || (m != 0 && m != n) ||
+            ((env->sregs[PS] & (PS_WOE | PS_EXCM)) ^ PS_WOE) != 0) {
+        printf("Illegal retw instruction (%08x), PS = %08x, m = %d, n = %d\n",
+                pc, env->sregs[PS], m, n);
+        HELPER(exception_cause)(pc, ILLEGAL_INSTRUCTION_CAUSE);
+    } else {
+        int owb = windowbase;
+
+        ret_pc = (pc & 0xc0000000) | (env->regs[0] & 0x3fffffff);
+
+        rotate_window(-n);
+        if (windowstart & WINDOWSTART_BIT(env->sregs[WINDOW_BASE])) {
+            env->sregs[WINDOW_START] &= ~WINDOWSTART_BIT(owb);
+        } else {
+            /* window underflow */
+            env->sregs[PS] = (env->sregs[PS] & ~PS_OWB) |
+                (windowbase << PS_OWB_SHIFT) | PS_EXCM;
+            env->sregs[EPC1] = env->pc = pc;
+
+            if (n == 1) {
+                HELPER(exception)(EXC_WINDOW_UNDERFLOW4);
+            } else if (n == 2) {
+                HELPER(exception)(EXC_WINDOW_UNDERFLOW8);
+            } else if (n == 3) {
+                HELPER(exception)(EXC_WINDOW_UNDERFLOW12);
+            }
+        }
+    }
+    return ret_pc;
+}
+
+void HELPER(rotw)(uint32_t imm4)
+{
+    rotate_window(imm4);
+}
+
+void HELPER(restore_owb)(void)
+{
+    rotate_window_abs((env->sregs[PS] & PS_OWB) >> PS_OWB_SHIFT);
+}
+
+void HELPER(movsp)(uint32_t pc)
+{
+    if ((env->sregs[WINDOW_START] &
+            (WINDOWSTART_BIT(env->sregs[WINDOW_BASE] - 3) |
+             WINDOWSTART_BIT(env->sregs[WINDOW_BASE] - 2) |
+             WINDOWSTART_BIT(env->sregs[WINDOW_BASE] - 1))) == 0) {
+        HELPER(exception_cause)(pc, ALLOCA_CAUSE);
+    }
+}
+
+void HELPER(dump_state)(void)
+{
+    cpu_dump_state(env, stderr, fprintf, 0);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index d635229..f3aecaa 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -59,6 +59,8 @@  static TCGv_i32 cpu_UR[256];
 static const char * const sregnames[256] = {
     [SAR] = "SAR",
     [SCOMPARE1] = "SCOMPARE1",
+    [WINDOW_BASE] = "WINDOW_BASE",
+    [WINDOW_START] = "WINDOW_START",
     [EPC1] = "EPC1",
     [DEPC] = "DEPC",
     [EXCSAVE1] = "EXCSAVE1",
@@ -124,10 +126,16 @@  static void gen_rsr(TCGv_i32 d, int sr)
     }
 }
 
+static void gen_wsr_windowbase(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+    gen_helper_wsr_windowbase(v);
+}
+
 static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
 {
     static void (* const wsr_handler[256])(DisasContext *dc,
             uint32_t sr, TCGv_i32 v) = {
+        [WINDOW_BASE] = gen_wsr_windowbase,
     };
 
     if (sregnames[sr]) {
@@ -192,6 +200,18 @@  static void gen_jumpi(DisasContext *dc, uint32_t dest)
     tcg_temp_free(tmp);
 }
 
+static void gen_callw(DisasContext *dc, int _callinc, TCGv_i32 target)
+{
+    TCGv_i32 callinc = tcg_const_i32(_callinc);
+
+    tcg_gen_deposit_i32(cpu_SR[PS], cpu_SR[PS],
+            callinc, PS_CALLINC_SHIFT, PS_CALLINC_LEN);
+    tcg_temp_free(callinc);
+    tcg_gen_movi_i32(cpu_R[_callinc << 2],
+            (_callinc << 30) | ((dc->pc + 3) & 0x3fffffff));
+    gen_jump(dc, target);
+}
+
 static void disas_xtensa_insn(DisasContext *dc)
 {
 #define HAS_OPTION(opt) do { \
@@ -317,7 +337,12 @@  static void disas_xtensa_insn(DisasContext *dc)
 
                         case 1: /*RETWw*/
                             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                            TBD();
+                            {
+                                TCGv_i32 tmp = tcg_const_i32(dc->pc);
+                                gen_helper_retw(tmp, tmp);
+                                gen_jump(dc, tmp);
+                                tcg_temp_free(tmp);
+                            }
                             break;
 
                         case 3: /*reserved*/
@@ -342,7 +367,13 @@  static void disas_xtensa_insn(DisasContext *dc)
                         case 2: /*CALLX8w*/
                         case 3: /*CALLX12w*/
                             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                            TBD();
+                            {
+                                TCGv_i32 tmp = tcg_temp_new_i32();
+
+                                tcg_gen_mov_i32(tmp, cpu_R[CALLX_S]);
+                                gen_callw(dc, CALLX_N, tmp);
+                                tcg_temp_free(tmp);
+                            }
                             break;
                         }
                         break;
@@ -351,7 +382,12 @@  static void disas_xtensa_insn(DisasContext *dc)
 
                 case 1: /*MOVSPw*/
                     HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                    TBD();
+                    {
+                        TCGv_i32 pc = tcg_const_i32(dc->pc);
+                        gen_helper_movsp(pc);
+                        tcg_gen_mov_i32(cpu_R[RRR_T], cpu_R[RRR_S]);
+                        tcg_temp_free(pc);
+                    }
                     break;
 
                 case 2: /*SYNC*/
@@ -411,7 +447,27 @@  static void disas_xtensa_insn(DisasContext *dc)
                         case 4: /*RFWOw*/
                         case 5: /*RFWUw*/
                             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                            TBD();
+                            gen_check_privilege(dc);
+                            {
+                                TCGv_i32 tmp = tcg_const_i32(1);
+
+                                tcg_gen_andi_i32(
+                                        cpu_SR[PS], cpu_SR[PS], ~PS_EXCM);
+                                tcg_gen_shl_i32(tmp, tmp, cpu_SR[WINDOW_BASE]);
+
+                                if (RRR_S == 4) {
+                                    tcg_gen_andc_i32(cpu_SR[WINDOW_START],
+                                            cpu_SR[WINDOW_START], tmp);
+                                } else {
+                                    tcg_gen_or_i32(cpu_SR[WINDOW_START],
+                                            cpu_SR[WINDOW_START], tmp);
+                                }
+
+                                gen_helper_restore_owb();
+                                gen_jump(dc, cpu_SR[EPC1]);
+
+                                tcg_temp_free(tmp);
+                            }
                             break;
 
                         default: /*reserved*/
@@ -563,7 +619,13 @@  static void disas_xtensa_insn(DisasContext *dc)
 
                 case 8: /*ROTWw*/
                     HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                    TBD();
+                    gen_check_privilege(dc);
+                    {
+                        TCGv_i32 tmp = tcg_const_i32(
+                                RRR_T | ((RRR_T & 8) ? 0xfffffff0 : 0));
+                        gen_helper_rotw(tmp);
+                        tcg_temp_free(tmp);
+                    }
                     break;
 
                 case 14: /*NSAu*/
@@ -1019,7 +1081,37 @@  static void disas_xtensa_insn(DisasContext *dc)
             break;
 
         case 9: /*LSC4*/
-            TBD();
+            switch (_OP2) {
+            case 0: /*L32E*/
+                HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
+                gen_check_privilege(dc);
+                {
+                    TCGv_i32 addr = tcg_temp_new_i32();
+                    tcg_gen_addi_i32(addr, cpu_R[RRR_S],
+                            (0xffffffc0 | (RRR_R << 2)));
+                    /*TODO protection control*/
+                    tcg_gen_qemu_ld32u(cpu_R[RRR_T], addr, 0);
+                    tcg_temp_free(addr);
+                }
+                break;
+
+            case 4: /*S32E*/
+                HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
+                gen_check_privilege(dc);
+                {
+                    TCGv_i32 addr = tcg_temp_new_i32();
+                    tcg_gen_addi_i32(addr, cpu_R[RRR_S],
+                            (0xffffffc0 | (RRR_R << 2)));
+                    /*TODO protection control*/
+                    tcg_gen_qemu_st32(cpu_R[RRR_T], addr, 0);
+                    tcg_temp_free(addr);
+                }
+                break;
+
+            default:
+                RESERVED();
+                break;
+            }
             break;
 
         case 10: /*FP0*/
@@ -1257,7 +1349,12 @@  static void disas_xtensa_insn(DisasContext *dc)
         case 2: /*CALL8w*/
         case 3: /*CALL12w*/
             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-            TBD();
+            {
+                TCGv_i32 tmp = tcg_const_i32(
+                        (dc->pc & ~3) + (CALL_OFFSET_SE << 2) + 4);
+                gen_callw(dc, CALL_N, tmp);
+                tcg_temp_free(tmp);
+            }
             break;
         }
         break;
@@ -1316,7 +1413,15 @@  static void disas_xtensa_insn(DisasContext *dc)
             switch (BRI8_M) {
             case 0: /*ENTRYw*/
                 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                TBD();
+                {
+                    TCGv_i32 pc = tcg_const_i32(dc->pc);
+                    TCGv_i32 s = tcg_const_i32(BRI12_S);
+                    TCGv_i32 imm = tcg_const_i32(BRI12_IMM12);
+                    gen_helper_entry(pc, s, imm);
+                    tcg_temp_free(imm);
+                    tcg_temp_free(s);
+                    tcg_temp_free(pc);
+                }
                 break;
 
             case 1: /*B1*/
@@ -1499,7 +1604,12 @@  static void disas_xtensa_insn(DisasContext *dc)
 
             case 1: /*RETW.Nn*/
                 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
-                TBD();
+                {
+                    TCGv_i32 tmp = tcg_const_i32(dc->pc);
+                    gen_helper_retw(tmp, tmp);
+                    gen_jump(dc, tmp);
+                    tcg_temp_free(tmp);
+                }
                 break;
 
             case 2: /*BREAK.Nn*/
@@ -1662,6 +1772,12 @@  void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
     for (i = 0; i < 16; ++i)
         cpu_fprintf(f, "A%02d=%08x%c", i, env->regs[i],
                 (i % 4) == 3 ? '\n' : ' ');
+
+    cpu_fprintf(f, "\n");
+
+    for (i = 0; i < env->config->nareg; ++i)
+        cpu_fprintf(f, "AR%02d=%08x%c", i, env->phys_regs[i],
+                (i % 4) == 3 ? '\n' : ' ');
 }
 
 void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)