Patchwork [23/26] target-xtensa: implement interrupt option

login
register
mail settings
Submitter Max Filippov
Date May 17, 2011, 10:32 p.m.
Message ID <1305671572-5899-24-git-send-email-jcmvbkbc@gmail.com>
Download mbox | patch
Permalink /patch/96072/
State New
Headers show

Comments

Max Filippov - May 17, 2011, 10:32 p.m.
See ISA, 4.4.6 (interrupt option), 4.4.7 (high priority interrupt
option) and 4.4.8 (timer interrupt option) for details.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
RFC -> PATCH changes:
- halt CPU on waiti, use qemu timer to wake up on CCOMPARE match;
- advance CCOUNT on TB exit, on exception and on RSR.CCOUNT;
---
 hw/xtensa_pic.c           |   83 +++++++++++++++++++++++++++++++
 target-xtensa/cpu.h       |   39 ++++++++++++++
 target-xtensa/exec.h      |    2 +-
 target-xtensa/helper.c    |   80 +++++++++++++++++++++++++++++-
 target-xtensa/helpers.h   |    5 ++
 target-xtensa/op_helper.c |   44 ++++++++++++++++
 target-xtensa/translate.c |  121 ++++++++++++++++++++++++++++++++++++++++++---
 7 files changed, 365 insertions(+), 9 deletions(-)
Richard Henderson - May 20, 2011, 3:44 p.m.
On 05/17/2011 03:32 PM, Max Filippov wrote:
> +    if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT)) {
> +        int i;
> +        for (i = 0; i < env->config->nccompare; ++i) {
> +            if (env->sregs[CCOMPARE + i] - old_ccount <= d) {
> +                env->halted = 0;
> +                xtensa_timer_irq(env, i, 1);

I don't think you should be writing to halted here; this is done by
the code in cpu-exec.c, when noticing when cpu_has_work.  Which will
be true as a function of env->interrupt_request and the interrupt mask.


> +            if (env->halted) {
> +                xtensa_advance_ccount(env,
> +                        muldiv64(qemu_get_clock_ns(vm_clock) - env->halt_clock,
> +                            env->config->clock_freq_khz, 1000000));
> +            }

Why are you polling the vm_clock rather than setting up a timer?

> +        env->ccompare_timer =
> +            qemu_new_timer_ns(vm_clock, &xtensa_ccompare_cb, env);

... er, actually you are setting up a timer.  So why aren't you using it?

>  void do_interrupt(CPUState *env)
>  {
>      switch (env->exception_index) {
> +    case EXC_IRQ:
> +        if (handle_interrupt(env)) {
> +            break;
> +        }
> +        /* not handled interrupt falls through,
> +         * env->exception_index is updated
> +         */

Do you really want to fall through, rather than restart the switch?

> @@ -124,12 +198,16 @@ void do_interrupt(CPUState *env)
>          if (env->config->exception_vector[env->exception_index]) {
>              env->pc = env->config->exception_vector[env->exception_index];
>              env->exception_taken = 1;
> +            env->interrupt_request |= CPU_INTERRUPT_EXITTB;

Huh?  What are you trying to accomplish here?
EXITTB is supposed to be used when a device external to the cpu
changes the memory mapping of the system.  E.g. the x86 a20 line.

> +DEF_HELPER_0(check_interrupts, void)
> +DEF_HELPER_2(waiti, void, i32, i32)
> +DEF_HELPER_2(timer_irq, void, i32, i32)
> +DEF_HELPER_1(advance_ccount, void, i32)

You shouldn't have to manage any of this from within the translator.


r~
Max Filippov - May 20, 2011, 8:05 p.m.
> > +    if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT)) {
> > +        int i;
> > +        for (i = 0; i < env->config->nccompare; ++i) {
> > +            if (env->sregs[CCOMPARE + i] - old_ccount <= d) {
> > +                env->halted = 0;
> > +                xtensa_timer_irq(env, i, 1);
> 
> I don't think you should be writing to halted here; this is done by
> the code in cpu-exec.c, when noticing when cpu_has_work.  Which will
> be true as a function of env->interrupt_request and the interrupt mask.

I do it here to distinguish interrupt caused by CCOMPARE match, for which I want exact CCOUNT value, from other interrupt sources, when CCOUNT may be advanced approximately.

> > +            if (env->halted) {
> > +                xtensa_advance_ccount(env,
> > +                        muldiv64(qemu_get_clock_ns(vm_clock) - env->halt_clock,
> > +                            env->config->clock_freq_khz, 1000000));
> > +            }
> 
> Why are you polling the vm_clock rather than setting up a timer?

I'm not polling, I'm adjusting ccount according to vm_clock time passed since we were halted.

> > +        env->ccompare_timer =
> > +            qemu_new_timer_ns(vm_clock, &xtensa_ccompare_cb, env);
> 
> ... er, actually you are setting up a timer.  So why aren't you using it?

I'm using it: it is wound up by HELPER(waiti) when there's no currently pending interrupts and xtensa_ccompare_cb calls xtensa_advance_ccount, which in turn calls xtensa_timer_irq.

> >  void do_interrupt(CPUState *env)
> >  {
> >      switch (env->exception_index) {
> > +    case EXC_IRQ:
> > +        if (handle_interrupt(env)) {
> > +            break;
> > +        }
> > +        /* not handled interrupt falls through,
> > +         * env->exception_index is updated
> > +         */
> 
> Do you really want to fall through, rather than restart the switch?

Handle_interrupt will handle high-priority interrupt requests, leaving only level-1 interrupts, which it converts into EXC_USER or EXC_KERNEL. If only for the sake of readability...

> > @@ -124,12 +198,16 @@ void do_interrupt(CPUState *env)
> >          if (env->config->exception_vector[env->exception_index]) {
> >              env->pc = env->config->exception_vector[env->exception_index];
> >              env->exception_taken = 1;
> > +            env->interrupt_request |= CPU_INTERRUPT_EXITTB;
> 
> Huh?  What are you trying to accomplish here?
> EXITTB is supposed to be used when a device external to the cpu
> changes the memory mapping of the system.  E.g. the x86 a20 line.

I used it to have next_tb = 0 in cpu_exec, after return from do_interrupt, but now it is done unconditionally, so there's no need in CPU_INTERRUPT_EXITTB.
By the way, do I understand it right that if I chain TBs than I need to periodically check for pending interrupts myself, otherwise e.g. "j $" will create uninterruptible infinite loop?

> > +DEF_HELPER_0(check_interrupts, void)
> > +DEF_HELPER_2(waiti, void, i32, i32)
> > +DEF_HELPER_2(timer_irq, void, i32, i32)
> > +DEF_HELPER_1(advance_ccount, void, i32)
> 
> You shouldn't have to manage any of this from within the translator.

Please explain.

Thanks.
-- Max
Richard Henderson - May 20, 2011, 8:49 p.m.
On 05/20/2011 01:05 PM, Max Filippov wrote:
> By the way, do I understand it right that if I chain TBs than I need
> to periodically check for pending interrupts myself, otherwise e.g.
> "j $" will create uninterruptible infinite loop?

No, it won't.  It'll create a loop, but it'll be broken by the host
signal handler.  Notice no other target is checking for this.

>>> +DEF_HELPER_0(check_interrupts, void)
>>> +DEF_HELPER_2(waiti, void, i32, i32)
>>> +DEF_HELPER_2(timer_irq, void, i32, i32)
>>> +DEF_HELPER_1(advance_ccount, void, i32)
>>
>> You shouldn't have to manage any of this from within the translator.

You should *never* have to check for interrupts, or advance cycle counters,
from within the translated code.  Interrupt processing, and thus timers,
are handled in between TBs as necessary by generic code.


r~
Max Filippov - May 20, 2011, 9:30 p.m.
> > By the way, do I understand it right that if I chain TBs than I need
> > to periodically check for pending interrupts myself, otherwise e.g.
> > "j $" will create uninterruptible infinite loop?
> 
> No, it won't.  It'll create a loop, but it'll be broken by the host
> signal handler.  Notice no other target is checking for this.
> 
> >>> +DEF_HELPER_0(check_interrupts, void)
> >>> +DEF_HELPER_2(waiti, void, i32, i32)
> >>> +DEF_HELPER_2(timer_irq, void, i32, i32)
> >>> +DEF_HELPER_1(advance_ccount, void, i32)
> >>
> >> You shouldn't have to manage any of this from within the translator.
> 
> You should *never* have to check for interrupts, or advance cycle counters,
> from within the translated code.  Interrupt processing, and thus timers,
> are handled in between TBs as necessary by generic code.

Well,
- cycles fed into advance_ccount may (and on real hardware actually do) depend on executed commands/pipeline/cache hits. Most of this stuff may be counted at the translation time;
- timer_irq is a helper that raises IRQ generated by CCOMPARE match;
- waiti is a helper for the instruction of the same name;
- check_interrupts converts IRQs on enabled interrupt sources into current irq level.

Thanks.
-- Max
Richard Henderson - May 20, 2011, 10:19 p.m.
On 05/20/2011 02:30 PM, Max Filippov wrote:
> - cycles fed into advance_ccount may (and on real hardware actually
> do) depend on executed commands/pipeline/cache hits. Most of this
> stuff may be counted at the translation time;

Since CCOUNT, as seen by any one thread of execution, on real hw depends
on cache hits, interrupts, and other asynchronous stuff, then don't bother
trying to account for it on a per-instruction basis.  Just define a clock
that runs at a given rate and be done.  No need to advance it manually.

> - timer_irq is a helper that raises IRQ generated by CCOMPARE match;

That's what the callback from qemu_new_timer_ns is for.

You've got N CCOMPARE registers, you can create N timers all of which
are setup to raise the IRQ.

cpu_xtensa_init(...)
{
    ...
    for (i = 0; i < N; ++i)
        env->ccompare_timer[i] = qemu_new_timer(vm_clock, HZ, xtensa_ccompare_cb, env);
}

void xtensa_ccompare_cb(CPUState *env)
{
    /* Set something in the processor state.  */
    cpu_interrupt(env, CPU_INTERRUPT_HARD);
}

void HELPER(wsr_ccompare)(int which, uint32_t match)
{
    qemu_mod_timer(env->ccompare_timer[which], match);
}

> - waiti is a helper for the instruction of the same name;

Sure, this one should exist for the translator.  But it should do nothing
except change PS.INTLEVEL, set env->halted, and do cpu_loop_exit.  All of
the actual waiting should be in the main QEMU loop.

Because it exits to the main loop, it should also end the TB.

> - check_interrupts converts IRQs on enabled interrupt sources into current irq level.

This is needed during do_interrupt, but should not be used by the translator itself.

You may find some of this easier if you utilize the IRQ cleanup patch series that
was recently committed to HEAD.  See CPU_INTERRUPT_TGT_EXT_[0-4] in cpu-all.h and
its uses in the various targets.


r~
Max Filippov - May 24, 2011, 10:28 a.m.
>> - cycles fed into advance_ccount may (and on real hardware actually
>> do) depend on executed commands/pipeline/cache hits. Most of this
>> stuff may be counted at the translation time;
>
> Since CCOUNT, as seen by any one thread of execution, on real hw depends
> on cache hits, interrupts, and other asynchronous stuff, then don't bother
> trying to account for it on a per-instruction basis.  Just define a clock
> that runs at a given rate and be done.  No need to advance it manually.

It means no cycle-accurate emulation. This was one of my goals, maybe
not closest one. Is it acceptable to have two simulation modes -- fast
functional and slower cycle-accurate?

>> - check_interrupts converts IRQs on enabled interrupt sources into current irq level.
> This is needed during do_interrupt, but should not be used by the translator itself.

Not only during do_interrupt, but also in cpu_has_work.

Thanks.
-- Max
Richard Henderson - May 24, 2011, 2:59 p.m.
On 05/24/2011 03:28 AM, Max Filippov wrote:
>>> - cycles fed into advance_ccount may (and on real hardware actually
>>> do) depend on executed commands/pipeline/cache hits. Most of this
>>> stuff may be counted at the translation time;
>>
>> Since CCOUNT, as seen by any one thread of execution, on real hw depends
>> on cache hits, interrupts, and other asynchronous stuff, then don't bother
>> trying to account for it on a per-instruction basis.  Just define a clock
>> that runs at a given rate and be done.  No need to advance it manually.
> 
> It means no cycle-accurate emulation. This was one of my goals, maybe
> not closest one. Is it acceptable to have two simulation modes -- fast
> functional and slower cycle-accurate?

Huh.  Given that you're not modeling the caches, I don't see how you could
hope for true cycle accuracy.  As for whether a mostly-cycle-accurate mode
should be a goal...  I'll have to defer to other QEMU maintainers.


r~
Max Filippov - May 24, 2011, 3:11 p.m.
>>>> - cycles fed into advance_ccount may (and on real hardware actually
>>>> do) depend on executed commands/pipeline/cache hits. Most of this
>>>> stuff may be counted at the translation time;
>>>
>>> Since CCOUNT, as seen by any one thread of execution, on real hw depends
>>> on cache hits, interrupts, and other asynchronous stuff, then don't bother
>>> trying to account for it on a per-instruction basis.  Just define a clock
>>> that runs at a given rate and be done.  No need to advance it manually.
>>
>> It means no cycle-accurate emulation. This was one of my goals, maybe
>> not closest one. Is it acceptable to have two simulation modes -- fast
>> functional and slower cycle-accurate?
>
> Huh.  Given that you're not modeling the caches, I don't see how you could
> hope for true cycle accuracy.  As for whether a mostly-cycle-accurate mode
> should be a goal...  I'll have to defer to other QEMU maintainers.

I'm going to model them finally, as well as pipeline and TCM/external
memory. Just need a stable basis to start with (:

Patch

diff --git a/hw/xtensa_pic.c b/hw/xtensa_pic.c
index 91a5445..de0cdc4 100644
--- a/hw/xtensa_pic.c
+++ b/hw/xtensa_pic.c
@@ -27,6 +27,8 @@ 
 
 #include "hw.h"
 #include "pc.h"
+#include "qemu-log.h"
+#include "qemu-timer.h"
 
 /* Stub functions for hardware that doesn't exist.  */
 void pic_info(Monitor *mon)
@@ -36,3 +38,84 @@  void pic_info(Monitor *mon)
 void irq_info(Monitor *mon)
 {
 }
+
+void xtensa_advance_ccount(CPUState *env, uint32_t d)
+{
+    uint32_t old_ccount = env->sregs[CCOUNT];
+
+    env->sregs[CCOUNT] += d;
+
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT)) {
+        int i;
+        for (i = 0; i < env->config->nccompare; ++i) {
+            if (env->sregs[CCOMPARE + i] - old_ccount <= d) {
+                env->halted = 0;
+                xtensa_timer_irq(env, i, 1);
+            }
+        }
+    }
+}
+
+void check_interrupts(CPUState *env)
+{
+    int minlevel = xtensa_get_cintlevel(env);
+    int level;
+
+    for (level = env->config->nlevel; level > minlevel; --level) {
+        if (env->config->level_mask[level] &
+                env->sregs[INTSET] &
+                env->sregs[INTENABLE]) {
+            if (env->halted) {
+                xtensa_advance_ccount(env,
+                        muldiv64(qemu_get_clock_ns(vm_clock) - env->halt_clock,
+                            env->config->clock_freq_khz, 1000000));
+            }
+            env->pending_irq_level = level;
+            cpu_interrupt(env, CPU_INTERRUPT_HARD);
+            return;
+        }
+    }
+    env->pending_irq_level = 0;
+    cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
+static void xtensa_set_irq(void *opaque, int irq, int active)
+{
+    CPUState *env = opaque;
+
+    if (irq >= env->config->ninterrupt) {
+        qemu_log("%s: bad IRQ %d\n", __func__, irq);
+    } else {
+        uint32_t irq_bit = 1 << irq;
+
+        if (active) {
+            env->sregs[INTSET] |= irq_bit;
+        } else {
+            env->sregs[INTSET] &= ~irq_bit;
+        }
+
+        check_interrupts(env);
+    }
+}
+
+void xtensa_timer_irq(CPUState *env, uint32_t id, uint32_t active)
+{
+    qemu_set_irq(env->irq_inputs[env->config->timerint[id]], active);
+}
+
+static void xtensa_ccompare_cb(void *opaque)
+{
+    CPUState *env = opaque;
+    xtensa_advance_ccount(env, env->wake_ccount - env->sregs[CCOUNT]);
+}
+
+void xtensa_irq_init(CPUState *env)
+{
+    env->irq_inputs = (void **)qemu_allocate_irqs(
+            xtensa_set_irq, env, env->config->ninterrupt);
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT) &&
+            env->config->nccompare > 0) {
+        env->ccompare_timer =
+            qemu_new_timer_ns(vm_clock, &xtensa_ccompare_cb, env);
+    }
+}
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 55d81e9..31d3e7e 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -116,10 +116,16 @@  enum {
     WINDOW_START = 73,
     EPC1 = 177,
     DEPC = 192,
+    EPS2 = 194,
     EXCSAVE1 = 209,
+    INTSET = 226,
+    INTCLEAR = 227,
+    INTENABLE = 228,
     PS = 230,
     EXCCAUSE = 232,
+    CCOUNT = 234,
     EXCVADDR = 238,
+    CCOMPARE = 240,
 };
 
 #define PS_INTLEVEL 0xf
@@ -141,6 +147,10 @@  enum {
 #define PS_WOE 0x40000
 
 #define MAX_NAREG 64
+#define MAX_NINTERRUPT 32
+#define MAX_NLEVEL 6
+#define MAX_NNMI 1
+#define MAX_NCCOMPARE 3
 
 enum {
     /* Static vectors */
@@ -190,6 +200,16 @@  enum {
     COPROCESSOR0_DISABLED = 32,
 };
 
+typedef enum {
+    INTTYPE_LEVEL,
+    INTTYPE_EDGE,
+    INTTYPE_NMI,
+    INTTYPE_SOFTWARE,
+    INTTYPE_TIMER,
+    INTTYPE_DEBUG,
+    INTTYPE_WRITE_ERR,
+} interrupt_type_t;
+
 typedef struct XtensaConfig {
     const char *name;
     uint64_t options;
@@ -197,6 +217,15 @@  typedef struct XtensaConfig {
     int excm_level;
     int ndepc;
     uint32_t exception_vector[EXC_MAX];
+    unsigned ninterrupt;
+    unsigned nlevel;
+    uint32_t interrupt_vector[MAX_NLEVEL + MAX_NNMI + 1];
+    uint32_t level[MAX_NINTERRUPT];
+    uint32_t level_mask[MAX_NLEVEL + MAX_NNMI + 1];
+    interrupt_type_t inttype[MAX_NINTERRUPT];
+    unsigned nccompare;
+    uint32_t timerint[MAX_NCCOMPARE];
+    uint32_t clock_freq_khz;
 } XtensaConfig;
 
 typedef struct CPUXtensaState {
@@ -207,6 +236,12 @@  typedef struct CPUXtensaState {
     uint32_t uregs[256];
     uint32_t phys_regs[MAX_NAREG];
 
+    int pending_irq_level; /* level of last raised IRQ */
+    void **irq_inputs;
+    QEMUTimer *ccompare_timer;
+    uint32_t wake_ccount;
+    int64_t halt_clock;
+
     int exception_taken;
 
     CPU_COMMON
@@ -222,6 +257,10 @@  CPUXtensaState *cpu_xtensa_init(const char *cpu_model);
 void xtensa_translate_init(void);
 int cpu_xtensa_exec(CPUXtensaState *s);
 void do_interrupt(CPUXtensaState *s);
+void check_interrupts(CPUXtensaState *s);
+void xtensa_irq_init(CPUState *env);
+void xtensa_advance_ccount(CPUState *env, uint32_t d);
+void xtensa_timer_irq(CPUState *env, uint32_t id, uint32_t active);
 int cpu_xtensa_signal_handler(int host_signum, void *pinfo, void *puc);
 void xtensa_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 void xtensa_sync_window_from_phys(CPUState *env);
diff --git a/target-xtensa/exec.h b/target-xtensa/exec.h
index 9c114ef..4f02a85 100644
--- a/target-xtensa/exec.h
+++ b/target-xtensa/exec.h
@@ -35,7 +35,7 @@  register struct CPUXtensaState *env asm(AREG0);
 
 static inline int cpu_has_work(CPUState *env)
 {
-    return 1;
+    return env->pending_irq_level;
 }
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index ffa5590..4c3069f 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -40,6 +40,8 @@  void cpu_reset(CPUXtensaState *env)
     env->pc = env->config->exception_vector[EXC_RESET];
     env->sregs[LITBASE] &= ~1;
     env->sregs[PS] = 0x1f;
+
+    env->pending_irq_level = 0;
 }
 
 static const XtensaConfig core_config[] = {
@@ -61,6 +63,31 @@  static const XtensaConfig core_config[] = {
             [EXC_USER] = 0x5fff863c,
             [EXC_DOUBLE] = 0x5fff865c,
         },
+        .ninterrupt = 13,
+        .nlevel = 6,
+        .interrupt_vector = {
+            0,
+            0,
+            0x5fff857c,
+            0x5fff859c,
+            0x5fff85bc,
+            0x5fff85dc,
+            0x5fff85fc,
+        },
+        .level = {
+            [0] = 4,
+        },
+        .level_mask = {
+            [4] = 1,
+        },
+        .inttype = {
+            [0] = INTTYPE_TIMER,
+        },
+        .nccompare = 1,
+        .timerint = {
+            [0] = 0,
+        },
+        .clock_freq_khz = 912000,
     },
 };
 
@@ -90,6 +117,7 @@  CPUXtensaState *cpu_xtensa_init(const char *cpu_model)
         xtensa_translate_init();
     }
 
+    xtensa_irq_init(env);
     qemu_init_vcpu(env);
     return env;
 }
@@ -109,9 +137,55 @@  target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
     return addr;
 }
 
+static int handle_interrupt(CPUState *env)
+{
+    int handled = 1;
+    int level = env->pending_irq_level;
+
+    if (level > xtensa_get_cintlevel(env) &&
+            level <= env->config->nlevel &&
+            (env->config->level_mask[level] &
+             env->sregs[INTSET] &
+             env->sregs[INTENABLE])) {
+        if (level > 1) {
+            env->sregs[EPC1 + level - 1] = env->pc;
+            env->sregs[EPS2 + level - 2] = env->sregs[PS];
+            env->pc = env->config->interrupt_vector[level];
+        } else {
+            handled = 0;
+            env->sregs[EXCCAUSE] = LEVEL1_INTERRUPT_CAUSE;
+
+            if (env->sregs[PS] & PS_EXCM) {
+                if (env->config->ndepc) {
+                    env->sregs[DEPC] = env->pc;
+                } else {
+                    env->sregs[EPC1] = env->pc;
+                }
+                env->exception_index = EXC_DOUBLE;
+            } else {
+                env->sregs[EPC1] = env->pc;
+                env->exception_index =
+                    (env->sregs[PS] & PS_UM) ? EXC_USER : EXC_KERNEL;
+            }
+        }
+        env->sregs[PS] = (env->sregs[PS] & ~PS_INTLEVEL) | PS_EXCM | level;
+        env->exception_taken = 1;
+        env->interrupt_request |= CPU_INTERRUPT_EXITTB;
+    }
+    return handled;
+}
+
 void do_interrupt(CPUState *env)
 {
     switch (env->exception_index) {
+    case EXC_IRQ:
+        if (handle_interrupt(env)) {
+            break;
+        }
+        /* not handled interrupt falls through,
+         * env->exception_index is updated
+         */
+
     case EXC_WINDOW_OVERFLOW4:
     case EXC_WINDOW_UNDERFLOW4:
     case EXC_WINDOW_OVERFLOW8:
@@ -124,12 +198,16 @@  void do_interrupt(CPUState *env)
         if (env->config->exception_vector[env->exception_index]) {
             env->pc = env->config->exception_vector[env->exception_index];
             env->exception_taken = 1;
+            env->interrupt_request |= CPU_INTERRUPT_EXITTB;
         } else {
             qemu_log("%s(pc = %08x) bad exception_index: %d\n",
                     __func__, env->pc, env->exception_index);
         }
         break;
 
+    default:
+        qemu_log("%s(pc = %08x) unknown exception_index: %d\n",
+                __func__, env->pc, env->exception_index);
+        break;
     }
-    env->interrupt_request |= CPU_INTERRUPT_EXITTB;
 }
diff --git a/target-xtensa/helpers.h b/target-xtensa/helpers.h
index 87c7cc5..bbc0ebc 100644
--- a/target-xtensa/helpers.h
+++ b/target-xtensa/helpers.h
@@ -16,4 +16,9 @@  DEF_HELPER_1(wsr_lend, void, i32)
 DEF_HELPER_0(simcall, void)
 DEF_HELPER_0(dump_state, void)
 
+DEF_HELPER_0(check_interrupts, void)
+DEF_HELPER_2(waiti, void, i32, i32)
+DEF_HELPER_2(timer_irq, void, i32, i32)
+DEF_HELPER_1(advance_ccount, void, i32)
+
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index b170dbe..d742f1b 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -300,3 +300,47 @@  void HELPER(dump_state)(void)
 {
     cpu_dump_state(env, stderr, fprintf, 0);
 }
+
+void HELPER(check_interrupts)(void)
+{
+    check_interrupts(env);
+}
+
+void HELPER(waiti)(uint32_t pc, uint32_t intlevel)
+{
+    env->sregs[PS] = (env->sregs[PS] & ~PS_INTLEVEL) |
+        (intlevel << PS_INTLEVEL_SHIFT);
+    check_interrupts(env);
+    if (env->pending_irq_level) {
+        return;
+    }
+
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT)) {
+        int i;
+        uint32_t wake_ccount = env->sregs[CCOUNT] - 1;
+
+        for (i = 0; i < env->config->nccompare; ++i) {
+            if (env->sregs[CCOMPARE + i] - env->sregs[CCOUNT] <
+                    wake_ccount - env->sregs[CCOUNT]) {
+                wake_ccount = env->sregs[CCOMPARE + i];
+            }
+        }
+        env->wake_ccount = wake_ccount;
+        qemu_mod_timer(env->ccompare_timer, qemu_get_clock_ns(vm_clock) +
+                muldiv64(wake_ccount - env->sregs[CCOUNT],
+                    1000000, env->config->clock_freq_khz));
+    }
+    env->halt_clock = qemu_get_clock_ns(vm_clock);
+    env->halted = 1;
+    HELPER(exception)(EXCP_HLT);
+}
+
+void HELPER(timer_irq)(uint32_t id, uint32_t active)
+{
+    xtensa_timer_irq(env, id, active);
+}
+
+void HELPER(advance_ccount)(uint32_t d)
+{
+    xtensa_advance_ccount(env, d);
+}
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index b40218d..450c302 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -56,6 +56,8 @@  typedef struct DisasContext {
     bool sar_m32_5bit;
     bool sar_m32_allocated;
     TCGv_i32 sar_m32;
+
+    uint32_t ccount_delta;
 } DisasContext;
 
 static TCGv_ptr cpu_env;
@@ -76,11 +78,36 @@  static const char * const sregnames[256] = {
     [WINDOW_BASE] = "WINDOW_BASE",
     [WINDOW_START] = "WINDOW_START",
     [EPC1] = "EPC1",
+    [EPC1 + 1] = "EPC2",
+    [EPC1 + 2] = "EPC3",
+    [EPC1 + 3] = "EPC4",
+    [EPC1 + 4] = "EPC5",
+    [EPC1 + 5] = "EPC6",
+    [EPC1 + 6] = "EPC7",
     [DEPC] = "DEPC",
+    [EPS2] = "EPS2",
+    [EPS2 + 1] = "EPS3",
+    [EPS2 + 2] = "EPS4",
+    [EPS2 + 3] = "EPS5",
+    [EPS2 + 4] = "EPS6",
+    [EPS2 + 5] = "EPS7",
     [EXCSAVE1] = "EXCSAVE1",
+    [EXCSAVE1 + 1] = "EXCSAVE2",
+    [EXCSAVE1 + 2] = "EXCSAVE3",
+    [EXCSAVE1 + 3] = "EXCSAVE4",
+    [EXCSAVE1 + 4] = "EXCSAVE5",
+    [EXCSAVE1 + 5] = "EXCSAVE6",
+    [EXCSAVE1 + 6] = "EXCSAVE7",
+    [INTSET] = "INTSET",
+    [INTCLEAR] = "INTCLEAR",
+    [INTENABLE] = "INTENABLE",
     [PS] = "PS",
     [EXCCAUSE] = "EXCCAUSE",
+    [CCOUNT] = "CCOUNT",
     [EXCVADDR] = "EXCVADDR",
+    [CCOMPARE] = "CCOMPARE0",
+    [CCOMPARE + 1] = "CCOMPARE1",
+    [CCOMPARE + 2] = "CCOMPARE2",
 };
 
 static const char * const uregnames[256] = {
@@ -162,9 +189,27 @@  static void gen_left_shift_sar(DisasContext *dc, TCGv_i32 sa)
     tcg_temp_free(tmp);
 }
 
-static void gen_exception(int excp)
+static void gen_advance_ccount(DisasContext *dc)
+{
+    if (dc->ccount_delta > 0) {
+        TCGv_i32 tmp = tcg_const_i32(dc->ccount_delta);
+        dc->ccount_delta = 0;
+        gen_helper_advance_ccount(tmp);
+        tcg_temp_free(tmp);
+    }
+}
+
+static void gen_check_interrupts(DisasContext *dc)
+{
+    gen_advance_ccount(dc);
+    tcg_gen_movi_i32(cpu_pc, dc->pc);
+    gen_helper_check_interrupts();
+}
+
+static void gen_exception(DisasContext *dc, int excp)
 {
     TCGv_i32 tmp = tcg_const_i32(excp);
+    gen_advance_ccount(dc);
     gen_helper_exception(tmp);
     tcg_temp_free(tmp);
 }
@@ -173,6 +218,7 @@  static void gen_exception_cause(DisasContext *dc, uint32_t cause)
 {
     TCGv_i32 _pc = tcg_const_i32(dc->pc);
     TCGv_i32 _cause = tcg_const_i32(cause);
+    gen_advance_ccount(dc);
     gen_helper_exception_cause(_pc, _cause);
     tcg_temp_free(_pc);
     tcg_temp_free(_cause);
@@ -183,6 +229,7 @@  static void gen_exception_cause_vaddr(DisasContext *dc, uint32_t cause,
 {
     TCGv_i32 _pc = tcg_const_i32(dc->pc);
     TCGv_i32 _cause = tcg_const_i32(cause);
+    gen_advance_ccount(dc);
     gen_helper_exception_cause_vaddr(_pc, _cause, vaddr);
     tcg_temp_free(_pc);
     tcg_temp_free(_cause);
@@ -199,8 +246,9 @@  static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot)
 {
     tcg_gen_mov_i32(cpu_pc, dest);
     if (dc->singlestep_enabled) {
-        gen_exception(EXCP_DEBUG);
+        gen_exception(dc, EXCP_DEBUG);
     } else {
+        gen_advance_ccount(dc);
         if (slot >= 0) {
             tcg_gen_goto_tb(slot);
             tcg_gen_exit_tb((tcg_target_long)dc->tb + slot);
@@ -295,10 +343,17 @@  static void gen_brcondi(DisasContext *dc, TCGCond cond,
     tcg_temp_free(tmp);
 }
 
+static void gen_rsr_ccount(DisasContext *dc, TCGv_i32 d, uint32_t sr)
+{
+    gen_advance_ccount(dc);
+    tcg_gen_mov_i32(d, cpu_SR[sr]);
+}
+
 static void gen_rsr(DisasContext *dc, TCGv_i32 d, uint32_t sr)
 {
     static void (* const rsr_handler[256])(DisasContext *dc,
             TCGv_i32 d, uint32_t sr) = {
+        [CCOUNT] = gen_rsr_ccount,
     };
 
     if (sregnames[sr]) {
@@ -339,6 +394,25 @@  static void gen_wsr_windowbase(DisasContext *dc, uint32_t sr, TCGv_i32 v)
     gen_helper_wsr_windowbase(v);
 }
 
+static void gen_wsr_ps(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+    tcg_gen_mov_i32(cpu_SR[sr], v);
+    gen_check_interrupts(dc);
+    /* This can change tb->flags, so exit tb */
+    gen_jumpi_check_loop_end(dc, -1);
+}
+
+static void gen_wsr_ccompare(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+    TCGv_i32 id = tcg_const_i32(sr - CCOMPARE);
+    TCGv_i32 active = tcg_const_i32(0);
+    gen_advance_ccount(dc);
+    tcg_gen_mov_i32(cpu_SR[sr], v);
+    gen_helper_timer_irq(id, active);
+    tcg_temp_free(id);
+    tcg_temp_free(active);
+}
+
 static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
 {
     static void (* const wsr_handler[256])(DisasContext *dc,
@@ -347,6 +421,10 @@  static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
         [SAR] = gen_wsr_sar,
         [LITBASE] = gen_wsr_litbase,
         [WINDOW_BASE] = gen_wsr_windowbase,
+        [PS] = gen_wsr_ps,
+        [CCOMPARE] = gen_wsr_ccompare,
+        [CCOMPARE + 1] = gen_wsr_ccompare,
+        [CCOMPARE + 2] = gen_wsr_ccompare,
     };
 
     if (sregnames[sr]) {
@@ -374,6 +452,16 @@  static void gen_load_store_alignment(DisasContext *dc, int shift, TCGv_i32 addr)
     tcg_temp_free(tmp);
 }
 
+static void gen_waiti(DisasContext *dc, uint32_t imm4)
+{
+    TCGv_i32 pc = tcg_const_i32(dc->pc);
+    TCGv_i32 intlevel = tcg_const_i32(imm4);
+    gen_advance_ccount(dc);
+    gen_helper_waiti(pc, intlevel);
+    tcg_temp_free(pc);
+    tcg_temp_free(intlevel);
+}
+
 static void disas_xtensa_insn(DisasContext *dc)
 {
 #define HAS_OPTION(opt) do { \
@@ -510,6 +598,7 @@  static void disas_xtensa_insn(DisasContext *dc)
                             HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
                             {
                                 TCGv_i32 tmp = tcg_const_i32(dc->pc);
+                                gen_advance_ccount(dc);
                                 gen_helper_retw(tmp, tmp);
                                 gen_jump(dc, tmp);
                                 tcg_temp_free(tmp);
@@ -555,6 +644,7 @@  static void disas_xtensa_insn(DisasContext *dc)
                     HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
                     {
                         TCGv_i32 pc = tcg_const_i32(dc->pc);
+                        gen_advance_ccount(dc);
                         gen_helper_movsp(pc);
                         tcg_gen_mov_i32(cpu_R[RRR_T], cpu_R[RRR_S]);
                         tcg_temp_free(pc);
@@ -649,7 +739,9 @@  static void disas_xtensa_insn(DisasContext *dc)
 
                     case 1: /*RFIx*/
                         HAS_OPTION(XTENSA_OPTION_HIGH_PRIORITY_INTERRUPT);
-                        TBD();
+                        gen_check_privilege(dc);
+                        tcg_gen_mov_i32(cpu_SR[PS], cpu_SR[EPS2 + RRR_S - 2]);
+                        gen_jump(dc, cpu_SR[EPC1 + RRR_S - 1]);
                         break;
 
                     case 2: /*RFME*/
@@ -697,11 +789,13 @@  static void disas_xtensa_insn(DisasContext *dc)
                     tcg_gen_ori_i32(cpu_SR[PS], cpu_SR[PS], RRR_S);
                     tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS],
                             RRR_S | ~PS_INTLEVEL);
+                    gen_check_interrupts(dc);
                     break;
 
                 case 7: /*WAITIx*/
                     HAS_OPTION(XTENSA_OPTION_INTERRUPT);
-                    TBD();
+                    gen_check_privilege(dc);
+                    gen_waiti(dc, RRR_S);
                     break;
 
                 case 8: /*ANY4p*/
@@ -1558,6 +1652,7 @@  static void disas_xtensa_insn(DisasContext *dc)
                     TCGv_i32 pc = tcg_const_i32(dc->pc);
                     TCGv_i32 s = tcg_const_i32(BRI12_S);
                     TCGv_i32 imm = tcg_const_i32(BRI12_IMM12);
+                    gen_advance_ccount(dc);
                     gen_helper_entry(pc, s, imm);
                     tcg_temp_free(imm);
                     tcg_temp_free(s);
@@ -1744,6 +1839,7 @@  static void disas_xtensa_insn(DisasContext *dc)
                 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
                 {
                     TCGv_i32 tmp = tcg_const_i32(dc->pc);
+                    gen_advance_ccount(dc);
                     gen_helper_retw(tmp, tmp);
                     gen_jump(dc, tmp);
                     tcg_temp_free(tmp);
@@ -1797,7 +1893,7 @@  static void check_breakpoint(CPUState *env, DisasContext *dc)
         QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
             if (bp->pc == dc->pc) {
                 tcg_gen_movi_i32(cpu_pc, dc->pc);
-                gen_exception(EXCP_DEBUG);
+                gen_exception(dc, EXCP_DEBUG);
                 dc->is_jmp = DISAS_UPDATE;
              }
         }
@@ -1827,6 +1923,7 @@  static void gen_intermediate_code_internal(
     dc.lend = env->sregs[LEND];
     dc.litbase = env->sregs[LITBASE] & 0xfffff000;
     dc.is_jmp = DISAS_NEXT;
+    dc.ccount_delta = 0;
 
     reset_sar_tracker(&dc);
 
@@ -1835,7 +1932,7 @@  static void gen_intermediate_code_internal(
     if (env->singlestep_enabled && env->exception_taken) {
         env->exception_taken = 0;
         tcg_gen_movi_i32(cpu_pc, dc.pc);
-        gen_exception(EXCP_DEBUG);
+        gen_exception(&dc, EXCP_DEBUG);
     }
 
     do {
@@ -1858,11 +1955,17 @@  static void gen_intermediate_code_internal(
             tcg_gen_debug_insn_start(dc.pc);
         }
 
+        ++dc.ccount_delta;
+
+        if (insn_count + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
+            gen_io_start();
+        }
+
         disas_xtensa_insn(&dc);
         ++insn_count;
         if (env->singlestep_enabled) {
             tcg_gen_movi_i32(cpu_pc, dc.pc);
-            gen_exception(EXCP_DEBUG);
+            gen_exception(&dc, EXCP_DEBUG);
             break;
         }
     } while (dc.is_jmp == DISAS_NEXT &&
@@ -1874,6 +1977,10 @@  static void gen_intermediate_code_internal(
         tcg_temp_free(dc.sar_m32);
     }
 
+    if (tb->cflags & CF_LAST_IO) {
+        gen_io_end();
+    }
+
     if (dc.is_jmp == DISAS_NEXT) {
         gen_jumpi(&dc, dc.pc, 0);
     }