[v2] target-mips: fix broken MIPS16 and microMIPS
diff mbox

Message ID 1404232985-63404-1-git-send-email-yongbok.kim@imgtec.com
State New
Headers show

Commit Message

Yongbok Kim July 1, 2014, 4:43 p.m. UTC
Commit 240ce26a broke MIPS16 and microMIPS support as it didn't
care those branches and jumps don't have delay slot in
MIPS16 and microMIPS.

This patch introduces a new argument delayslot_size to the
gen_compute_branch() indicating size of delay slot {0, 2, 4}.
And the information is used to call handle_delay_slot() forcingly
when no delay slot is required.

There are some microMIPS branch and jump instructions that requires
exact size of instruction in the delay slot. For indicating
these instructions, MIPS_HFLAG_BDS_STRICT flag is introduced.

Those fictional branch opcodes defined to support MIPS16 and
microMIPS are no longer needed.

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
v2:
* correct MIPS_HFLAG_TMASK
---
 target-mips/cpu.h       |   13 +-
 target-mips/translate.c |  284 ++++++++++++++++++-----------------------------
 2 files changed, 117 insertions(+), 180 deletions(-)

Comments

James Hogan Sept. 3, 2014, 1:33 p.m. UTC | #1
On Tue, Jul 01, 2014 at 05:43:05PM +0100, Yongbok Kim wrote:
> Commit 240ce26a broke MIPS16 and microMIPS support as it didn't
> care those branches and jumps don't have delay slot in
> MIPS16 and microMIPS.
> 
> This patch introduces a new argument delayslot_size to the
> gen_compute_branch() indicating size of delay slot {0, 2, 4}.
> And the information is used to call handle_delay_slot() forcingly
> when no delay slot is required.
> 
> There are some microMIPS branch and jump instructions that requires
> exact size of instruction in the delay slot. For indicating
> these instructions, MIPS_HFLAG_BDS_STRICT flag is introduced.
> 
> Those fictional branch opcodes defined to support MIPS16 and
> microMIPS are no longer needed.
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>

Jonas hit this problem too with a mips16 enabled userland, and has
confirmed that this patch fixes it:

Tested-by: Jonas Gorski <jogo@openwrt.org>

Cheers
James

> ---
> v2:
> * correct MIPS_HFLAG_TMASK
> ---
>  target-mips/cpu.h       |   13 +-
>  target-mips/translate.c |  284 ++++++++++++++++++-----------------------------
>  2 files changed, 117 insertions(+), 180 deletions(-)
> 
> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index 8b9a92e..c81dfac 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -431,7 +431,7 @@ struct CPUMIPSState {
>      int error_code;
>      uint32_t hflags;    /* CPU State */
>      /* TMASK defines different execution modes */
> -#define MIPS_HFLAG_TMASK  0xC07FF
> +#define MIPS_HFLAG_TMASK  0x1807FF
>  #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
>      /* The KSU flags must be the lowest bits in hflags. The flag order
>         must be the same as defined for CP0 Status. This allows to use
> @@ -463,17 +463,18 @@ struct CPUMIPSState {
>  #define MIPS_HFLAG_BL     0x01800 /* Likely branch                      */
>  #define MIPS_HFLAG_BR     0x02000 /* branch to register (can't link TB) */
>      /* Extra flags about the current pending branch.  */
> -#define MIPS_HFLAG_BMASK_EXT 0x3C000
> +#define MIPS_HFLAG_BMASK_EXT 0x7C000
>  #define MIPS_HFLAG_B16    0x04000 /* branch instruction was 16 bits     */
>  #define MIPS_HFLAG_BDS16  0x08000 /* branch requires 16-bit delay slot  */
>  #define MIPS_HFLAG_BDS32  0x10000 /* branch requires 32-bit delay slot  */
> -#define MIPS_HFLAG_BX     0x20000 /* branch exchanges execution mode    */
> +#define MIPS_HFLAG_BDS_STRICT  0x20000 /* Strict delay slot size */
> +#define MIPS_HFLAG_BX     0x40000 /* branch exchanges execution mode    */
>  #define MIPS_HFLAG_BMASK  (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT)
>      /* MIPS DSP resources access. */
> -#define MIPS_HFLAG_DSP   0x40000  /* Enable access to MIPS DSP resources. */
> -#define MIPS_HFLAG_DSPR2 0x80000  /* Enable access to MIPS DSPR2 resources. */
> +#define MIPS_HFLAG_DSP   0x080000  /* Enable access to MIPS DSP resources. */
> +#define MIPS_HFLAG_DSPR2 0x100000  /* Enable access to MIPS DSPR2 resources. */
>      /* Extra flag about HWREna register. */
> -#define MIPS_HFLAG_HWRENA_ULR 0x100000 /* ULR bit from HWREna is set. */
> +#define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */
>      target_ulong btarget;        /* Jump / branch target               */
>      target_ulong bcond;          /* Branch condition (if needed)       */
>  
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 2f91959..a654ae8 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -61,7 +61,6 @@ enum {
>      /* Jump and branches */
>      OPC_J        = (0x02 << 26),
>      OPC_JAL      = (0x03 << 26),
> -    OPC_JALS     = OPC_JAL | 0x5,
>      OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
>      OPC_BEQL     = (0x14 << 26),
>      OPC_BNE      = (0x05 << 26),
> @@ -70,8 +69,7 @@ enum {
>      OPC_BLEZL    = (0x16 << 26),
>      OPC_BGTZ     = (0x07 << 26),
>      OPC_BGTZL    = (0x17 << 26),
> -    OPC_JALX     = (0x1D << 26),  /* MIPS 16 only */
> -    OPC_JALXS    = OPC_JALX | 0x5,
> +    OPC_JALX     = (0x1D << 26),
>      /* Load and stores */
>      OPC_LDL      = (0x1A << 26),
>      OPC_LDR      = (0x1B << 26),
> @@ -171,8 +169,6 @@ enum {
>      /* Jumps */
>      OPC_JR       = 0x08 | OPC_SPECIAL, /* Also JR.HB */
>      OPC_JALR     = 0x09 | OPC_SPECIAL, /* Also JALR.HB */
> -    OPC_JALRC    = OPC_JALR | (0x5 << 6),
> -    OPC_JALRS    = 0x10 | OPC_SPECIAL | (0x5 << 6),
>      /* Traps */
>      OPC_TGE      = 0x30 | OPC_SPECIAL,
>      OPC_TGEU     = 0x31 | OPC_SPECIAL,
> @@ -236,10 +232,8 @@ enum {
>      OPC_BGEZ     = (0x01 << 16) | OPC_REGIMM,
>      OPC_BGEZL    = (0x03 << 16) | OPC_REGIMM,
>      OPC_BLTZAL   = (0x10 << 16) | OPC_REGIMM,
> -    OPC_BLTZALS  = OPC_BLTZAL | 0x5, /* microMIPS */
>      OPC_BLTZALL  = (0x12 << 16) | OPC_REGIMM,
>      OPC_BGEZAL   = (0x11 << 16) | OPC_REGIMM,
> -    OPC_BGEZALS  = OPC_BGEZAL | 0x5, /* microMIPS */
>      OPC_BGEZALL  = (0x13 << 16) | OPC_REGIMM,
>      OPC_TGEI     = (0x08 << 16) | OPC_REGIMM,
>      OPC_TGEIU    = (0x09 << 16) | OPC_REGIMM,
> @@ -3597,7 +3591,8 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>  /* Branches (before delay slot) */
>  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>                                  int insn_bytes,
> -                                int rs, int rt, int32_t offset)
> +                                int rs, int rt, int32_t offset,
> +                                int delayslot_size)
>  {
>      target_ulong btgt = -1;
>      int blink = 0;
> @@ -3629,7 +3624,6 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>          break;
>      case OPC_BGEZ:
>      case OPC_BGEZAL:
> -    case OPC_BGEZALS:
>      case OPC_BGEZALL:
>      case OPC_BGEZL:
>      case OPC_BGTZ:
> @@ -3638,7 +3632,6 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>      case OPC_BLEZL:
>      case OPC_BLTZ:
>      case OPC_BLTZAL:
> -    case OPC_BLTZALS:
>      case OPC_BLTZALL:
>      case OPC_BLTZL:
>          /* Compare to zero */
> @@ -3661,15 +3654,11 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>      case OPC_J:
>      case OPC_JAL:
>      case OPC_JALX:
> -    case OPC_JALS:
> -    case OPC_JALXS:
>          /* Jump to immediate */
>          btgt = ((ctx->pc + insn_bytes) & (int32_t)0xF0000000) | (uint32_t)offset;
>          break;
>      case OPC_JR:
>      case OPC_JALR:
> -    case OPC_JALRC:
> -    case OPC_JALRS:
>          /* Jump to register */
>          if (offset != 0 && offset != 16) {
>              /* Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
> @@ -3698,12 +3687,8 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>              ctx->hflags |= MIPS_HFLAG_B;
>              MIPS_DEBUG("balways");
>              break;
> -        case OPC_BGEZALS:
>          case OPC_BGEZAL:  /* 0 >= 0          */
>          case OPC_BGEZALL: /* 0 >= 0 likely   */
> -            ctx->hflags |= (opc == OPC_BGEZALS
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              /* Always take and link */
>              blink = 31;
>              ctx->hflags |= MIPS_HFLAG_B;
> @@ -3715,15 +3700,11 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>              /* Treat as NOP. */
>              MIPS_DEBUG("bnever (NOP)");
>              goto out;
> -        case OPC_BLTZALS:
>          case OPC_BLTZAL:  /* 0 < 0           */
> -            ctx->hflags |= (opc == OPC_BLTZALS
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              /* Handle as an unconditional branch to get correct delay
>                 slot checking.  */
>              blink = 31;
> -            btgt = ctx->pc + (opc == OPC_BLTZALS ? 6 : 8);
> +            btgt = ctx->pc + insn_bytes + delayslot_size;
>              ctx->hflags |= MIPS_HFLAG_B;
>              MIPS_DEBUG("bnever and link");
>              break;
> @@ -3744,33 +3725,21 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>              ctx->hflags |= MIPS_HFLAG_B;
>              MIPS_DEBUG("j " TARGET_FMT_lx, btgt);
>              break;
> -        case OPC_JALXS:
>          case OPC_JALX:
>              ctx->hflags |= MIPS_HFLAG_BX;
>              /* Fallthrough */
> -        case OPC_JALS:
>          case OPC_JAL:
>              blink = 31;
>              ctx->hflags |= MIPS_HFLAG_B;
> -            ctx->hflags |= ((opc == OPC_JALS || opc == OPC_JALXS)
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              MIPS_DEBUG("jal " TARGET_FMT_lx, btgt);
>              break;
>          case OPC_JR:
>              ctx->hflags |= MIPS_HFLAG_BR;
> -            if (insn_bytes == 4)
> -                ctx->hflags |= MIPS_HFLAG_BDS32;
>              MIPS_DEBUG("jr %s", regnames[rs]);
>              break;
> -        case OPC_JALRS:
>          case OPC_JALR:
> -        case OPC_JALRC:
>              blink = rt;
>              ctx->hflags |= MIPS_HFLAG_BR;
> -            ctx->hflags |= (opc == OPC_JALRS
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              MIPS_DEBUG("jalr %s, %s", regnames[rt], regnames[rs]);
>              break;
>          default:
> @@ -3808,11 +3777,7 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>              tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
>              MIPS_DEBUG("bgezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
>              goto likely;
> -        case OPC_BGEZALS:
>          case OPC_BGEZAL:
> -            ctx->hflags |= (opc == OPC_BGEZALS
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
>              MIPS_DEBUG("bgezal %s, " TARGET_FMT_lx, regnames[rs], btgt);
>              blink = 31;
> @@ -3856,11 +3821,7 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>              MIPS_DEBUG("bposge64 " TARGET_FMT_lx, btgt);
>              goto not_likely;
>  #endif
> -        case OPC_BLTZALS:
>          case OPC_BLTZAL:
> -            ctx->hflags |= (opc == OPC_BLTZALS
> -                            ? MIPS_HFLAG_BDS16
> -                            : MIPS_HFLAG_BDS32);
>              tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
>              blink = 31;
>              MIPS_DEBUG("bltzal %s, " TARGET_FMT_lx, regnames[rs], btgt);
> @@ -3884,13 +3845,21 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
>                 blink, ctx->hflags, btgt);
>  
>      ctx->btarget = btgt;
> +
> +    switch (delayslot_size) {
> +    case 2:
> +        ctx->hflags |= MIPS_HFLAG_BDS16;
> +        break;
> +    case 4:
> +        ctx->hflags |= MIPS_HFLAG_BDS32;
> +        break;
> +    }
> +
>      if (blink > 0) {
>          int post_delay = insn_bytes;
>          int lowbit = !!(ctx->hflags & MIPS_HFLAG_M16);
>  
> -        if (opc != OPC_JALRC)
> -            post_delay += ((ctx->hflags & MIPS_HFLAG_BDS16) ? 2 : 4);
> -
> +        post_delay += delayslot_size;
>          tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + post_delay + lowbit);
>      }
>  
> @@ -7124,7 +7093,7 @@ static void gen_compute_branch1(DisasContext *ctx, uint32_t op,
>      MIPS_DEBUG("%s: cond %02x target " TARGET_FMT_lx, opn,
>                 ctx->hflags, btarget);
>      ctx->btarget = btarget;
> -
> +    ctx->hflags |= MIPS_HFLAG_BDS32;
>   out:
>      tcg_temp_free_i32(t0);
>  }
> @@ -9667,15 +9636,15 @@ static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>          gen_addiupc(ctx, rx, imm, 0, 1);
>          break;
>      case M16_OPC_B:
> -        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1);
> +        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_BEQZ:
> -        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1);
> +        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_BNEQZ:
> -        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1);
> +        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_SHIFT:
> @@ -9733,10 +9702,10 @@ static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>      case M16_OPC_I8:
>          switch (funct) {
>          case I8_BTEQZ:
> -            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1);
> +            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1, 0);
>              break;
>          case I8_BTNEZ:
> -            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1);
> +            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1, 0);
>              break;
>          case I8_SWRASP:
>              gen_st(ctx, OPC_SW, 31, 29, imm);
> @@ -9864,7 +9833,7 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>      case M16_OPC_B:
>          offset = (ctx->opcode & 0x7ff) << 1;
>          offset = (int16_t)(offset << 4) >> 4;
> -        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset);
> +        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_JAL:
> @@ -9872,16 +9841,18 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>          offset = (((ctx->opcode & 0x1f) << 21)
>                    | ((ctx->opcode >> 5) & 0x1f) << 16
>                    | offset) << 2;
> -        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALXS : OPC_JALS;
> -        gen_compute_branch(ctx, op, 4, rx, ry, offset);
> +        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALX : OPC_JAL;
> +        gen_compute_branch(ctx, op, 4, rx, ry, offset, 2);
>          n_bytes = 4;
>          break;
>      case M16_OPC_BEQZ:
> -        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
> +        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0,
> +                           ((int8_t)ctx->opcode) << 1, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_BNEQZ:
> -        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
> +        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0,
> +                           ((int8_t)ctx->opcode) << 1, 0);
>          /* No delay slot, so just process as a normal instruction */
>          break;
>      case M16_OPC_SHIFT:
> @@ -9954,11 +9925,11 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>              switch (funct) {
>              case I8_BTEQZ:
>                  gen_compute_branch(ctx, OPC_BEQ, 2, 24, 0,
> -                                   ((int8_t)ctx->opcode) << 1);
> +                                   ((int8_t)ctx->opcode) << 1, 0);
>                  break;
>              case I8_BTNEZ:
>                  gen_compute_branch(ctx, OPC_BNE, 2, 24, 0,
> -                                   ((int8_t)ctx->opcode) << 1);
> +                                   ((int8_t)ctx->opcode) << 1, 0);
>                  break;
>              case I8_SWRASP:
>                  gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
> @@ -10107,12 +10078,13 @@ static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
>                  int ra = (ctx->opcode >> 5) & 0x1;
>  
>                  if (link) {
> -                    op = nd ? OPC_JALRC : OPC_JALRS;
> +                    op = OPC_JALR;
>                  } else {
>                      op = OPC_JR;
>                  }
>  
> -                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0);
> +                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0,
> +                                   (nd ? 0 : 2));
>              }
>              break;
>          case RR_SDBBP:
> @@ -10870,7 +10842,6 @@ static void gen_pool16c_insn(DisasContext *ctx)
>  {
>      int rd = mmreg((ctx->opcode >> 3) & 0x7);
>      int rs = mmreg(ctx->opcode & 0x7);
> -    int opc;
>  
>      switch (((ctx->opcode) >> 4) & 0x3f) {
>      case NOT16 + 0:
> @@ -10926,32 +10897,27 @@ static void gen_pool16c_insn(DisasContext *ctx)
>          {
>              int reg = ctx->opcode & 0x1f;
>  
> -            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
> +            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 4);
>          }
>          break;
>      case JRC16 + 0:
>      case JRC16 + 1:
>          {
>              int reg = ctx->opcode & 0x1f;
> -
> -            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
> +            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 0);
>              /* Let normal delay slot handling in our caller take us
>                 to the branch target.  */
>          }
>          break;
>      case JALR16 + 0:
>      case JALR16 + 1:
> -        opc = OPC_JALR;
> -        goto do_jalr;
> +        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 4);
> +        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
> +        break;
>      case JALR16S + 0:
>      case JALR16S + 1:
> -        opc = OPC_JALRS;
> -    do_jalr:
> -        {
> -            int reg = ctx->opcode & 0x1f;
> -
> -            gen_compute_branch(ctx, opc, 2, reg, 31, 0);
> -        }
> +        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 2);
> +        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>          break;
>      case MFHI16 + 0:
>      case MFHI16 + 1:
> @@ -10979,8 +10945,7 @@ static void gen_pool16c_insn(DisasContext *ctx)
>      case JRADDIUSP + 1:
>          {
>              int imm = ZIMM(ctx->opcode, 0, 5);
> -
> -            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0);
> +            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
>              gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
>              /* Let normal delay slot handling in our caller take us
>                 to the branch target.  */
> @@ -11237,11 +11202,13 @@ static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
>          switch (minor) {
>          case JALR:
>          case JALR_HB:
> -            gen_compute_branch (ctx, OPC_JALR, 4, rs, rt, 0);
> +            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>              break;
>          case JALRS:
>          case JALRS_HB:
> -            gen_compute_branch (ctx, OPC_JALRS, 4, rs, rt, 0);
> +            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 2);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>              break;
>          default:
>              goto pool32axf_invalid;
> @@ -12131,30 +12098,32 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
>          minor = (ctx->opcode >> 21) & 0x1f;
>          switch (minor) {
>          case BLTZ:
> -            mips32_op = OPC_BLTZ;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BLTZ, 4, rs, -1, imm << 1, 4);
> +            break;
>          case BLTZAL:
> -            mips32_op = OPC_BLTZAL;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 4);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
> +            break;
>          case BLTZALS:
> -            mips32_op = OPC_BLTZALS;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 2);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
> +            break;
>          case BGEZ:
> -            mips32_op = OPC_BGEZ;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BGEZ, 4, rs, -1, imm << 1, 4);
> +            break;
>          case BGEZAL:
> -            mips32_op = OPC_BGEZAL;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 4);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
> +            break;
>          case BGEZALS:
> -            mips32_op = OPC_BGEZALS;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 2);
> +            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
> +            break;
>          case BLEZ:
> -            mips32_op = OPC_BLEZ;
> -            goto do_branch;
> +            gen_compute_branch(ctx, OPC_BLEZ, 4, rs, -1, imm << 1, 4);
> +            break;
>          case BGTZ:
> -            mips32_op = OPC_BGTZ;
> -        do_branch:
> -            gen_compute_branch(ctx, mips32_op, 4, rs, -1, imm << 1);
> +            gen_compute_branch(ctx, OPC_BGTZ, 4, rs, -1, imm << 1, 4);
>              break;
>  
>              /* Traps */
> @@ -12182,7 +12151,7 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
>          case BNEZC:
>          case BEQZC:
>              gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
> -                               4, rs, 0, imm << 1);
> +                               4, rs, 0, imm << 1, 0);
>              /* Compact branches don't have a delay slot, so just let
>                 the normal delay slot handling take us to the branch
>                 target. */
> @@ -12320,25 +12289,28 @@ static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
>          break;
>      case JALX32:
>          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
> -        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset);
> +        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset, 4);
> +        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>          break;
>      case JALS32:
>          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
> -        gen_compute_branch(ctx, OPC_JALS, 4, rt, rs, offset);
> +        gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
> +        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>          break;
>      case BEQ32:
> -        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1);
> +        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
>          break;
>      case BNE32:
> -        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1);
> +        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
>          break;
>      case J32:
>          gen_compute_branch(ctx, OPC_J, 4, rt, rs,
> -                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
> +                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
>          break;
>      case JAL32:
>          gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
> -                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
> +                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
> +        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
>          break;
>          /* Floating point (COP1) */
>      case LWC132:
> @@ -12422,84 +12394,41 @@ static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx)
>  
>      op = (ctx->opcode >> 10) & 0x3f;
>      /* Enforce properly-sized instructions in a delay slot */
> -    if (ctx->hflags & MIPS_HFLAG_BMASK) {
> -        int bits = ctx->hflags & MIPS_HFLAG_BMASK_EXT;
> -
> -        switch (op) {
> -        case POOL32A:
> -        case POOL32B:
> -        case POOL32I:
> -        case POOL32C:
> -        case ADDI32:
> -        case ADDIU32:
> -        case ORI32:
> -        case XORI32:
> -        case SLTI32:
> -        case SLTIU32:
> -        case ANDI32:
> -        case JALX32:
> -        case LBU32:
> -        case LHU32:
> -        case POOL32F:
> -        case JALS32:
> -        case BEQ32:
> -        case BNE32:
> -        case J32:
> -        case JAL32:
> -        case SB32:
> -        case SH32:
> -        case POOL32S:
> -        case ADDIUPC:
> -        case SWC132:
> -        case SDC132:
> -        case SD32:
> -        case SW32:
> -        case LB32:
> -        case LH32:
> -        case DADDIU32:
> -        case LWC132:
> -        case LDC132:
> -        case LD32:
> -        case LW32:
> -            if (bits & MIPS_HFLAG_BDS16) {
> +    if (ctx->hflags & MIPS_HFLAG_BDS_STRICT) {
> +        switch (op & 0x7) { /* MSB-3..MSB-5 */
> +        case 0:
> +        /* POOL31A, POOL32B, POOL32I, POOL32C */
> +        case 4:
> +        /* ADDI32, ADDIU32, ORI32, XORI32, SLTI32, SLTIU32, ANDI32, JALX32 */
> +        case 5:
> +        /* LBU32, LHU32, POOL32F, JALS32, BEQ32, BNE32, J32, JAL32 */
> +        case 6:
> +        /* SB32, SH32, ADDIUPC, SWC132, SDC132, SW32 */
> +        case 7:
> +        /* LB32, LH32, LWC132, LDC132, LW32 */
> +            if (ctx->hflags & MIPS_HFLAG_BDS16) {
>                  generate_exception(ctx, EXCP_RI);
>                  /* Just stop translation; the user is confused.  */
>                  ctx->bstate = BS_STOP;
>                  return 2;
>              }
>              break;
> -        case POOL16A:
> -        case POOL16B:
> -        case POOL16C:
> -        case LWGP16:
> -        case POOL16F:
> -        case LBU16:
> -        case LHU16:
> -        case LWSP16:
> -        case LW16:
> -        case SB16:
> -        case SH16:
> -        case SWSP16:
> -        case SW16:
> -        case MOVE16:
> -        case ANDI16:
> -        case POOL16D:
> -        case POOL16E:
> -        case BEQZ16:
> -        case BNEZ16:
> -        case B16:
> -        case LI16:
> -            if (bits & MIPS_HFLAG_BDS32) {
> +        case 1:
> +        /* POOL16A, POOL16B, POOL16C, LWGP16, POOL16F */
> +        case 2:
> +        /* LBU16, LHU16, LWSP16, LW16, SB16, SH16, SWSP16, SW16 */
> +        case 3:
> +        /* MOVE16, ANDI16, POOL16D, POOL16E, BEQZ16, BNEZ16, B16, LI16 */
> +            if (ctx->hflags & MIPS_HFLAG_BDS32) {
>                  generate_exception(ctx, EXCP_RI);
>                  /* Just stop translation; the user is confused.  */
>                  ctx->bstate = BS_STOP;
>                  return 2;
>              }
>              break;
> -        default:
> -            break;
>          }
>      }
> +
>      switch (op) {
>      case POOL16A:
>          {
> @@ -12680,13 +12609,13 @@ static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx)
>          break;
>      case B16:
>          gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
> -                           SIMM(ctx->opcode, 0, 10) << 1);
> +                           SIMM(ctx->opcode, 0, 10) << 1, 4);
>          break;
>      case BNEZ16:
>      case BEQZ16:
>          gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
>                             mmreg(uMIPS_RD(ctx->opcode)),
> -                           0, SIMM(ctx->opcode, 0, 7) << 1);
> +                           0, SIMM(ctx->opcode, 0, 7) << 1, 4);
>          break;
>      case LI16:
>          {
> @@ -14556,7 +14485,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>              gen_muldiv(ctx, op1, 0, rs, rt);
>              break;
>          case OPC_JR ... OPC_JALR:
> -            gen_compute_branch(ctx, op1, 4, rs, rd, sa);
> +            gen_compute_branch(ctx, op1, 4, rs, rd, sa, 4);
>              break;
>          case OPC_TGE ... OPC_TEQ: /* Traps */
>          case OPC_TNE:
> @@ -15322,7 +15251,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>          switch (op1) {
>          case OPC_BLTZ ... OPC_BGEZL: /* REGIMM branches */
>          case OPC_BLTZAL ... OPC_BGEZALL:
> -            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2);
> +            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
>              break;
>          case OPC_TGEI ... OPC_TEQI: /* REGIMM traps */
>          case OPC_TNEI:
> @@ -15337,7 +15266,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>          case OPC_BPOSGE64:
>  #endif
>              check_dsp(ctx);
> -            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2);
> +            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2, 4);
>              break;
>          default:            /* Invalid */
>              MIPS_INVAL("regimm");
> @@ -15448,11 +15377,11 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>           break;
>      case OPC_J ... OPC_JAL: /* Jump */
>           offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
> -         gen_compute_branch(ctx, op, 4, rs, rt, offset);
> +         gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
>           break;
>      case OPC_BEQ ... OPC_BGTZ: /* Branch */
>      case OPC_BEQL ... OPC_BGTZL:
> -         gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> +            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
>           break;
>      case OPC_LB ... OPC_LWR: /* Load and stores */
>      case OPC_LL:
> @@ -15617,7 +15546,7 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>      case OPC_JALX:
>          check_insn(ctx, ASE_MIPS16 | ASE_MICROMIPS);
>          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
> -        gen_compute_branch(ctx, op, 4, rs, rt, offset);
> +        gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
>          break;
>      case OPC_MDMX:
>          check_insn(ctx, ASE_MDMX);
> @@ -15719,6 +15648,13 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
>              ctx.bstate = BS_STOP;
>              break;
>          }
> +        if (ctx.hflags & MIPS_HFLAG_BMASK) {
> +            if (!(ctx.hflags & MIPS_HFLAG_BDS16) &&
> +                    !(ctx.hflags & MIPS_HFLAG_BDS32)) {
> +                is_delay = 1;
> +                /* force to generate branch as no delay slot is required */
> +            }
> +        }
>          if (is_delay) {
>              handle_delay_slot(&ctx, insn_bytes);
>          }
> -- 
> 1.7.4
> 
>

Patch
diff mbox

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 8b9a92e..c81dfac 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -431,7 +431,7 @@  struct CPUMIPSState {
     int error_code;
     uint32_t hflags;    /* CPU State */
     /* TMASK defines different execution modes */
-#define MIPS_HFLAG_TMASK  0xC07FF
+#define MIPS_HFLAG_TMASK  0x1807FF
 #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
     /* The KSU flags must be the lowest bits in hflags. The flag order
        must be the same as defined for CP0 Status. This allows to use
@@ -463,17 +463,18 @@  struct CPUMIPSState {
 #define MIPS_HFLAG_BL     0x01800 /* Likely branch                      */
 #define MIPS_HFLAG_BR     0x02000 /* branch to register (can't link TB) */
     /* Extra flags about the current pending branch.  */
-#define MIPS_HFLAG_BMASK_EXT 0x3C000
+#define MIPS_HFLAG_BMASK_EXT 0x7C000
 #define MIPS_HFLAG_B16    0x04000 /* branch instruction was 16 bits     */
 #define MIPS_HFLAG_BDS16  0x08000 /* branch requires 16-bit delay slot  */
 #define MIPS_HFLAG_BDS32  0x10000 /* branch requires 32-bit delay slot  */
-#define MIPS_HFLAG_BX     0x20000 /* branch exchanges execution mode    */
+#define MIPS_HFLAG_BDS_STRICT  0x20000 /* Strict delay slot size */
+#define MIPS_HFLAG_BX     0x40000 /* branch exchanges execution mode    */
 #define MIPS_HFLAG_BMASK  (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT)
     /* MIPS DSP resources access. */
-#define MIPS_HFLAG_DSP   0x40000  /* Enable access to MIPS DSP resources. */
-#define MIPS_HFLAG_DSPR2 0x80000  /* Enable access to MIPS DSPR2 resources. */
+#define MIPS_HFLAG_DSP   0x080000  /* Enable access to MIPS DSP resources. */
+#define MIPS_HFLAG_DSPR2 0x100000  /* Enable access to MIPS DSPR2 resources. */
     /* Extra flag about HWREna register. */
-#define MIPS_HFLAG_HWRENA_ULR 0x100000 /* ULR bit from HWREna is set. */
+#define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */
     target_ulong btarget;        /* Jump / branch target               */
     target_ulong bcond;          /* Branch condition (if needed)       */
 
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 2f91959..a654ae8 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -61,7 +61,6 @@  enum {
     /* Jump and branches */
     OPC_J        = (0x02 << 26),
     OPC_JAL      = (0x03 << 26),
-    OPC_JALS     = OPC_JAL | 0x5,
     OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
     OPC_BEQL     = (0x14 << 26),
     OPC_BNE      = (0x05 << 26),
@@ -70,8 +69,7 @@  enum {
     OPC_BLEZL    = (0x16 << 26),
     OPC_BGTZ     = (0x07 << 26),
     OPC_BGTZL    = (0x17 << 26),
-    OPC_JALX     = (0x1D << 26),  /* MIPS 16 only */
-    OPC_JALXS    = OPC_JALX | 0x5,
+    OPC_JALX     = (0x1D << 26),
     /* Load and stores */
     OPC_LDL      = (0x1A << 26),
     OPC_LDR      = (0x1B << 26),
@@ -171,8 +169,6 @@  enum {
     /* Jumps */
     OPC_JR       = 0x08 | OPC_SPECIAL, /* Also JR.HB */
     OPC_JALR     = 0x09 | OPC_SPECIAL, /* Also JALR.HB */
-    OPC_JALRC    = OPC_JALR | (0x5 << 6),
-    OPC_JALRS    = 0x10 | OPC_SPECIAL | (0x5 << 6),
     /* Traps */
     OPC_TGE      = 0x30 | OPC_SPECIAL,
     OPC_TGEU     = 0x31 | OPC_SPECIAL,
@@ -236,10 +232,8 @@  enum {
     OPC_BGEZ     = (0x01 << 16) | OPC_REGIMM,
     OPC_BGEZL    = (0x03 << 16) | OPC_REGIMM,
     OPC_BLTZAL   = (0x10 << 16) | OPC_REGIMM,
-    OPC_BLTZALS  = OPC_BLTZAL | 0x5, /* microMIPS */
     OPC_BLTZALL  = (0x12 << 16) | OPC_REGIMM,
     OPC_BGEZAL   = (0x11 << 16) | OPC_REGIMM,
-    OPC_BGEZALS  = OPC_BGEZAL | 0x5, /* microMIPS */
     OPC_BGEZALL  = (0x13 << 16) | OPC_REGIMM,
     OPC_TGEI     = (0x08 << 16) | OPC_REGIMM,
     OPC_TGEIU    = (0x09 << 16) | OPC_REGIMM,
@@ -3597,7 +3591,8 @@  static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 /* Branches (before delay slot) */
 static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
                                 int insn_bytes,
-                                int rs, int rt, int32_t offset)
+                                int rs, int rt, int32_t offset,
+                                int delayslot_size)
 {
     target_ulong btgt = -1;
     int blink = 0;
@@ -3629,7 +3624,6 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_BGEZ:
     case OPC_BGEZAL:
-    case OPC_BGEZALS:
     case OPC_BGEZALL:
     case OPC_BGEZL:
     case OPC_BGTZ:
@@ -3638,7 +3632,6 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     case OPC_BLEZL:
     case OPC_BLTZ:
     case OPC_BLTZAL:
-    case OPC_BLTZALS:
     case OPC_BLTZALL:
     case OPC_BLTZL:
         /* Compare to zero */
@@ -3661,15 +3654,11 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     case OPC_J:
     case OPC_JAL:
     case OPC_JALX:
-    case OPC_JALS:
-    case OPC_JALXS:
         /* Jump to immediate */
         btgt = ((ctx->pc + insn_bytes) & (int32_t)0xF0000000) | (uint32_t)offset;
         break;
     case OPC_JR:
     case OPC_JALR:
-    case OPC_JALRC:
-    case OPC_JALRS:
         /* Jump to register */
         if (offset != 0 && offset != 16) {
             /* Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
@@ -3698,12 +3687,8 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("balways");
             break;
-        case OPC_BGEZALS:
         case OPC_BGEZAL:  /* 0 >= 0          */
         case OPC_BGEZALL: /* 0 >= 0 likely   */
-            ctx->hflags |= (opc == OPC_BGEZALS
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             /* Always take and link */
             blink = 31;
             ctx->hflags |= MIPS_HFLAG_B;
@@ -3715,15 +3700,11 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             /* Treat as NOP. */
             MIPS_DEBUG("bnever (NOP)");
             goto out;
-        case OPC_BLTZALS:
         case OPC_BLTZAL:  /* 0 < 0           */
-            ctx->hflags |= (opc == OPC_BLTZALS
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             /* Handle as an unconditional branch to get correct delay
                slot checking.  */
             blink = 31;
-            btgt = ctx->pc + (opc == OPC_BLTZALS ? 6 : 8);
+            btgt = ctx->pc + insn_bytes + delayslot_size;
             ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("bnever and link");
             break;
@@ -3744,33 +3725,21 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             ctx->hflags |= MIPS_HFLAG_B;
             MIPS_DEBUG("j " TARGET_FMT_lx, btgt);
             break;
-        case OPC_JALXS:
         case OPC_JALX:
             ctx->hflags |= MIPS_HFLAG_BX;
             /* Fallthrough */
-        case OPC_JALS:
         case OPC_JAL:
             blink = 31;
             ctx->hflags |= MIPS_HFLAG_B;
-            ctx->hflags |= ((opc == OPC_JALS || opc == OPC_JALXS)
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             MIPS_DEBUG("jal " TARGET_FMT_lx, btgt);
             break;
         case OPC_JR:
             ctx->hflags |= MIPS_HFLAG_BR;
-            if (insn_bytes == 4)
-                ctx->hflags |= MIPS_HFLAG_BDS32;
             MIPS_DEBUG("jr %s", regnames[rs]);
             break;
-        case OPC_JALRS:
         case OPC_JALR:
-        case OPC_JALRC:
             blink = rt;
             ctx->hflags |= MIPS_HFLAG_BR;
-            ctx->hflags |= (opc == OPC_JALRS
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             MIPS_DEBUG("jalr %s, %s", regnames[rt], regnames[rs]);
             break;
         default:
@@ -3808,11 +3777,7 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             MIPS_DEBUG("bgezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
-        case OPC_BGEZALS:
         case OPC_BGEZAL:
-            ctx->hflags |= (opc == OPC_BGEZALS
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
             MIPS_DEBUG("bgezal %s, " TARGET_FMT_lx, regnames[rs], btgt);
             blink = 31;
@@ -3856,11 +3821,7 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             MIPS_DEBUG("bposge64 " TARGET_FMT_lx, btgt);
             goto not_likely;
 #endif
-        case OPC_BLTZALS:
         case OPC_BLTZAL:
-            ctx->hflags |= (opc == OPC_BLTZALS
-                            ? MIPS_HFLAG_BDS16
-                            : MIPS_HFLAG_BDS32);
             tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             blink = 31;
             MIPS_DEBUG("bltzal %s, " TARGET_FMT_lx, regnames[rs], btgt);
@@ -3884,13 +3845,21 @@  static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
                blink, ctx->hflags, btgt);
 
     ctx->btarget = btgt;
+
+    switch (delayslot_size) {
+    case 2:
+        ctx->hflags |= MIPS_HFLAG_BDS16;
+        break;
+    case 4:
+        ctx->hflags |= MIPS_HFLAG_BDS32;
+        break;
+    }
+
     if (blink > 0) {
         int post_delay = insn_bytes;
         int lowbit = !!(ctx->hflags & MIPS_HFLAG_M16);
 
-        if (opc != OPC_JALRC)
-            post_delay += ((ctx->hflags & MIPS_HFLAG_BDS16) ? 2 : 4);
-
+        post_delay += delayslot_size;
         tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + post_delay + lowbit);
     }
 
@@ -7124,7 +7093,7 @@  static void gen_compute_branch1(DisasContext *ctx, uint32_t op,
     MIPS_DEBUG("%s: cond %02x target " TARGET_FMT_lx, opn,
                ctx->hflags, btarget);
     ctx->btarget = btarget;
-
+    ctx->hflags |= MIPS_HFLAG_BDS32;
  out:
     tcg_temp_free_i32(t0);
 }
@@ -9667,15 +9636,15 @@  static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
         gen_addiupc(ctx, rx, imm, 0, 1);
         break;
     case M16_OPC_B:
-        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1);
+        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_BEQZ:
-        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1);
+        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_BNEQZ:
-        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1);
+        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_SHIFT:
@@ -9733,10 +9702,10 @@  static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
     case M16_OPC_I8:
         switch (funct) {
         case I8_BTEQZ:
-            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1);
+            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1, 0);
             break;
         case I8_BTNEZ:
-            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1);
+            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1, 0);
             break;
         case I8_SWRASP:
             gen_st(ctx, OPC_SW, 31, 29, imm);
@@ -9864,7 +9833,7 @@  static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
     case M16_OPC_B:
         offset = (ctx->opcode & 0x7ff) << 1;
         offset = (int16_t)(offset << 4) >> 4;
-        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset);
+        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_JAL:
@@ -9872,16 +9841,18 @@  static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
         offset = (((ctx->opcode & 0x1f) << 21)
                   | ((ctx->opcode >> 5) & 0x1f) << 16
                   | offset) << 2;
-        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALXS : OPC_JALS;
-        gen_compute_branch(ctx, op, 4, rx, ry, offset);
+        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALX : OPC_JAL;
+        gen_compute_branch(ctx, op, 4, rx, ry, offset, 2);
         n_bytes = 4;
         break;
     case M16_OPC_BEQZ:
-        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
+        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0,
+                           ((int8_t)ctx->opcode) << 1, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_BNEQZ:
-        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0, ((int8_t)ctx->opcode) << 1);
+        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0,
+                           ((int8_t)ctx->opcode) << 1, 0);
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_SHIFT:
@@ -9954,11 +9925,11 @@  static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
             switch (funct) {
             case I8_BTEQZ:
                 gen_compute_branch(ctx, OPC_BEQ, 2, 24, 0,
-                                   ((int8_t)ctx->opcode) << 1);
+                                   ((int8_t)ctx->opcode) << 1, 0);
                 break;
             case I8_BTNEZ:
                 gen_compute_branch(ctx, OPC_BNE, 2, 24, 0,
-                                   ((int8_t)ctx->opcode) << 1);
+                                   ((int8_t)ctx->opcode) << 1, 0);
                 break;
             case I8_SWRASP:
                 gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
@@ -10107,12 +10078,13 @@  static int decode_mips16_opc (CPUMIPSState *env, DisasContext *ctx)
                 int ra = (ctx->opcode >> 5) & 0x1;
 
                 if (link) {
-                    op = nd ? OPC_JALRC : OPC_JALRS;
+                    op = OPC_JALR;
                 } else {
                     op = OPC_JR;
                 }
 
-                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0);
+                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0,
+                                   (nd ? 0 : 2));
             }
             break;
         case RR_SDBBP:
@@ -10870,7 +10842,6 @@  static void gen_pool16c_insn(DisasContext *ctx)
 {
     int rd = mmreg((ctx->opcode >> 3) & 0x7);
     int rs = mmreg(ctx->opcode & 0x7);
-    int opc;
 
     switch (((ctx->opcode) >> 4) & 0x3f) {
     case NOT16 + 0:
@@ -10926,32 +10897,27 @@  static void gen_pool16c_insn(DisasContext *ctx)
         {
             int reg = ctx->opcode & 0x1f;
 
-            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 4);
         }
         break;
     case JRC16 + 0:
     case JRC16 + 1:
         {
             int reg = ctx->opcode & 0x1f;
-
-            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0);
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 0);
             /* Let normal delay slot handling in our caller take us
                to the branch target.  */
         }
         break;
     case JALR16 + 0:
     case JALR16 + 1:
-        opc = OPC_JALR;
-        goto do_jalr;
+        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 4);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        break;
     case JALR16S + 0:
     case JALR16S + 1:
-        opc = OPC_JALRS;
-    do_jalr:
-        {
-            int reg = ctx->opcode & 0x1f;
-
-            gen_compute_branch(ctx, opc, 2, reg, 31, 0);
-        }
+        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 2);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
         break;
     case MFHI16 + 0:
     case MFHI16 + 1:
@@ -10979,8 +10945,7 @@  static void gen_pool16c_insn(DisasContext *ctx)
     case JRADDIUSP + 1:
         {
             int imm = ZIMM(ctx->opcode, 0, 5);
-
-            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0);
+            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
             gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
             /* Let normal delay slot handling in our caller take us
                to the branch target.  */
@@ -11237,11 +11202,13 @@  static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
         switch (minor) {
         case JALR:
         case JALR_HB:
-            gen_compute_branch (ctx, OPC_JALR, 4, rs, rt, 0);
+            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         case JALRS:
         case JALRS_HB:
-            gen_compute_branch (ctx, OPC_JALRS, 4, rs, rt, 0);
+            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
             break;
         default:
             goto pool32axf_invalid;
@@ -12131,30 +12098,32 @@  static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
         minor = (ctx->opcode >> 21) & 0x1f;
         switch (minor) {
         case BLTZ:
-            mips32_op = OPC_BLTZ;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BLTZ, 4, rs, -1, imm << 1, 4);
+            break;
         case BLTZAL:
-            mips32_op = OPC_BLTZAL;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
         case BLTZALS:
-            mips32_op = OPC_BLTZALS;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
         case BGEZ:
-            mips32_op = OPC_BGEZ;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BGEZ, 4, rs, -1, imm << 1, 4);
+            break;
         case BGEZAL:
-            mips32_op = OPC_BGEZAL;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
         case BGEZALS:
-            mips32_op = OPC_BGEZALS;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
         case BLEZ:
-            mips32_op = OPC_BLEZ;
-            goto do_branch;
+            gen_compute_branch(ctx, OPC_BLEZ, 4, rs, -1, imm << 1, 4);
+            break;
         case BGTZ:
-            mips32_op = OPC_BGTZ;
-        do_branch:
-            gen_compute_branch(ctx, mips32_op, 4, rs, -1, imm << 1);
+            gen_compute_branch(ctx, OPC_BGTZ, 4, rs, -1, imm << 1, 4);
             break;
 
             /* Traps */
@@ -12182,7 +12151,7 @@  static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
         case BNEZC:
         case BEQZC:
             gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
-                               4, rs, 0, imm << 1);
+                               4, rs, 0, imm << 1, 0);
             /* Compact branches don't have a delay slot, so just let
                the normal delay slot handling take us to the branch
                target. */
@@ -12320,25 +12289,28 @@  static void decode_micromips32_opc (CPUMIPSState *env, DisasContext *ctx,
         break;
     case JALX32:
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset);
+        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset, 4);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
         break;
     case JALS32:
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
-        gen_compute_branch(ctx, OPC_JALS, 4, rt, rs, offset);
+        gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
         break;
     case BEQ32:
-        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1);
+        gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
         break;
     case BNE32:
-        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1);
+        gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
         break;
     case J32:
         gen_compute_branch(ctx, OPC_J, 4, rt, rs,
-                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
+                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
         break;
     case JAL32:
         gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
-                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1);
+                           (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
         break;
         /* Floating point (COP1) */
     case LWC132:
@@ -12422,84 +12394,41 @@  static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx)
 
     op = (ctx->opcode >> 10) & 0x3f;
     /* Enforce properly-sized instructions in a delay slot */
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        int bits = ctx->hflags & MIPS_HFLAG_BMASK_EXT;
-
-        switch (op) {
-        case POOL32A:
-        case POOL32B:
-        case POOL32I:
-        case POOL32C:
-        case ADDI32:
-        case ADDIU32:
-        case ORI32:
-        case XORI32:
-        case SLTI32:
-        case SLTIU32:
-        case ANDI32:
-        case JALX32:
-        case LBU32:
-        case LHU32:
-        case POOL32F:
-        case JALS32:
-        case BEQ32:
-        case BNE32:
-        case J32:
-        case JAL32:
-        case SB32:
-        case SH32:
-        case POOL32S:
-        case ADDIUPC:
-        case SWC132:
-        case SDC132:
-        case SD32:
-        case SW32:
-        case LB32:
-        case LH32:
-        case DADDIU32:
-        case LWC132:
-        case LDC132:
-        case LD32:
-        case LW32:
-            if (bits & MIPS_HFLAG_BDS16) {
+    if (ctx->hflags & MIPS_HFLAG_BDS_STRICT) {
+        switch (op & 0x7) { /* MSB-3..MSB-5 */
+        case 0:
+        /* POOL31A, POOL32B, POOL32I, POOL32C */
+        case 4:
+        /* ADDI32, ADDIU32, ORI32, XORI32, SLTI32, SLTIU32, ANDI32, JALX32 */
+        case 5:
+        /* LBU32, LHU32, POOL32F, JALS32, BEQ32, BNE32, J32, JAL32 */
+        case 6:
+        /* SB32, SH32, ADDIUPC, SWC132, SDC132, SW32 */
+        case 7:
+        /* LB32, LH32, LWC132, LDC132, LW32 */
+            if (ctx->hflags & MIPS_HFLAG_BDS16) {
                 generate_exception(ctx, EXCP_RI);
                 /* Just stop translation; the user is confused.  */
                 ctx->bstate = BS_STOP;
                 return 2;
             }
             break;
-        case POOL16A:
-        case POOL16B:
-        case POOL16C:
-        case LWGP16:
-        case POOL16F:
-        case LBU16:
-        case LHU16:
-        case LWSP16:
-        case LW16:
-        case SB16:
-        case SH16:
-        case SWSP16:
-        case SW16:
-        case MOVE16:
-        case ANDI16:
-        case POOL16D:
-        case POOL16E:
-        case BEQZ16:
-        case BNEZ16:
-        case B16:
-        case LI16:
-            if (bits & MIPS_HFLAG_BDS32) {
+        case 1:
+        /* POOL16A, POOL16B, POOL16C, LWGP16, POOL16F */
+        case 2:
+        /* LBU16, LHU16, LWSP16, LW16, SB16, SH16, SWSP16, SW16 */
+        case 3:
+        /* MOVE16, ANDI16, POOL16D, POOL16E, BEQZ16, BNEZ16, B16, LI16 */
+            if (ctx->hflags & MIPS_HFLAG_BDS32) {
                 generate_exception(ctx, EXCP_RI);
                 /* Just stop translation; the user is confused.  */
                 ctx->bstate = BS_STOP;
                 return 2;
             }
             break;
-        default:
-            break;
         }
     }
+
     switch (op) {
     case POOL16A:
         {
@@ -12680,13 +12609,13 @@  static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx)
         break;
     case B16:
         gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
-                           SIMM(ctx->opcode, 0, 10) << 1);
+                           SIMM(ctx->opcode, 0, 10) << 1, 4);
         break;
     case BNEZ16:
     case BEQZ16:
         gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
                            mmreg(uMIPS_RD(ctx->opcode)),
-                           0, SIMM(ctx->opcode, 0, 7) << 1);
+                           0, SIMM(ctx->opcode, 0, 7) << 1, 4);
         break;
     case LI16:
         {
@@ -14556,7 +14485,7 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
             gen_muldiv(ctx, op1, 0, rs, rt);
             break;
         case OPC_JR ... OPC_JALR:
-            gen_compute_branch(ctx, op1, 4, rs, rd, sa);
+            gen_compute_branch(ctx, op1, 4, rs, rd, sa, 4);
             break;
         case OPC_TGE ... OPC_TEQ: /* Traps */
         case OPC_TNE:
@@ -15322,7 +15251,7 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
         switch (op1) {
         case OPC_BLTZ ... OPC_BGEZL: /* REGIMM branches */
         case OPC_BLTZAL ... OPC_BGEZALL:
-            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2);
+            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
             break;
         case OPC_TGEI ... OPC_TEQI: /* REGIMM traps */
         case OPC_TNEI:
@@ -15337,7 +15266,7 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
         case OPC_BPOSGE64:
 #endif
             check_dsp(ctx);
-            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2);
+            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2, 4);
             break;
         default:            /* Invalid */
             MIPS_INVAL("regimm");
@@ -15448,11 +15377,11 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
          break;
     case OPC_J ... OPC_JAL: /* Jump */
          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-         gen_compute_branch(ctx, op, 4, rs, rt, offset);
+         gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
          break;
     case OPC_BEQ ... OPC_BGTZ: /* Branch */
     case OPC_BEQL ... OPC_BGTZL:
-         gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
          break;
     case OPC_LB ... OPC_LWR: /* Load and stores */
     case OPC_LL:
@@ -15617,7 +15546,7 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
     case OPC_JALX:
         check_insn(ctx, ASE_MIPS16 | ASE_MICROMIPS);
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-        gen_compute_branch(ctx, op, 4, rs, rt, offset);
+        gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
         break;
     case OPC_MDMX:
         check_insn(ctx, ASE_MDMX);
@@ -15719,6 +15648,13 @@  gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
             ctx.bstate = BS_STOP;
             break;
         }
+        if (ctx.hflags & MIPS_HFLAG_BMASK) {
+            if (!(ctx.hflags & MIPS_HFLAG_BDS16) &&
+                    !(ctx.hflags & MIPS_HFLAG_BDS32)) {
+                is_delay = 1;
+                /* force to generate branch as no delay slot is required */
+            }
+        }
         if (is_delay) {
             handle_delay_slot(&ctx, insn_bytes);
         }