diff mbox

[09/12] tcg-sparc: Drop use of Bicc in favor of BPcc

Message ID 1348710942-3040-10-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Sept. 27, 2012, 1:55 a.m. UTC
Now that we're always sparcv9, we can not bother using Bicc for
32-bit branches and BPcc for 64-bit branches and instead always
use BPcc.

New interfaces allow less direct use of tcg_out32 and raw numbers
inside the qemu_ld/st routines.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/sparc/tcg-target.c | 305 +++++++++++++++++++++++--------------------------
 1 file changed, 145 insertions(+), 160 deletions(-)

Comments

Blue Swirl Sept. 29, 2012, 12:07 p.m. UTC | #1
On Thu, Sep 27, 2012 at 1:55 AM, Richard Henderson <rth@twiddle.net> wrote:
> Now that we're always sparcv9, we can not bother using Bicc for
> 32-bit branches and BPcc for 64-bit branches and instead always
> use BPcc.

But then the branch range is smaller (19 bits). The code buffer is
32M, wouldn't we use BPcc for TB linking too and then the range could
be exceed?

>
> New interfaces allow less direct use of tcg_out32 and raw numbers
> inside the qemu_ld/st routines.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/sparc/tcg-target.c | 305 +++++++++++++++++++++++--------------------------
>  1 file changed, 145 insertions(+), 160 deletions(-)
>
> diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
> index 1618996..f3eb246 100644
> --- a/tcg/sparc/tcg-target.c
> +++ b/tcg/sparc/tcg-target.c
> @@ -115,101 +115,6 @@ static const int tcg_target_call_oarg_regs[] = {
>      TCG_REG_O3,
>  };
>
> -static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
> -{
> -    return (val << ((sizeof(tcg_target_long) * 8 - bits))
> -            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
> -}
> -
> -static inline int check_fit_i32(uint32_t val, unsigned int bits)
> -{
> -    return ((val << (32 - bits)) >> (32 - bits)) == val;
> -}
> -
> -static void patch_reloc(uint8_t *code_ptr, int type,
> -                        tcg_target_long value, tcg_target_long addend)
> -{
> -    value += addend;
> -    switch (type) {
> -    case R_SPARC_32:
> -        if (value != (uint32_t)value)
> -            tcg_abort();
> -        *(uint32_t *)code_ptr = value;
> -        break;
> -    case R_SPARC_WDISP22:
> -        value -= (long)code_ptr;
> -        value >>= 2;
> -        if (!check_fit_tl(value, 22))
> -            tcg_abort();
> -        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value;
> -        break;
> -    case R_SPARC_WDISP19:
> -        value -= (long)code_ptr;
> -        value >>= 2;
> -        if (!check_fit_tl(value, 19))
> -            tcg_abort();
> -        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value;
> -        break;
> -    default:
> -        tcg_abort();
> -    }
> -}
> -
> -/* parse target specific constraints */
> -static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
> -{
> -    const char *ct_str;
> -
> -    ct_str = *pct_str;
> -    switch (ct_str[0]) {
> -    case 'r':
> -        ct->ct |= TCG_CT_REG;
> -        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
> -        break;
> -    case 'L': /* qemu_ld/st constraint */
> -        ct->ct |= TCG_CT_REG;
> -        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
> -        // Helper args
> -        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
> -        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
> -        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
> -        break;
> -    case 'I':
> -        ct->ct |= TCG_CT_CONST_S11;
> -        break;
> -    case 'J':
> -        ct->ct |= TCG_CT_CONST_S13;
> -        break;
> -    case 'Z':
> -        ct->ct |= TCG_CT_CONST_ZERO;
> -        break;
> -    default:
> -        return -1;
> -    }
> -    ct_str++;
> -    *pct_str = ct_str;
> -    return 0;
> -}
> -
> -/* test if a constant matches the constraint */
> -static inline int tcg_target_const_match(tcg_target_long val,
> -                                         const TCGArgConstraint *arg_ct)
> -{
> -    int ct;
> -
> -    ct = arg_ct->ct;
> -    if (ct & TCG_CT_CONST)
> -        return 1;
> -    else if ((ct & TCG_CT_CONST_ZERO) && val == 0)
> -        return 1;
> -    else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
> -        return 1;
> -    else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
> -        return 1;
> -    else
> -        return 0;
> -}
> -
>  #define INSN_OP(x)  ((x) << 30)
>  #define INSN_OP2(x) ((x) << 22)
>  #define INSN_OP3(x) ((x) << 19)
> @@ -222,9 +127,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
>  #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
>  #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
> -#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
> +#define INSN_COND(x) ((x) << 25)
>
> -#define INSN_COND(x, a) (((x) << 25) | ((a) << 29))
>  #define COND_N     0x0
>  #define COND_E     0x1
>  #define COND_LE    0x2
> @@ -241,11 +145,17 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define COND_CC    0xd
>  #define COND_POS   0xe
>  #define COND_VC    0xf
> -#define BA         (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
> +#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
>
>  #define MOVCC_ICC  (1 << 18)
>  #define MOVCC_XCC  (1 << 18 | 1 << 12)
>
> +#define BPCC_ICC   0
> +#define BPCC_XCC   (2 << 20)
> +#define BPCC_PT    (1 << 19)
> +#define BPCC_PN    0
> +#define BPCC_A     (1 << 29)
> +
>  #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
>  #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
>  #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
> @@ -318,6 +228,99 @@ static inline int tcg_target_const_match(tcg_target_long val,
>  #define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
>  #define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
>
> +static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
> +{
> +    return (val << ((sizeof(tcg_target_long) * 8 - bits))
> +            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
> +}
> +
> +static inline int check_fit_i32(uint32_t val, unsigned int bits)
> +{
> +    return ((val << (32 - bits)) >> (32 - bits)) == val;
> +}
> +
> +static void patch_reloc(uint8_t *code_ptr, int type,
> +                        tcg_target_long value, tcg_target_long addend)
> +{
> +    uint32_t insn;
> +    value += addend;
> +    switch (type) {
> +    case R_SPARC_32:
> +        if (value != (uint32_t)value) {
> +            tcg_abort();
> +        }
> +        *(uint32_t *)code_ptr = value;
> +        break;
> +    case R_SPARC_WDISP19:
> +        value -= (long)code_ptr;
> +        if (!check_fit_tl(value >> 2, 19)) {
> +            tcg_abort();
> +        }
> +        insn = *(uint32_t *)code_ptr;
> +        insn &= ~INSN_OFF19(-1);
> +        insn |= INSN_OFF19(value);
> +        *(uint32_t *)code_ptr = insn;
> +        break;
> +    default:
> +        tcg_abort();
> +    }
> +}
> +
> +/* parse target specific constraints */
> +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
> +{
> +    const char *ct_str;
> +
> +    ct_str = *pct_str;
> +    switch (ct_str[0]) {
> +    case 'r':
> +        ct->ct |= TCG_CT_REG;
> +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
> +        break;
> +    case 'L': /* qemu_ld/st constraint */
> +        ct->ct |= TCG_CT_REG;
> +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
> +        // Helper args

Please use C89 comments.

> +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
> +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
> +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
> +        break;
> +    case 'I':
> +        ct->ct |= TCG_CT_CONST_S11;
> +        break;
> +    case 'J':
> +        ct->ct |= TCG_CT_CONST_S13;
> +        break;
> +    case 'Z':
> +        ct->ct |= TCG_CT_CONST_ZERO;
> +        break;
> +    default:
> +        return -1;
> +    }
> +    ct_str++;
> +    *pct_str = ct_str;
> +    return 0;
> +}
> +
> +/* test if a constant matches the constraint */
> +static inline int tcg_target_const_match(tcg_target_long val,
> +                                         const TCGArgConstraint *arg_ct)
> +{
> +    int ct;
> +
> +    ct = arg_ct->ct;
> +    if (ct & TCG_CT_CONST)

Please add braces.

> +        return 1;
> +    else if ((ct & TCG_CT_CONST_ZERO) && val == 0)
> +        return 1;
> +    else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
> +        return 1;
> +    else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
> +        return 1;
> +    else
> +        return 0;
> +}
> +
>  static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
>                                   int op)
>  {
> @@ -486,39 +489,6 @@ static inline void tcg_out_nop(TCGContext *s)
>      tcg_out_sethi(s, TCG_REG_G0, 0);
>  }
>
> -static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
> -{
> -    TCGLabel *l = &s->labels[label_index];
> -    uint32_t off22;
> -
> -    if (l->has_value) {
> -        off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
> -    } else {
> -        /* Make sure to preserve destinations during retranslation.  */
> -        off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
> -        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
> -    }
> -    tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
> -}
> -
> -#if TCG_TARGET_REG_BITS == 64
> -static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
> -{
> -    TCGLabel *l = &s->labels[label_index];
> -    uint32_t off19;
> -
> -    if (l->has_value) {
> -        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
> -    } else {
> -        /* Make sure to preserve destinations during retranslation.  */
> -        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
> -        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
> -    }
> -    tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
> -                  (0x5 << 19) | off19));
> -}
> -#endif
> -
>  static const uint8_t tcg_cond_to_bcond[] = {
>      [TCG_COND_EQ] = COND_E,
>      [TCG_COND_NE] = COND_NE,
> @@ -532,17 +502,36 @@ static const uint8_t tcg_cond_to_bcond[] = {
>      [TCG_COND_GTU] = COND_GU,
>  };
>
> +static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
> +{
> +    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
> +}
> +
> +static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
> +{
> +    TCGLabel *l = &s->labels[label];
> +    int off19;
> +
> +    if (l->has_value) {
> +        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
> +    } else {
> +        /* Make sure to preserve destinations during retranslation.  */
> +        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
> +        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
> +    }
> +    tcg_out_bpcc0(s, scond, flags, off19);
> +}
> +
>  static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
>  {
>      tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
>  }
>
> -static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
> -                               TCGArg arg1, TCGArg arg2, int const_arg2,
> -                               int label_index)
> +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1,
> +                               TCGArg arg2, int const_arg2, int label)
>  {
>      tcg_out_cmp(s, arg1, arg2, const_arg2);
> -    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
> +    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
>      tcg_out_nop(s);
>  }
>
> @@ -563,12 +552,11 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
>  }
>
>  #if TCG_TARGET_REG_BITS == 64
> -static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
> -                               TCGArg arg1, TCGArg arg2, int const_arg2,
> -                               int label_index)
> +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
> +                               TCGArg arg2, int const_arg2, int label)
>  {
>      tcg_out_cmp(s, arg1, arg2, const_arg2);
> -    tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
> +    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
>      tcg_out_nop(s);
>  }
>
> @@ -585,32 +573,32 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
>                                  TCGArg bl, int blconst,
>                                  TCGArg bh, int bhconst, int label_dest)
>  {
> -    int cc, label_next = gen_new_label();
> +    int scond, label_next = gen_new_label();
>
>      tcg_out_cmp(s, ah, bh, bhconst);
>
>      /* Note that we fill one of the delay slots with the second compare.  */
>      switch (cond) {
>      case TCG_COND_EQ:
> -        tcg_out_branch_i32(s, COND_NE, label_next);
> +        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
>          tcg_out_cmp(s, al, bl, blconst);
> -        tcg_out_branch_i32(s, COND_E, label_dest);
> +        tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
>          break;
>
>      case TCG_COND_NE:
> -        tcg_out_branch_i32(s, COND_NE, label_dest);
> +        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
>          tcg_out_cmp(s, al, bl, blconst);
> -        tcg_out_branch_i32(s, COND_NE, label_dest);
> +        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
>          break;
>
>      default:
> -        cc = tcg_cond_to_bcond[tcg_high_cond(cond)];
> -        tcg_out_branch_i32(s, cc, label_dest);
> +        scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
> +        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
>          tcg_out_nop(s);
> -        tcg_out_branch_i32(s, COND_NE, label_next);
> +        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
>          tcg_out_cmp(s, al, bl, blconst);
> -        cc = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
> -        tcg_out_branch_i32(s, cc, label_dest);
> +        scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
> +        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
>          break;
>      }
>      tcg_out_nop(s);
> @@ -903,8 +891,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
>
>          /* bne,pn %[xi]cc, label0 */
>          label_ptr[0] = (uint32_t *)s->code_ptr;
> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
> -                      | ((TARGET_LONG_BITS == 64) << 21)));
> +        tcg_out_bpcc0(s, COND_NE, BPCC_PN
> +                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
>
>          /* TLB Hit.  */
>          /* Load all 64-bits into an O/G register.  */
> @@ -919,8 +907,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
>
>          /* b,a,pt label1 */
>          label_ptr[1] = (uint32_t *)s->code_ptr;
> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
> -                      | (1 << 29) | (1 << 19)));
> +        tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
>      } else {
>          /* The fast path is exactly one insn.  Thus we can perform the
>             entire TLB Hit in the (annulled) delay slot of the branch
> @@ -929,9 +916,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
>          /* beq,a,pt %[xi]cc, label0 */
>          label_ptr[0] = NULL;
>          label_ptr[1] = (uint32_t *)s->code_ptr;
> -        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
> -                      | ((TARGET_LONG_BITS == 64) << 21)
> -                      | (1 << 29) | (1 << 19)));
> +        tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
> +                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
>          /* delay slot */
>          tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
>      }
> @@ -1050,9 +1036,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
>         TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
>      /* beq,a,pt %[xi]cc, label0 */
>      label_ptr = (uint32_t *)s->code_ptr;
> -    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
> -                  | ((TARGET_LONG_BITS == 64) << 21)
> -                  | (1 << 29) | (1 << 19)));
> +    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
> +                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
>      /* delay slot */
>      tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
>
> @@ -1144,7 +1129,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>          break;
>      case INDEX_op_jmp:
>      case INDEX_op_br:
> -        tcg_out_branch_i32(s, COND_A, args[0]);
> +        tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);

Here we could check if the range is OK and use Bicc otherwise.

>          tcg_out_nop(s);
>          break;
>      case INDEX_op_movi_i32:
> --
> 1.7.11.4
>
Richard Henderson Sept. 29, 2012, 5:21 p.m. UTC | #2
On 2012-09-29 05:07, Blue Swirl wrote:
> On Thu, Sep 27, 2012 at 1:55 AM, Richard Henderson <rth@twiddle.net> wrote:
>> Now that we're always sparcv9, we can not bother using Bicc for
>> 32-bit branches and BPcc for 64-bit branches and instead always
>> use BPcc.
> 
> But then the branch range is smaller (19 bits). The code buffer is
> 32M, wouldn't we use BPcc for TB linking too and then the range could
> be exceed?

We use CALL for TB linking, with its 30 bit range.

BPcc is only needed within a TB, which should never come close to 2MB.


r~
diff mbox

Patch

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 1618996..f3eb246 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -115,101 +115,6 @@  static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O3,
 };
 
-static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-{
-    return (val << ((sizeof(tcg_target_long) * 8 - bits))
-            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
-}
-
-static inline int check_fit_i32(uint32_t val, unsigned int bits)
-{
-    return ((val << (32 - bits)) >> (32 - bits)) == val;
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
-                        tcg_target_long value, tcg_target_long addend)
-{
-    value += addend;
-    switch (type) {
-    case R_SPARC_32:
-        if (value != (uint32_t)value)
-            tcg_abort();
-        *(uint32_t *)code_ptr = value;
-        break;
-    case R_SPARC_WDISP22:
-        value -= (long)code_ptr;
-        value >>= 2;
-        if (!check_fit_tl(value, 22))
-            tcg_abort();
-        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value;
-        break;
-    case R_SPARC_WDISP19:
-        value -= (long)code_ptr;
-        value >>= 2;
-        if (!check_fit_tl(value, 19))
-            tcg_abort();
-        *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value;
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
-    const char *ct_str;
-
-    ct_str = *pct_str;
-    switch (ct_str[0]) {
-    case 'r':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        break;
-    case 'L': /* qemu_ld/st constraint */
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
-        // Helper args
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-        break;
-    case 'I':
-        ct->ct |= TCG_CT_CONST_S11;
-        break;
-    case 'J':
-        ct->ct |= TCG_CT_CONST_S13;
-        break;
-    case 'Z':
-        ct->ct |= TCG_CT_CONST_ZERO;
-        break;
-    default:
-        return -1;
-    }
-    ct_str++;
-    *pct_str = ct_str;
-    return 0;
-}
-
-/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
-{
-    int ct;
-
-    ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST)
-        return 1;
-    else if ((ct & TCG_CT_CONST_ZERO) && val == 0)
-        return 1;
-    else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
-        return 1;
-    else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
-        return 1;
-    else
-        return 0;
-}
-
 #define INSN_OP(x)  ((x) << 30)
 #define INSN_OP2(x) ((x) << 22)
 #define INSN_OP3(x) ((x) << 19)
@@ -222,9 +127,8 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
-#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
+#define INSN_COND(x) ((x) << 25)
 
-#define INSN_COND(x, a) (((x) << 25) | ((a) << 29))
 #define COND_N     0x0
 #define COND_E     0x1
 #define COND_LE    0x2
@@ -241,11 +145,17 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define COND_CC    0xd
 #define COND_POS   0xe
 #define COND_VC    0xf
-#define BA         (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
+#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
 
 #define MOVCC_ICC  (1 << 18)
 #define MOVCC_XCC  (1 << 18 | 1 << 12)
 
+#define BPCC_ICC   0
+#define BPCC_XCC   (2 << 20)
+#define BPCC_PT    (1 << 19)
+#define BPCC_PN    0
+#define BPCC_A     (1 << 29)
+
 #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
 #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
@@ -318,6 +228,99 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 #define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
 
+static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
+{
+    return (val << ((sizeof(tcg_target_long) * 8 - bits))
+            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
+}
+
+static inline int check_fit_i32(uint32_t val, unsigned int bits)
+{
+    return ((val << (32 - bits)) >> (32 - bits)) == val;
+}
+
+static void patch_reloc(uint8_t *code_ptr, int type,
+                        tcg_target_long value, tcg_target_long addend)
+{
+    uint32_t insn;
+    value += addend;
+    switch (type) {
+    case R_SPARC_32:
+        if (value != (uint32_t)value) {
+            tcg_abort();
+        }
+        *(uint32_t *)code_ptr = value;
+        break;
+    case R_SPARC_WDISP19:
+        value -= (long)code_ptr;
+        if (!check_fit_tl(value >> 2, 19)) {
+            tcg_abort();
+        }
+        insn = *(uint32_t *)code_ptr;
+        insn &= ~INSN_OFF19(-1);
+        insn |= INSN_OFF19(value);
+        *(uint32_t *)code_ptr = insn;
+        break;
+    default:
+        tcg_abort();
+    }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+    const char *ct_str;
+
+    ct_str = *pct_str;
+    switch (ct_str[0]) {
+    case 'r':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+        break;
+    case 'L': /* qemu_ld/st constraint */
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+        // Helper args
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+        break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_S11;
+        break;
+    case 'J':
+        ct->ct |= TCG_CT_CONST_S13;
+        break;
+    case 'Z':
+        ct->ct |= TCG_CT_CONST_ZERO;
+        break;
+    default:
+        return -1;
+    }
+    ct_str++;
+    *pct_str = ct_str;
+    return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val,
+                                         const TCGArgConstraint *arg_ct)
+{
+    int ct;
+
+    ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST)
+        return 1;
+    else if ((ct & TCG_CT_CONST_ZERO) && val == 0)
+        return 1;
+    else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
+        return 1;
+    else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
+        return 1;
+    else
+        return 0;
+}
+
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
                                  int op)
 {
@@ -486,39 +489,6 @@  static inline void tcg_out_nop(TCGContext *s)
     tcg_out_sethi(s, TCG_REG_G0, 0);
 }
 
-static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
-{
-    TCGLabel *l = &s->labels[label_index];
-    uint32_t off22;
-
-    if (l->has_value) {
-        off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
-    } else {
-        /* Make sure to preserve destinations during retranslation.  */
-        off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
-        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
-    }
-    tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
-{
-    TCGLabel *l = &s->labels[label_index];
-    uint32_t off19;
-
-    if (l->has_value) {
-        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
-    } else {
-        /* Make sure to preserve destinations during retranslation.  */
-        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
-        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
-    }
-    tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                  (0x5 << 19) | off19));
-}
-#endif
-
 static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
@@ -532,17 +502,36 @@  static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_GTU] = COND_GU,
 };
 
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
+{
+    TCGLabel *l = &s->labels[label];
+    int off19;
+
+    if (l->has_value) {
+        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
+    } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
+        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
+    }
+    tcg_out_bpcc0(s, scond, flags, off19);
+}
+
 static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
 {
     tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
 }
 
-static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
-                               TCGArg arg1, TCGArg arg2, int const_arg2,
-                               int label_index)
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1,
+                               TCGArg arg2, int const_arg2, int label)
 {
     tcg_out_cmp(s, arg1, arg2, const_arg2);
-    tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
+    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
     tcg_out_nop(s);
 }
 
@@ -563,12 +552,11 @@  static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 }
 
 #if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
-                               TCGArg arg1, TCGArg arg2, int const_arg2,
-                               int label_index)
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
+                               TCGArg arg2, int const_arg2, int label)
 {
     tcg_out_cmp(s, arg1, arg2, const_arg2);
-    tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
+    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
     tcg_out_nop(s);
 }
 
@@ -585,32 +573,32 @@  static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
                                 TCGArg bl, int blconst,
                                 TCGArg bh, int bhconst, int label_dest)
 {
-    int cc, label_next = gen_new_label();
+    int scond, label_next = gen_new_label();
 
     tcg_out_cmp(s, ah, bh, bhconst);
 
     /* Note that we fill one of the delay slots with the second compare.  */
     switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_branch_i32(s, COND_NE, label_next);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
         tcg_out_cmp(s, al, bl, blconst);
-        tcg_out_branch_i32(s, COND_E, label_dest);
+        tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
         break;
 
     case TCG_COND_NE:
-        tcg_out_branch_i32(s, COND_NE, label_dest);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
         tcg_out_cmp(s, al, bl, blconst);
-        tcg_out_branch_i32(s, COND_NE, label_dest);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
         break;
 
     default:
-        cc = tcg_cond_to_bcond[tcg_high_cond(cond)];
-        tcg_out_branch_i32(s, cc, label_dest);
+        scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
+        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
         tcg_out_nop(s);
-        tcg_out_branch_i32(s, COND_NE, label_next);
+        tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
         tcg_out_cmp(s, al, bl, blconst);
-        cc = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
-        tcg_out_branch_i32(s, cc, label_dest);
+        scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
+        tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
         break;
     }
     tcg_out_nop(s);
@@ -903,8 +891,8 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 
         /* bne,pn %[xi]cc, label0 */
         label_ptr[0] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
-                      | ((TARGET_LONG_BITS == 64) << 21)));
+        tcg_out_bpcc0(s, COND_NE, BPCC_PN
+                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
 
         /* TLB Hit.  */
         /* Load all 64-bits into an O/G register.  */
@@ -919,8 +907,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
 
         /* b,a,pt label1 */
         label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
-                      | (1 << 29) | (1 << 19)));
+        tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
     } else {
         /* The fast path is exactly one insn.  Thus we can perform the
            entire TLB Hit in the (annulled) delay slot of the branch
@@ -929,9 +916,8 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
         /* beq,a,pt %[xi]cc, label0 */
         label_ptr[0] = NULL;
         label_ptr[1] = (uint32_t *)s->code_ptr;
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
-                      | ((TARGET_LONG_BITS == 64) << 21)
-                      | (1 << 29) | (1 << 19)));
+        tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                      | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
         /* delay slot */
         tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
     }
@@ -1050,9 +1036,8 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
        TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
     /* beq,a,pt %[xi]cc, label0 */
     label_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
-                  | ((TARGET_LONG_BITS == 64) << 21)
-                  | (1 << 29) | (1 << 19)));
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
     tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
 
@@ -1144,7 +1129,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
     case INDEX_op_jmp:
     case INDEX_op_br:
-        tcg_out_branch_i32(s, COND_A, args[0]);
+        tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);
         tcg_out_nop(s);
         break;
     case INDEX_op_movi_i32: