Message ID | 1414546928-54642-12-git-send-email-yongbok.kim@imgtec.com |
---|---|
State | New |
Headers | show |
Hi Yongbok, On Wed, Oct 29, 2014 at 01:41:59AM +0000, Yongbok Kim wrote: > +DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32) > +DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64) > +DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64) s64 seems like overkill for a 5bit field. > diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c > index 46ffaa5..ffdde07 100644 > --- a/target-mips/msa_helper.c > +++ b/target-mips/msa_helper.c > @@ -114,3 +114,145 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd, > msa_move_v(pwd, pwx); > } > > +static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 + arg2; > +} > + > +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 - arg2; > +} > + > +static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 == arg2 ? -1 : 0; > +} > + > +static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 <= arg2 ? -1 : 0; > +} > + > +static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + uint64_t u_arg1 = UNSIGNED(arg1, df); > + uint64_t u_arg2 = UNSIGNED(arg2, df); > + return u_arg1 <= u_arg2 ? -1 : 0; > +} > + > +static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 < arg2 ? -1 : 0; > +} > + > +static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + uint64_t u_arg1 = UNSIGNED(arg1, df); > + uint64_t u_arg2 = UNSIGNED(arg2, df); > + return u_arg1 < u_arg2 ? -1 : 0; > +} > + > +static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 > arg2 ? arg1 : arg2; > +} > + > +static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + uint64_t u_arg1 = UNSIGNED(arg1, df); > + uint64_t u_arg2 = UNSIGNED(arg2, df); > + return u_arg1 > u_arg2 ? arg1 : arg2; > +} > + > +static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + return arg1 < arg2 ? arg1 : arg2; > +} > + > +static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2) > +{ > + uint64_t u_arg1 = UNSIGNED(arg1, df); > + uint64_t u_arg2 = UNSIGNED(arg2, df); > + return u_arg1 < u_arg2 ? arg1 : arg2; > +} > + > +#define MSA_BINOP_IMM_DF(helper, func) \ > +void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ > + uint32_t wd, uint32_t ws, int64_t u5) \ > +{ \ > + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \ > + wr_t *pws = &(env->active_fpu.fpr[ws].wr); \ > + uint32_t i; \ > + \ > + switch (df) { \ > + case DF_BYTE: \ > + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \ > + pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5); \ > + } \ > + break; \ > + case DF_HALF: \ > + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \ > + pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5); \ > + } \ > + break; \ > + case DF_WORD: \ > + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \ > + pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5); \ > + } \ > + break; \ > + case DF_DOUBLE: \ > + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \ > + pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5); \ > + } \ Have you checked whether the compiler is actually able to effectively optimise this lot? If not, I bet it'd have a much better chance if the inline functions above were defined as macros, e.g.: #define msa_min_s_df(TYPE_S, TYPE_U, ARG1, ARG2) \ ((TYPE_S)(ARG1) < (TYPE_S)(ARG2) ? (ARG1) : (ARG2)) #define msa_min_u_df(TYPE_S, TYPE_U, ARG1, ARG2) \ ((TYPE_U)(ARG1) < (TYPE_U)(ARG2) ? (ARG1) : (ARG2)) and called like this: pwd->b[i] = msa_##func##_df(int8_t, uint8_t, pws->b[i], u5); Having said that, I don't think it should block this patchset, but certainly something it'd be nice to improve. > +void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd, > + uint32_t s10) > +{ > + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); > + int64_t s64 = ((int64_t)s10 << 54) >> 54; Extending to 32-bits (int32_t) would work just as well. > + uint32_t i; > + > + switch (df) { > + case DF_BYTE: > + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { > + pwd->b[i] = (int8_t)s10; > + } > + break; > + case DF_HALF: > + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { > + pwd->h[i] = (int16_t)s64; > + } > + break; > + case DF_WORD: > + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { > + pwd->w[i] = (int32_t)s64; > + } > + break; > + case DF_DOUBLE: > + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { > + pwd->d[i] = s64; > + } > + break; > + default: > + assert(0); > + } > +} > diff --git a/target-mips/translate.c b/target-mips/translate.c > index b2934d7..62dd0b9 100644 > --- a/target-mips/translate.c > +++ b/target-mips/translate.c > @@ -17395,6 +17395,81 @@ static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx) > tcg_temp_free_i32(ti8); > } > > +static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx) > +{ > +#define MASK_MSA_I5(op) (MASK_MSA_MINOR(op) | (op & (0x7 << 23))) > + uint32_t opcode = ctx->opcode; > + > + uint8_t df = (ctx->opcode >> 21) & 0x3; > + int64_t s5 = (ctx->opcode >> 16) & 0x1f; > + s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/ > + uint8_t u5 = (ctx->opcode >> 16) & 0x1f; > + uint8_t ws = (ctx->opcode >> 11) & 0x1f; > + uint8_t wd = (ctx->opcode >> 6) & 0x1f; > + > + TCGv_i32 tdf = tcg_const_i32(df); > + TCGv_i32 twd = tcg_const_i32(wd); > + TCGv_i32 tws = tcg_const_i32(ws); > + TCGv_i64 tu5 = tcg_const_i64(u5); > + TCGv_i64 ts5 = tcg_const_i64(s5); these (and the in64_t s5 above) don't really need to be 64bit since the field is only 5 bits wide. > + > + switch (MASK_MSA_I5(opcode)) { > + case OPC_ADDVI_df: > + gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5); Since you know df at translation time, this could avoid the whole switch in each helper every time one is executed by having a helper for each df. Again, for another patchset perhaps. > + break; > + case OPC_LDI_df: > + { > + int64_t s10 = (ctx->opcode >> 11) & 0x3ff; > + s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/ why use int64_t when you then put it in a const_i32? > + > + TCGv_i32 ts10 = tcg_const_i32(s10); > + gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10); > + tcg_temp_free_i32(ts10); > + } Cheers James
On 29/10/14 01:41, Yongbok Kim wrote: > + uint8_t df = (ctx->opcode >> 21) & 0x3; > + int64_t s5 = (ctx->opcode >> 16) & 0x1f; > + s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/ Mixed declarations and code are not allowed. This issue occurs also in subsequent patches (12, 15, 17, 18) in this series. You may also consider using sextract() for s5. > + uint8_t u5 = (ctx->opcode >> 16) & 0x1f; > + uint8_t ws = (ctx->opcode >> 11) & 0x1f; > + uint8_t wd = (ctx->opcode >> 6) & 0x1f; > + > + TCGv_i32 tdf = tcg_const_i32(df); > + TCGv_i32 twd = tcg_const_i32(wd); > + TCGv_i32 tws = tcg_const_i32(ws); > + TCGv_i64 tu5 = tcg_const_i64(u5); > + TCGv_i64 ts5 = tcg_const_i64(s5); One of above tcg variable is redundant as tu5 and ts5 are never used together. Have you considered to have just one tcg variable initialized later - in case blocks - with appropriate value? You already did this for ts10 in case OPC_LDI_df. > + > + switch (MASK_MSA_I5(opcode)) { > + case OPC_ADDVI_df: > + gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_SUBVI_df: > + gen_helper_msa_subvi_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_MAXI_S_df: > + gen_helper_msa_maxi_s_df(cpu_env, tdf, twd, tws, ts5); > + break; > + case OPC_MAXI_U_df: > + gen_helper_msa_maxi_u_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_MINI_S_df: > + gen_helper_msa_mini_s_df(cpu_env, tdf, twd, tws, ts5); > + break; > + case OPC_MINI_U_df: > + gen_helper_msa_mini_u_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_CEQI_df: > + gen_helper_msa_ceqi_df(cpu_env, tdf, twd, tws, ts5); > + break; > + case OPC_CLTI_S_df: > + gen_helper_msa_clti_s_df(cpu_env, tdf, twd, tws, ts5); > + break; > + case OPC_CLTI_U_df: > + gen_helper_msa_clti_u_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_CLEI_S_df: > + gen_helper_msa_clei_s_df(cpu_env, tdf, twd, tws, ts5); > + break; > + case OPC_CLEI_U_df: > + gen_helper_msa_clei_u_df(cpu_env, tdf, twd, tws, tu5); > + break; > + case OPC_LDI_df: > + { > + int64_t s10 = (ctx->opcode >> 11) & 0x3ff; > + s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/ Mixed declarations and code > + > + TCGv_i32 ts10 = tcg_const_i32(s10); > + gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10); > + tcg_temp_free_i32(ts10);
diff --git a/target-mips/helper.h b/target-mips/helper.h index ec1c0e5..585eaa9 100644 --- a/target-mips/helper.h +++ b/target-mips/helper.h @@ -758,3 +758,16 @@ DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32) DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32) DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32) DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32) + +DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64) +DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32) +DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64) +DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64) diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c index 46ffaa5..ffdde07 100644 --- a/target-mips/msa_helper.c +++ b/target-mips/msa_helper.c @@ -114,3 +114,145 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd, msa_move_v(pwd, pwx); } +static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 + arg2; +} + +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 - arg2; +} + +static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 == arg2 ? -1 : 0; +} + +static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 <= arg2 ? -1 : 0; +} + +static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return u_arg1 <= u_arg2 ? -1 : 0; +} + +static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 < arg2 ? -1 : 0; +} + +static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return u_arg1 < u_arg2 ? -1 : 0; +} + +static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 > arg2 ? arg1 : arg2; +} + +static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return u_arg1 > u_arg2 ? arg1 : arg2; +} + +static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 < arg2 ? arg1 : arg2; +} + +static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return u_arg1 < u_arg2 ? arg1 : arg2; +} + +#define MSA_BINOP_IMM_DF(helper, func) \ +void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ + uint32_t wd, uint32_t ws, int64_t u5) \ +{ \ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \ + wr_t *pws = &(env->active_fpu.fpr[ws].wr); \ + uint32_t i; \ + \ + switch (df) { \ + case DF_BYTE: \ + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \ + pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5); \ + } \ + break; \ + case DF_HALF: \ + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \ + pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5); \ + } \ + break; \ + case DF_WORD: \ + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \ + pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5); \ + } \ + break; \ + case DF_DOUBLE: \ + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \ + pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5); \ + } \ + break; \ + default: \ + assert(0); \ + } \ +} + +MSA_BINOP_IMM_DF(addvi, addv) +MSA_BINOP_IMM_DF(subvi, subv) +MSA_BINOP_IMM_DF(ceqi, ceq) +MSA_BINOP_IMM_DF(clei_s, cle_s) +MSA_BINOP_IMM_DF(clei_u, cle_u) +MSA_BINOP_IMM_DF(clti_s, clt_s) +MSA_BINOP_IMM_DF(clti_u, clt_u) +MSA_BINOP_IMM_DF(maxi_s, max_s) +MSA_BINOP_IMM_DF(maxi_u, max_u) +MSA_BINOP_IMM_DF(mini_s, min_s) +MSA_BINOP_IMM_DF(mini_u, min_u) +#undef MSA_BINOP_IMM_DF + +void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd, + uint32_t s10) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + int64_t s64 = ((int64_t)s10 << 54) >> 54; + uint32_t i; + + switch (df) { + case DF_BYTE: + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { + pwd->b[i] = (int8_t)s10; + } + break; + case DF_HALF: + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { + pwd->h[i] = (int16_t)s64; + } + break; + case DF_WORD: + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { + pwd->w[i] = (int32_t)s64; + } + break; + case DF_DOUBLE: + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { + pwd->d[i] = s64; + } + break; + default: + assert(0); + } +} diff --git a/target-mips/translate.c b/target-mips/translate.c index b2934d7..62dd0b9 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -17395,6 +17395,81 @@ static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx) tcg_temp_free_i32(ti8); } +static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx) +{ +#define MASK_MSA_I5(op) (MASK_MSA_MINOR(op) | (op & (0x7 << 23))) + uint32_t opcode = ctx->opcode; + + uint8_t df = (ctx->opcode >> 21) & 0x3; + int64_t s5 = (ctx->opcode >> 16) & 0x1f; + s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/ + uint8_t u5 = (ctx->opcode >> 16) & 0x1f; + uint8_t ws = (ctx->opcode >> 11) & 0x1f; + uint8_t wd = (ctx->opcode >> 6) & 0x1f; + + TCGv_i32 tdf = tcg_const_i32(df); + TCGv_i32 twd = tcg_const_i32(wd); + TCGv_i32 tws = tcg_const_i32(ws); + TCGv_i64 tu5 = tcg_const_i64(u5); + TCGv_i64 ts5 = tcg_const_i64(s5); + + switch (MASK_MSA_I5(opcode)) { + case OPC_ADDVI_df: + gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_SUBVI_df: + gen_helper_msa_subvi_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_MAXI_S_df: + gen_helper_msa_maxi_s_df(cpu_env, tdf, twd, tws, ts5); + break; + case OPC_MAXI_U_df: + gen_helper_msa_maxi_u_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_MINI_S_df: + gen_helper_msa_mini_s_df(cpu_env, tdf, twd, tws, ts5); + break; + case OPC_MINI_U_df: + gen_helper_msa_mini_u_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_CEQI_df: + gen_helper_msa_ceqi_df(cpu_env, tdf, twd, tws, ts5); + break; + case OPC_CLTI_S_df: + gen_helper_msa_clti_s_df(cpu_env, tdf, twd, tws, ts5); + break; + case OPC_CLTI_U_df: + gen_helper_msa_clti_u_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_CLEI_S_df: + gen_helper_msa_clei_s_df(cpu_env, tdf, twd, tws, ts5); + break; + case OPC_CLEI_U_df: + gen_helper_msa_clei_u_df(cpu_env, tdf, twd, tws, tu5); + break; + case OPC_LDI_df: + { + int64_t s10 = (ctx->opcode >> 11) & 0x3ff; + s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/ + + TCGv_i32 ts10 = tcg_const_i32(s10); + gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10); + tcg_temp_free_i32(ts10); + } + break; + default: + MIPS_INVAL("MSA instruction"); + generate_exception(ctx, EXCP_RI); + break; + } + + tcg_temp_free_i32(tdf); + tcg_temp_free_i32(twd); + tcg_temp_free_i32(tws); + tcg_temp_free_i64(tu5); + tcg_temp_free_i64(ts5); +} + static void gen_msa(CPUMIPSState *env, DisasContext *ctx) { uint32_t opcode = ctx->opcode; @@ -17407,6 +17482,10 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx) case OPC_MSA_I8_02: gen_msa_i8(env, ctx); break; + case OPC_MSA_I5_06: + case OPC_MSA_I5_07: + gen_msa_i5(env, ctx); + break; default: MIPS_INVAL("MSA instruction"); generate_exception(ctx, EXCP_RI);
add MSA I5 format instructions Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com> --- target-mips/helper.h | 13 ++++ target-mips/msa_helper.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++ target-mips/translate.c | 79 +++++++++++++++++++++++++ 3 files changed, 234 insertions(+), 0 deletions(-)