diff mbox

[v2,10/20] target-mips: add MSA I8 format instructions

Message ID 1414546928-54642-11-git-send-email-yongbok.kim@imgtec.com
State New
Headers show

Commit Message

Yongbok Kim Oct. 29, 2014, 1:41 a.m. UTC
add MSA I8 format instructions

Reviewed-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   10 +++++
 target-mips/msa_helper.c |   67 +++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   86 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 161 insertions(+), 2 deletions(-)

Comments

James Hogan Oct. 29, 2014, 11:38 a.m. UTC | #1
On Wed, Oct 29, 2014 at 01:41:58AM +0000, Yongbok Kim wrote:
> add MSA I8 format instructions
> 
> Reviewed-by: James Hogan <james.hogan@imgtec.com>

The patch has changed quite a lot, so probably worth dropping
Reviewed-by in those cases in future.

> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>


> +#define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
> +void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
> +        uint32_t i8)                                                    \
> +{                                                                       \
> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
> +    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
> +    uint32_t i;                                                         \
> +    for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
> +        DEST = OPERATION;                                               \
> +    }                                                                   \

I presume register partitioning isn't going to be supported in this
round of patches?

Okay.

> +}
> +
> +MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
> +MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
> +MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
> +MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
> +
> +#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
> +            UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
> +MSA_FN_IMM8(bmnzi_b, pwd->b[i], \

no need to escape the newline

> +        BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
> +
> +#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
> +            UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
> +MSA_FN_IMM8(bmzi_b, pwd->b[i], \

same

> +        BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
> +
> +#define BIT_SELECT(dest, arg1, arg2, df) \
> +            UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
> +MSA_FN_IMM8(bseli_b, pwd->b[i], \

same

> +        BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
> +
> +#undef MSA_FN_IMM8
> +
> +#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))

Should probably put brackets around macro arguments here, just for the
sake of robustness.

Otherwise
Reviewed-by: James Hogan <james.hogan@imgtec.com>

Cheers
James
Richard Henderson Nov. 5, 2014, 5:43 p.m. UTC | #2
On 10/29/2014 02:41 AM, Yongbok Kim wrote:
> +void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> +                       uint32_t ws, uint32_t imm)
> +{
> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
> +    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
> +    wr_t wx, *pwx = &wx;
> +    uint32_t i;
> +
> +    switch (df) {
> +    case DF_BYTE:
> +        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
> +            pwx->b[i] = pws->b[SHF_POS(i, imm)];
> +        }
> +        break;
> +    case DF_HALF:
> +        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
> +            pwx->h[i] = pws->h[SHF_POS(i, imm)];
> +        }
> +        break;

Why pass DF to decode at runtime?  It's better to fully decode this at
translate time and call the correct function.


r~
Yongbok Kim Nov. 6, 2014, 11:49 a.m. UTC | #3
On 05/11/2014 17:43, Richard Henderson wrote:
> On 10/29/2014 02:41 AM, Yongbok Kim wrote:
>> +void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
>> +                       uint32_t ws, uint32_t imm)
>> +{
>> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
>> +    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
>> +    wr_t wx, *pwx = &wx;
>> +    uint32_t i;
>> +
>> +    switch (df) {
>> +    case DF_BYTE:
>> +        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
>> +            pwx->b[i] = pws->b[SHF_POS(i, imm)];
>> +        }
>> +        break;
>> +    case DF_HALF:
>> +        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
>> +            pwx->h[i] = pws->h[SHF_POS(i, imm)];
>> +        }
>> +        break;
> Why pass DF to decode at runtime?  It's better to fully decode this at
> translate time and call the correct function.
>
>
> r~
Hi Richard,

Agreed. DF is already known in translation time.
I do have a plan to improve efficiency of MSA implementation.

Regards,
Yongbok
diff mbox

Patch

diff --git a/target-mips/helper.h b/target-mips/helper.h
index aea12a9..ec1c0e5 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -748,3 +748,13 @@  DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
 #endif
 DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
 DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
+
+/* MIPS SIMD Architecture */
+DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index b65fb27..46ffaa5 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -47,3 +47,70 @@  static inline void msa_move_v(wr_t *pwd, wr_t *pws)
         pwd->d[i] = pws->d[i];
     }
 }
+
+#define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
+void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
+        uint32_t i8)                                                    \
+{                                                                       \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
+    uint32_t i;                                                         \
+    for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
+        DEST = OPERATION;                                               \
+    }                                                                   \
+}
+
+MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
+MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
+MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
+MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
+
+#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
+            UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
+MSA_FN_IMM8(bmnzi_b, pwd->b[i], \
+        BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
+
+#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
+            UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
+MSA_FN_IMM8(bmzi_b, pwd->b[i], \
+        BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
+
+#define BIT_SELECT(dest, arg1, arg2, df) \
+            UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+MSA_FN_IMM8(bseli_b, pwd->b[i], \
+        BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
+
+#undef MSA_FN_IMM8
+
+#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
+
+void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                       uint32_t ws, uint32_t imm)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t wx, *pwx = &wx;
+    uint32_t i;
+
+    switch (df) {
+    case DF_BYTE:
+        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
+            pwx->b[i] = pws->b[SHF_POS(i, imm)];
+        }
+        break;
+    case DF_HALF:
+        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
+            pwx->h[i] = pws->h[SHF_POS(i, imm)];
+        }
+        break;
+    case DF_WORD:
+        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
+            pwx->w[i] = pws->w[SHF_POS(i, imm)];
+        }
+        break;
+    default:
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 0df86cc..b2934d7 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -17333,6 +17333,88 @@  static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
     ctx->hflags |= MIPS_HFLAG_BC;
     ctx->hflags |= MIPS_HFLAG_BDS32;
 }
+
+static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_I8(op)    (MASK_MSA_MINOR(op) | (op & (0x03 << 24)))
+
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t i8 = (opcode >> 16) & 0xff;
+    uint8_t ws = (opcode >> 11) & 0x1f;
+    uint8_t wd = (opcode >> 6) & 0x1f;
+
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 ti8 = tcg_const_i32(i8);
+
+    switch (MASK_MSA_I8(opcode)) {
+    case OPC_ANDI_B:
+        gen_helper_msa_andi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_ORI_B:
+        gen_helper_msa_ori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_NORI_B:
+        gen_helper_msa_nori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_XORI_B:
+        gen_helper_msa_xori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_BMNZI_B:
+        gen_helper_msa_bmnzi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_BMZI_B:
+        gen_helper_msa_bmzi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_BSELI_B:
+        gen_helper_msa_bseli_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_SHF_B:
+    case OPC_SHF_H:
+    case OPC_SHF_W:
+        {
+            uint8_t df = (opcode >> 24) & 0x3;
+            if (df == DF_DOUBLE) {
+                generate_exception(ctx, EXCP_RI);
+            } else {
+                TCGv_i32 tdf = tcg_const_i32(df);
+                gen_helper_msa_shf_df(cpu_env, tdf, twd, tws, ti8);
+                tcg_temp_free_i32(tdf);
+            }
+        }
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(ti8);
+}
+
+static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = ctx->opcode;
+    check_insn(ctx, ASE_MSA);
+    check_msa_access(env, ctx);
+
+    switch (MASK_MSA_MINOR(opcode)) {
+    case OPC_MSA_I8_00:
+    case OPC_MSA_I8_01:
+    case OPC_MSA_I8_02:
+        gen_msa_i8(env, ctx);
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+}
+
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
@@ -17954,9 +18036,9 @@  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
             gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
         }
         break;
-    case OPC_MDMX:
-        check_insn(ctx, ASE_MDMX);
+    case OPC_MSA: /* OPC_MDMX */
         /* MDMX: Not implemented. */
+        gen_msa(env, ctx);
         break;
     case OPC_PCREL:
         check_insn(ctx, ISA_MIPS32R6);