diff mbox series

[RFC,v1,06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers

Message ID 20190731175702.4916-7-jan.bobek@gmail.com
State New
Headers show
Series reimplement (some) x86 vector instructions using tcg-gvec | expand

Commit Message

Jan Bobek July 31, 2019, 5:56 p.m. UTC
gen_gvec_ld_modrm_* helpers tie together a gen_ld_modrm_* helper and a
particular gvec operation, effectively handling a single instruction.

Signed-off-by: Jan Bobek <jan.bobek@gmail.com>
---
 target/i386/translate.c | 77 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

Comments

Richard Henderson July 31, 2019, 10:47 p.m. UTC | #1
On 7/31/19 10:56 AM, Jan Bobek wrote:
> +static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
> +                                       int modrm, unsigned vece,
> +                                       uint32_t oprsz, uint32_t maxsz,
> +                                       gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
> +                                       gen_gvec_2_fp_t gen_gvec_2_fp,
> +                                       int opctl)
> +{
> +    uint32_t ofss[2];
> +
> +    const int opd = ((opctl >> 6) & 7) - 1;
> +    const int opa = ((opctl >> 3) & 7) - 1;
> +    const int opb = ((opctl >> 0) & 7) - 1;
> +
> +    assert(0 <= opd && opd < 2);
> +    assert(0 <= opa && opa < 2);
> +    assert(0 <= opb && opb < 2);
> +
> +    (*gen_ld_modrm_2_fp)(env, s, modrm, &ofss[0], &ofss[1]);
> +    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +
> +static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
> +                                       int modrm, unsigned vece,
> +                                       uint32_t oprsz, uint32_t maxsz,
> +                                       gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
> +                                       gen_gvec_2_fp_t gen_gvec_2_fp,
> +                                       int opctl)
> +{
> +    uint32_t ofss[3];
> +
> +    const int opd = ((opctl >> 6) & 7) - 1;
> +    const int opa = ((opctl >> 3) & 7) - 1;
> +    const int opb = ((opctl >> 0) & 7) - 1;
> +
> +    assert(0 <= opd && opd < 3);
> +    assert(0 <= opa && opa < 3);
> +    assert(0 <= opb && opb < 3);
> +
> +    (*gen_ld_modrm_3_fp)(env, s, modrm, &ofss[0], &ofss[1], &ofss[2]);
> +    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +> +#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,                       \> +
                            gen_gvec_2_fp, opctl)                      \> +
gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),                    \> +
                 sizeof(MMXReg), sizeof(MMXReg),                 \> +
              gen_ld_modrm_PqQq,                              \> +
           gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_xmm(env, s,
modrm, vece,                      \> +
gen_gvec_2_fp, opctl)                     \> +    gen_gvec_ld_modrm_2((env),
(s), (modrm), (vece),                    \> +
sizeof(XMMReg), sizeof(XMMReg),                 \> +
gen_ld_modrm_VxWx,                              \> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vxmm(env, s, modrm,
vece,                     \> +                               gen_gvec_2_fp,
opctl)                    \> +    gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),                    \> +                        sizeof(XMMReg),
sizeof(ZMMReg),                 \> +
gen_ld_modrm_VxHxWx,                            \> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vymm(env, s, modrm,
vece,                     \> +                               gen_gvec_2_fp,
opctl)                    \> +    gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),                    \> +                        sizeof(YMMReg),
sizeof(ZMMReg),                 \> +
gen_ld_modrm_VxHxWx,                            \> +
gen_gvec_2_fp, (opctl))

I suppose there aren't so many different combinations, but did you consider
separate callbacks per operand?  If you have

typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);

static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
{
    int reg = (modrm >> 3) & 7; /* Ignore REX_R */
    return offsetof(CPUX86State, fpregs[reg].mmx);
}

static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
{
    int mod = (modrm >> 6) & 3;
    unsigned ret;

    if (mod == 3) {
        int rm = modrm & 7; /* Ignore REX_B */
        ret = offsetof(CPUX86State, fpregs[rm].mmx);
    } else {
        ret = offsetof(CPUX86State, mmx_t0);
        gen_lea_modrm(env, s, modrm);
        gen_ldq_env_A0(s, ret);
    }
    return ret;
}

static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
{
    int reg = ((modrm >> 3) & 7) | REX_R(s);
    return offsetof(CPUX86State, xmm_regs[reg]);
}

static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
{
    int mod = (modrm >> 6) & 3;
    unsigned ret;

    if (mod == 3) {
        int rm = (modrm & 7) | REX_B(s);
        ret = offsetof(CPUX86State, xmm_regs[rm]);
    } else {
        ret = offsetof(CPUX86State, xmm_t0);
        gen_lea_modrm(env, s, modrm);
        gen_ldo_env_A0(s, ret);
    }
    return ret;
}

static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
{
    return offsetof(CPUX86State, xmm_regs[s->vex_v]);
}

Then you can have

#define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,      \
    DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
{                                               \
    int ofd = offset_##OP0(env, s, modrm);      \
    int of1 = offset_##OP1(env, s, modrm);      \
    int of2 = offset_##OP2(env, s, modrm);      \
    gen(vece, opd, opa, opb, OPRSZ, MAXSZ);     \
}

GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg))
GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s))
GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s))

The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit
to the actual format of the instruction, with 2 inputs and 1 output.

You can also do

GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg))

for those rare "reversed" operations like PANDN.  Now you don't need to carry
around the OPCTL argument, which I initially found non-obvious.

I initially thought you'd be able to infer maxsz from the set of arguments, but
since there are vex encoded operations that do not use vex.vvvv that is not
always the case.  Thus I suggest

static size_t max_vec_size(DisasContext *s)
{
    if (s->prefixes & PREFIX_VEX) {
        /*
         * TODO: When avx512 is supported and enabled, sizeof(ZMMReg).
         * In the meantime don't waste time zeroing data that is not
         * architecturally present.
         */
        return sizeof(YMMReg);
    } else {
        /* Without vex encoding, only the low 128 bits are modified. */
        return sizeof(XMMReg);
    }
}


r~
Jan Bobek Aug. 2, 2019, 1:34 p.m. UTC | #2
On 7/31/19 6:47 PM, Richard Henderson wrote:
> I suppose there aren't so many different combinations, but did you consider
> separate callbacks per operand?  If you have
> 
> typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);
> 
> static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
> {
>     int reg = (modrm >> 3) & 7; /* Ignore REX_R */
>     return offsetof(CPUX86State, fpregs[reg].mmx);
> }
> 
> static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
> {
>     int mod = (modrm >> 6) & 3;
>     unsigned ret;
> 
>     if (mod == 3) {
>         int rm = modrm & 7; /* Ignore REX_B */
>         ret = offsetof(CPUX86State, fpregs[rm].mmx);
>     } else {
>         ret = offsetof(CPUX86State, mmx_t0);
>         gen_lea_modrm(env, s, modrm);
>         gen_ldq_env_A0(s, ret);
>     }
>     return ret;
> }
> 
> static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
> {
>     int reg = ((modrm >> 3) & 7) | REX_R(s);
>     return offsetof(CPUX86State, xmm_regs[reg]);
> }
> 
> static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
> {
>     int mod = (modrm >> 6) & 3;
>     unsigned ret;
> 
>     if (mod == 3) {
>         int rm = (modrm & 7) | REX_B(s);
>         ret = offsetof(CPUX86State, xmm_regs[rm]);
>     } else {
>         ret = offsetof(CPUX86State, xmm_t0);
>         gen_lea_modrm(env, s, modrm);
>         gen_ldo_env_A0(s, ret);
>     }
>     return ret;
> }
> 
> static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
> {
>     return offsetof(CPUX86State, xmm_regs[s->vex_v]);
> }
> 
> Then you can have
> 
> #define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
> static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,      \
>     DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
> {                                               \
>     int ofd = offset_##OP0(env, s, modrm);      \
>     int of1 = offset_##OP1(env, s, modrm);      \
>     int of2 = offset_##OP2(env, s, modrm);      \
>     gen(vece, opd, opa, opb, OPRSZ, MAXSZ);     \
> }
> 
> GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg))
> GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s))
> GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s))
> 
> The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit
> to the actual format of the instruction, with 2 inputs and 1 output.

Funny, I had a similar idea and converged to almost identical
solution. This will be part of v2.

> You can also do
> 
> GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg))
> 
> for those rare "reversed" operations like PANDN.  Now you don't need to carry
> around the OPCTL argument, which I initially found non-obvious.

Yup, solves the problem nicely and more clearly.

> I initially thought you'd be able to infer maxsz from the set of arguments, but
> since there are vex encoded operations that do not use vex.vvvv that is not
> always the case.  Thus I suggest
> 
> static size_t max_vec_size(DisasContext *s)
> {
>     if (s->prefixes & PREFIX_VEX) {
>         /*
>          * TODO: When avx512 is supported and enabled, sizeof(ZMMReg).
>          * In the meantime don't waste time zeroing data that is not
>          * architecturally present.
>          */
>         return sizeof(YMMReg);
>     } else {
>         /* Without vex encoding, only the low 128 bits are modified. */
>         return sizeof(XMMReg);
>     }
> }

Looks good.

-Jan
diff mbox series

Patch

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7548677e1f..d576b3345c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3087,6 +3087,83 @@  static inline void gen_ld_modrm_VxHxWx(CPUX86State *env, DisasContext *s, int mo
     *aofs = offsetof(CPUX86State, xmm_regs[s->vex_v]);
 }
 
+typedef void (*gen_ld_modrm_2_fp_t)(CPUX86State *env, DisasContext *s, int modrm,
+                                    uint32_t *dofs, uint32_t *aofs);
+typedef void (*gen_ld_modrm_3_fp_t)(CPUX86State *env, DisasContext *s, int modrm,
+                                    uint32_t *dofs, uint32_t *aofs, uint32_t *bofs);
+typedef void (*gen_gvec_2_fp_t)(unsigned vece, uint32_t dofs, uint32_t aofs,
+                                uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
+                                       int modrm, unsigned vece,
+                                       uint32_t oprsz, uint32_t maxsz,
+                                       gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
+                                       gen_gvec_2_fp_t gen_gvec_2_fp,
+                                       int opctl)
+{
+    uint32_t ofss[2];
+
+    const int opd = ((opctl >> 6) & 7) - 1;
+    const int opa = ((opctl >> 3) & 7) - 1;
+    const int opb = ((opctl >> 0) & 7) - 1;
+
+    assert(0 <= opd && opd < 2);
+    assert(0 <= opa && opa < 2);
+    assert(0 <= opb && opb < 2);
+
+    (*gen_ld_modrm_2_fp)(env, s, modrm, &ofss[0], &ofss[1]);
+    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
+                                       int modrm, unsigned vece,
+                                       uint32_t oprsz, uint32_t maxsz,
+                                       gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
+                                       gen_gvec_2_fp_t gen_gvec_2_fp,
+                                       int opctl)
+{
+    uint32_t ofss[3];
+
+    const int opd = ((opctl >> 6) & 7) - 1;
+    const int opa = ((opctl >> 3) & 7) - 1;
+    const int opb = ((opctl >> 0) & 7) - 1;
+
+    assert(0 <= opd && opd < 3);
+    assert(0 <= opa && opa < 3);
+    assert(0 <= opb && opb < 3);
+
+    (*gen_ld_modrm_3_fp)(env, s, modrm, &ofss[0], &ofss[1], &ofss[2]);
+    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
+}
+
+#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,                       \
+                             gen_gvec_2_fp, opctl)                      \
+    gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),                    \
+                        sizeof(MMXReg), sizeof(MMXReg),                 \
+                        gen_ld_modrm_PqQq,                              \
+                        gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_xmm(env, s, modrm, vece,                      \
+                              gen_gvec_2_fp, opctl)                     \
+    gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),                    \
+                        sizeof(XMMReg), sizeof(XMMReg),                 \
+                        gen_ld_modrm_VxWx,                              \
+                        gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vxmm(env, s, modrm, vece,                     \
+                               gen_gvec_2_fp, opctl)                    \
+    gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),                    \
+                        sizeof(XMMReg), sizeof(ZMMReg),                 \
+                        gen_ld_modrm_VxHxWx,                            \
+                        gen_gvec_2_fp, (opctl))
+
+#define gen_gvec_ld_modrm_vymm(env, s, modrm, vece,                     \
+                               gen_gvec_2_fp, opctl)                    \
+    gen_gvec_ld_modrm_3((env), (s), (modrm), (vece),                    \
+                        sizeof(YMMReg), sizeof(ZMMReg),                 \
+                        gen_ld_modrm_VxHxWx,                            \
+                        gen_gvec_2_fp, (opctl))
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b)
 {
     int b1, op1_offset, op2_offset, is_xmm, val;