diff mbox

[v14,16/33] target-tilegx: Handle most bit manipulation instructions

Message ID 1440433079-14458-17-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Aug. 24, 2015, 4:17 p.m. UTC
Omitting crc instructions.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-tilegx/helper.c    | 23 ++++++++++++++++++
 target-tilegx/helper.h    |  2 ++
 target-tilegx/translate.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 84 insertions(+), 1 deletion(-)

Comments

Peter Maydell Aug. 29, 2015, 3:26 p.m. UTC | #1
On 24 August 2015 at 17:17, Richard Henderson <rth@twiddle.net> wrote:
> Omitting crc instructions.

I'm not a fan of commit message bodies that rely on reading
the subject line to make sense (partly because my mail
client doesn't put the subject line very prominently
when reading the email...)

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-tilegx/helper.c    | 23 ++++++++++++++++++
>  target-tilegx/helper.h    |  2 ++
>  target-tilegx/translate.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 84 insertions(+), 1 deletion(-)
>
> diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c
> index ea66da0..6aba681 100644
> --- a/target-tilegx/helper.c
> +++ b/target-tilegx/helper.c
> @@ -40,6 +40,29 @@ uint64_t helper_cnttz(uint64_t arg)
>      return ctz64(arg);
>  }
>
> +uint64_t helper_pcnt(uint64_t arg)
> +{
> +    return ctpop64(arg);
> +}
> +
> +uint64_t helper_revbits(uint64_t arg)
> +{
> +    /* Assign the correct byte position.  */
> +    arg = bswap64(arg);
> +
> +    /* Assign the correct nibble position.  */
> +    arg = ((arg & 0xf0f0f0f0f0f0f0f0ULL) >> 4)
> +        | ((arg & 0x0f0f0f0f0f0f0f0fULL) << 4);
> +
> +    /* Assign the correct bit position.  */
> +    arg = ((arg & 0x8888888888888888ULL) >> 3)
> +        | ((arg & 0x4444444444444444ULL) >> 1)
> +        | ((arg & 0x2222222222222222ULL) << 1)
> +        | ((arg & 0x1111111111111111ULL) << 3);
> +
> +    return arg;
> +}

AArch64 has this exact same operation; maybe we should
factor it out into bitops.h ?

> +
>  /*
>   * Functional Description
>   *     uint64_t a = rf[SrcA];
> diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
> index fd5517e..644d313 100644
> --- a/target-tilegx/helper.h
> +++ b/target-tilegx/helper.h
> @@ -1,4 +1,6 @@
>  DEF_HELPER_2(exception, noreturn, env, i32)
>  DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
>  DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
> +DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
> +DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
>  DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
> diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
> index 090c006..82a34e5 100644
> --- a/target-tilegx/translate.c
> +++ b/target-tilegx/translate.c
> @@ -177,6 +177,35 @@ static void gen_saturate_op(TCGv tdest, TCGv tsrca, TCGv tsrcb,
>      tcg_temp_free(t0);
>  }
>
> +static void gen_dblaligni(TCGv tdest, TCGv tsrca, TCGv tsrcb, int shr)
> +{
> +    TCGv t0 = tcg_temp_new();
> +
> +    tcg_gen_shri_tl(t0, tsrcb, shr);
> +    tcg_gen_shli_tl(tdest, tsrca, 64 - shr);
> +    tcg_gen_or_tl(tdest, tdest, t0);
> +
> +    tcg_temp_free(t0);
> +}
> +
> +static void gen_dblalign(TCGv tdest, TCGv tsrcd, TCGv tsrca, TCGv tsrcb)
> +{
> +    TCGv t0 = tcg_temp_new();

This operation seems sufficiently obscure that I think it would be
helpful to have a comment that explains what it is:
    /* Shift the 128 bit value tsrca:tsrcd right by the number
     * of bytes specified by the bottom 3 bits of tsrcb, and
     * set tdest to the low 64 bits of the resulting value.
     */

(I think I've read the ISA correctly...)

Incidentally I deduce that we're not planning to support bigendian
mode?

> +
> +    tcg_gen_andi_tl(t0, tsrcb, 7);
> +    tcg_gen_shli_tl(t0, t0, 3);
> +    tcg_gen_shr_tl(tdest, tsrcd, t0);
> +
> +    /* Rather than creating and invalid shift, 64 - 0, perform the
> +       left shift in two steps via the one's compliment.  */

typos: "an", "complement".

It might be helpful to say what the maths going on here is
more explicitly:
    We want to do "t0 = tsrca << (64 - t0)".
    Twos' complement arithmetic on a 6-bit field tells us
    that 64 - t0 == (t0 ^ 63) + 1. So we can do the shift in
    two parts, neither of which will be 64 (since t0 can't be
    63 here).

Otherwise

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
Richard Henderson Sept. 1, 2015, 2:26 a.m. UTC | #2
On 08/29/2015 08:26 AM, Peter Maydell wrote:
> Incidentally I deduce that we're not planning to support bigendian
> mode?

*shrug* I hadn't thought about it.  Little-endian seems to be the default for 
the toolchain.  The pre-built userland available for download only contains 
little-endian code.


r~
diff mbox

Patch

diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c
index ea66da0..6aba681 100644
--- a/target-tilegx/helper.c
+++ b/target-tilegx/helper.c
@@ -40,6 +40,29 @@  uint64_t helper_cnttz(uint64_t arg)
     return ctz64(arg);
 }
 
+uint64_t helper_pcnt(uint64_t arg)
+{
+    return ctpop64(arg);
+}
+
+uint64_t helper_revbits(uint64_t arg)
+{
+    /* Assign the correct byte position.  */
+    arg = bswap64(arg);
+
+    /* Assign the correct nibble position.  */
+    arg = ((arg & 0xf0f0f0f0f0f0f0f0ULL) >> 4)
+        | ((arg & 0x0f0f0f0f0f0f0f0fULL) << 4);
+
+    /* Assign the correct bit position.  */
+    arg = ((arg & 0x8888888888888888ULL) >> 3)
+        | ((arg & 0x4444444444444444ULL) >> 1)
+        | ((arg & 0x2222222222222222ULL) << 1)
+        | ((arg & 0x1111111111111111ULL) << 3);
+
+    return arg;
+}
+
 /*
  * Functional Description
  *     uint64_t a = rf[SrcA];
diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
index fd5517e..644d313 100644
--- a/target-tilegx/helper.h
+++ b/target-tilegx/helper.h
@@ -1,4 +1,6 @@ 
 DEF_HELPER_2(exception, noreturn, env, i32)
 DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index 090c006..82a34e5 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -177,6 +177,35 @@  static void gen_saturate_op(TCGv tdest, TCGv tsrca, TCGv tsrcb,
     tcg_temp_free(t0);
 }
 
+static void gen_dblaligni(TCGv tdest, TCGv tsrca, TCGv tsrcb, int shr)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_shri_tl(t0, tsrcb, shr);
+    tcg_gen_shli_tl(tdest, tsrca, 64 - shr);
+    tcg_gen_or_tl(tdest, tdest, t0);
+
+    tcg_temp_free(t0);
+}
+
+static void gen_dblalign(TCGv tdest, TCGv tsrcd, TCGv tsrca, TCGv tsrcb)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, tsrcb, 7);
+    tcg_gen_shli_tl(t0, t0, 3);
+    tcg_gen_shr_tl(tdest, tsrcd, t0);
+
+    /* Rather than creating and invalid shift, 64 - 0, perform the
+       left shift in two steps via the one's compliment.  */
+    tcg_gen_xori_tl(t0, t0, 63);
+    tcg_gen_shl_tl(t0, tsrca, t0);
+    tcg_gen_shli_tl(t0, t0, 1);
+    tcg_gen_or_tl(tdest, tdest, t0);
+
+    tcg_temp_free(t0);
+}
+
 static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
                               unsigned dest, unsigned srca)
 {
@@ -210,8 +239,14 @@  static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
     switch (opext) {
     case OE_RR_X0(CNTLZ):
     case OE_RR_Y0(CNTLZ):
+        gen_helper_cntlz(tdest, tsrca);
+        mnemonic = "cntlz";
+        break;
     case OE_RR_X0(CNTTZ):
     case OE_RR_Y0(CNTTZ):
+        gen_helper_cnttz(tdest, tsrca);
+        mnemonic = "cnttz";
+        break;
     case OE_RR_X1(DRAIN):
     case OE_RR_X1(DTLBPR):
     case OE_RR_X1(FINV):
@@ -251,11 +286,17 @@  static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
     case OE_RR_Y1(LNK):
     case OE_RR_X1(MF):
     case OE_RR_X1(NAP):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RR_X0(PCNT):
     case OE_RR_Y0(PCNT):
+        gen_helper_pcnt(tdest, tsrca);
+        mnemonic = "pcnt";
+        break;
     case OE_RR_X0(REVBITS):
     case OE_RR_Y0(REVBITS):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_helper_revbits(tdest, tsrca);
+        mnemonic = "revbits";
+        break;
     case OE_RR_X0(REVBYTES):
     case OE_RR_Y0(REVBYTES):
         tcg_gen_bswap64_tl(tdest, tsrca);
@@ -358,13 +399,26 @@  static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(CMUL, 0, X0):
     case OE_RRR(CRC32_32, 0, X0):
     case OE_RRR(CRC32_8, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(DBLALIGN2, 0, X0):
     case OE_RRR(DBLALIGN2, 0, X1):
+        gen_dblaligni(tdest, tsrca, tsrcb, 16);
+        mnemonic = "dblalign2";
+        break;
     case OE_RRR(DBLALIGN4, 0, X0):
     case OE_RRR(DBLALIGN4, 0, X1):
+        gen_dblaligni(tdest, tsrca, tsrcb, 32);
+        mnemonic = "dblalign4";
+        break;
     case OE_RRR(DBLALIGN6, 0, X0):
     case OE_RRR(DBLALIGN6, 0, X1):
+        gen_dblaligni(tdest, tsrca, tsrcb, 48);
+        mnemonic = "dblalign6";
+        break;
     case OE_RRR(DBLALIGN, 0, X0):
+        gen_dblalign(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "dblalign";
+        break;
     case OE_RRR(EXCH4, 0, X1):
     case OE_RRR(EXCH, 0, X1):
     case OE_RRR(FDOUBLE_ADDSUB, 0, X0):
@@ -516,7 +570,11 @@  static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(SHRU, 0, X1):
     case OE_RRR(SHRU, 6, Y0):
     case OE_RRR(SHRU, 6, Y1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(SHUFFLEBYTES, 0, X0):
+        gen_helper_shufflebytes(tdest, load_gr(dc, dest), tsrca, tsrca);
+        mnemonic = "shufflebytes";
+        break;
     case OE_RRR(ST1, 0, X1):
     case OE_RRR(ST2, 0, X1):
     case OE_RRR(ST4, 0, X1):