Patchwork [17/21] target-sparc: Implement BMASK/BSHUFFLE.

login
register
mail settings
Submitter Richard Henderson
Date Oct. 18, 2011, 6:50 p.m.
Message ID <1318963843-25100-18-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/120505/
State New
Headers show

Comments

Richard Henderson - Oct. 18, 2011, 6:50 p.m.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-sparc/helper.h     |    1 +
 target-sparc/translate.c  |   28 ++++++++++++++++++++++++----
 target-sparc/vis_helper.c |   29 +++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 4 deletions(-)
Blue Swirl - Oct. 18, 2011, 8:36 p.m.
On Tue, Oct 18, 2011 at 6:50 PM, Richard Henderson <rth@twiddle.net> wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-sparc/helper.h     |    1 +
>  target-sparc/translate.c  |   28 ++++++++++++++++++++++++----
>  target-sparc/vis_helper.c |   29 +++++++++++++++++++++++++++++
>  3 files changed, 54 insertions(+), 4 deletions(-)
>
> diff --git a/target-sparc/helper.h b/target-sparc/helper.h
> index 4a61b77..ec00436 100644
> --- a/target-sparc/helper.h
> +++ b/target-sparc/helper.h
> @@ -140,6 +140,7 @@ DEF_HELPER_FLAGS_3(pdist, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
>  DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
> +DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
>  #define VIS_HELPER(name)                                                 \
>     DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_CONST | TCG_CALL_PURE,  \
>                        i64, i64, i64)                                    \
> diff --git a/target-sparc/translate.c b/target-sparc/translate.c
> index e955bf3..66107ee 100644
> --- a/target-sparc/translate.c
> +++ b/target-sparc/translate.c
> @@ -1744,6 +1744,20 @@ static void gen_ne_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
>     gen_store_fpr_D(dc, rd, dst);
>  }
>
> +static void gen_gsr_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
> +                            void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> +    TCGv_i64 dst, src1, src2;
> +
> +    src1 = gen_load_fpr_D(dc, rs1);
> +    src2 = gen_load_fpr_D(dc, rs2);
> +    dst = gen_dest_fpr_D();
> +
> +    gen(dst, cpu_gsr, src1, src2);
> +
> +    gen_store_fpr_D(dc, rd, dst);
> +}

This could be introduced with fpack functions, so the next patch could
be squashed into that one.

> +
>  static void gen_ne_fop_DDDD(DisasContext *dc, int rd, int rs1, int rs2,
>                             void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
>  {
> @@ -4183,8 +4197,13 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
>                     gen_movl_TN_reg(rd, cpu_dst);
>                     break;
>                 case 0x019: /* VIS II bmask */
> -                    // XXX
> -                    goto illegal_insn;
> +                    CHECK_FPU_FEATURE(dc, VIS2);
> +                    cpu_src1 = get_src1(insn, cpu_src1);
> +                    cpu_src2 = get_src1(insn, cpu_src2);
> +                    tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
> +                    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
> +                    gen_movl_TN_reg(rd, cpu_dst);
> +                    break;
>                 case 0x020: /* VIS I fcmple16 */
>                     CHECK_FPU_FEATURE(dc, VIS1);
>                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
> @@ -4310,8 +4329,9 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
>                     gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpmerge);
>                     break;
>                 case 0x04c: /* VIS II bshuffle */
> -                    // XXX
> -                    goto illegal_insn;
> +                    CHECK_FPU_FEATURE(dc, VIS2);
> +                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_bshuffle);
> +                    break;
>                 case 0x04d: /* VIS I fexpand */
>                     CHECK_FPU_FEATURE(dc, VIS1);
>                     gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fexpand);
> diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
> index 40adb47..7830120 100644
> --- a/target-sparc/vis_helper.c
> +++ b/target-sparc/vis_helper.c
> @@ -470,3 +470,32 @@ uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
>
>     return ret;
>  }
> +
> +uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
> +{
> +    union {
> +        uint64_t ll[2];
> +        uint8_t b[16];
> +    } s;
> +    VIS64 r;
> +    uint32_t i, mask, host;
> +
> +    /* Set up S such that we can index across all of the bytes.  */
> +#ifdef HOST_WORDS_BIGENDIAN
> +    s.ll[0] = src1;
> +    s.ll[1] = src2;
> +    host = 0;
> +#else
> +    s.ll[1] = src1;
> +    s.ll[0] = src2;
> +    host = 15;
> +#endif
> +    mask = gsr >> 32;
> +
> +    for (i = 0; i < 8; ++i) {
> +        unsigned e = (mask >> (28 - i*4)) & 0xf;
> +        r.VIS_B64(i) = s.b[e ^ host];
> +    }
> +
> +    return r.ll;
> +}
> --
> 1.7.6.4
>
>

Patch

diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index 4a61b77..ec00436 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -140,6 +140,7 @@  DEF_HELPER_FLAGS_3(pdist, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
 DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
 #define VIS_HELPER(name)                                                 \
     DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_CONST | TCG_CALL_PURE,  \
                        i64, i64, i64)                                    \
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e955bf3..66107ee 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -1744,6 +1744,20 @@  static void gen_ne_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
     gen_store_fpr_D(dc, rd, dst);
 }
 
+static void gen_gsr_fop_DDD(DisasContext *dc, int rd, int rs1, int rs2,
+                            void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2;
+
+    src1 = gen_load_fpr_D(dc, rs1);
+    src2 = gen_load_fpr_D(dc, rs2);
+    dst = gen_dest_fpr_D();
+
+    gen(dst, cpu_gsr, src1, src2);
+
+    gen_store_fpr_D(dc, rd, dst);
+}
+
 static void gen_ne_fop_DDDD(DisasContext *dc, int rd, int rs1, int rs2,
                             void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
 {
@@ -4183,8 +4197,13 @@  static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x019: /* VIS II bmask */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src2 = get_src1(insn, cpu_src2);
+                    tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
+                    tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
+                    gen_movl_TN_reg(rd, cpu_dst);
+                    break;
                 case 0x020: /* VIS I fcmple16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
@@ -4310,8 +4329,9 @@  static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fpmerge);
                     break;
                 case 0x04c: /* VIS II bshuffle */
-                    // XXX
-                    goto illegal_insn;
+                    CHECK_FPU_FEATURE(dc, VIS2);
+                    gen_gsr_fop_DDD(dc, rd, rs1, rs2, gen_helper_bshuffle);
+                    break;
                 case 0x04d: /* VIS I fexpand */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     gen_ne_fop_DDD(dc, rd, rs1, rs2, gen_helper_fexpand);
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
index 40adb47..7830120 100644
--- a/target-sparc/vis_helper.c
+++ b/target-sparc/vis_helper.c
@@ -470,3 +470,32 @@  uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 
     return ret;
 }
+
+uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } s;
+    VIS64 r;
+    uint32_t i, mask, host;
+
+    /* Set up S such that we can index across all of the bytes.  */
+#ifdef HOST_WORDS_BIGENDIAN
+    s.ll[0] = src1;
+    s.ll[1] = src2;
+    host = 0;
+#else
+    s.ll[1] = src1;
+    s.ll[0] = src2;
+    host = 15;
+#endif
+    mask = gsr >> 32;
+
+    for (i = 0; i < 8; ++i) {
+        unsigned e = (mask >> (28 - i*4)) & 0xf;
+        r.VIS_B64(i) = s.b[e ^ host];
+    }
+
+    return r.ll;
+}