diff mbox series

[RFC,v2] target/arm: Implement SVE2 TBL, TBX

Message ID 20200428144352.9275-1-steplong@quicinc.com
State New
Headers show
Series [RFC,v2] target/arm: Implement SVE2 TBL, TBX | expand

Commit Message

Stephen Long April 28, 2020, 2:43 p.m. UTC
Signed-off-by: Stephen Long <steplong@quicinc.com>
---
 target/arm/helper-sve.h    | 10 ++++++
 target/arm/internals.h     | 12 +++++++
 target/arm/sve.decode      |  5 +++
 target/arm/sve_helper.c    | 71 ++++++++++++++++++++++++++++++++++----
 target/arm/translate-sve.c | 20 +++++++++++
 5 files changed, 112 insertions(+), 6 deletions(-)

Pulled out the common functionality for SVE TBL and SVE2 TBL, TBX. I still
haven't reimplemnted AdvSIMD TBL, TBX yet, but I would like to know if
I'm on the right track.

Comments

Richard Henderson June 16, 2020, 6:18 p.m. UTC | #1
On 4/28/20 7:43 AM, Stephen Long wrote:
> Signed-off-by: Stephen Long <steplong@quicinc.com>
> ---
>  target/arm/helper-sve.h    | 10 ++++++
>  target/arm/internals.h     | 12 +++++++
>  target/arm/sve.decode      |  5 +++
>  target/arm/sve_helper.c    | 71 ++++++++++++++++++++++++++++++++++----
>  target/arm/translate-sve.c | 20 +++++++++++
>  5 files changed, 112 insertions(+), 6 deletions(-)

I've rearranged the macros in sve_helper.c a little and queued.


r~
diff mbox series

Patch

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 51ad60e5c3..ed8b9223ee 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2690,3 +2690,13 @@  DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
 
 DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index bae4f36426..286ef3c4c6 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1381,4 +1381,16 @@  static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
     return ptr;
 }
 
+#define DECLARE_DO_TB(TYPE)                                      \
+void do_tb_##TYPE(TYPE *vd, TYPE **tables, intptr_t ntables,     \
+                  intptr_t table_sz, TYPE *indices,              \
+                  intptr_t nindices, bool is_tbl);
+
+DECLARE_DO_TB(uint8_t)
+DECLARE_DO_TB(uint16_t)
+DECLARE_DO_TB(uint32_t)
+DECLARE_DO_TB(uint64_t)
+
+#undef DECLARE_DO_TB
+
 #endif
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index de3768c24a..624c12faf3 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1419,3 +1419,8 @@  STNT1_zprz      1110010 .. 00 ..... 001 ... ..... ..... \
 # SVE2 32-bit scatter non-temporal store (vector plus scalar)
 STNT1_zprz      1110010 .. 10 ..... 001 ... ..... ..... \
                 @rprr_scatter_store xs=0 esz=2 scale=0
+
+### SVE2 Table Lookup (three sources)
+
+TBL_zzz         00000101 .. 1 ..... 00101 0 ..... .....  @rd_rn_rm
+TBX_zzz         00000101 .. 1 ..... 00101 1 ..... .....  @rd_rn_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index cd5c6f7fb0..2b8de6adb8 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -2944,20 +2944,37 @@  void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
     }
 }
 
+#define DO_TB(TYPE, H)                                                  \
+void do_tb_##TYPE(TYPE *vd, TYPE **tables, intptr_t ntables,            \
+                  intptr_t table_sz, TYPE *indices,                     \
+                  intptr_t nindices, bool is_tbl)                       \
+{                                                                       \
+    for (intptr_t i = 0; i < nindices; ++i) {                           \
+        TYPE index = indices[H(i)];                                     \
+        if (index < table_sz * ntables) {                               \
+            vd[H(i)] = tables[index / ntables][H(index % ntables)];     \
+        } else if (is_tbl) {                                            \
+            vd[H(i)] = 0;                                               \
+        }                                                               \
+    }                                                                   \
+}
+
+DO_TB(uint8_t, H1)
+DO_TB(uint16_t, H2)
+DO_TB(uint32_t, H4)
+DO_TB(uint64_t, )
+
 #define DO_TBL(NAME, TYPE, H) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 {                                                              \
-    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    intptr_t opr_sz = simd_oprsz(desc);                        \
     uintptr_t elem = opr_sz / sizeof(TYPE);                    \
-    TYPE *d = vd, *n = vn, *m = vm;                            \
+    TYPE *n = vn;                                              \
     ARMVectorReg tmp;                                          \
     if (unlikely(vd == vn)) {                                  \
         n = memcpy(&tmp, vn, opr_sz);                          \
     }                                                          \
-    for (i = 0; i < elem; i++) {                               \
-        TYPE j = m[H(i)];                                      \
-        d[H(i)] = j < elem ? n[H(j)] : 0;                      \
-    }                                                          \
+    do_tb_##TYPE(vd, &n, 1, elem, vm, elem, true);             \
 }
 
 DO_TBL(sve_tbl_b, uint8_t, H1)
@@ -2967,6 +2984,48 @@  DO_TBL(sve_tbl_d, uint64_t, )
 
 #undef TBL
 
+#define DO_SVE2_TBL(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn1, void *vm, void *vn2, uint32_t desc)  \
+{                                                                           \
+    intptr_t opr_sz = simd_oprsz(desc);                                     \
+    intptr_t elem = opr_sz / sizeof(TYPE);                                  \
+    TYPE *n1 = vn1, *n2 = vn2;                                              \
+    ARMVectorReg tmp1, tmp2;                                                \
+    if (unlikely(vd == vn1)) {                                              \
+        n1 = memcpy(&tmp1, vn1, opr_sz);                                    \
+    } else if (unlikely(vd == vn2)) {                                       \
+        n2 = memcpy(&tmp2, vn2, opr_sz);                                    \
+    }                                                                       \
+    TYPE *tables[] = {n1, n2};                                              \
+    do_tb_##TYPE(vd, tables, 2, elem, vm, elem, true);                      \
+}
+
+DO_SVE2_TBL(sve2_tbl_b, uint8_t, H1)
+DO_SVE2_TBL(sve2_tbl_h, uint16_t, H2)
+DO_SVE2_TBL(sve2_tbl_s, uint32_t, H4)
+DO_SVE2_TBL(sve2_tbl_d, uint64_t, )
+
+#define DO_SVE2_TBX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)  \
+{                                                               \
+    intptr_t opr_sz = simd_oprsz(desc);                         \
+    uintptr_t elem = opr_sz / sizeof(TYPE);                     \
+    TYPE *n = vn;                                               \
+    ARMVectorReg tmp;                                           \
+    if (unlikely(vd == vn)) {                                   \
+        n = memcpy(&tmp, vn, opr_sz);                           \
+    }                                                           \
+    do_tb_##TYPE(vd, &n, 1, elem, vm, elem, false);             \
+}
+
+DO_SVE2_TBX(sve2_tbx_b, uint8_t, H1)
+DO_SVE2_TBX(sve2_tbx_h, uint16_t, H2)
+DO_SVE2_TBX(sve2_tbx_s, uint32_t, H4)
+DO_SVE2_TBX(sve2_tbx_d, uint64_t, )
+
+#undef DO_SVE2_TBX
+#undef DO_SVE2_TBL
+
 #define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
 void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
 {                                                              \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 86c3d0ed11..11b78f49b4 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7956,3 +7956,23 @@  static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
     };
     return do_sve2_zzzz_fn(s, a->rd, a->rn, a->rm, a->ra, fns[a->esz], a->rot);
 }
+
+static bool trans_TBL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
+        gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d,
+    };
+    int rn1 = a->rn;
+    int rn2 = (a->rn + 1) % 32;
+    return do_sve2_zzzz_fn(s, a->rd, rn1, a->rm, rn2, fns[a->esz], 0);
+}
+
+static bool trans_TBX_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[] = {
+        gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
+        gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d,
+    };
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}