diff mbox series

[v4,04/13] Hexagon (target/hexagon) Add overrides for dealloc-return instructions

Message ID 20230125024215.10430-5-tsimpson@quicinc.com
State New
Headers show
Series Hexagon: COF overrides, new generator, test/bug update | expand

Commit Message

Taylor Simpson Jan. 25, 2023, 2:42 a.m. UTC
These instructions perform a deallocframe+return (jumpr r31)

Add overrides for
    L4_return
    SL2_return
    L4_return_t
    L4_return_f
    L4_return_tnew_pt
    L4_return_fnew_pt
    L4_return_tnew_pnt
    L4_return_fnew_pnt
    SL2_return_t
    SL2_return_f
    SL2_return_tnew
    SL2_return_fnew

This patch eliminates the last helper that uses write_new_pc, so we
remove it from op_helper.c

This patch also eliminates the last helper for load instructions, so we
remove the pkt_has_store_s1 runtime field as well as the mem_load[1248]
functions.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/cpu.h       |  3 +-
 target/hexagon/gen_tcg.h   | 54 ++++++++++++++++++++++++
 target/hexagon/genptr.c    | 86 ++++++++++++++++++++++++++++++++++++++
 target/hexagon/op_helper.c | 69 ------------------------------
 target/hexagon/translate.c |  6 +--
 5 files changed, 142 insertions(+), 76 deletions(-)
diff mbox series

Patch

diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 34c0ae0a67..8df5b5a236 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -1,5 +1,5 @@ 
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -98,7 +98,6 @@  typedef struct CPUArchState {
     target_ulong pred_written;
 
     MemLog mem_log_stores[STORES_MAX];
-    target_ulong pkt_has_store_s1;
     target_ulong dczero_addr;
 
     float_status fp_status;
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 6267f51ccc..8282ff3fc5 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -508,6 +508,60 @@ 
 #define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
     fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
 
+/*
+ * dealloc_return
+ * Assembler mapped to
+ * r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return(SHORTCODE) \
+    gen_return(ctx, RddV, RsV)
+
+/*
+ * sub-instruction version (no RddV, so handle it manually)
+ */
+#define fGEN_TCG_SL2_return(SHORTCODE) \
+    do { \
+        TCGv_i64 RddV = tcg_temp_new_i64(); \
+        gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
+        gen_log_reg_write_pair(HEX_REG_FP, RddV); \
+        tcg_temp_free_i64(RddV); \
+    } while (0)
+
+/*
+ * Conditional returns follow this naming convention
+ *     _t                 predicate true
+ *     _f                 predicate false
+ *     _tnew_pt           predicate.new true predict taken
+ *     _fnew_pt           predicate.new false predict taken
+ *     _tnew_pnt          predicate.new true predict not taken
+ *     _fnew_pnt          predicate.new false predict not taken
+ * Predictions are not modelled in QEMU
+ *
+ * Example:
+ *     if (p1) r31:30 = dealloc_return(r30):raw
+ */
+#define fGEN_TCG_L4_return_t(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_EQ);
+#define fGEN_TCG_L4_return_f(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvV, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+#define fGEN_TCG_L4_return_tnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_EQ)
+#define fGEN_TCG_L4_return_fnew_pnt(SHORTCODE) \
+    gen_cond_return(ctx, RddV, RsV, PvN, TCG_COND_NE)
+
+#define fGEN_TCG_SL2_return_t(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_pred[0])
+#define fGEN_TCG_SL2_return_f(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0])
+#define fGEN_TCG_SL2_return_tnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0])
+#define fGEN_TCG_SL2_return_fnew(SHORTCODE) \
+    gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0])
+
 /*
  * Mathematical operations with more than one definition require
  * special handling
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index e17ac93a59..efd36f760f 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -746,6 +746,92 @@  static void gen_cond_callr(DisasContext *ctx,
     gen_set_label(skip);
 }
 
+/* frame ^= (int64_t)FRAMEKEY << 32 */
+static void gen_frame_unscramble(TCGv_i64 frame)
+{
+    TCGv_i64 framekey = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(framekey, hex_gpr[HEX_REG_FRAMEKEY]);
+    tcg_gen_shli_i64(framekey, framekey, 32);
+    tcg_gen_xor_i64(frame, frame, framekey);
+    tcg_temp_free_i64(framekey);
+}
+
+static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
+{
+    Insn *insn = ctx->insn;  /* Needed for CHECK_NOSHUF */
+    CHECK_NOSHUF(EA, 8);
+    tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
+}
+
+static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+                            TCGv r29)
+{
+    /*
+     * frame = *src
+     * dst = frame_unscramble(frame)
+     * SP = src + 8
+     * PC = dst.w[1]
+     */
+    TCGv_i64 frame = tcg_temp_new_i64();
+    TCGv r31 = tcg_temp_new();
+
+    gen_load_frame(ctx, frame, src);
+    gen_frame_unscramble(frame);
+    tcg_gen_mov_i64(dst, frame);
+    tcg_gen_addi_tl(r29, src, 8);
+    tcg_gen_extrh_i64_i32(r31, dst);
+    gen_jumpr(ctx, r31);
+
+    tcg_temp_free_i64(frame);
+    tcg_temp_free(r31);
+}
+
+static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
+{
+    TCGv r29 = tcg_temp_new();
+    gen_return_base(ctx, dst, src, r29);
+    gen_log_reg_write(HEX_REG_SP, r29);
+    tcg_temp_free(r29);
+}
+
+/* if (pred) dst = dealloc_return(src):raw */
+static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
+                            TCGv pred, TCGCond cond)
+{
+    TCGv LSB = tcg_temp_new();
+    TCGv mask = tcg_temp_new();
+    TCGv r29 = tcg_temp_local_new();
+    TCGLabel *skip = gen_new_label();
+    tcg_gen_andi_tl(LSB, pred, 1);
+
+    /* Initialize the results in case the predicate is false */
+    tcg_gen_movi_i64(dst, 0);
+    tcg_gen_movi_tl(r29, 0);
+
+    /* Set the bit in hex_slot_cancelled if the predicate is flase */
+    tcg_gen_movi_tl(mask, 1 << ctx->insn->slot);
+    tcg_gen_or_tl(mask, hex_slot_cancelled, mask);
+    tcg_gen_movcond_tl(cond, hex_slot_cancelled, LSB, tcg_constant_tl(0),
+                       mask, hex_slot_cancelled);
+    tcg_temp_free(mask);
+
+    tcg_gen_brcondi_tl(cond, LSB, 0, skip);
+    tcg_temp_free(LSB);
+    gen_return_base(ctx, dst, src, r29);
+    gen_set_label(skip);
+    gen_log_predicated_reg_write(HEX_REG_SP, r29, ctx->insn->slot);
+    tcg_temp_free(r29);
+}
+
+/* sub-instruction version (no RddV, so handle it manually) */
+static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
+{
+    TCGv_i64 RddV = tcg_temp_local_new_i64();
+    gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
+    gen_log_predicated_reg_write_pair(HEX_REG_FP, RddV, ctx->insn->slot);
+    tcg_temp_free_i64(RddV);
+}
+
 static void gen_endloop0(DisasContext *ctx)
 {
     TCGv lpcfg = tcg_temp_local_new();
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 35449ef524..cb43519edf 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -105,30 +105,6 @@  void log_store64(CPUHexagonState *env, target_ulong addr,
     env->mem_log_stores[slot].data64 = val;
 }
 
-void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof,
-                         target_ulong addr)
-{
-    HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
-
-    if (pkt_has_multi_cof) {
-        /*
-         * If more than one branch is taken in a packet, only the first one
-         * is actually done.
-         */
-        if (env->branch_taken) {
-            HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
-                          "ignoring the second one\n");
-        } else {
-            fCHECK_PCALIGN(addr);
-            env->gpr[HEX_REG_PC] = addr;
-            env->branch_taken = 1;
-        }
-    } else {
-        fCHECK_PCALIGN(addr);
-        env->gpr[HEX_REG_PC] = addr;
-    }
-}
-
 /* Handy place to set a breakpoint */
 void HELPER(debug_start_packet)(CPUHexagonState *env)
 {
@@ -525,51 +501,6 @@  void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
     }
 }
 
-/*
- * mem_noshuf
- * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
- *
- * If the load is in slot 0 and there is a store in slot1 (that
- * wasn't cancelled), we have to do the store first.
- */
-static void check_noshuf(CPUHexagonState *env, uint32_t slot,
-                         target_ulong vaddr, int size)
-{
-    if (slot == 0 && env->pkt_has_store_s1 &&
-        ((env->slot_cancelled & (1 << 1)) == 0)) {
-        HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
-        HELPER(commit_store)(env, 1);
-    }
-}
-
-uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, slot, vaddr, 1);
-    return cpu_ldub_data_ra(env, vaddr, ra);
-}
-
-uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, slot, vaddr, 2);
-    return cpu_lduw_data_ra(env, vaddr, ra);
-}
-
-uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, slot, vaddr, 4);
-    return cpu_ldl_data_ra(env, vaddr, ra);
-}
-
-uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, slot, vaddr, 8);
-    return cpu_ldq_data_ra(env, vaddr, ra);
-}
-
 /* Floating point */
 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
 {
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 75f28e08ad..5be24102aa 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1,5 +1,5 @@ 
 /*
- *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -42,7 +42,6 @@  TCGv hex_store_addr[STORES_MAX];
 TCGv hex_store_width[STORES_MAX];
 TCGv hex_store_val32[STORES_MAX];
 TCGv_i64 hex_store_val64[STORES_MAX];
-TCGv hex_pkt_has_store_s1;
 TCGv hex_dczero_addr;
 TCGv hex_llsc_addr;
 TCGv hex_llsc_val;
@@ -287,7 +286,6 @@  static void gen_start_packet(DisasContext *ctx)
     for (i = 0; i < STORES_MAX; i++) {
         ctx->store_width[i] = 0;
     }
-    tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
     ctx->s1_store_processed = false;
     ctx->pre_commit = true;
 
@@ -1026,8 +1024,6 @@  void hexagon_translate_init(void)
         offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
     hex_branch_taken = tcg_global_mem_new(cpu_env,
         offsetof(CPUHexagonState, branch_taken), "branch_taken");
-    hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
-        offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
     hex_dczero_addr = tcg_global_mem_new(cpu_env,
         offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
     hex_llsc_addr = tcg_global_mem_new(cpu_env,