diff mbox series

[v5,12/14] Hexagon (target/hexagon) Remove gen_log_predicated_reg_write[_pair]

Message ID 20230131225647.25274-13-tsimpson@quicinc.com
State New
Headers show
Series Hexagon: COF overrides, new generator, test/bug update | expand

Commit Message

Taylor Simpson Jan. 31, 2023, 10:56 p.m. UTC
We assign the instruction destination register to hex_new_value[num]
instead of a TCG temp that gets copied back to hex_new_value[num].

Since we preload hex_new_value for predicated instructions, we don't
need the check for slot_cancelled.  So, we call gen_log_reg_write instead.

We update the helper function generation and gen_tcg.h to maintain the
disable-hexagon-idef-parser configuration.

Here is a simple example of the differences in the TCG code generated:

IN:
0x00400094:  0xf900c102 {       if (P0) R2 = and(R0,R1) }

BEFORE
 ---- 00400094
 mov_i32 slot_cancelled,$0x0
 mov_i32 new_r2,r2
 mov_i32 loc2,$0x0
 and_i32 tmp0,p0,$0x1
 brcond_i32 tmp0,$0x0,eq,$L1
 and_i32 tmp0,r0,r1
 mov_i32 loc2,tmp0
 br $L2
 set_label $L1
 or_i32 slot_cancelled,slot_cancelled,$0x8
 set_label $L2
 and_i32 tmp0,slot_cancelled,$0x8
 movcond_i32 new_r2,tmp0,$0x0,loc2,new_r2,eq
 mov_i32 r2,new_r2

AFTER
 ---- 00400094
 mov_i32 slot_cancelled,$0x0
 mov_i32 new_r2,r2
 and_i32 tmp0,p0,$0x1
 brcond_i32 tmp0,$0x0,eq,$L1
 and_i32 tmp0,r0,r1
 mov_i32 new_r2,tmp0
 br $L2
 set_label $L1
 or_i32 slot_cancelled,slot_cancelled,$0x8
 set_label $L2
 mov_i32 r2,new_r2

We'll remove the unnecessary manipulation of slot_cancelled in a
subsequent patch.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg.h                    |   7 +-
 target/hexagon/macros.h                     |  17 ---
 target/hexagon/genptr.c                     | 120 ++++----------------
 target/hexagon/idef-parser/parser-helpers.c |   4 +-
 target/hexagon/gen_helper_funcs.py          |  19 +++-
 target/hexagon/gen_helper_protos.py         |  12 +-
 target/hexagon/gen_tcg_funcs.py             |  90 ++++++++-------
 target/hexagon/hex_common.py                |   9 +-
 8 files changed, 110 insertions(+), 168 deletions(-)

Comments

Taylor Simpson Feb. 27, 2023, 11:40 p.m. UTC | #1
> -----Original Message-----
> From: Anton Johansson <anjo@rev.ng>
> Sent: Friday, February 24, 2023 6:06 AM
> To: Taylor Simpson <tsimpson@quicinc.com>; qemu-devel@nongnu.org
> Cc: richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; Brian Cain
> <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>
> Subject: Re: [PATCH v5 12/14] Hexagon (target/hexagon) Remove
> gen_log_predicated_reg_write[_pair]
> 
> On 1/31/23 23:56, Taylor Simpson wrote:
> >   static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
> >   {
> >       const target_ulong reg_mask_low = reg_immut_masks[rnum]; @@
> > -167,6 +120,7 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64
> val)
> >       }
> >
> >       tcg_temp_free(val32);
> > +    tcg_temp_free_i64(val);
> >   }
> >
> >   void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) @@
> > -306,12 +260,14 @@ static inline void
> gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
> >                                              TCGv_i64 val)
> >   {
> >       if (reg_num == HEX_REG_P3_0_ALIASED) {
> > +        TCGv result = get_result_gpr(ctx, reg_num + 1);
> >           TCGv val32 = tcg_temp_new();
> >           tcg_gen_extrl_i64_i32(val32, val);
> >           gen_write_p3_0(ctx, val32);
> >           tcg_gen_extrh_i64_i32(val32, val);
> > -        gen_log_reg_write(reg_num + 1, val32);
> > +        tcg_gen_mov_tl(result, val32);
> >           tcg_temp_free(val32);
> > +        tcg_temp_free_i64(val);
> >       } else {
> >           gen_log_reg_write_pair(reg_num, val);
> >           if (reg_num == HEX_REG_QEMU_PKT_CNT) {
> 
> Hiding the free of a TCGv argument scares me a bit, it's bound to cause
> annoying bugs down the line, I feel.
> 
> Any reason we can't keep the frees in the same scope as allocation?

The only ones that need to be free'd are the pairs.  Those are created by get_result_gpr_pair, so I'm using the gen_log_reg_write_pair function as the central place to free them.

Also, Richard Henderson is working on a patch series that will eliminate the need for all free's 😊

> 
> Otherwise, the patch looks good,
> 
> Reviewed-by: Anton Johansson <anjo@rev.ng>
diff mbox series

Patch

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 8282ff3fc5..5f2ffaf780 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -342,8 +342,6 @@ 
         tcg_gen_movi_tl(EA, 0); \
         PRED;  \
         CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \
-        PRED_LOAD_CANCEL(LSB, EA); \
-        tcg_gen_movi_tl(RdV, 0); \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
         fLOAD(1, SIZE, SIGN, EA, RdV); \
         gen_set_label(label); \
@@ -402,8 +400,6 @@ 
         tcg_gen_movi_tl(EA, 0); \
         PRED;  \
         CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \
-        PRED_LOAD_CANCEL(LSB, EA); \
-        tcg_gen_movi_i64(RddV, 0); \
         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
         fLOAD(1, 8, u, EA, RddV); \
         gen_set_label(label); \
@@ -521,10 +517,9 @@ 
  */
 #define fGEN_TCG_SL2_return(SHORTCODE) \
     do { \
-        TCGv_i64 RddV = tcg_temp_new_i64(); \
+        TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \
         gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \
         gen_log_reg_write_pair(HEX_REG_FP, RddV); \
-        tcg_temp_free_i64(RddV); \
     } while (0)
 
 /*
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 8f1f82f8da..f5ff923bbd 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -210,23 +210,6 @@  static inline void gen_cancel(uint32_t slot)
 
 #define LOAD_CANCEL(EA) do { CANCEL; } while (0)
 
-#ifdef QEMU_GENERATE
-static inline void gen_pred_cancel(TCGv pred, uint32_t slot_num)
- {
-    TCGv slot_mask = tcg_temp_new();
-    TCGv tmp = tcg_temp_new();
-    TCGv zero = tcg_constant_tl(0);
-    tcg_gen_ori_tl(slot_mask, hex_slot_cancelled, 1 << slot_num);
-    tcg_gen_andi_tl(tmp, pred, 1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_slot_cancelled, tmp, zero,
-                       slot_mask, hex_slot_cancelled);
-    tcg_temp_free(slot_mask);
-    tcg_temp_free(tmp);
-}
-#define PRED_LOAD_CANCEL(PRED, EA) \
-    gen_pred_cancel(PRED, insn->is_endloop ? 4 : insn->slot)
-#endif
-
 #define STORE_CANCEL(EA) { env->slot_cancelled |= (1 << slot); }
 
 #define fMAX(A, B) (((A) > (B)) ? (A) : (B))
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 67ec3ac551..f937a17b24 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -70,28 +70,17 @@  static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
     }
 }
 
-static inline void gen_log_predicated_reg_write(int rnum, TCGv val,
-                                                uint32_t slot)
+static TCGv get_result_gpr(DisasContext *ctx, int rnum)
 {
-    TCGv zero = tcg_constant_tl(0);
-    TCGv slot_mask = tcg_temp_new();
-
-    tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
-                           val, hex_new_value[rnum]);
-    if (HEX_DEBUG) {
-        /*
-         * Do this so HELPER(debug_commit_end) will know
-         *
-         * Note that slot_mask indicates the value is not written
-         * (i.e., slot was cancelled), so we create a true/false value before
-         * or'ing with hex_reg_written[rnum].
-         */
-        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
-        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
-    }
+    return hex_new_value[rnum];
+}
 
-    tcg_temp_free(slot_mask);
+static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
+{
+    TCGv_i64 result = tcg_temp_local_new_i64();
+    tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
+                                   hex_new_value[rnum + 1]);
+    return result;
 }
 
 void gen_log_reg_write(int rnum, TCGv val)
@@ -106,42 +95,6 @@  void gen_log_reg_write(int rnum, TCGv val)
     }
 }
 
-static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val,
-                                              uint32_t slot)
-{
-    TCGv val32 = tcg_temp_new();
-    TCGv zero = tcg_constant_tl(0);
-    TCGv slot_mask = tcg_temp_new();
-
-    tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
-    /* Low word */
-    tcg_gen_extrl_i64_i32(val32, val);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum],
-                       slot_mask, zero,
-                       val32, hex_new_value[rnum]);
-    /* High word */
-    tcg_gen_extrh_i64_i32(val32, val);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1],
-                       slot_mask, zero,
-                       val32, hex_new_value[rnum + 1]);
-    if (HEX_DEBUG) {
-        /*
-         * Do this so HELPER(debug_commit_end) will know
-         *
-         * Note that slot_mask indicates the value is not written
-         * (i.e., slot was cancelled), so we create a true/false value before
-         * or'ing with hex_reg_written[rnum].
-         */
-        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
-        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
-        tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1],
-                      slot_mask);
-    }
-
-    tcg_temp_free(val32);
-    tcg_temp_free(slot_mask);
-}
-
 static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
 {
     const target_ulong reg_mask_low = reg_immut_masks[rnum];
@@ -167,6 +120,7 @@  static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
     }
 
     tcg_temp_free(val32);
+    tcg_temp_free_i64(val);
 }
 
 void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
@@ -306,12 +260,14 @@  static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
                                            TCGv_i64 val)
 {
     if (reg_num == HEX_REG_P3_0_ALIASED) {
+        TCGv result = get_result_gpr(ctx, reg_num + 1);
         TCGv val32 = tcg_temp_new();
         tcg_gen_extrl_i64_i32(val32, val);
         gen_write_p3_0(ctx, val32);
         tcg_gen_extrh_i64_i32(val32, val);
-        gen_log_reg_write(reg_num + 1, val32);
+        tcg_gen_mov_tl(result, val32);
         tcg_temp_free(val32);
+        tcg_temp_free_i64(val);
     } else {
         gen_log_reg_write_pair(reg_num, val);
         if (reg_num == HEX_REG_QEMU_PKT_CNT) {
@@ -701,33 +657,29 @@  static void gen_jumpr(DisasContext *ctx, TCGv new_pc)
 
 static void gen_call(DisasContext *ctx, int pc_off)
 {
-    TCGv next_PC =
-        tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
-    gen_log_reg_write(HEX_REG_LR, next_PC);
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
     gen_write_new_pc_pcrel(ctx, pc_off, TCG_COND_ALWAYS, NULL);
 }
 
 static void gen_callr(DisasContext *ctx, TCGv new_pc)
 {
-    TCGv next_PC =
-        tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
-    gen_log_reg_write(HEX_REG_LR, next_PC);
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
     gen_write_new_pc_addr(ctx, new_pc, TCG_COND_ALWAYS, NULL);
 }
 
 static void gen_cond_call(DisasContext *ctx, TCGv pred,
                           TCGCond cond, int pc_off)
 {
-    TCGv next_PC;
+    TCGv lr = get_result_gpr(ctx, HEX_REG_LR);
     TCGv lsb = tcg_temp_local_new();
     TCGLabel *skip = gen_new_label();
     tcg_gen_andi_tl(lsb, pred, 1);
     gen_write_new_pc_pcrel(ctx, pc_off, cond, lsb);
     tcg_gen_brcondi_tl(cond, lsb, 0, skip);
     tcg_temp_free(lsb);
-    next_PC =
-        tcg_constant_tl(ctx->pkt->pc + ctx->pkt->encod_pkt_size_in_bytes);
-    gen_log_reg_write(HEX_REG_LR, next_PC);
+    tcg_gen_movi_tl(lr, ctx->next_PC);
     gen_set_label(skip);
 }
 
@@ -760,8 +712,7 @@  static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA)
     tcg_gen_qemu_ld64(frame, EA, ctx->mem_idx);
 }
 
-static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
-                            TCGv r29)
+static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
 {
     /*
      * frame = *src
@@ -771,6 +722,7 @@  static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
      */
     TCGv_i64 frame = tcg_temp_new_i64();
     TCGv r31 = tcg_temp_new();
+    TCGv r29 = get_result_gpr(ctx, HEX_REG_SP);
 
     gen_load_frame(ctx, frame, src);
     gen_frame_unscramble(frame);
@@ -783,50 +735,26 @@  static void gen_return_base(DisasContext *ctx, TCGv_i64 dst, TCGv src,
     tcg_temp_free(r31);
 }
 
-static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src)
-{
-    TCGv r29 = tcg_temp_new();
-    gen_return_base(ctx, dst, src, r29);
-    gen_log_reg_write(HEX_REG_SP, r29);
-    tcg_temp_free(r29);
-}
-
 /* if (pred) dst = dealloc_return(src):raw */
 static void gen_cond_return(DisasContext *ctx, TCGv_i64 dst, TCGv src,
                             TCGv pred, TCGCond cond)
 {
     TCGv LSB = tcg_temp_new();
-    TCGv mask = tcg_temp_new();
-    TCGv r29 = tcg_temp_local_new();
     TCGLabel *skip = gen_new_label();
     tcg_gen_andi_tl(LSB, pred, 1);
 
-    /* Initialize the results in case the predicate is false */
-    tcg_gen_movi_i64(dst, 0);
-    tcg_gen_movi_tl(r29, 0);
-
-    /* Set the bit in hex_slot_cancelled if the predicate is flase */
-    tcg_gen_movi_tl(mask, 1 << ctx->insn->slot);
-    tcg_gen_or_tl(mask, hex_slot_cancelled, mask);
-    tcg_gen_movcond_tl(cond, hex_slot_cancelled, LSB, tcg_constant_tl(0),
-                       mask, hex_slot_cancelled);
-    tcg_temp_free(mask);
-
     tcg_gen_brcondi_tl(cond, LSB, 0, skip);
     tcg_temp_free(LSB);
-    gen_return_base(ctx, dst, src, r29);
+    gen_return(ctx, dst, src);
     gen_set_label(skip);
-    gen_log_predicated_reg_write(HEX_REG_SP, r29, ctx->insn->slot);
-    tcg_temp_free(r29);
 }
 
 /* sub-instruction version (no RddV, so handle it manually) */
 static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred)
 {
-    TCGv_i64 RddV = tcg_temp_local_new_i64();
+    TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP);
     gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond);
-    gen_log_predicated_reg_write_pair(HEX_REG_FP, RddV, ctx->insn->slot);
-    tcg_temp_free_i64(RddV);
+    gen_log_reg_write_pair(HEX_REG_FP, RddV);
 }
 
 static void gen_endloop0(DisasContext *ctx)
diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c
index de9838806b..eb652d6a7a 100644
--- a/target/hexagon/idef-parser/parser-helpers.c
+++ b/target/hexagon/idef-parser/parser-helpers.c
@@ -1836,9 +1836,7 @@  void gen_inst_init_args(Context *c, YYLTYPE *locp)
     for (unsigned i = 0; i < c->inst.init_list->len; i++) {
         HexValue *val = &g_array_index(c->inst.init_list, HexValue, i);
         if (val->type == REGISTER_ARG) {
-            char reg_id[5];
-            reg_compose(c, locp, &val->reg, reg_id);
-            EMIT_HEAD(c, "tcg_gen_movi_i%u(%s, 0);\n", val->bit_width, reg_id);
+            /* Nothing to do here */
         } else if (val->type == PREDICATE) {
             char suffix = val->is_dotnew ? 'N' : 'V';
             EMIT_HEAD(c, "tcg_gen_movi_i%u(P%c%c, 0);\n", val->bit_width,
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index 19e9883f4c..7a224b66e6 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -1,7 +1,7 @@ 
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -226,6 +226,14 @@  def gen_helper_function(f, tag, tagregs, tagimms):
                     print("Bad register parse: ",regtype,regid,toss,numregs)
                 i += 1
 
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_helper_arg_opn(f, regtype, regid, i, tag)
+                    i += 1
+
         ## Arguments to the helper function are the source regs and immediates
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
@@ -262,10 +270,11 @@  def gen_helper_function(f, tag, tagregs, tagimms):
         if hex_common.need_ea(tag): gen_decl_ea(f)
         ## Declare the return variable
         i=0
-        for regtype,regid,toss,numregs in regs:
-            if (hex_common.is_writeonly(regid)):
-                gen_helper_dest_decl_opn(f,regtype,regid,i)
-            i += 1
+        if 'A_CONDEXEC' not in hex_common.attribdict[tag]:
+            for regtype,regid,toss,numregs in regs:
+                if (hex_common.is_writeonly(regid)):
+                    gen_helper_dest_decl_opn(f,regtype,regid,i)
+                i += 1
 
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index 674bf370fa..ddddc9e4f0 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -1,7 +1,7 @@ 
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -87,6 +87,7 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
             if hex_common.need_slot(tag): def_helper_size += 1
             if hex_common.need_PC(tag): def_helper_size += 1
             if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
+            if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
             f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
             ## The return type is void
             f.write(', void' )
@@ -96,6 +97,7 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
             if hex_common.need_part1(tag): def_helper_size += 1
             if hex_common.need_slot(tag): def_helper_size += 1
             if hex_common.need_PC(tag): def_helper_size += 1
+            if hex_common.need_condexec_reg(tag, regs): def_helper_size += 1
             if hex_common.helper_needs_next_PC(tag): def_helper_size += 1
             f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
 
@@ -121,6 +123,14 @@  def gen_helper_prototype(f, tag, tagregs, tagimms):
                     gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
                     i += 1
 
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
+                    i += 1
+
         ## Generate the qemu type for each input operand (regs and immediates)
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index 3786ede44c..e2fbb50949 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -37,23 +37,33 @@  def gen_free_ea_tcg(f):
 
 def genptr_decl_pair_writable(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
-    f.write("    TCGv_i64 %s%sV = tcg_temp_local_new_i64();\n" % \
-        (regtype, regid))
-    if (regtype == "C"):
+    if (regtype == "R"):
+        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+    elif (regtype == "C"):
         f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
             (regN, regno))
     else:
-        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+        print("Bad register parse: ", regtype, regid)
+    f.write("    TCGv_i64 %s%sV = get_result_gpr_pair(ctx, %s);\n" % \
+        (regtype, regid, regN))
 
 def genptr_decl_writable(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
-    f.write("    TCGv %s%sV = tcg_temp_local_new();\n" % \
-        (regtype, regid))
-    if (regtype == "C"):
+    if (regtype == "R"):
+        f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+        f.write("    TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
+            (regtype, regid, regN))
+    elif (regtype == "C"):
         f.write("    const int %s = insn->regno[%d] + HEX_REG_SA0;\n" % \
             (regN, regno))
-    else:
+        f.write("    TCGv %s%sV = get_result_gpr(ctx, %s);\n" % \
+            (regtype, regid, regN))
+    elif (regtype == "P"):
         f.write("    const int %s = insn->regno[%d];\n" % (regN, regno))
+        f.write("    TCGv %s%sV = tcg_temp_local_new();\n" % \
+            (regtype, regid))
+    else:
+        print("Bad register parse: ", regtype, regid)
 
 def genptr_decl(f, tag, regtype, regid, regno):
     regN="%s%sN" % (regtype,regid)
@@ -250,11 +260,10 @@  def genptr_decl_imm(f,immlett):
 
 def genptr_free(f, tag, regtype, regid, regno):
     if (regtype == "R"):
-        if (regid in {"dd", "ss", "tt", "xx", "yy"}):
+        if (regid in {"ss", "tt"}):
             f.write("    tcg_temp_free_i64(%s%sV);\n" % (regtype, regid))
-        elif (regid in {"d", "e", "x", "y"}):
-            f.write("    tcg_temp_free(%s%sV);\n" % (regtype, regid))
-        elif (regid not in {"s", "t", "u", "v"}):
+        elif (regid not in {"dd", "xx", "yy", "d", "e", "x", "y",
+                            "s", "t", "u", "v"}):
             print("Bad register parse: ",regtype,regid)
     elif (regtype == "P"):
         if (regid in {"d", "e", "x"}):
@@ -262,11 +271,11 @@  def genptr_free(f, tag, regtype, regid, regno):
         elif (regid not in {"s", "t", "u", "v"}):
             print("Bad register parse: ",regtype,regid)
     elif (regtype == "C"):
-        if (regid in {"dd", "ss"}):
+        if (regid in {"ss"}):
             f.write("    tcg_temp_free_i64(%s%sV);\n" % (regtype, regid))
-        elif (regid in {"d", "s"}):
+        elif (regid in {"s"}):
             f.write("    tcg_temp_free(%s%sV);\n" % (regtype, regid))
-        else:
+        elif (regid not in {"dd", "d"}):
             print("Bad register parse: ",regtype,regid)
     elif (regtype == "M"):
         if (regid != "u"):
@@ -323,8 +332,12 @@  def genptr_src_read(f, tag, regtype, regid):
             f.write("                                 hex_gpr[%s%sN + 1]);\n" % \
                 (regtype, regid))
         elif (regid in {"x", "y"}):
-            f.write("    tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \
-                (regtype,regid,regtype,regid))
+            ## For read/write registers, we need to get the original value into
+            ## the result TCGv.  For conditional instructions, this is done in
+            ## gen_start_packet.  For unconditional instructions, we do it here.
+            if ('A_CONDEXEC' not in hex_common.attribdict[tag]):
+                f.write("    tcg_gen_mov_tl(%s%sV, hex_gpr[%s%sN]);\n" % \
+                    (regtype, regid, regtype, regid))
         elif (regid not in {"s", "t", "u", "v"}):
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
@@ -434,25 +447,16 @@  def gen_helper_call_imm(f,immlett):
     f.write(", tcgv_%s" % hex_common.imm_name(immlett))
 
 def genptr_dst_write_pair(f, tag, regtype, regid):
-    if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-        f.write("    gen_log_predicated_reg_write_pair(%s%sN, %s%sV, insn->slot);\n" % \
-            (regtype, regid, regtype, regid))
-    else:
-        f.write("    gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
-            (regtype, regid, regtype, regid))
+    f.write("    gen_log_reg_write_pair(%s%sN, %s%sV);\n" % \
+        (regtype, regid, regtype, regid))
 
 def genptr_dst_write(f, tag, regtype, regid):
     if (regtype == "R"):
         if (regid in {"dd", "xx", "yy"}):
             genptr_dst_write_pair(f, tag, regtype, regid)
         elif (regid in {"d", "e", "x", "y"}):
-            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
-                f.write("    gen_log_predicated_reg_write(%s%sN, %s%sV,\n" % \
-                    (regtype, regid, regtype, regid))
-                f.write("                                 insn->slot);\n")
-            else:
-                f.write("    gen_log_reg_write(%s%sN, %s%sV);\n" % \
-                    (regtype, regid, regtype, regid))
+            f.write("    gen_log_reg_write(%s%sN, %s%sV);\n" % \
+                (regtype, regid, regtype, regid))
         else:
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
@@ -536,15 +540,15 @@  def genptr_dst_write_opn(f,regtype, regid, tag):
 ##     For A2_add: Rd32=add(Rs32,Rt32), { RdV=RsV+RtV;}
 ##     We produce:
 ##    static void generate_A2_add(DisasContext *ctx)
-##       {
-##           TCGv RdV = tcg_temp_local_new();
-##           const int RdN = insn->regno[0];
-##           TCGv RsV = hex_gpr[insn->regno[1]];
-##           TCGv RtV = hex_gpr[insn->regno[2]];
-##           <GEN>
-##           gen_log_reg_write(RdN, RdV);
-##           tcg_temp_free(RdV);
-##       }
+##    {
+##        Insn *insn __attribute__((unused)) = ctx->insn;
+##        const int RdN = insn->regno[0];
+##        TCGv RdV = get_result_gpr(ctx, RdN);
+##        TCGv RsV = hex_gpr[insn->regno[1]];
+##        TCGv RtV = hex_gpr[insn->regno[2]];
+##        <GEN>
+##        gen_log_reg_write(RdN, RdV);
+##    }
 ##
 ##       where <GEN> depends on hex_common.skip_qemu_helper(tag)
 ##       if hex_common.skip_qemu_helper(tag) is True
@@ -628,6 +632,14 @@  def gen_tcg_func(f, tag, regs, imms):
         if (i > 0): f.write(", ")
         f.write("cpu_env")
         i=1
+        ## For conditional instructions, we pass in the destination register
+        if 'A_CONDEXEC' in hex_common.attribdict[tag]:
+            for regtype, regid, toss, numregs in regs:
+                if (hex_common.is_writeonly(regid) and
+                    not hex_common.is_hvx_reg(regtype)):
+                    gen_helper_call_opn(f, tag, regtype, regid, toss, \
+                                        numregs, i)
+                    i += 1
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_written(regid)):
                 if (not hex_common.is_hvx_reg(regtype)):
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 76da362c11..0200a66cb6 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@ 
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -237,6 +237,13 @@  def helper_needs_next_PC(tag):
 def need_pkt_has_multi_cof(tag):
     return 'A_COF' in attribdict[tag]
 
+def need_condexec_reg(tag, regs):
+    if 'A_CONDEXEC' in attribdict[tag]:
+        for regtype, regid, toss, numregs in regs:
+            if is_writeonly(regid) and not is_hvx_reg(regtype):
+                return True
+    return False
+
 def skip_qemu_helper(tag):
     return tag in overrides.keys()