diff mbox

[v3,7/9] tcg: Compress dead_temps and mem_temps into a single array

Message ID 1466740107-15042-8-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson June 24, 2016, 3:48 a.m. UTC
We only need two bits per temporary.  Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg.c | 118 +++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

Comments

Aurelien Jarno July 25, 2016, 3:15 p.m. UTC | #1
On 2016-06-23 20:48, Richard Henderson wrote:
> We only need two bits per temporary.  Fold the two bytes into one,
> and reduce the memory and cachelines required during compilation.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/tcg.c | 118 +++++++++++++++++++++++++++++++-------------------------------
>  1 file changed, 59 insertions(+), 59 deletions(-)
> 
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index c41640f..fd92b06 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -332,7 +332,7 @@ void tcg_context_init(TCGContext *s)
>  
>      memset(s, 0, sizeof(*s));
>      s->nb_globals = 0;
> -    
> +
>      /* Count total number of arguments and allocate the corresponding
>         space */
>      total_args = 0;
> @@ -824,16 +824,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
>                  real_args++;
>              }
>  #endif
> -	    /* If stack grows up, then we will be placing successive
> -	       arguments at lower addresses, which means we need to
> -	       reverse the order compared to how we would normally
> -	       treat either big or little-endian.  For those arguments
> -	       that will wind up in registers, this still works for
> -	       HPPA (the only current STACK_GROWSUP target) since the
> -	       argument registers are *also* allocated in decreasing
> -	       order.  If another such target is added, this logic may
> -	       have to get more complicated to differentiate between
> -	       stack arguments and register arguments.  */
> +           /* If stack grows up, then we will be placing successive
> +              arguments at lower addresses, which means we need to
> +              reverse the order compared to how we would normally
> +              treat either big or little-endian.  For those arguments
> +              that will wind up in registers, this still works for
> +              HPPA (the only current STACK_GROWSUP target) since the
> +              argument registers are *also* allocated in decreasing
> +              order.  If another such target is added, this logic may
> +              have to get more complicated to differentiate between
> +              stack arguments and register arguments.  */
>  #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
>              s->gen_opparam_buf[pi++] = args[i] + 1;
>              s->gen_opparam_buf[pi++] = args[i];
> @@ -1297,27 +1297,28 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
>  #endif
>  }
>  
> +#define TS_DEAD  1
> +#define TS_SYNC  2

I am not sure that TS_SYNC is the best name for that. There we really
want to tell that at this moment in the TCG instruction stream the
operand is in memory. It doesn't implied it is synced. Maybe TS_MEM?

The rest looks fine to me. The other alternative would have been to use
the TCGTempSet with the bitmap functions like in optimize.c, and use
only 2 bits per temp. That something that can be done later though.

All that said and provided you change the name:

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Richard Henderson Aug. 3, 2016, 6:22 p.m. UTC | #2
On 07/25/2016 08:45 PM, Aurelien Jarno wrote:
>> > +#define TS_DEAD  1
>> > +#define TS_SYNC  2
> I am not sure that TS_SYNC is the best name for that. There we really
> want to tell that at this moment in the TCG instruction stream the
> operand is in memory. It doesn't implied it is synced. Maybe TS_MEM?
>
> The rest looks fine to me. The other alternative would have been to use
> the TCGTempSet with the bitmap functions like in optimize.c, and use
> only 2 bits per temp. That something that can be done later though.

I changed the name to TS_MEM as suggested.


r~
diff mbox

Patch

diff --git a/tcg/tcg.c b/tcg/tcg.c
index c41640f..fd92b06 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -332,7 +332,7 @@  void tcg_context_init(TCGContext *s)
 
     memset(s, 0, sizeof(*s));
     s->nb_globals = 0;
-    
+
     /* Count total number of arguments and allocate the corresponding
        space */
     total_args = 0;
@@ -824,16 +824,16 @@  void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
                 real_args++;
             }
 #endif
-	    /* If stack grows up, then we will be placing successive
-	       arguments at lower addresses, which means we need to
-	       reverse the order compared to how we would normally
-	       treat either big or little-endian.  For those arguments
-	       that will wind up in registers, this still works for
-	       HPPA (the only current STACK_GROWSUP target) since the
-	       argument registers are *also* allocated in decreasing
-	       order.  If another such target is added, this logic may
-	       have to get more complicated to differentiate between
-	       stack arguments and register arguments.  */
+           /* If stack grows up, then we will be placing successive
+              arguments at lower addresses, which means we need to
+              reverse the order compared to how we would normally
+              treat either big or little-endian.  For those arguments
+              that will wind up in registers, this still works for
+              HPPA (the only current STACK_GROWSUP target) since the
+              argument registers are *also* allocated in decreasing
+              order.  If another such target is added, this logic may
+              have to get more complicated to differentiate between
+              stack arguments and register arguments.  */
 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
             s->gen_opparam_buf[pi++] = args[i] + 1;
             s->gen_opparam_buf[pi++] = args[i];
@@ -1297,27 +1297,28 @@  void tcg_op_remove(TCGContext *s, TCGOp *op)
 #endif
 }
 
+#define TS_DEAD  1
+#define TS_SYNC  2
+
 /* liveness analysis: end of function: all temps are dead, and globals
    should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
-                                   uint8_t *mem_temps)
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
 {
-    memset(dead_temps, 1, s->nb_temps);
-    memset(mem_temps, 1, s->nb_globals);
-    memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+    memset(temp_state, TS_DEAD | TS_SYNC, s->nb_globals);
+    memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
 }
 
 /* liveness analysis: end of basic block: all temps are dead, globals
    and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
-                                 uint8_t *mem_temps)
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
 {
-    int i;
+    int i, n;
 
-    memset(dead_temps, 1, s->nb_temps);
-    memset(mem_temps, 1, s->nb_globals);
-    for(i = s->nb_globals; i < s->nb_temps; i++) {
-        mem_temps[i] = s->temps[i].temp_local;
+    tcg_la_func_end(s, temp_state);
+    for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
+        if (s->temps[i].temp_local) {
+            temp_state[i] |= TS_SYNC;
+        }
     }
 }
 
@@ -1326,12 +1327,12 @@  static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
 {
-    uint8_t *dead_temps, *mem_temps;
+    uint8_t *temp_state;
     int oi, oi_prev;
+    int nb_globals = s->nb_globals;
 
-    dead_temps = tcg_malloc(s->nb_temps);
-    mem_temps = tcg_malloc(s->nb_temps);
-    tcg_la_func_end(s, dead_temps, mem_temps);
+    temp_state = tcg_malloc(s->nb_temps);
+    tcg_la_func_end(s, temp_state);
 
     for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
         int i, nb_iargs, nb_oargs;
@@ -1360,7 +1361,7 @@  static void tcg_liveness_analysis(TCGContext *s)
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
-                        if (!dead_temps[arg] || mem_temps[arg]) {
+                        if (temp_state[arg] != TS_DEAD) {
                             goto do_not_remove_call;
                         }
                     }
@@ -1371,39 +1372,41 @@  static void tcg_liveness_analysis(TCGContext *s)
                     /* output args are dead */
                     for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
-                        if (dead_temps[arg]) {
+                        if (temp_state[arg] & TS_DEAD) {
                             arg_life |= DEAD_ARG << i;
                         }
-                        if (mem_temps[arg]) {
+                        if (temp_state[arg] & TS_SYNC) {
                             arg_life |= SYNC_ARG << i;
                         }
-                        dead_temps[arg] = 1;
-                        mem_temps[arg] = 0;
+                        temp_state[arg] = TS_DEAD;
                     }
 
-                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
-                        /* globals should be synced to memory */
-                        memset(mem_temps, 1, s->nb_globals);
-                    }
                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
                                         TCG_CALL_NO_READ_GLOBALS))) {
                         /* globals should go back to memory */
-                        memset(dead_temps, 1, s->nb_globals);
+                        memset(temp_state, TS_DEAD | TS_SYNC, nb_globals);
+                    } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        for (i = 0; i < nb_globals; i++) {
+                            temp_state[i] |= TS_SYNC;
+                        }
                     }
 
                     /* record arguments that die in this helper */
                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
                         arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
-                            if (dead_temps[arg]) {
+                            if (temp_state[arg] & TS_DEAD) {
                                 arg_life |= DEAD_ARG << i;
                             }
                         }
                     }
                     /* input arguments are live for preceding opcodes */
-                    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
                         arg = args[i];
-                        dead_temps[arg] = 0;
+                        if (arg != TCG_CALL_DUMMY_ARG) {
+                            temp_state[arg] &= ~TS_DEAD;
+                        }
                     }
                 }
             }
@@ -1412,8 +1415,7 @@  static void tcg_liveness_analysis(TCGContext *s)
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
-            dead_temps[args[0]] = 1;
-            mem_temps[args[0]] = 0;
+            temp_state[args[0]] = TS_DEAD;
             break;
 
         case INDEX_op_add2_i32:
@@ -1434,8 +1436,8 @@  static void tcg_liveness_analysis(TCGContext *s)
                the low part.  The result can be optimized to a simple
                add or sub.  This happens often for x86_64 guest when the
                cpu mode is set to 32 bit.  */
-            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
-                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+            if (temp_state[args[1]] == TS_DEAD) {
+                if (temp_state[args[0]] == TS_DEAD) {
                     goto do_remove;
                 }
                 /* Replace the opcode and adjust the args in place,
@@ -1472,8 +1474,8 @@  static void tcg_liveness_analysis(TCGContext *s)
         do_mul2:
             nb_iargs = 2;
             nb_oargs = 2;
-            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
-                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+            if (temp_state[args[1]] == TS_DEAD) {
+                if (temp_state[args[0]] == TS_DEAD) {
                     /* Both parts of the operation are dead.  */
                     goto do_remove;
                 }
@@ -1481,8 +1483,7 @@  static void tcg_liveness_analysis(TCGContext *s)
                 op->opc = opc = opc_new;
                 args[1] = args[2];
                 args[2] = args[3];
-            } else if (have_opc_new2 && dead_temps[args[0]]
-                       && !mem_temps[args[0]]) {
+            } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
                 /* The low part of the operation is dead; generate the high. */
                 op->opc = opc = opc_new2;
                 args[0] = args[1];
@@ -1505,8 +1506,7 @@  static void tcg_liveness_analysis(TCGContext *s)
                implies side effects */
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
                 for (i = 0; i < nb_oargs; i++) {
-                    arg = args[i];
-                    if (!dead_temps[arg] || mem_temps[arg]) {
+                    if (temp_state[args[i]] != TS_DEAD) {
                         goto do_not_remove;
                     }
                 }
@@ -1517,35 +1517,35 @@  static void tcg_liveness_analysis(TCGContext *s)
                 /* output args are dead */
                 for (i = 0; i < nb_oargs; i++) {
                     arg = args[i];
-                    if (dead_temps[arg]) {
+                    if (temp_state[arg] & TS_DEAD) {
                         arg_life |= DEAD_ARG << i;
                     }
-                    if (mem_temps[arg]) {
+                    if (temp_state[arg] & TS_SYNC) {
                         arg_life |= SYNC_ARG << i;
                     }
-                    dead_temps[arg] = 1;
-                    mem_temps[arg] = 0;
+                    temp_state[arg] = TS_DEAD;
                 }
 
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s, dead_temps, mem_temps);
+                    tcg_la_bb_end(s, temp_state);
                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
                     /* globals should be synced to memory */
-                    memset(mem_temps, 1, s->nb_globals);
+                    for (i = 0; i < nb_globals; i++) {
+                        temp_state[i] |= TS_SYNC;
+                    }
                 }
 
                 /* record arguments that die in this opcode */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
                     arg = args[i];
-                    if (dead_temps[arg]) {
+                    if (temp_state[arg] & TS_DEAD) {
                         arg_life |= DEAD_ARG << i;
                     }
                 }
                 /* input arguments are live for preceding opcodes */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg = args[i];
-                    dead_temps[arg] = 0;
+                    temp_state[args[i]] &= ~TS_DEAD;
                 }
             }
             break;