Patchwork [3/9] tcg/optimizer: rework copy progagation

login
register
mail settings
Submitter Aurelien Jarno
Date Sept. 19, 2012, 8 p.m.
Message ID <1348084823-18277-4-git-send-email-aurelien@aurel32.net>
Download mbox | patch
Permalink /patch/185215/
State New
Headers show

Comments

Aurelien Jarno - Sept. 19, 2012, 8 p.m.
The copy propagation pass tries to keep track of what is a copy of what
and what has copy of what, and in addition it keep a circular list of
of all the copies. Unfortunately this doesn't fully work: a mov from
a temp which has a state "COPY" changes it into a state "HAS_COPY".
Later when this temp is used again, it is considered has not having
copy and thus no propagation is done.

This patch fixes that by removing the hiearchy between copies, and thus
only keeping a "COPY" state both meaning "is a copy" and "has a copy".
The decision of which copy to use is deferred to the actual temp
replacement. At this stage there is not one best choice to do, but only
better choices than others. For doing the best choice the operation
would have to be parsed in reversed to know if a temp is going to be
used later or not. That what is done by the liveness analysis. At this
stage it is known that globals will always be live, that local temps
will be dead at the end of the translation block, and that the temps
will be dead at the end of the basic block. This means that this stage
should try to replace temps by local temps or globals and local temps
by globals.

It also brings the advantage of knowing if a temp is a copy of another,
which can improve the various optimizations.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c |  155 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 88 insertions(+), 67 deletions(-)
Richard Henderson - Sept. 19, 2012, 9:41 p.m.
On 09/19/2012 01:00 PM, Aurelien Jarno wrote:
> The copy propagation pass tries to keep track of what is a copy of what
> and what has copy of what, and in addition it keep a circular list of
> of all the copies. Unfortunately this doesn't fully work: a mov from
> a temp which has a state "COPY" changes it into a state "HAS_COPY".
> Later when this temp is used again, it is considered has not having
> copy and thus no propagation is done.
> 
> This patch fixes that by removing the hiearchy between copies, and thus
> only keeping a "COPY" state both meaning "is a copy" and "has a copy".
> The decision of which copy to use is deferred to the actual temp
> replacement. At this stage there is not one best choice to do, but only
> better choices than others. For doing the best choice the operation
> would have to be parsed in reversed to know if a temp is going to be
> used later or not. That what is done by the liveness analysis. At this
> stage it is known that globals will always be live, that local temps
> will be dead at the end of the translation block, and that the temps
> will be dead at the end of the basic block. This means that this stage
> should try to replace temps by local temps or globals and local temps
> by globals.
> 
> It also brings the advantage of knowing if a temp is a copy of another,
> which can improve the various optimizations.
> 
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 13b5b15..244eb02 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,7 +39,6 @@  typedef enum {
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -51,39 +50,19 @@  struct tcg_temp_info {
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
-   class of equivalent temp's, a new representative should be chosen in this
-   class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
+   the copy flag from the remaining temp.  */
+static void reset_temp(TCGArg temp)
 {
-    int i;
-    TCGArg new_base = (TCGArg)-1;
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (i >= nb_globals) {
-                temps[i].state = TCG_TEMP_HAS_COPY;
-                new_base = i;
-                break;
-            }
-        }
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_UNDEF;
-            } else {
-                temps[i].val = new_base;
-            }
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+        } else {
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         }
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-        new_base = temps[temp].val;
     }
     temps[temp].state = TCG_TEMP_UNDEF;
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_UNDEF;
-    }
 }
 
 static int op_bits(TCGOpcode op)
@@ -106,34 +85,83 @@  static TCGOpcode op_to_movi(TCGOpcode op)
     }
 }
 
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+    TCGArg i;
+
+    /* If this is already a global, we can't do better. */
+    if (temp < s->nb_globals) {
+        return temp;
+    }
+
+    /* Search for a global first. */
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+        if (i < s->nb_globals) {
+            return i;
+        }
+    }
+
+    /* If it is a temp, search for a temp local. */
+    if (!s->temps[temp].temp_local) {
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+            if (s->temps[i].temp_local) {
+                return i;
+            }
+        }
+    }
+
+    /* Failure to find a better representation, return the same temp. */
+    return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+    TCGArg i;
+
+    if (arg1 == arg2) {
+        return true;
+    }
+
+    if (temps[arg1].state != TCG_TEMP_COPY
+        || temps[arg2].state != TCG_TEMP_COPY) {
+        return false;
+    }
+
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+        if (i == arg2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
                             TCGArg dst, TCGArg src)
 {
-        reset_temp(dst, s->nb_temps, s->nb_globals);
-        assert(temps[src].state != TCG_TEMP_COPY);
-        /* Only consider temps with the same type (width) as copies. */
-        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
-            assert(temps[src].state != TCG_TEMP_CONST);
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
-                temps[src].state = TCG_TEMP_HAS_COPY;
+        reset_temp(dst);
+        assert(temps[src].state != TCG_TEMP_CONST);
+
+        if (s->temps[src].type == s->temps[dst].type) {
+            if (temps[src].state != TCG_TEMP_COPY) {
+                temps[src].state = TCG_TEMP_COPY;
                 temps[src].next_copy = src;
                 temps[src].prev_copy = src;
             }
             temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].val = src;
             temps[dst].next_copy = temps[src].next_copy;
             temps[dst].prev_copy = src;
             temps[temps[dst].next_copy].prev_copy = dst;
             temps[src].next_copy = dst;
         }
+
         gen_args[0] = dst;
         gen_args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
-                             int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst);
         temps[dst].state = TCG_TEMP_CONST;
         temps[dst].val = val;
         gen_args[0] = dst;
@@ -324,7 +352,6 @@  static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
     tcg_abort();
 }
 
-
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -336,10 +363,8 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     TCGArg tmp;
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
-       If this temp is a copy of other ones then this equivalence class'
-       representative is kept in VALS' element.
-       If this temp is neither copy nor constant then corresponding VALS'
-       element is unused. */
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
@@ -355,7 +380,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             assert(op != INDEX_op_call);
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = temps[args[i]].val;
+                    args[i] = find_better_copy(s, args[i]);
                 }
             }
         }
@@ -408,7 +433,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -435,9 +460,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             if (temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0) {
-                if ((temps[args[0]].state == TCG_TEMP_COPY
-                    && temps[args[0]].val == args[1])
-                    || args[0] == args[1]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -459,7 +482,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
@@ -474,7 +497,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
-                if (args[1] == args[0]) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
@@ -494,9 +517,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            allocator where needed and possible.  Also detect copies. */
         switch (op) {
         CASE_OP_32_64(mov):
-            if ((temps[args[1]].state == TCG_TEMP_COPY
-                && temps[args[1]].val == args[0])
-                || args[0] == args[1]) {
+            if (temps_are_copies(args[0], args[1])) {
                 args += 2;
                 gen_opc_buf[op_index] = INDEX_op_nop;
                 break;
@@ -514,7 +535,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
             gen_args += 2;
             args += 2;
             break;
@@ -529,9 +550,9 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
             }
@@ -559,10 +580,10 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -576,10 +597,10 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
                                                temps[args[2]].val, args[3]);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -602,7 +623,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
             } else {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
@@ -615,11 +636,11 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
                 for (i = 0; i < nb_globals; i++) {
-                    reset_temp(i, nb_temps, nb_globals);
+                    reset_temp(i);
                 }
             }
             for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1], nb_temps, nb_globals);
+                reset_temp(args[i + 1]);
             }
             i = nb_call_args + 3;
             while (i) {
@@ -638,7 +659,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
                 for (i = 0; i < def->nb_oargs; i++) {
-                    reset_temp(args[i], nb_temps, nb_globals);
+                    reset_temp(args[i]);
                 }
             }
             for (i = 0; i < def->nb_args; i++) {