diff mbox

[2/6] Add copy and constant propagation.

Message ID 4763ae5461ae14adbb6aca5c925fa0fe81f4f214.1305889001.git.batuzovk@ispras.ru
State New
Headers show

Commit Message

Kirill Batuzov May 20, 2011, 12:39 p.m. UTC
Make tcg_constant_folding do copy and constant propagation. It is a
preparational work before actual constant folding.

Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
 tcg/optimize.c |  123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 123 insertions(+), 0 deletions(-)

Comments

Richard Henderson May 20, 2011, 6:22 p.m. UTC | #1
On 05/20/2011 05:39 AM, Kirill Batuzov wrote:
> +    static tcg_target_ulong vals[TCG_MAX_TEMPS];
> +    static tcg_temp_state state[TCG_MAX_TEMPS];

Any particular reason to make these static?  It's 4-6k on the host
stack depending on sizeof tcg_target_ulong.  Large, but not excessive.
I think it's just confusing to have them static, myself.

I think it might be clearer to have these two in a structure, rather
than two separate arrays.  That does waste a bit of memory in padding
for 64-bit target, but I think it might clean up the code a bit.

>      nb_temps = s->nb_temps;
>      nb_globals = s->nb_globals;
> +    memset(state, 0, nb_temps * sizeof(tcg_temp_state));

Of course, instead of allocating static structures, you could alloca
the memory in the appropriate size...

> +        case INDEX_op_mov_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_mov_i64:
> +#endif
> +            if ((state[args[1]] == TCG_TEMP_COPY
> +                && vals[args[1]] == args[0])
> +                || args[0] == args[1]) {
> +                args += 2;
> +                gen_opc_buf[op_index] = INDEX_op_nop;
> +                break;

Here, for example, INDEX_op_nop2 would be more appropriate,
obviating the need for the arg copy loop from patch 1.

> +            if (state[args[1]] != TCG_TEMP_CONST) {
> +            } else {
> +                /* Source argument is constant.  Rewrite the operation and
> +                   let movi case handle it. */
> +            }

FWIW, I think writing positive tests is clearer than writing
negative tests.  I.e. reverse the condition and the if/else bodies.

>          case INDEX_op_brcond_i64:
>  #endif
> +            memset(state, 0, nb_temps * sizeof(tcg_temp_state));

Why are you resetting at the branch, rather than at the label?
Seems reasonable enough to handle the extended basic block when
possible...


r~
Paolo Bonzini May 20, 2011, 6:46 p.m. UTC | #2
On 05/20/2011 08:22 PM, Richard Henderson wrote:
> I think it might be clearer to have these two in a structure, rather
> than two separate arrays.  That does waste a bit of memory in padding
> for 64-bit target, but I think it might clean up the code a bit.

You can use the padding to implement a generation count.  O(1) clearing 
of the array might help performance somewhat.  At this point making the 
arrays static makes sense again, because it allows to bump the 
generation count at the beginning of tcg_constant_folding (if you put it 
on the stack you have to zero the state).

It could also help to add a num_copies flag tracking whether a register 
is a copy of this one.  If not, you can avoid processing the output 
registers in the default case.

That would be something like

struct {
     uint16_t state;
     uint16_t num_copies;
     uint32_t gen;
     tcg_target_ulong val;
};

Paolo
Aurelien Jarno May 20, 2011, 7:41 p.m. UTC | #3
On Fri, May 20, 2011 at 04:39:29PM +0400, Kirill Batuzov wrote:
> Make tcg_constant_folding do copy and constant propagation. It is a
> preparational work before actual constant folding.
> 
> Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
> ---
>  tcg/optimize.c |  123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 123 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index cf31d18..a761c51 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -31,22 +31,139 @@
>  #include "qemu-common.h"
>  #include "tcg-op.h"
>  
> +typedef enum {
> +    TCG_TEMP_UNDEF = 0,
> +    TCG_TEMP_CONST,
> +    TCG_TEMP_COPY,
> +    TCG_TEMP_ANY
> +} tcg_temp_state;
> +
> +static int mov_to_movi(int op)
> +{
> +    switch (op) {
> +    case INDEX_op_mov_i32: return INDEX_op_movi_i32;
> +#if TCG_TARGET_REG_BITS == 64
> +    case INDEX_op_mov_i64: return INDEX_op_movi_i64;
> +#endif
> +    default:
> +        fprintf(stderr, "Unrecognized operation %d in mov_to_movi.\n", op);
> +        tcg_abort();
> +    }
> +}
> +
> +/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
> +   class of equivalent temp's, a new representative should be chosen in this
> +   class. */
> +static void reset_temp(tcg_temp_state *state, tcg_target_ulong *vals,
> +                       TCGArg temp, int nb_temps, int nb_globals)
> +{
> +    int i;
> +    TCGArg new_base;
> +    new_base = (TCGArg)-1;
> +    for (i = nb_globals; i < nb_temps; i++) {
> +        if (state[i] == TCG_TEMP_COPY && vals[i] == temp) {
> +            if (new_base == ((TCGArg)-1)) {
> +                new_base = (TCGArg)i;
> +                state[i] = TCG_TEMP_ANY;
> +            } else {
> +                vals[i] = new_base;
> +            }
> +        }
> +    }
> +    for (i = 0; i < nb_globals; i++) {
> +        if (state[i] == TCG_TEMP_COPY && vals[i] == temp) {
> +            if (new_base == ((TCGArg)-1)) {
> +                state[i] = TCG_TEMP_ANY;
> +            } else {
> +                vals[i] = new_base;
> +            }
> +        }
> +    }
> +    state[temp] = TCG_TEMP_ANY;
> +}
> +
> +/* Propagate constants and copies, fold constant expressions. */
>  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>                                      TCGArg *args, TCGOpDef *tcg_op_defs)
>  {
>      int i, nb_ops, op_index, op, nb_temps, nb_globals;
>      const TCGOpDef *def;
>      TCGArg *gen_args;
> +    /* Array VALS has an element for each temp.
> +       If this temp holds a constant then its value is kept in VALS' element.
> +       If this temp is a copy of other ones then this equivalence class'
> +       representative is kept in VALS' element.
> +       If this temp is neither copy nor constant then corresponding VALS'
> +       element is unused. */
> +    static tcg_target_ulong vals[TCG_MAX_TEMPS];
> +    static tcg_temp_state state[TCG_MAX_TEMPS];
>  
>      nb_temps = s->nb_temps;
>      nb_globals = s->nb_globals;
> +    memset(state, 0, nb_temps * sizeof(tcg_temp_state));
>  
>      nb_ops = tcg_opc_ptr - gen_opc_buf;
>      gen_args = args;
>      for (op_index = 0; op_index < nb_ops; op_index++) {
>          op = gen_opc_buf[op_index];
>          def = &tcg_op_defs[op];
> +        /* Do copy propagation */
> +        if (op != INDEX_op_call) {
> +            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
> +                if (state[args[i]] == TCG_TEMP_COPY
> +                    && !(def->args_ct[i].ct & TCG_CT_IALIAS)
> +                    && (def->args_ct[i].ct & TCG_CT_REG)) {
> +                    args[i] = vals[args[i]];
> +                }
> +            }
> +        }
> +
> +        /* Propagate constants through copy operations and do constant
> +           folding.  Constants will be substituted to arguments by register
> +           allocator where needed and possible.  Also detect copies. */
>          switch (op) {
> +        case INDEX_op_mov_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_mov_i64:
> +#endif
> +            if ((state[args[1]] == TCG_TEMP_COPY
> +                && vals[args[1]] == args[0])
> +                || args[0] == args[1]) {
> +                args += 2;
> +                gen_opc_buf[op_index] = INDEX_op_nop;
> +                break;
> +            }
> +            if (state[args[1]] != TCG_TEMP_CONST) {
> +                reset_temp(state, vals, args[0], nb_temps, nb_globals);
> +                if (args[1] >= s->nb_globals) {
> +                    state[args[0]] = TCG_TEMP_COPY;
> +                    vals[args[0]] = args[1];
> +                }
> +                gen_args[0] = args[0];
> +                gen_args[1] = args[1];
> +                gen_args += 2;
> +                args += 2;
> +                break;
> +            } else {
> +                /* Source argument is constant.  Rewrite the operation and
> +                   let movi case handle it. */
> +                op = mov_to_movi(op);
> +                gen_opc_buf[op_index] = op;
> +                args[1] = vals[args[1]];
> +                /* fallthrough */
> +            }
> +        case INDEX_op_movi_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_movi_i64:
> +#endif
> +            reset_temp(state, vals, args[0], nb_temps, nb_globals);
> +            state[args[0]] = TCG_TEMP_CONST;
> +            vals[args[0]] = args[1];
> +            gen_args[0] = args[0];
> +            gen_args[1] = args[1];
> +            gen_args += 2;
> +            args += 2;
> +            break;
>          case INDEX_op_call:
>          case INDEX_op_jmp:
>          case INDEX_op_br:
> @@ -55,6 +172,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>  #if TCG_TARGET_REG_BITS == 64
>          case INDEX_op_brcond_i64:
>  #endif
> +            memset(state, 0, nb_temps * sizeof(tcg_temp_state));
>              i = (op == INDEX_op_call) ?
>                  (args[0] >> 16) + (args[0] & 0xffff) + 3 :
>                  def->nb_args;
> @@ -66,6 +184,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
>              }
>              break;
>          default:
> +            /* Default case: we do know nothing about operation so no
> +               propagation is done.  We only trash output args.  */
> +            for (i = 0; i < def->nb_oargs; i++) {
> +                reset_temp(state, vals, args[i], nb_temps, nb_globals);
> +            }
>              for (i = 0; i < def->nb_args; i++) {
>                  gen_args[i] = args[i];
>              }

It's not possible to do any optimization across certain ops that have
side effect or helper which access globals directly. For helpers this
can be easily detected looking at the TCG_CALL_PURE and TCG_CALL_CONST
flags. For ops, you should look at TCG_OPF_BB_END, TCG_OPF_CALL_CLOBBER
and TCG_OPF_SIDE_EFFECTS flags.
diff mbox

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index cf31d18..a761c51 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -31,22 +31,139 @@ 
 #include "qemu-common.h"
 #include "tcg-op.h"
 
+typedef enum {
+    TCG_TEMP_UNDEF = 0,
+    TCG_TEMP_CONST,
+    TCG_TEMP_COPY,
+    TCG_TEMP_ANY
+} tcg_temp_state;
+
+static int mov_to_movi(int op)
+{
+    switch (op) {
+    case INDEX_op_mov_i32: return INDEX_op_movi_i32;
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_mov_i64: return INDEX_op_movi_i64;
+#endif
+    default:
+        fprintf(stderr, "Unrecognized operation %d in mov_to_movi.\n", op);
+        tcg_abort();
+    }
+}
+
+/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
+   class of equivalent temp's, a new representative should be chosen in this
+   class. */
+static void reset_temp(tcg_temp_state *state, tcg_target_ulong *vals,
+                       TCGArg temp, int nb_temps, int nb_globals)
+{
+    int i;
+    TCGArg new_base;
+    new_base = (TCGArg)-1;
+    for (i = nb_globals; i < nb_temps; i++) {
+        if (state[i] == TCG_TEMP_COPY && vals[i] == temp) {
+            if (new_base == ((TCGArg)-1)) {
+                new_base = (TCGArg)i;
+                state[i] = TCG_TEMP_ANY;
+            } else {
+                vals[i] = new_base;
+            }
+        }
+    }
+    for (i = 0; i < nb_globals; i++) {
+        if (state[i] == TCG_TEMP_COPY && vals[i] == temp) {
+            if (new_base == ((TCGArg)-1)) {
+                state[i] = TCG_TEMP_ANY;
+            } else {
+                vals[i] = new_base;
+            }
+        }
+    }
+    state[temp] = TCG_TEMP_ANY;
+}
+
+/* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
 {
     int i, nb_ops, op_index, op, nb_temps, nb_globals;
     const TCGOpDef *def;
     TCGArg *gen_args;
+    /* Array VALS has an element for each temp.
+       If this temp holds a constant then its value is kept in VALS' element.
+       If this temp is a copy of other ones then this equivalence class'
+       representative is kept in VALS' element.
+       If this temp is neither copy nor constant then corresponding VALS'
+       element is unused. */
+    static tcg_target_ulong vals[TCG_MAX_TEMPS];
+    static tcg_temp_state state[TCG_MAX_TEMPS];
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
+    memset(state, 0, nb_temps * sizeof(tcg_temp_state));
 
     nb_ops = tcg_opc_ptr - gen_opc_buf;
     gen_args = args;
     for (op_index = 0; op_index < nb_ops; op_index++) {
         op = gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
+        /* Do copy propagation */
+        if (op != INDEX_op_call) {
+            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+                if (state[args[i]] == TCG_TEMP_COPY
+                    && !(def->args_ct[i].ct & TCG_CT_IALIAS)
+                    && (def->args_ct[i].ct & TCG_CT_REG)) {
+                    args[i] = vals[args[i]];
+                }
+            }
+        }
+
+        /* Propagate constants through copy operations and do constant
+           folding.  Constants will be substituted to arguments by register
+           allocator where needed and possible.  Also detect copies. */
         switch (op) {
+        case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mov_i64:
+#endif
+            if ((state[args[1]] == TCG_TEMP_COPY
+                && vals[args[1]] == args[0])
+                || args[0] == args[1]) {
+                args += 2;
+                gen_opc_buf[op_index] = INDEX_op_nop;
+                break;
+            }
+            if (state[args[1]] != TCG_TEMP_CONST) {
+                reset_temp(state, vals, args[0], nb_temps, nb_globals);
+                if (args[1] >= s->nb_globals) {
+                    state[args[0]] = TCG_TEMP_COPY;
+                    vals[args[0]] = args[1];
+                }
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
+            } else {
+                /* Source argument is constant.  Rewrite the operation and
+                   let movi case handle it. */
+                op = mov_to_movi(op);
+                gen_opc_buf[op_index] = op;
+                args[1] = vals[args[1]];
+                /* fallthrough */
+            }
+        case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_movi_i64:
+#endif
+            reset_temp(state, vals, args[0], nb_temps, nb_globals);
+            state[args[0]] = TCG_TEMP_CONST;
+            vals[args[0]] = args[1];
+            gen_args[0] = args[0];
+            gen_args[1] = args[1];
+            gen_args += 2;
+            args += 2;
+            break;
         case INDEX_op_call:
         case INDEX_op_jmp:
         case INDEX_op_br:
@@ -55,6 +172,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 #if TCG_TARGET_REG_BITS == 64
         case INDEX_op_brcond_i64:
 #endif
+            memset(state, 0, nb_temps * sizeof(tcg_temp_state));
             i = (op == INDEX_op_call) ?
                 (args[0] >> 16) + (args[0] & 0xffff) + 3 :
                 def->nb_args;
@@ -66,6 +184,11 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             break;
         default:
+            /* Default case: we do know nothing about operation so no
+               propagation is done.  We only trash output args.  */
+            for (i = 0; i < def->nb_oargs; i++) {
+                reset_temp(state, vals, args[i], nb_temps, nb_globals);
+            }
             for (i = 0; i < def->nb_args; i++) {
                 gen_args[i] = args[i];
             }