Patchwork [v2,2/6] Add copy and constant propagation.

login
register
mail settings
Submitter Kirill Batuzov
Date June 9, 2011, 10:45 a.m.
Message ID <1307616344-27161-3-git-send-email-batuzovk@ispras.ru>
Download mbox | patch
Permalink /patch/99722/
State New
Headers show

Comments

Kirill Batuzov - June 9, 2011, 10:45 a.m.
Make tcg_constant_folding do copy and constant propagation. It is a
preparational work before actual constant folding.

Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
 tcg/optimize.c |  161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 161 insertions(+), 0 deletions(-)
Richard Henderson - June 10, 2011, 5:44 p.m.
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> Make tcg_constant_folding do copy and constant propagation. It is a
> preparational work before actual constant folding.
> 
> Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
> ---
>  tcg/optimize.c |  161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 161 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 5daf131..7996798 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -31,23 +31,178 @@
>  #include "qemu-common.h"
>  #include "tcg-op.h"
>  
> +typedef enum {
> +    TCG_TEMP_UNDEF = 0,
> +    TCG_TEMP_CONST,
> +    TCG_TEMP_COPY,
> +    TCG_TEMP_ANY
> +} tcg_temp_state;

Coding conventions request StudlyCaps, fwiw.

> +
> +struct tcg_temp_info {
> +    tcg_temp_state state;
> +    uint16_t num_copies;
> +    tcg_target_ulong val;
> +};
> +
> +/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
> +   class of equivalent temp's, a new representative should be chosen in this
> +   class. */
> +static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
> +                       int nb_globals)
> +{
> +    int i;
> +    TCGArg new_base;
> +    if (temps[temp].state == TCG_TEMP_COPY) {
> +        temps[temps[temp].val].num_copies--;
> +    }
> +    if (temps[temp].num_copies > 0) {
> +        new_base = (TCGArg)-1;
> +        for (i = nb_globals; i < nb_temps; i++) {

I think it's probably better to place the elements of the equiv class into
a double-linked circular list, rather than a mere num_copies that forces
you to iterate over nb_temps to remove an element.  E.g.

  struct tcg_temp_info {
    tcg_temp_state state;
    uint16_t prev_copy;
    uint16_t next_copy;
    tcg_target_ulong val;
  };

This makes elements easy to remove, and easy to choose a new representative.

> +static int op_bits(int op)
> +{
> +    switch (op) {
> +    case INDEX_op_mov_i32:
> +        return 32;
> +#if TCG_TARGET_REG_BITS == 64
> +    case INDEX_op_mov_i64:
> +        return 64;
> +#endif
> +    default:
> +        fprintf(stderr, "Unrecognized operation %d in op_bits.\n", op);
> +        tcg_abort();
> +    }
> +}

I think we would be well-served to move this to a property of the opcode,
much the same way as TCG_OPF_CALL_CLOBBER et al.  I would not, of course,
suggest to block this patch series on that cleanup.

> +static int op_to_movi(int op)
> +{
> +    if (op_bits(op) == 32) {
> +        return INDEX_op_movi_i32;
> +    }
> +#if TCG_TARGET_REG_BITS == 64
> +    if (op_bits(op) == 64) {
> +        return INDEX_op_movi_i64;
> +    }
> +#endif
> +    tcg_abort();
> +}

This should use a switch, not two calls to a function.

> +    struct tcg_temp_info temps[TCG_MAX_TEMPS];

Given that gen_opc_buf is static, I see no reason not to make this a
static variable as well.  Put it at file scope so you don't have to
pass it as arguments to subroutines.

> +        /* Do copy propagation */
> +        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {

Why are you suppressing copy propagation in this way?  I see no reason for it.

> +            assert(op != INDEX_op_call);
> +            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
> +                if (temps[args[i]].state == TCG_TEMP_COPY
> +                    && !(def->args_ct[i].ct & TCG_CT_IALIAS)

From looking at only ARM translator output, you might suppose that we've
already performed matching constraint substitution.  But you'd be wrong.

I realize that at present you get a smidge smaller code by suppressing
substitution around matching constraints, but that's only because our
copy propagation is incomplete.  From a pure theoretic stance, I think
this line is wrong.  E.g.

 With IALIAS suppression                 Without
 ---- 0x40802e50                         ---- 0x40802e50
 mov_i32 tmp5,r10                    |   nopn $0x2,$0x2
 movi_i32 tmp6,$0x40802e58               movi_i32 tmp6,$0x40802e58
 add_i32 tmp6,tmp5,tmp6              |   add_i32 tmp6,r10,tmp6
 mov_i32 r10,tmp6                        mov_i32 r10,tmp6

I'll claim that that the right-hand column is better, even though it
currently forces the generation of an LEA insn instead of an ADD.

What's needed to fix this is either (1) a much better register allocator
or (2) a reverse copy-propagation pass that pushes global variables up
into outputs containing temporaries.

> +                    && (def->args_ct[i].ct & TCG_CT_REG)) {

This line looks redundant.  Don't we assert this property
in tcg_add_target_add_op_defs?  Certainly elsewhere we expect all
arguments to be able to accept a register...

> +        case INDEX_op_mov_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_mov_i64:
> +#endif

I suggest we take a page from tcg/sparc/tcg-target.c:

#if TCG_TARGET_REG_BITS == 64
#define CASE_OP_32_64(x)                        \
        glue(glue(case INDEX_op_, x), _i32):    \
        glue(glue(case INDEX_op_, x), _i64)
#else
#define CASE_OP_32_64(x)                        \
        glue(glue(case INDEX_op_, x), _i32)
#endif

Used as

        CASE_OP_32_64(mov):

in order to avoid rampant ifdefs like this.

Unfortunately that also raises the spectre of the fact that almost
all of the logical operations are optional.

Again, probably not something to tackle within this patch series,
but I suggest that we no longer conditionally define the opcodes.
Unconditionally define them, but mark them with TCG_OPF_UNSUPPORTED,
do not generate them, and abort in final code generation if they're
seen.  I think that would clean up a lot of if-deffery in and 
around tcg/*.c.

>          case INDEX_op_call:
> +            for (i = 0; i < nb_globals; i++) {
> +                reset_temp(temps, i, nb_temps, nb_globals);
> +            }

No need to reset globals if the call is PURE or CONST.

>          case INDEX_op_brcond_i64:
>  #endif
> +            memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));

Perhaps to pedantic, but we can do better in extended basic blocks.
A full flush of all temps is only needed at labels.  



r~
Kirill Batuzov - July 7, 2011, 12:36 p.m.
On Fri, 10 Jun 2011, Richard Henderson wrote:

> > +        /* Do copy propagation */
> > +        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
> 
> Why are you suppressing copy propagation in this way?  I see no reason for it.
>

This was suggested by Aurelien Jarno. I do not know how safe it is to
propagate copies through such operations so I decided to be
conservative.

> >          case INDEX_op_brcond_i64:
> >  #endif
> > +            memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
> 
> Perhaps to pedantic, but we can do better in extended basic blocks.
> A full flush of all temps is only needed at labels.  
>

Not much better unfortunately. Globals got spilled at basic block end,
temps just die. The only things we can keep are locals but I have not
seen much of them in the intermediate representation.

----
  Kirill Batuzov

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 5daf131..7996798 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -31,23 +31,178 @@ 
 #include "qemu-common.h"
 #include "tcg-op.h"
 
+typedef enum {
+    TCG_TEMP_UNDEF = 0,
+    TCG_TEMP_CONST,
+    TCG_TEMP_COPY,
+    TCG_TEMP_ANY
+} tcg_temp_state;
+
+struct tcg_temp_info {
+    tcg_temp_state state;
+    uint16_t num_copies;
+    tcg_target_ulong val;
+};
+
+/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
+   class of equivalent temp's, a new representative should be chosen in this
+   class. */
+static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
+                       int nb_globals)
+{
+    int i;
+    TCGArg new_base;
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        temps[temps[temp].val].num_copies--;
+    }
+    if (temps[temp].num_copies > 0) {
+        new_base = (TCGArg)-1;
+        for (i = nb_globals; i < nb_temps; i++) {
+            if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
+                if (new_base == ((TCGArg)-1)) {
+                    new_base = (TCGArg)i;
+                    temps[i].state = TCG_TEMP_ANY;
+                    temps[i].num_copies = 0;
+                } else {
+                    temps[i].val = new_base;
+                    temps[new_base].num_copies++;
+                }
+            }
+        }
+        for (i = 0; i < nb_globals; i++) {
+            if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
+                if (new_base == ((TCGArg)-1)) {
+                    temps[i].state = TCG_TEMP_ANY;
+                    temps[i].num_copies = 0;
+                } else {
+                    temps[i].val = new_base;
+                    temps[new_base].num_copies++;
+                }
+            }
+        }
+    }
+    temps[temp].state = TCG_TEMP_ANY;
+    temps[temp].num_copies = 0;
+}
+
+static int op_bits(int op)
+{
+    switch (op) {
+    case INDEX_op_mov_i32:
+        return 32;
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_mov_i64:
+        return 64;
+#endif
+    default:
+        fprintf(stderr, "Unrecognized operation %d in op_bits.\n", op);
+        tcg_abort();
+    }
+}
+
+static int op_to_movi(int op)
+{
+    if (op_bits(op) == 32) {
+        return INDEX_op_movi_i32;
+    }
+#if TCG_TARGET_REG_BITS == 64
+    if (op_bits(op) == 64) {
+        return INDEX_op_movi_i64;
+    }
+#endif
+    tcg_abort();
+}
+
+/* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
 {
     int i, nb_ops, op_index, op, nb_temps, nb_globals;
     const TCGOpDef *def;
     TCGArg *gen_args;
+    /* Array VALS has an element for each temp.
+       If this temp holds a constant then its value is kept in VALS' element.
+       If this temp is a copy of other ones then this equivalence class'
+       representative is kept in VALS' element.
+       If this temp is neither copy nor constant then corresponding VALS'
+       element is unused. */
+    struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
+    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
 
     nb_ops = tcg_opc_ptr - gen_opc_buf;
     gen_args = args;
     for (op_index = 0; op_index < nb_ops; op_index++) {
         op = gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
+        /* Do copy propagation */
+        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
+            assert(op != INDEX_op_call);
+            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY
+                    && !(def->args_ct[i].ct & TCG_CT_IALIAS)
+                    && (def->args_ct[i].ct & TCG_CT_REG)) {
+                    args[i] = temps[args[i]].val;
+                }
+            }
+        }
+
+        /* Propagate constants through copy operations and do constant
+           folding.  Constants will be substituted to arguments by register
+           allocator where needed and possible.  Also detect copies. */
         switch (op) {
+        case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mov_i64:
+#endif
+            if ((temps[args[1]].state == TCG_TEMP_COPY
+                && temps[args[1]].val == args[0])
+                || args[0] == args[1]) {
+                args += 2;
+                gen_opc_buf[op_index] = INDEX_op_nop;
+                break;
+            }
+            if (temps[args[1]].state != TCG_TEMP_CONST) {
+                reset_temp(temps, args[0], nb_temps, nb_globals);
+                if (args[1] >= s->nb_globals) {
+                    temps[args[0]].state = TCG_TEMP_COPY;
+                    temps[args[0]].val = args[1];
+                    temps[args[1]].num_copies++;
+                }
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
+            }
+            /* Source argument is constant.  Rewrite the operation and
+               let movi case handle it. */
+            op = op_to_movi(op);
+            gen_opc_buf[op_index] = op;
+            args[1] = temps[args[1]].val;
+            /* fallthrough */
+        case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_movi_i64:
+#endif
+            reset_temp(temps, args[0], nb_temps, nb_globals);
+            temps[args[0]].state = TCG_TEMP_CONST;
+            temps[args[0]].val = args[1];
+            assert(temps[args[0]].num_copies == 0);
+            gen_args[0] = args[0];
+            gen_args[1] = args[1];
+            gen_args += 2;
+            args += 2;
+            break;
         case INDEX_op_call:
+            for (i = 0; i < nb_globals; i++) {
+                reset_temp(temps, i, nb_temps, nb_globals);
+            }
+            for (i = 0; i < (args[0] >> 16); i++) {
+                reset_temp(temps, args[i + 1], nb_temps, nb_globals);
+            }
             i = (args[0] >> 16) + (args[0] & 0xffff) + 3;
             while (i) {
                 *gen_args = *args;
@@ -63,6 +218,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 #if TCG_TARGET_REG_BITS == 64
         case INDEX_op_brcond_i64:
 #endif
+            memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             for (i = 0; i < def->nb_args; i++) {
                 *gen_args = *args;
                 args++;
@@ -70,6 +226,11 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
             break;
         default:
+            /* Default case: we do know nothing about operation so no
+               propagation is done.  We only trash output args.  */
+            for (i = 0; i < def->nb_oargs; i++) {
+                reset_temp(temps, args[i], nb_temps, nb_globals);
+            }
             for (i = 0; i < def->nb_args; i++) {
                 gen_args[i] = args[i];
             }