Patchwork [v2,3/6] Do constant folding for basic arithmetic operations.

login
register
mail settings
Submitter Kirill Batuzov
Date June 9, 2011, 10:45 a.m.
Message ID <1307616344-27161-4-git-send-email-batuzovk@ispras.ru>
Download mbox | patch
Permalink /patch/99720/
State New
Headers show

Comments

Kirill Batuzov - June 9, 2011, 10:45 a.m.
Perform actual constant folding for ADD, SUB and MUL operations.

Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
 tcg/optimize.c |  156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 156 insertions(+), 0 deletions(-)
Richard Henderson - June 10, 2011, 5:57 p.m.
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> +static int op_to_mov(int op)
> +{
> +    if (op_bits(op) == 32) {
> +        return INDEX_op_mov_i32;
> +    }
> +#if TCG_TARGET_REG_BITS == 64
> +    if (op_bits(op) == 64) {
> +        return INDEX_op_mov_i64;
> +    }
> +#endif
> +    tcg_abort();
> +}

Again, switch not two calls.

> +static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
> +{
> +    TCGArg res = do_constant_folding_2(op, x, y);
> +#if TCG_TARGET_REG_BITS == 64
> +    if (op_bits(op) == 32) {
> +        res &= 0xffffffff;

Strictly speaking, this isn't required.  The top bits of any
constant are considered garbage to the code generators.  C.f.
the code in tcg_out_movi for any 64-bit host.

That said, only x86_64 and s390 get this right for the constraints.
x86_64 by being able to use 'i' to accept all constants for 32-bit
operations, and s390 by using 'W' as a modifier to force 32-bit
comparison in tcg_target_const_match.

So it's probably best to keep this for now.

> +        /* Simplify expression if possible. */
> +        switch (op) {
> +        case INDEX_op_add_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_add_i64:
> +#endif
> +            if (temps[args[1]].state == TCG_TEMP_CONST) {
> +                /* 0 + x == x + 0 */
> +                tmp = args[1];
> +                args[1] = args[2];
> +                args[2] = tmp;
> +            }

Probably best to break this out into another switch so that
you can handle all of the commutative operations.

> +            /* Fallthrough */
> +        case INDEX_op_sub_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_sub_i64:
> +#endif
> +            if (temps[args[1]].state == TCG_TEMP_CONST) {
> +                /* Proceed with possible constant folding. */
> +                break;
> +            }
> +            if (temps[args[2]].state == TCG_TEMP_CONST
> +                && temps[args[2]].val == 0) {
> +                if ((temps[args[0]].state == TCG_TEMP_COPY
> +                    && temps[args[0]].val == args[1])
> +                    || args[0] == args[1]) {
> +                    args += 3;
> +                    gen_opc_buf[op_index] = INDEX_op_nop;
> +                } else {
> +                    reset_temp(temps, args[0], nb_temps, nb_globals);
> +                    if (args[1] >= s->nb_globals) {
> +                        temps[args[0]].state = TCG_TEMP_COPY;
> +                        temps[args[0]].val = args[1];
> +                        temps[args[1]].num_copies++;
> +                    }
> +                    gen_opc_buf[op_index] = op_to_mov(op);
> +                    gen_args[0] = args[0];
> +                    gen_args[1] = args[1];
> +                    gen_args += 2;
> +                    args += 3;
> +                }
> +                continue;
> +            }
> +            break;
> +        case INDEX_op_mul_i32:
> +#if TCG_TARGET_REG_BITS == 64
> +        case INDEX_op_mul_i64:
> +#endif
> +            if ((temps[args[1]].state == TCG_TEMP_CONST
> +                 && temps[args[1]].val == 0)
> +                || (temps[args[2]].state == TCG_TEMP_CONST
> +                    && temps[args[2]].val == 0)) {
> +                reset_temp(temps, args[0], nb_temps, nb_globals);
> +                temps[args[0]].state = TCG_TEMP_CONST;
> +                temps[args[0]].val = 0;
> +                assert(temps[args[0]].num_copies == 0);
> +                gen_opc_buf[op_index] = op_to_movi(op);
> +                gen_args[0] = args[0];
> +                gen_args[1] = 0;
> +                args += 3;
> +                gen_args += 2;
> +                continue;
> +            }

This is *way* too much code duplication, particularly with the
same code sequences already existing for mov and movi and more
to come with the logicals.


r~

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 7996798..29da6fa 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -89,9 +89,15 @@  static int op_bits(int op)
 {
     switch (op) {
     case INDEX_op_mov_i32:
+    case INDEX_op_add_i32:
+    case INDEX_op_sub_i32:
+    case INDEX_op_mul_i32:
         return 32;
 #if TCG_TARGET_REG_BITS == 64
     case INDEX_op_mov_i64:
+    case INDEX_op_add_i64:
+    case INDEX_op_sub_i64:
+    case INDEX_op_mul_i64:
         return 64;
 #endif
     default:
@@ -113,6 +119,58 @@  static int op_to_movi(int op)
     tcg_abort();
 }
 
+static int op_to_mov(int op)
+{
+    if (op_bits(op) == 32) {
+        return INDEX_op_mov_i32;
+    }
+#if TCG_TARGET_REG_BITS == 64
+    if (op_bits(op) == 64) {
+        return INDEX_op_mov_i64;
+    }
+#endif
+    tcg_abort();
+}
+
+static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
+{
+    switch (op) {
+    case INDEX_op_add_i32:
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_add_i64:
+#endif
+        return x + y;
+
+    case INDEX_op_sub_i32:
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_sub_i64:
+#endif
+        return x - y;
+
+    case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_mul_i64:
+#endif
+        return x * y;
+
+    default:
+        fprintf(stderr,
+                "Unrecognized operation %d in do_constant_folding.\n", op);
+        tcg_abort();
+    }
+}
+
+static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
+{
+    TCGArg res = do_constant_folding_2(op, x, y);
+#if TCG_TARGET_REG_BITS == 64
+    if (op_bits(op) == 32) {
+        res &= 0xffffffff;
+    }
+#endif
+    return res;
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -120,6 +178,7 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     int i, nb_ops, op_index, op, nb_temps, nb_globals;
     const TCGOpDef *def;
     TCGArg *gen_args;
+    TCGArg tmp;
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
        If this temp is a copy of other ones then this equivalence class'
@@ -149,6 +208,72 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             }
         }
 
+        /* Simplify expression if possible. */
+        switch (op) {
+        case INDEX_op_add_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_add_i64:
+#endif
+            if (temps[args[1]].state == TCG_TEMP_CONST) {
+                /* 0 + x == x + 0 */
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+            }
+            /* Fallthrough */
+        case INDEX_op_sub_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_sub_i64:
+#endif
+            if (temps[args[1]].state == TCG_TEMP_CONST) {
+                /* Proceed with possible constant folding. */
+                break;
+            }
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[2]].val == 0) {
+                if ((temps[args[0]].state == TCG_TEMP_COPY
+                    && temps[args[0]].val == args[1])
+                    || args[0] == args[1]) {
+                    args += 3;
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                } else {
+                    reset_temp(temps, args[0], nb_temps, nb_globals);
+                    if (args[1] >= s->nb_globals) {
+                        temps[args[0]].state = TCG_TEMP_COPY;
+                        temps[args[0]].val = args[1];
+                        temps[args[1]].num_copies++;
+                    }
+                    gen_opc_buf[op_index] = op_to_mov(op);
+                    gen_args[0] = args[0];
+                    gen_args[1] = args[1];
+                    gen_args += 2;
+                    args += 3;
+                }
+                continue;
+            }
+            break;
+        case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mul_i64:
+#endif
+            if ((temps[args[1]].state == TCG_TEMP_CONST
+                 && temps[args[1]].val == 0)
+                || (temps[args[2]].state == TCG_TEMP_CONST
+                    && temps[args[2]].val == 0)) {
+                reset_temp(temps, args[0], nb_temps, nb_globals);
+                temps[args[0]].state = TCG_TEMP_CONST;
+                temps[args[0]].val = 0;
+                assert(temps[args[0]].num_copies == 0);
+                gen_opc_buf[op_index] = op_to_movi(op);
+                gen_args[0] = args[0];
+                gen_args[1] = 0;
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        }
+
         /* Propagate constants through copy operations and do constant
            folding.  Constants will be substituted to arguments by register
            allocator where needed and possible.  Also detect copies. */
@@ -196,6 +321,37 @@  static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             gen_args += 2;
             args += 2;
             break;
+        case INDEX_op_add_i32:
+        case INDEX_op_sub_i32:
+        case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_add_i64:
+        case INDEX_op_sub_i64:
+        case INDEX_op_mul_i64:
+#endif
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                gen_args[0] = args[0];
+                gen_args[1] =
+                    do_constant_folding(op, temps[args[1]].val,
+                                        temps[args[2]].val);
+                reset_temp(temps, gen_args[0], nb_temps, nb_globals);
+                temps[gen_args[0]].state = TCG_TEMP_CONST;
+                temps[gen_args[0]].val = gen_args[1];
+                assert(temps[gen_args[0]].num_copies == 0);
+                gen_args += 2;
+                args += 3;
+                break;
+            } else {
+                reset_temp(temps, args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args += 3;
+                args += 3;
+                break;
+            }
         case INDEX_op_call:
             for (i = 0; i < nb_globals; i++) {
                 reset_temp(temps, i, nb_temps, nb_globals);