Patchwork RFC: TCG constant propagation.

login
register
mail settings
Submitter Pablo Virolainen
Date Aug. 5, 2009, 8:13 a.m.
Message ID <4A793F32.4090207@nomovok.com>
Download mbox | patch
Permalink /patch/30783/
State Superseded
Headers show

Comments

Pablo Virolainen - Aug. 5, 2009, 8:13 a.m.
Filip Navara kirjoitti:
> Add support for constant propagation to TCG. This has to be paired with the liveness
> analysis to remove the dead code. Not all possible operations are covered, but the
> most common ones are. This improves the code generation for several ARM instructions,
> like MVN (immediate), and it may help other targets as well.

On my small benchmark, qemu-system-sh4 was about 3% slower on Intel Xeon 
E5405@2.00GHz. I'm running 64-bit mode. My mini benchmark is to build 
zlib 1.2.3, so it's 'real' world work load. Ran the benchmark several 
times and results seems to be pretty constant.

ps. I added INDEX_op_*_i64 cases to the evaluation part. I'm not 
completly sure if those &mask should be there.

Pablo Virolainen
Filip Navara - Aug. 5, 2009, 8:48 a.m.
On Wed, Aug 5, 2009 at 10:13 AM, Pablo
Virolainen<Pablo.Virolainen@nomovok.com> wrote:
> Filip Navara kirjoitti:
>>
>> Add support for constant propagation to TCG. This has to be paired with
>> the liveness
>> analysis to remove the dead code. Not all possible operations are covered,
>> but the
>> most common ones are. This improves the code generation for several ARM
>> instructions,
>> like MVN (immediate), and it may help other targets as well.
>
> On my small benchmark, qemu-system-sh4 was about 3% slower on Intel Xeon
> E5405@2.00GHz. I'm running 64-bit mode. My mini benchmark is to build zlib
> 1.2.3, so it's 'real' world work load. Ran the benchmark several times and
> results seems to be pretty constant.

Thanks for testing and reporting the results. I'll see if I can reduce
the overhead or if I should ditch the patch.

> ps. I added INDEX_op_*_i64 cases to the evaluation part. I'm not completly
> sure if those &mask should be there.

I've rewritten the patch and fixed the 64-bit mess that was there. It
is available at
http://repo.or.cz/w/qemu/navara.git?a=commit;h=5df3a524fc0b923cf2e5e1883ff550d055d36eb5

Best regards,
Filip Navara

Patch

--- qemu-0.11.0-rc1_orig/tcg/tcg.c	2009-07-30 03:38:26.000000000 +0300
+++ qemu-0.11.0-rc1/tcg/tcg.c	2009-08-05 10:43:48.000000000 +0300
@@ -1021,7 +1021,194 @@ 
 #endif
         tdefs++;
     }
+}
 
+static void tcg_const_analysis(TCGContext *s)
+{
+    int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i;
+    TCGArg *args;
+    uint16_t op;
+    uint16_t *opc_ptr;
+    const TCGOpDef *def;
+    uint8_t *const_temps;
+    tcg_target_ulong *temp_values;
+    tcg_target_ulong val, mask;
+    tcg_target_ulong dest_val, src_val, src2_val;
+
+    const_temps = tcg_malloc(s->nb_temps);
+    memset(const_temps, 0, s->nb_temps);
+    temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t));
+
+    opc_ptr = gen_opc_buf;
+    args = gen_opparam_buf;
+    while (opc_ptr < gen_opc_ptr) {
+        op = *opc_ptr;
+        def = &tcg_op_defs[op];
+        nb_oargs = def->nb_oargs;
+        nb_iargs = def->nb_iargs;
+        nb_cargs = def->nb_cargs;
+        del_args = 0;
+        mask = ~((tcg_target_ulong)0);
+
+        switch(op) {
+        case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_movi_i64:
+#endif
+            dest = args[0];
+            val = args[1];
+            const_temps[dest] = 1;
+            temp_values[dest] = val;
+            break;
+        case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mov_i64:
+#endif
+            dest = args[0];
+            src = args[1];
+            const_temps[dest] = const_temps[src];
+            temp_values[dest] = temp_values[src];
+            break;
+        case INDEX_op_not_i32:
+#if TCG_TARGET_REG_BITS == 64
+            mask = 0xffffffff;
+        case INDEX_op_not_i64:
+#endif
+            dest = args[0];
+            src = args[1];
+            if (const_temps[src]) {
+                const_temps[dest] = 1;
+                dest_val = ~temp_values[src];
+                *opc_ptr = INDEX_op_movi_i32;
+                args[1] = temp_values[dest] = dest_val & mask;
+            } else {
+                const_temps[dest] = 0;
+            }
+            break;
+        case INDEX_op_add_i32:
+        case INDEX_op_sub_i32:
+        case INDEX_op_mul_i32:
+        case INDEX_op_and_i32:
+        case INDEX_op_or_i32:
+        case INDEX_op_xor_i32:
+        case INDEX_op_shl_i32:
+        case INDEX_op_shr_i32:
+#if TCG_TARGET_REG_BITS == 64
+            mask = 0xffffffff;
+        case INDEX_op_add_i64:
+        case INDEX_op_sub_i64:
+        case INDEX_op_mul_i64:
+        case INDEX_op_and_i64:
+        case INDEX_op_or_i64:
+        case INDEX_op_xor_i64:
+        case INDEX_op_shl_i64:
+        case INDEX_op_shr_i64:
+#endif
+
+            dest = args[0];
+            src = args[1];
+            src2 = args[2];
+            if (const_temps[src] && const_temps[src2]) {
+                src_val = temp_values[src];
+                src2_val = temp_values[src2];
+                const_temps[dest] = 1;
+                switch (op) {
+                case INDEX_op_add_i32:
+                    dest_val = src_val + src2_val;
+                    break;
+                case INDEX_op_add_i64:
+		    dest_val = (src_val + src2_val) & mask;
+		    break;
+                case INDEX_op_sub_i32:
+                    dest_val = src_val - src2_val;
+                    break;
+                case INDEX_op_sub_i64:
+		    dest_val = (src_val - src2_val) & mask;
+		    break;
+                case INDEX_op_mul_i32:
+                    dest_val = src_val * src2_val;
+                    break;
+                case INDEX_op_mul_i64:
+		    dest_val = (src_val * src2_val) & mask;
+		    break;
+                case INDEX_op_and_i32:
+                    dest_val = src_val & src2_val;
+                    break;
+                case INDEX_op_and_i64:
+		    dest_val = src_val & src2_val & mask;
+		    break;
+                case INDEX_op_or_i32:
+                    dest_val = src_val | src2_val;
+                    break;
+                case INDEX_op_or_i64:
+		    dest_val = (src_val | src2_val) & mask;
+		    break;
+                case INDEX_op_xor_i32:
+                    dest_val = src_val ^ src2_val;
+                    break;
+                case INDEX_op_xor_i64:
+		    dest_val = (src_val ^ src2_val) & mask;
+		    break;
+                case INDEX_op_shl_i32:
+                    dest_val = src_val << src2_val;
+                    break;
+                case INDEX_op_shl_i64:
+		    dest_val = (src_val << src2_val) & mask;
+		    break;
+                case INDEX_op_shr_i32:
+                    dest_val = src_val >> src2_val;
+                    break;
+                case INDEX_op_shr_i64:
+		    dest_val = (src_val >> src2_val) & mask;
+		    break;
+                default:
+		  fprintf(stderr,"index op %i\n",op);
+                    tcg_abort();
+                    return;
+                }
+                *opc_ptr = INDEX_op_movi_i32;                
+                args[1] = temp_values[dest] = dest_val & mask;
+                del_args = 1;
+            } else {
+                const_temps[dest] = 0;
+            }
+            break;
+        case INDEX_op_call:
+            nb_oargs = args[0] >> 16;
+            nb_iargs = args[0] & 0xffff;
+            nb_cargs = def->nb_cargs;
+            args++;
+            for (i = 0; i < nb_oargs; i++) {
+                const_temps[args[i]] = 0;
+            }
+            break;
+        case INDEX_op_nopn:
+            /* variable number of arguments */
+            nb_cargs = args[0];
+            break;
+        case INDEX_op_set_label:
+            memset(const_temps, 0, s->nb_temps);
+            break;
+        default:
+            if (def->flags & TCG_OPF_BB_END) {
+                memset(const_temps, 0, s->nb_temps);
+            } else {
+                for (i = 0; i < nb_oargs; i++) {
+                    const_temps[args[i]] = 0;
+                }
+            }
+            break;
+        }
+        opc_ptr++;
+        args += nb_iargs + nb_oargs + nb_cargs - del_args;
+        if (del_args > 0) {
+            gen_opparam_ptr -= del_args;
+            memmove(args, args + del_args, (gen_opparam_ptr - args) * sizeof(*args));
+        }
+    }
+
+    if (args != gen_opparam_ptr)
+        tcg_abort();
 }
 
 #ifdef USE_LIVENESS_ANALYSIS
@@ -1891,6 +2078,8 @@ 
     }
 #endif
 
+    tcg_const_analysis(s);
+
 #ifdef CONFIG_PROFILER
     s->la_time -= profile_getclock();
 #endif