Patchwork [04/22] tcg-i386: Tidy ext8s and ext16s operations.

login
register
mail settings
Submitter Richard Henderson
Date April 13, 2010, 11:13 p.m.
Message ID <e9a62702566d91f79dedc02e5b283b2dc54bee21.1272479073.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/51190/
State New
Headers show

Comments

Richard Henderson - April 13, 2010, 11:13 p.m.
Define OPC_MOVSBL and OPC_MOVSWL.  Factor opcode emission to
separate functions.  Don't restrict the input register to the
low 4 "q" registers; emit shifts instead if needed.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   49 +++++++++++++++++++++++++++++++++++++------------
 1 files changed, 37 insertions(+), 12 deletions(-)
Aurelien Jarno - May 20, 2010, 6:52 p.m.
On Tue, Apr 13, 2010 at 04:13:49PM -0700, Richard Henderson wrote:
> Define OPC_MOVSBL and OPC_MOVSWL.  Factor opcode emission to
> separate functions.  Don't restrict the input register to the
> low 4 "q" registers; emit shifts instead if needed.
>

Given this patch is of the same type as the previous one, I have also
benchmarked it, here are the results:

        |  instr |  size  |
        +--------+--------+
 before | 101258 | 344829 |
 after  | 101258 | 344833 |
 
This time the patch clearly doesn't bring an improvement, so I think it
should also be rewritten without the constraints change.

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 2cc1191..75b9915 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -165,6 +165,8 @@  static inline int tcg_target_const_match(tcg_target_long val,
 
 #define OPC_MOVZBL	(0xb6 | P_EXT)
 #define OPC_MOVZWL	(0xb7 | P_EXT)
+#define OPC_MOVSBL	(0xbe | P_EXT)
+#define OPC_MOVSWL	(0xbf | P_EXT)
 
 #define ARITH_ADD 0
 #define ARITH_OR  1
@@ -306,12 +308,37 @@  static void tcg_out_ext8u(TCGContext *s, int dest, int src)
     tcg_out_modrm(s, OPC_MOVZBL, dest, src);
 }
 
+static void tcg_out_ext8s(TCGContext *s, int dest, int src)
+{
+    if (src >= 4) {
+        tcg_out_mov(s, dest, src);
+        if (dest >= 4) {
+            /* shl $24, dest */
+            tcg_out_modrm(s, 0xc1, SHIFT_SHL, dest);
+            tcg_out8(s, 24);
+            /* sar $24, dest */
+            tcg_out_modrm(s, 0xc1, SHIFT_SAR, dest);
+            tcg_out8(s, 24);
+            return;
+        }
+        src = dest;
+    }
+    /* movsbl */
+    tcg_out_modrm(s, OPC_MOVSBL, dest, src);
+}
+
 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
 {
     /* movzwl */
     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
 }
 
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
+{
+    /* movswl */
+    tcg_out_modrm(s, OPC_MOVSWL, dest, src);
+}
+
 static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
 {
     if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
@@ -659,12 +686,10 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     switch(opc) {
     case 0 | 4:
-        /* movsbl */
-        tcg_out_modrm(s, 0xbe | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX);
         break;
     case 1 | 4:
-        /* movswl */
-        tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX);
         break;
     case 0:
         tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
@@ -714,7 +739,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 0 | 4:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, data_reg, r0, GUEST_BASE);
         break;
     case 1:
         /* movzwl */
@@ -728,7 +753,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         break;
     case 1 | 4:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, data_reg, r0, GUEST_BASE);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
             /* rolw $8, data_reg */
             tcg_out8(s, 0x66); 
@@ -736,7 +761,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out8(s, 8);
 
             /* movswl data_reg, data_reg */
-            tcg_out_modrm(s, 0xbf | P_EXT, data_reg, data_reg);
+            tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
         }
         break;
     case 2:
@@ -1042,7 +1067,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld8s_i32:
         /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld16u_i32:
         /* movzwl */
@@ -1050,7 +1075,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_ld16s_i32:
         /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]);
+        tcg_out_modrm_offset(s, OPC_MOVSWL, args[0], args[1], args[2]);
         break;
     case INDEX_op_ld_i32:
         /* movl */
@@ -1187,10 +1212,10 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_ext8s_i32:
-        tcg_out_modrm(s, 0xbe | P_EXT, args[0], args[1]);
+        tcg_out_ext8s(s, args[0], args[1]);
         break;
     case INDEX_op_ext16s_i32:
-        tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
+        tcg_out_ext16s(s, args[0], args[1]);
         break;
     case INDEX_op_ext8u_i32:
         tcg_out_ext8u(s, args[0], args[1]);
@@ -1289,7 +1314,7 @@  static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_not_i32, { "r", "0" } },
 
-    { INDEX_op_ext8s_i32, { "r", "q" } },
+    { INDEX_op_ext8s_i32, { "r", "r" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
     { INDEX_op_ext8u_i32, { "r", "r" } },
     { INDEX_op_ext16u_i32, { "r", "r" } },