Patchwork [v4] tcg-i386: Introduce limited deposit support

login
register
mail settings
Submitter Jan Kiszka
Date Sept. 29, 2011, 4:52 p.m.
Message ID <4E84A23B.2030405@siemens.com>
Download mbox | patch
Permalink /patch/116981/
State New
Headers show

Comments

Jan Kiszka - Sept. 29, 2011, 4:52 p.m.
On 2011-09-29 18:11, Richard Henderson wrote:
> On 09/29/2011 08:23 AM, Jan Kiszka wrote:
>> +#ifndef TCG_TARGET_deposit_i32_valid
>> +#define TCG_TARGET_deposit_i32_valid(ofs, len) 0
>> +#endif
>> +#ifndef TCG_TARGET_deposit_i64_valid
>> +#define TCG_TARGET_deposit_i64_valid(ofs, len) 0
>> +#endif
> 
> Err, no.  The default is true.  The targets that currently
> implement deposit at present can handle arbitrary inputs.

Grr, of course.

---

x86 cannot provide an optimized generic deposit implementation. But at
least for a few special cases, namely for writing bits 0..7, 8..15, and
0..15, versions using only a single instruction are feasible.
Introducing such limited support improves emulating 16-bit x86 code on
x86, but also rarer cases where 32-bit or 64-bit code accesses bytes or
words.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Changes in v4:
 - provide correct default TCG_TARGET_deposit_i32_valid

Changes in v3:
 - provide default TCG_TARGET_deposit_i32_valid - just in case

Changes in v2:
 - introduce restricting predicates TCG_TARGET_deposit_i32/64_valid
   to decide if deposit support can be used
 - express register constraints via new 'Q' symbol

 tcg/i386/tcg-target.c |   24 ++++++++++++++++++++++++
 tcg/i386/tcg-target.h |    9 +++++++--
 tcg/tcg-op.h          |    4 ++--
 tcg/tcg.h             |    7 +++++++
 4 files changed, 40 insertions(+), 4 deletions(-)
Richard Henderson - Sept. 29, 2011, 7:50 p.m.
On 09/29/2011 09:52 AM, Jan Kiszka wrote:
> x86 cannot provide an optimized generic deposit implementation. But at
> least for a few special cases, namely for writing bits 0..7, 8..15, and
> 0..15, versions using only a single instruction are feasible.
> Introducing such limited support improves emulating 16-bit x86 code on
> x86, but also rarer cases where 32-bit or 64-bit code accesses bytes or
> words.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
> 
> Changes in v4:
>  - provide correct default TCG_TARGET_deposit_i32_valid
> 
> Changes in v3:
>  - provide default TCG_TARGET_deposit_i32_valid - just in case
> 
> Changes in v2:
>  - introduce restricting predicates TCG_TARGET_deposit_i32/64_valid
>    to decide if deposit support can be used
>  - express register constraints via new 'Q' symbol
> 
>  tcg/i386/tcg-target.c |   24 ++++++++++++++++++++++++
>  tcg/i386/tcg-target.h |    9 +++++++--
>  tcg/tcg-op.h          |    4 ++--
>  tcg/tcg.h             |    7 +++++++
>  4 files changed, 40 insertions(+), 4 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
Blue Swirl - Oct. 1, 2011, 12:06 p.m.
Thanks, applied.

On Thu, Sep 29, 2011 at 4:52 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2011-09-29 18:11, Richard Henderson wrote:
>> On 09/29/2011 08:23 AM, Jan Kiszka wrote:
>>> +#ifndef TCG_TARGET_deposit_i32_valid
>>> +#define TCG_TARGET_deposit_i32_valid(ofs, len) 0
>>> +#endif
>>> +#ifndef TCG_TARGET_deposit_i64_valid
>>> +#define TCG_TARGET_deposit_i64_valid(ofs, len) 0
>>> +#endif
>>
>> Err, no.  The default is true.  The targets that currently
>> implement deposit at present can handle arbitrary inputs.
>
> Grr, of course.
>
> ---
>
> x86 cannot provide an optimized generic deposit implementation. But at
> least for a few special cases, namely for writing bits 0..7, 8..15, and
> 0..15, versions using only a single instruction are feasible.
> Introducing such limited support improves emulating 16-bit x86 code on
> x86, but also rarer cases where 32-bit or 64-bit code accesses bytes or
> words.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>
> Changes in v4:
>  - provide correct default TCG_TARGET_deposit_i32_valid
>
> Changes in v3:
>  - provide default TCG_TARGET_deposit_i32_valid - just in case
>
> Changes in v2:
>  - introduce restricting predicates TCG_TARGET_deposit_i32/64_valid
>   to decide if deposit support can be used
>  - express register constraints via new 'Q' symbol
>
>  tcg/i386/tcg-target.c |   24 ++++++++++++++++++++++++
>  tcg/i386/tcg-target.h |    9 +++++++--
>  tcg/tcg-op.h          |    4 ++--
>  tcg/tcg.h             |    7 +++++++
>  4 files changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 281f87d..3069e53 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -168,6 +168,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
>             tcg_regset_set32(ct->u.regs, 0, 0xf);
>         }
>         break;
> +    case 'Q':
> +        ct->ct |= TCG_CT_REG;
> +        tcg_regset_set32(ct->u.regs, 0, 0xf);
> +        break;
>     case 'r':
>         ct->ct |= TCG_CT_REG;
>         if (TCG_TARGET_REG_BITS == 64) {
> @@ -1747,6 +1751,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>         break;
>  #endif
>
> +    OP_32_64(deposit):
> +        if (args[3] == 0 && args[4] == 8) {
> +            /* load bits 0..7 */
> +            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
> +                          args[2], args[0]);
> +        } else if (args[3] == 8 && args[4] == 8) {
> +            /* load bits 8..15 */
> +            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
> +        } else if (args[3] == 0 && args[4] == 16) {
> +            /* load bits 0..15 */
> +            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
> +        } else {
> +            tcg_abort();
> +        }
> +        break;
> +
>     default:
>         tcg_abort();
>     }
> @@ -1802,6 +1822,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
>
>     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
>
> +    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
> +
>  #if TCG_TARGET_REG_BITS == 32
>     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
>     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
> @@ -1853,6 +1875,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
>     { INDEX_op_ext8u_i64, { "r", "r" } },
>     { INDEX_op_ext16u_i64, { "r", "r" } },
>     { INDEX_op_ext32u_i64, { "r", "r" } },
> +
> +    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
>  #endif
>
>  #if TCG_TARGET_REG_BITS == 64
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 5088e47..b9c9d4e 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -90,7 +90,7 @@ enum {
>  #define TCG_TARGET_HAS_eqv_i32          0
>  #define TCG_TARGET_HAS_nand_i32         0
>  #define TCG_TARGET_HAS_nor_i32          0
> -#define TCG_TARGET_HAS_deposit_i32      0
> +#define TCG_TARGET_HAS_deposit_i32      1
>
>  #if TCG_TARGET_REG_BITS == 64
>  #define TCG_TARGET_HAS_div2_i64         1
> @@ -111,9 +111,14 @@ enum {
>  #define TCG_TARGET_HAS_eqv_i64          0
>  #define TCG_TARGET_HAS_nand_i64         0
>  #define TCG_TARGET_HAS_nor_i64          0
> -#define TCG_TARGET_HAS_deposit_i64      0
> +#define TCG_TARGET_HAS_deposit_i64      1
>  #endif
>
> +#define TCG_TARGET_deposit_i32_valid(ofs, len) \
> +    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
> +     ((ofs) == 0 && (len) == 16))
> +#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
> +
>  #define TCG_TARGET_HAS_GUEST_BASE
>
>  /* Note: must be synced with dyngen-exec.h */
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 404b637..fea5983 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -2045,7 +2045,7 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
>                                       TCGv_i32 arg2, unsigned int ofs,
>                                       unsigned int len)
>  {
> -    if (TCG_TARGET_HAS_deposit_i32) {
> +    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
>         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
>     } else {
>         uint32_t mask = (1u << len) - 1;
> @@ -2064,7 +2064,7 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
>                                       TCGv_i64 arg2, unsigned int ofs,
>                                       unsigned int len)
>  {
> -    if (TCG_TARGET_HAS_deposit_i64) {
> +    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
>         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
>     } else {
>         uint64_t mask = (1ull << len) - 1;
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index dc5e9c9..c7197f5 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -71,6 +71,13 @@ typedef uint64_t TCGRegSet;
>  #define TCG_TARGET_HAS_deposit_i64      0
>  #endif
>
> +#ifndef TCG_TARGET_deposit_i32_valid
> +#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
> +#endif
> +#ifndef TCG_TARGET_deposit_i64_valid
> +#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
> +#endif
> +
>  /* Only one of DIV or DIV2 should be defined.  */
>  #if defined(TCG_TARGET_HAS_div_i32)
>  #define TCG_TARGET_HAS_div2_i32         0
> --
> 1.7.3.4
>

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 281f87d..3069e53 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -168,6 +168,10 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xf);
         }
         break;
+    case 'Q':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, 0xf);
+        break;
     case 'r':
         ct->ct |= TCG_CT_REG;
         if (TCG_TARGET_REG_BITS == 64) {
@@ -1747,6 +1751,22 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 #endif
 
+    OP_32_64(deposit):
+        if (args[3] == 0 && args[4] == 8) {
+            /* load bits 0..7 */
+            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
+                          args[2], args[0]);
+        } else if (args[3] == 8 && args[4] == 8) {
+            /* load bits 8..15 */
+            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
+        } else if (args[3] == 0 && args[4] == 16) {
+            /* load bits 0..15 */
+            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
+        } else {
+            tcg_abort();
+        }
+        break;
+
     default:
         tcg_abort();
     }
@@ -1802,6 +1822,8 @@  static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
 
+    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
@@ -1853,6 +1875,8 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext8u_i64, { "r", "r" } },
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
+
+    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 5088e47..b9c9d4e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -90,7 +90,7 @@  enum {
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_deposit_i32      1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -111,9 +111,14 @@  enum {
 #define TCG_TARGET_HAS_eqv_i64          0
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_deposit_i64      1
 #endif
 
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
+     ((ofs) == 0 && (len) == 16))
+#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
+
 #define TCG_TARGET_HAS_GUEST_BASE
 
 /* Note: must be synced with dyngen-exec.h */
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 404b637..fea5983 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2045,7 +2045,7 @@  static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
 				       TCGv_i32 arg2, unsigned int ofs,
 				       unsigned int len)
 {
-    if (TCG_TARGET_HAS_deposit_i32) {
+    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
     } else {
         uint32_t mask = (1u << len) - 1;
@@ -2064,7 +2064,7 @@  static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
 				       TCGv_i64 arg2, unsigned int ofs,
 				       unsigned int len)
 {
-    if (TCG_TARGET_HAS_deposit_i64) {
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
     } else {
         uint64_t mask = (1ull << len) - 1;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index dc5e9c9..c7197f5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -71,6 +71,13 @@  typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_deposit_i64      0
 #endif
 
+#ifndef TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+#endif
+
 /* Only one of DIV or DIV2 should be defined.  */
 #if defined(TCG_TARGET_HAS_div_i32)
 #define TCG_TARGET_HAS_div2_i32         0