Patchwork [1/2] tcg-x86_64: Special-case all 32-bit AND operands.

login
register
mail settings
Submitter Richard Henderson
Date Jan. 6, 2010, 12:03 a.m.
Message ID <20100106010536.9092BCBA@are.twiddle.net>
Download mbox | patch
Permalink /patch/42246/
State New
Headers show

Comments

Richard Henderson - Jan. 6, 2010, 12:03 a.m.
This avoids an unnecessary REX.W prefix when dealing with AND
operands that fit into a 32-bit quantity.  The most common change
actually seen is movz[wb]q -> movz[wb]l.

Similarly, avoid REXW in ext{8,16}u_i64 tcg opcodes.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/x86_64/tcg-target.c |   26 ++++++++------------------
 1 files changed, 8 insertions(+), 18 deletions(-)
Aurelien Jarno - Jan. 14, 2010, 3:57 p.m.
On Tue, Jan 05, 2010 at 04:03:00PM -0800, Richard Henderson wrote:
> This avoids an unnecessary REX.W prefix when dealing with AND
> operands that fit into a 32-bit quantity.  The most common change
> actually seen is movz[wb]q -> movz[wb]l.
> 
> Similarly, avoid REXW in ext{8,16}u_i64 tcg opcodes.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/x86_64/tcg-target.c |   26 ++++++++------------------
>  1 files changed, 8 insertions(+), 18 deletions(-)
> 
> diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
> index 2339091..f584c94 100644
> --- a/tcg/x86_64/tcg-target.c
> +++ b/tcg/x86_64/tcg-target.c
> @@ -426,24 +426,18 @@ static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val)
>      } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) {
>          /* dec */
>          tcg_out_modrm(s, 0xff | P_REXW, 1, r0);
> -    } else if (val == (int8_t)val) {
> -        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
> -        tcg_out8(s, val);
> -    } else if (c == ARITH_AND && val == 0xffu) {
> -        /* movzbl */
> -        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, r0, r0);
> -    } else if (c == ARITH_AND && val == 0xffffu) {
> -        /* movzwl */
> -        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, r0, r0);
>      } else if (c == ARITH_AND && val == 0xffffffffu) {
>          /* 32-bit mov zero extends */
>          tcg_out_modrm(s, 0x8b, r0, r0);
> +    } else if (c == ARITH_AND && (uint64_t)val <= 0xffffffffu) {
> +        /* AND with no high bits set can use a 32-bit operation.  */
> +        tgen_arithi32(s, c, r0, val);

Do we really want to call tgen_arithi32() here, that will redo part of
the above tests again? It might be better to simply remove the REX.W
prefix above instead.

> +    } else if (val == (int8_t)val) {
> +        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
> +        tcg_out8(s, val);
>      } else if (val == (int32_t)val) {
>          tcg_out_modrm(s, 0x81 | P_REXW, c, r0);
>          tcg_out32(s, val);
> -    } else if (c == ARITH_AND && val == (uint32_t)val) {
> -        tcg_out_modrm(s, 0x81, c, r0);
> -        tcg_out32(s, val);
>      } else {
>          tcg_abort();
>      }
> @@ -1182,16 +1176,12 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
>          tcg_out_modrm(s, 0x63 | P_REXW, args[0], args[1]);
>          break;
>      case INDEX_op_ext8u_i32:
> +    case INDEX_op_ext8u_i64:
>          tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, args[0], args[1]);
>          break;
>      case INDEX_op_ext16u_i32:
> -        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
> -        break;
> -    case INDEX_op_ext8u_i64:
> -        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, args[0], args[1]);
> -        break;
>      case INDEX_op_ext16u_i64:
> -        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, args[0], args[1]);
> +        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
>          break;
>      case INDEX_op_ext32u_i64:
>          tcg_out_modrm(s, 0x8b, args[0], args[1]);

This part looks fine.
Richard Henderson - Jan. 14, 2010, 4:05 p.m.
On 01/14/2010 07:57 AM, Aurelien Jarno wrote:
> On Tue, Jan 05, 2010 at 04:03:00PM -0800, Richard Henderson wrote:
>> This avoids an unnecessary REX.W prefix when dealing with AND
>> operands that fit into a 32-bit quantity.  The most common change
>> actually seen is movz[wb]q ->  movz[wb]l.
>>
>> Similarly, avoid REXW in ext{8,16}u_i64 tcg opcodes.
>>
>> Signed-off-by: Richard Henderson<rth@twiddle.net>
>> ---
>>   tcg/x86_64/tcg-target.c |   26 ++++++++------------------
>>   1 files changed, 8 insertions(+), 18 deletions(-)
>>
>> diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
>> index 2339091..f584c94 100644
>> --- a/tcg/x86_64/tcg-target.c
>> +++ b/tcg/x86_64/tcg-target.c
>> @@ -426,24 +426,18 @@ static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val)
>>       } else if ((c == ARITH_ADD&&  val == -1) || (c == ARITH_SUB&&  val == 1)) {
>>           /* dec */
>>           tcg_out_modrm(s, 0xff | P_REXW, 1, r0);
>> -    } else if (val == (int8_t)val) {
>> -        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
>> -        tcg_out8(s, val);
>> -    } else if (c == ARITH_AND&&  val == 0xffu) {
>> -        /* movzbl */
>> -        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, r0, r0);
>> -    } else if (c == ARITH_AND&&  val == 0xffffu) {
>> -        /* movzwl */
>> -        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, r0, r0);
>>       } else if (c == ARITH_AND&&  val == 0xffffffffu) {
>>           /* 32-bit mov zero extends */
>>           tcg_out_modrm(s, 0x8b, r0, r0);
>> +    } else if (c == ARITH_AND&&  (uint64_t)val<= 0xffffffffu) {
>> +        /* AND with no high bits set can use a 32-bit operation.  */
>> +        tgen_arithi32(s, c, r0, val);
>
> Do we really want to call tgen_arithi32() here, that will redo part of
> the above tests again? It might be better to simply remove the REX.W
> prefix above instead.

Pardon?  Do you mean the inc/dec tests?  Otherwise I don't see what 
"above tests again" you're talking about.

I am looking to handle more than 0xff, 0xffff with the new test in 
gen_arithi64 -- 0x1234 is an appropriate mask to shorten to 32-bit AND 
as well.  I have no idea if that answers your question.


r~
Aurelien Jarno - Jan. 14, 2010, 6:58 p.m.
On Thu, Jan 14, 2010 at 08:05:58AM -0800, Richard Henderson wrote:
> On 01/14/2010 07:57 AM, Aurelien Jarno wrote:
> >On Tue, Jan 05, 2010 at 04:03:00PM -0800, Richard Henderson wrote:
> >>This avoids an unnecessary REX.W prefix when dealing with AND
> >>operands that fit into a 32-bit quantity.  The most common change
> >>actually seen is movz[wb]q ->  movz[wb]l.
> >>
> >>Similarly, avoid REXW in ext{8,16}u_i64 tcg opcodes.
> >>
> >>Signed-off-by: Richard Henderson<rth@twiddle.net>
> >>---
> >>  tcg/x86_64/tcg-target.c |   26 ++++++++------------------
> >>  1 files changed, 8 insertions(+), 18 deletions(-)
> >>
> >>diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
> >>index 2339091..f584c94 100644
> >>--- a/tcg/x86_64/tcg-target.c
> >>+++ b/tcg/x86_64/tcg-target.c
> >>@@ -426,24 +426,18 @@ static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val)
> >>      } else if ((c == ARITH_ADD&&  val == -1) || (c == ARITH_SUB&&  val == 1)) {
> >>          /* dec */
> >>          tcg_out_modrm(s, 0xff | P_REXW, 1, r0);
> >>-    } else if (val == (int8_t)val) {
> >>-        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
> >>-        tcg_out8(s, val);
> >>-    } else if (c == ARITH_AND&&  val == 0xffu) {
> >>-        /* movzbl */
> >>-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, r0, r0);
> >>-    } else if (c == ARITH_AND&&  val == 0xffffu) {
> >>-        /* movzwl */
> >>-        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, r0, r0);
> >>      } else if (c == ARITH_AND&&  val == 0xffffffffu) {
> >>          /* 32-bit mov zero extends */
> >>          tcg_out_modrm(s, 0x8b, r0, r0);
> >>+    } else if (c == ARITH_AND&&  (uint64_t)val<= 0xffffffffu) {
> >>+        /* AND with no high bits set can use a 32-bit operation.  */
> >>+        tgen_arithi32(s, c, r0, val);
> >
> >Do we really want to call tgen_arithi32() here, that will redo part of
> >the above tests again? It might be better to simply remove the REX.W
> >prefix above instead.
> 
> Pardon?  Do you mean the inc/dec tests?  Otherwise I don't see what
> "above tests again" you're talking about.
> 
> I am looking to handle more than 0xff, 0xffff with the new test in
> gen_arithi64 -- 0x1234 is an appropriate mask to shorten to 32-bit
> AND as well.  I have no idea if that answers your question.
> 

Yes it does, I missed that fact. Viewing your patch on this side gives
it more sense. I have applied it.

Patch

diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..f584c94 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -426,24 +426,18 @@  static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val)
     } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) {
         /* dec */
         tcg_out_modrm(s, 0xff | P_REXW, 1, r0);
-    } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
-        tcg_out8(s, val);
-    } else if (c == ARITH_AND && val == 0xffu) {
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, r0, r0);
-    } else if (c == ARITH_AND && val == 0xffffu) {
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, r0, r0);
     } else if (c == ARITH_AND && val == 0xffffffffu) {
         /* 32-bit mov zero extends */
         tcg_out_modrm(s, 0x8b, r0, r0);
+    } else if (c == ARITH_AND && (uint64_t)val <= 0xffffffffu) {
+        /* AND with no high bits set can use a 32-bit operation.  */
+        tgen_arithi32(s, c, r0, val);
+    } else if (val == (int8_t)val) {
+        tcg_out_modrm(s, 0x83 | P_REXW, c, r0);
+        tcg_out8(s, val);
     } else if (val == (int32_t)val) {
         tcg_out_modrm(s, 0x81 | P_REXW, c, r0);
         tcg_out32(s, val);
-    } else if (c == ARITH_AND && val == (uint32_t)val) {
-        tcg_out_modrm(s, 0x81, c, r0);
-        tcg_out32(s, val);
     } else {
         tcg_abort();
     }
@@ -1182,16 +1176,12 @@  static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_modrm(s, 0x63 | P_REXW, args[0], args[1]);
         break;
     case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
         tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, args[0], args[1]);
         break;
     case INDEX_op_ext16u_i32:
-        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
-        break;
-    case INDEX_op_ext8u_i64:
-        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, args[0], args[1]);
-        break;
     case INDEX_op_ext16u_i64:
-        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, args[0], args[1]);
+        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
         break;
     case INDEX_op_ext32u_i64:
         tcg_out_modrm(s, 0x8b, args[0], args[1]);