diff mbox series

[11/31] tcg: Allocate TCGTemp pairs in host memory order

Message ID 20221020115242.2301066-12-richard.henderson@linaro.org
State New
Headers show
Series tcg: Support for Int128 with helpers | expand

Commit Message

Richard Henderson Oct. 20, 2022, 11:52 a.m. UTC
Allocate the first of a pair at the lower address, and the
second of a pair at the higher address.  This will make it
easier to find the beginning of the larger memory block.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg-internal.h |  4 ++--
 tcg/tcg.c          | 60 ++++++++++++++++++++--------------------------
 2 files changed, 28 insertions(+), 36 deletions(-)

Comments

Philippe Mathieu-Daudé Oct. 20, 2022, 3:01 p.m. UTC | #1
Hi Richard,

On 20/10/22 13:52, Richard Henderson wrote:
> Allocate the first of a pair at the lower address, and the
> second of a pair at the higher address.  This will make it
> easier to find the beginning of the larger memory block.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/tcg-internal.h |  4 ++--
>   tcg/tcg.c          | 60 ++++++++++++++++++++--------------------------
>   2 files changed, 28 insertions(+), 36 deletions(-)

> @@ -4116,18 +4114,12 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
>       }
>   
>       /* If the two inputs form one 64-bit value, try dupm_vec. */
> -    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
> -        if (!itsl->mem_coherent) {
> -            temp_sync(s, itsl, s->reserved_regs, 0, 0);
> -        }
> -        if (!itsh->mem_coherent) {
> -            temp_sync(s, itsh, s->reserved_regs, 0, 0);
> -        }
> -#if HOST_BIG_ENDIAN
> -        TCGTemp *its = itsh;
> -#else
> -        TCGTemp *its = itsl;
> -#endif
> +    if (itsl - itsl->temp_subindex == itsh - itsh->temp_subindex) {
> +        TCGTemp *its = itsl - itsl->temp_subindex;
> +
> +        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
> +        temp_sync(s, its + 1, s->reserved_regs, 0, 0);

No mention of dropping the 'mem_coherent' checks?

>           if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
>                                its->mem_base->reg, its->mem_offset)) {
>               goto done;
Richard Henderson Oct. 20, 2022, 9:33 p.m. UTC | #2
On 10/21/22 01:01, Philippe Mathieu-Daudé wrote:
> Hi Richard,
> 
> On 20/10/22 13:52, Richard Henderson wrote:
>> Allocate the first of a pair at the lower address, and the
>> second of a pair at the higher address.  This will make it
>> easier to find the beginning of the larger memory block.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   tcg/tcg-internal.h |  4 ++--
>>   tcg/tcg.c          | 60 ++++++++++++++++++++--------------------------
>>   2 files changed, 28 insertions(+), 36 deletions(-)
> 
>> @@ -4116,18 +4114,12 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
>>       }
>>       /* If the two inputs form one 64-bit value, try dupm_vec. */
>> -    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
>> -        if (!itsl->mem_coherent) {
>> -            temp_sync(s, itsl, s->reserved_regs, 0, 0);
>> -        }
>> -        if (!itsh->mem_coherent) {
>> -            temp_sync(s, itsh, s->reserved_regs, 0, 0);
>> -        }
>> -#if HOST_BIG_ENDIAN
>> -        TCGTemp *its = itsh;
>> -#else
>> -        TCGTemp *its = itsl;
>> -#endif
>> +    if (itsl - itsl->temp_subindex == itsh - itsh->temp_subindex) {
>> +        TCGTemp *its = itsl - itsl->temp_subindex;
>> +
>> +        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
>> +        temp_sync(s, its + 1, s->reserved_regs, 0, 0);
> 
> No mention of dropping the 'mem_coherent' checks?

Hmm.  I guess I should split that out as a separate patch.
The first thing that temp_sync does is check mem_coherent...


r~

> 
>>           if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
>>                                its->mem_base->reg, its->mem_offset)) {
>>               goto done;
>
diff mbox series

Patch

diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index a9ea27f67a..2c06b5116a 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -62,11 +62,11 @@  static inline unsigned tcg_call_flags(TCGOp *op)
 #if TCG_TARGET_REG_BITS == 32
 static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
 {
-    return temp_tcgv_i32(tcgv_i64_temp(t));
+    return temp_tcgv_i32(tcgv_i64_temp(t) + HOST_BIG_ENDIAN);
 }
 static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t)
 {
-    return temp_tcgv_i32(tcgv_i64_temp(t) + 1);
+    return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN);
 }
 #else
 extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 135d9a9a0a..71afa4b749 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -886,10 +886,7 @@  TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
     TCGContext *s = tcg_ctx;
     TCGTemp *base_ts = tcgv_ptr_temp(base);
     TCGTemp *ts = tcg_global_alloc(s);
-    int indirect_reg = 0, bigendian = 0;
-#if HOST_BIG_ENDIAN
-    bigendian = 1;
-#endif
+    int indirect_reg = 0;
 
     switch (base_ts->kind) {
     case TEMP_FIXED:
@@ -915,7 +912,7 @@  TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
         ts->indirect_reg = indirect_reg;
         ts->mem_allocated = 1;
         ts->mem_base = base_ts;
-        ts->mem_offset = offset + bigendian * 4;
+        ts->mem_offset = offset;
         pstrcpy(buf, sizeof(buf), name);
         pstrcat(buf, sizeof(buf), "_0");
         ts->name = strdup(buf);
@@ -926,7 +923,7 @@  TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
         ts2->indirect_reg = indirect_reg;
         ts2->mem_allocated = 1;
         ts2->mem_base = base_ts;
-        ts2->mem_offset = offset + (1 - bigendian) * 4;
+        ts2->mem_offset = offset + 4;
         ts2->temp_subindex = 1;
         pstrcpy(buf, sizeof(buf), name);
         pstrcat(buf, sizeof(buf), "_1");
@@ -1072,37 +1069,43 @@  TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
 
     ts = g_hash_table_lookup(h, &val);
     if (ts == NULL) {
+        int64_t *val_ptr;
+
         ts = tcg_temp_alloc(s);
 
         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
             TCGTemp *ts2 = tcg_temp_alloc(s);
 
+            tcg_debug_assert(ts2 == ts + 1);
+
             ts->base_type = TCG_TYPE_I64;
             ts->type = TCG_TYPE_I32;
             ts->kind = TEMP_CONST;
             ts->temp_allocated = 1;
-            /*
-             * Retain the full value of the 64-bit constant in the low
-             * part, so that the hash table works.  Actual uses will
-             * truncate the value to the low part.
-             */
-            ts->val = val;
 
-            tcg_debug_assert(ts2 == ts + 1);
             ts2->base_type = TCG_TYPE_I64;
             ts2->type = TCG_TYPE_I32;
             ts2->kind = TEMP_CONST;
             ts2->temp_allocated = 1;
             ts2->temp_subindex = 1;
-            ts2->val = val >> 32;
+
+            /*
+             * Retain the full value of the 64-bit constant in the low
+             * part, so that the hash table works.  Actual uses will
+             * truncate the value to the low part.
+             */
+            ts[HOST_BIG_ENDIAN].val = val;
+            ts[!HOST_BIG_ENDIAN].val = val >> 32;
+            val_ptr = &ts[HOST_BIG_ENDIAN].val;
         } else {
             ts->base_type = type;
             ts->type = type;
             ts->kind = TEMP_CONST;
             ts->temp_allocated = 1;
             ts->val = val;
+            val_ptr = &ts->val;
         }
-        g_hash_table_insert(h, &ts->val, ts);
+        g_hash_table_insert(h, val_ptr, ts);
     }
 
     return ts;
@@ -1514,13 +1517,8 @@  void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     pi = 0;
     if (ret != NULL) {
         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
-#if HOST_BIG_ENDIAN
-            op->args[pi++] = temp_arg(ret + 1);
-            op->args[pi++] = temp_arg(ret);
-#else
             op->args[pi++] = temp_arg(ret);
             op->args[pi++] = temp_arg(ret + 1);
-#endif
             nb_rets = 2;
         } else {
             op->args[pi++] = temp_arg(ret);
@@ -1554,8 +1552,8 @@  void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
         }
 
         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
-            op->args[pi++] = temp_arg(args[i] + HOST_BIG_ENDIAN);
-            op->args[pi++] = temp_arg(args[i] + !HOST_BIG_ENDIAN);
+            op->args[pi++] = temp_arg(args[i]);
+            op->args[pi++] = temp_arg(args[i] + 1);
             real_args += 2;
             continue;
         }
@@ -4116,18 +4114,12 @@  static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
     }
 
     /* If the two inputs form one 64-bit value, try dupm_vec. */
-    if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
-        if (!itsl->mem_coherent) {
-            temp_sync(s, itsl, s->reserved_regs, 0, 0);
-        }
-        if (!itsh->mem_coherent) {
-            temp_sync(s, itsh, s->reserved_regs, 0, 0);
-        }
-#if HOST_BIG_ENDIAN
-        TCGTemp *its = itsh;
-#else
-        TCGTemp *its = itsl;
-#endif
+    if (itsl - itsl->temp_subindex == itsh - itsh->temp_subindex) {
+        TCGTemp *its = itsl - itsl->temp_subindex;
+
+        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
+        temp_sync(s, its + 1, s->reserved_regs, 0, 0);
+
         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
                              its->mem_base->reg, its->mem_offset)) {
             goto done;