diff mbox series

[v2,40/48] tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops

Message ID 20211007195456.1168070-41-richard.henderson@linaro.org
State New
Headers show
Series tcg: optimize redundant sign extensions | expand

Commit Message

Richard Henderson Oct. 7, 2021, 7:54 p.m. UTC
Rename to fold_addsub2.
Use Int128 to implement the wider operation.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 64 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 21 deletions(-)

Comments

Alex Bennée Oct. 19, 2021, 3:34 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Rename to fold_addsub2.
> Use Int128 to implement the wider operation.
>
> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/optimize.c | 64 +++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 43 insertions(+), 21 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 0011ac31ec..5e662ad8f7 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -838,37 +838,59 @@ static bool fold_add(OptContext *ctx, TCGOp *op)
>      return false;
>  }
>  
> -static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
> +static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
>  {
>      if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
>          arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
> -        uint32_t al = arg_info(op->args[2])->val;
> -        uint32_t ah = arg_info(op->args[3])->val;
> -        uint32_t bl = arg_info(op->args[4])->val;
> -        uint32_t bh = arg_info(op->args[5])->val;
> -        uint64_t a = ((uint64_t)ah << 32) | al;
> -        uint64_t b = ((uint64_t)bh << 32) | bl;
> +        uint64_t al = arg_info(op->args[2])->val;
> +        uint64_t ah = arg_info(op->args[3])->val;
> +        uint64_t bl = arg_info(op->args[4])->val;
> +        uint64_t bh = arg_info(op->args[5])->val;
>          TCGArg rl, rh;
> -        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
> +        TCGOp *op2;
>  
> -        if (add) {
> -            a += b;
> +        if (ctx->type == TCG_TYPE_I32) {
> +            uint64_t a = deposit64(al, 32, 32, ah);
> +            uint64_t b = deposit64(bl, 32, 32, bh);
> +
> +            if (add) {
> +                a += b;
> +            } else {
> +                a -= b;
> +            }
> +
> +            al = sextract64(a, 0, 32);
> +            ah = sextract64(a, 32, 32);
>          } else {
> -            a -= b;
> +            Int128 a = int128_make128(al, ah);
> +            Int128 b = int128_make128(bl, bh);

This didn't find the Int128 support:

  FAILED: libqemu-arm-linux-user.fa.p/tcg_optimize.c.o 
  cc -m64 -mcx16 -Ilibqemu-arm-linux-user.fa.p -I. -I../.. -Itarget/arm -I../../target/arm -I../../linux-user/host/x86_64 -Ilinux-user -I../../linux-user -Ilinux-user/arm -I../../linux-user/arm -Itrace -Iqapi -Iui -Iui/shader -I/usr/include/capstone -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem /home/alex/lsrc/qemu.git/linux-headers -isystem linux-headers -iquote . -iquote /home/alex/lsrc/qemu.git -iquote /home/alex/lsrc/qemu.git/include -iquote /home/alex/lsrc/qemu.git/disas/libvixl -iquote /home/alex/lsrc/qemu.git/tcg/i386 -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE -isystem../../linux-headers -isystemlinux-headers -DNEED_CPU_H '-DCONFIG_TARGET="arm-linux-user-config-target.h"' '-DCONFIG_DEVICES="arm-linux-user-config-devices.h"' -MD -MQ libqemu-arm-linux-user.fa.p/tcg_optimize.c.o -MF libqemu-arm-linux-user.fa.p/tcg_optimize.c.o.d -o libqemu-arm-linux-user.fa.p/tcg_optimize.c.o -c ../../tcg/optimize.c
  ../../tcg/optimize.c: In function ‘fold_addsub2’:
  ../../tcg/optimize.c:865:13: error: unknown type name ‘Int128’
    865 |             Int128 a = int128_make128(al, ah);
        |             ^~~~~~
  ../../tcg/optimize.c:865:24: error: implicit declaration of function ‘int128_make128’ [-Werror=implicit-function-declaration]
    865 |             Int128 a = int128_make128(al, ah);


possibly we are just missing:

#include "qemu/int128.h"

?
Richard Henderson Oct. 19, 2021, 4:01 p.m. UTC | #2
On 10/19/21 8:34 AM, Alex Bennée wrote:
>    ../../tcg/optimize.c: In function ‘fold_addsub2’:
>    ../../tcg/optimize.c:865:13: error: unknown type name ‘Int128’
>      865 |             Int128 a = int128_make128(al, ah);
>          |             ^~~~~~
>    ../../tcg/optimize.c:865:24: error: implicit declaration of function ‘int128_make128’ [-Werror=implicit-function-declaration]
>      865 |             Int128 a = int128_make128(al, ah);
> 
> 
> possibly we are just missing:
> 
> #include "qemu/int128.h"

Ah, this patch set predates a cleanup to tcg_ldst.h.
Previously we included int128.h in tcg.h.

Will fix.


r~
diff mbox series

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0011ac31ec..5e662ad8f7 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -838,37 +838,59 @@  static bool fold_add(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
-        uint32_t al = arg_info(op->args[2])->val;
-        uint32_t ah = arg_info(op->args[3])->val;
-        uint32_t bl = arg_info(op->args[4])->val;
-        uint32_t bh = arg_info(op->args[5])->val;
-        uint64_t a = ((uint64_t)ah << 32) | al;
-        uint64_t b = ((uint64_t)bh << 32) | bl;
+        uint64_t al = arg_info(op->args[2])->val;
+        uint64_t ah = arg_info(op->args[3])->val;
+        uint64_t bl = arg_info(op->args[4])->val;
+        uint64_t bh = arg_info(op->args[5])->val;
         TCGArg rl, rh;
-        TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
+        TCGOp *op2;
 
-        if (add) {
-            a += b;
+        if (ctx->type == TCG_TYPE_I32) {
+            uint64_t a = deposit64(al, 32, 32, ah);
+            uint64_t b = deposit64(bl, 32, 32, bh);
+
+            if (add) {
+                a += b;
+            } else {
+                a -= b;
+            }
+
+            al = sextract64(a, 0, 32);
+            ah = sextract64(a, 32, 32);
         } else {
-            a -= b;
+            Int128 a = int128_make128(al, ah);
+            Int128 b = int128_make128(bl, bh);
+
+            if (add) {
+                a = int128_add(a, b);
+            } else {
+                a = int128_sub(a, b);
+            }
+
+            al = int128_getlo(a);
+            ah = int128_gethi(a);
         }
 
         rl = op->args[0];
         rh = op->args[1];
-        tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
-        tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
+
+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+        tcg_opt_gen_movi(ctx, op, rl, al);
+        tcg_opt_gen_movi(ctx, op2, rh, ah);
         return true;
     }
     return false;
 }
 
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
+static bool fold_add2(OptContext *ctx, TCGOp *op)
 {
-    return fold_addsub2_i32(ctx, op, true);
+    return fold_addsub2(ctx, op, true);
 }
 
 static bool fold_and(OptContext *ctx, TCGOp *op)
@@ -1707,9 +1729,9 @@  static bool fold_sub(OptContext *ctx, TCGOp *op)
     return false;
 }
 
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
-    return fold_addsub2_i32(ctx, op, false);
+    return fold_addsub2(ctx, op, false);
 }
 
 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
@@ -1855,8 +1877,8 @@  void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(add):
             done = fold_add(&ctx, op);
             break;
-        case INDEX_op_add2_i32:
-            done = fold_add2_i32(&ctx, op);
+        CASE_OP_32_64(add2):
+            done = fold_add2(&ctx, op);
             break;
         CASE_OP_32_64_VEC(and):
             done = fold_and(&ctx, op);
@@ -1991,8 +2013,8 @@  void tcg_optimize(TCGContext *s)
         CASE_OP_32_64_VEC(sub):
             done = fold_sub(&ctx, op);
             break;
-        case INDEX_op_sub2_i32:
-            done = fold_sub2_i32(&ctx, op);
+        CASE_OP_32_64(sub2):
+            done = fold_sub2(&ctx, op);
             break;
         CASE_OP_32_64_VEC(xor):
             done = fold_xor(&ctx, op);