Message ID | 20211007195456.1168070-41-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | tcg: optimize redundant sign extensions | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Rename to fold_addsub2. > Use Int128 to implement the wider operation. > > Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/optimize.c | 64 +++++++++++++++++++++++++++++++++----------------- > 1 file changed, 43 insertions(+), 21 deletions(-) > > diff --git a/tcg/optimize.c b/tcg/optimize.c > index 0011ac31ec..5e662ad8f7 100644 > --- a/tcg/optimize.c > +++ b/tcg/optimize.c > @@ -838,37 +838,59 @@ static bool fold_add(OptContext *ctx, TCGOp *op) > return false; > } > > -static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add) > +static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) > { > if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && > arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { > - uint32_t al = arg_info(op->args[2])->val; > - uint32_t ah = arg_info(op->args[3])->val; > - uint32_t bl = arg_info(op->args[4])->val; > - uint32_t bh = arg_info(op->args[5])->val; > - uint64_t a = ((uint64_t)ah << 32) | al; > - uint64_t b = ((uint64_t)bh << 32) | bl; > + uint64_t al = arg_info(op->args[2])->val; > + uint64_t ah = arg_info(op->args[3])->val; > + uint64_t bl = arg_info(op->args[4])->val; > + uint64_t bh = arg_info(op->args[5])->val; > TCGArg rl, rh; > - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32); > + TCGOp *op2; > > - if (add) { > - a += b; > + if (ctx->type == TCG_TYPE_I32) { > + uint64_t a = deposit64(al, 32, 32, ah); > + uint64_t b = deposit64(bl, 32, 32, bh); > + > + if (add) { > + a += b; > + } else { > + a -= b; > + } > + > + al = sextract64(a, 0, 32); > + ah = sextract64(a, 32, 32); > } else { > - a -= b; > + Int128 a = int128_make128(al, ah); > + Int128 b = int128_make128(bl, bh); This didn't find the Int128 support: FAILED: libqemu-arm-linux-user.fa.p/tcg_optimize.c.o cc -m64 -mcx16 -Ilibqemu-arm-linux-user.fa.p -I. -I../.. -Itarget/arm -I../../target/arm -I../../linux-user/host/x86_64 -Ilinux-user -I../../linux-user -Ilinux-user/arm -I../../linux-user/arm -Itrace -Iqapi -Iui -Iui/shader -I/usr/include/capstone -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem /home/alex/lsrc/qemu.git/linux-headers -isystem linux-headers -iquote . -iquote /home/alex/lsrc/qemu.git -iquote /home/alex/lsrc/qemu.git/include -iquote /home/alex/lsrc/qemu.git/disas/libvixl -iquote /home/alex/lsrc/qemu.git/tcg/i386 -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE -isystem../../linux-headers -isystemlinux-headers -DNEED_CPU_H '-DCONFIG_TARGET="arm-linux-user-config-target.h"' '-DCONFIG_DEVICES="arm-linux-user-config-devices.h"' -MD -MQ libqemu-arm-linux-user.fa.p/tcg_optimize.c.o -MF libqemu-arm-linux-user.fa.p/tcg_optimize.c.o.d -o libqemu-arm-linux-user.fa.p/tcg_optimize.c.o -c ../../tcg/optimize.c ../../tcg/optimize.c: In function ‘fold_addsub2’: ../../tcg/optimize.c:865:13: error: unknown type name ‘Int128’ 865 | Int128 a = int128_make128(al, ah); | ^~~~~~ ../../tcg/optimize.c:865:24: error: implicit declaration of function ‘int128_make128’ [-Werror=implicit-function-declaration] 865 | Int128 a = int128_make128(al, ah); possibly we are just missing: #include "qemu/int128.h" ?
On 10/19/21 8:34 AM, Alex Bennée wrote: > ../../tcg/optimize.c: In function ‘fold_addsub2’: > ../../tcg/optimize.c:865:13: error: unknown type name ‘Int128’ > 865 | Int128 a = int128_make128(al, ah); > | ^~~~~~ > ../../tcg/optimize.c:865:24: error: implicit declaration of function ‘int128_make128’ [-Werror=implicit-function-declaration] > 865 | Int128 a = int128_make128(al, ah); > > > possibly we are just missing: > > #include "qemu/int128.h" Ah, this patch set predates a cleanup to tcg_ldst.h. Previously we included int128.h in tcg.h. Will fix. r~
diff --git a/tcg/optimize.c b/tcg/optimize.c index 0011ac31ec..5e662ad8f7 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -838,37 +838,59 @@ static bool fold_add(OptContext *ctx, TCGOp *op) return false; } -static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add) +static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) { if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { - uint32_t al = arg_info(op->args[2])->val; - uint32_t ah = arg_info(op->args[3])->val; - uint32_t bl = arg_info(op->args[4])->val; - uint32_t bh = arg_info(op->args[5])->val; - uint64_t a = ((uint64_t)ah << 32) | al; - uint64_t b = ((uint64_t)bh << 32) | bl; + uint64_t al = arg_info(op->args[2])->val; + uint64_t ah = arg_info(op->args[3])->val; + uint64_t bl = arg_info(op->args[4])->val; + uint64_t bh = arg_info(op->args[5])->val; TCGArg rl, rh; - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32); + TCGOp *op2; - if (add) { - a += b; + if (ctx->type == TCG_TYPE_I32) { + uint64_t a = deposit64(al, 32, 32, ah); + uint64_t b = deposit64(bl, 32, 32, bh); + + if (add) { + a += b; + } else { + a -= b; + } + + al = sextract64(a, 0, 32); + ah = sextract64(a, 32, 32); } else { - a -= b; + Int128 a = int128_make128(al, ah); + Int128 b = int128_make128(bl, bh); + + if (add) { + a = int128_add(a, b); + } else { + a = int128_sub(a, b); + } + + al = int128_getlo(a); + ah = int128_gethi(a); } rl = op->args[0]; rh = op->args[1]; - tcg_opt_gen_movi(ctx, op, rl, (int32_t)a); - tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32)); + + /* The proper opcode is supplied by tcg_opt_gen_mov. */ + op2 = tcg_op_insert_before(ctx->tcg, op, 0); + + tcg_opt_gen_movi(ctx, op, rl, al); + tcg_opt_gen_movi(ctx, op2, rh, ah); return true; } return false; } -static bool fold_add2_i32(OptContext *ctx, TCGOp *op) +static bool fold_add2(OptContext *ctx, TCGOp *op) { - return fold_addsub2_i32(ctx, op, true); + return fold_addsub2(ctx, op, true); } static bool fold_and(OptContext *ctx, TCGOp *op) @@ -1707,9 +1729,9 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) return false; } -static bool fold_sub2_i32(OptContext *ctx, TCGOp *op) +static bool fold_sub2(OptContext *ctx, TCGOp *op) { - return fold_addsub2_i32(ctx, op, false); + return fold_addsub2(ctx, op, false); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) @@ -1855,8 +1877,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64_VEC(add): done = fold_add(&ctx, op); break; - case INDEX_op_add2_i32: - done = fold_add2_i32(&ctx, op); + CASE_OP_32_64(add2): + done = fold_add2(&ctx, op); break; CASE_OP_32_64_VEC(and): done = fold_and(&ctx, op); @@ -1991,8 +2013,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64_VEC(sub): done = fold_sub(&ctx, op); break; - case INDEX_op_sub2_i32: - done = fold_sub2_i32(&ctx, op); + CASE_OP_32_64(sub2): + done = fold_sub2(&ctx, op); break; CASE_OP_32_64_VEC(xor): done = fold_xor(&ctx, op);