@@ -511,32 +511,79 @@ static inline void tcg_out_movr(TCGContext *s, AArch64Ext ext,
tcg_out_aimm(s, INSN_ADDI, ext, dest, src, 0);
}
+static inline void tcg_out_movwi(TCGContext *s, AArch64Insn insn,
+ AArch64Ext ext, TCGReg rd,
+ uint16_t value, int shift)
+{
+ tcg_out32(s, insn | ext | shift << 17 | value << 5 | rd);
+}
+
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
- AArch64Insn insn = INSN_MOVZ;
+ tcg_target_long valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
+ AArch64Insn insn;
+ AArch64Ext ext;
+ int i, wantinv, shift;
+
+ value &= valid;
+
+ /* Check small positive values. */
+ if ((value & ~0xffff) == 0) {
+ tcg_out_movwi(s, INSN_MOVZ, E32, rd, value, 0);
+ return;
+ }
+
+ /* Check small negative values. */
+ if ((~value & valid & ~0xffff) == 0) {
+ tcg_out_movwi(s, INSN_MOVN, EXT(type == TCG_TYPE_I64), rd, ~value, 0);
+ return;
+ }
+
+ /* Check for bitfield immediates. */
+ if ((value & ~0xffffffffull) == 0) {
+ i = find_bitmask32(value);
+ ext = E32;
+ } else {
+ i = find_bitmask64(value);
+ ext = E64;
+ }
+ if (i >= 0) {
+ tcg_out32(s, INSN_ORRI | TCG_REG_XZR << 5 | ext
+ | bitmask_enc[i] << 10 | rd);
+ return;
+ }
- if (type == TCG_TYPE_I32) {
- value = (uint32_t)value;
+ /* Would it take fewer insns to load the inverse? */
+ wantinv = 0;
+ for (i = 0; i < 64; i += 16) {
+ if (((value >> i) & 0xffff) == 0) {
+ wantinv--;
+ }
+ if (((~value >> i) & 0xffff) == 0) {
+ wantinv++;
+ }
}
- /* Construct halfwords of the immediate with MOVZ/MOVK with LSL.
- Count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
- first MOVZ with the half-word immediate skipping the zeros, with
- a shift (LSL) equal to this number. Then all other insns are MOVKs.
- Zero the processed half-word in the value, continue until empty.
- We build the final result 16bits at a time with up to 4 instructions,
- but do not emit instructions for 16bit zero holes. */
- do {
- unsigned shift = ctz64(value) & (63 & -16);
- unsigned half = (value >> shift) & 0xffff;
- AArch64Ext ext = EXT(shift >= 32);
-
- tcg_out32(s, insn | ext | shift << 17 | half << 5 | rd);
-
- insn = INSN_MOVK;
+ if (wantinv > 0) {
+ value = ~value;
+ insn = INSN_MOVN;
+ valid = -1;
+ } else {
+ insn = INSN_MOVZ;
+ valid = 0;
+ }
+
+ /* Perform the first round specially, to handle the inverse. */
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_movwi(s, insn, ext, rd, value >> shift, shift);
+ value &= ~(0xffffUL << shift);
+
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_movwi(s, INSN_MOVK, ext, rd, (value ^ valid) >> shift, shift);
value &= ~(0xffffUL << shift);
- } while (value);
+ }
}
static inline void tcg_out_ldst_r(TCGContext *s,
Handle small positive and negative numbers early. Check for logical immediates. Check if using MOVN for the first set helps. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/aarch64/tcg-target.c | 85 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 19 deletions(-)