@@ -76,6 +76,7 @@
#include "sched-int.h"
#include "cortex-a57-fma-steering.h"
#include "target-globals.h"
+#include "genimm-hash.h"
/* This file should be included last. */
#include "target-def.h"
@@ -1317,54 +1318,144 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
return plus_constant (mode, reg, offset);
}
-static int
-aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
- machine_mode mode)
+namespace {
+
+/* In order to simplify the below, make sure none of the
+ given rtx codes are in {0,16,32,48}. */
+STATIC_ASSERT (((int)PLUS & ~48) != 0);
+STATIC_ASSERT (((int)IOR & ~48) != 0);
+STATIC_ASSERT (((int)AND & ~48) != 0);
+
+enum aa_gi_code
{
- unsigned HOST_WIDE_INT mask;
- int i;
- bool first;
- unsigned HOST_WIDE_INT val;
- bool subtargets;
- rtx subtarget;
- int one_match, zero_match, first_not_ffff_match;
- int num_insns = 0;
+ AA_GI_NIL = -2,
+ AA_GI_SET = -1,
+
+ AA_GI_INS0 = 0,
+ AA_GI_INS1 = 16,
+ AA_GI_INS2 = 32,
+ AA_GI_INS3 = 48,
+
+ AA_GI_PLUS = PLUS,
+ AA_GI_IOR = IOR,
+ AA_GI_AND = AND
+};
+
+struct genimm_aa64 : genimm_base<aa_gi_code, 4>
+{
+ static const int max_simple = 2;
+
+ static rtx_code aa_gi_binop(aa_gi_code c)
+ {
+ return (c == AA_GI_PLUS || c == AA_GI_IOR || c == AA_GI_AND
+ ? (rtx_code)c : UNKNOWN);
+ }
+
+ genimm_aa64 (HOST_WIDE_INT c);
+
+ void set0 (HOST_WIDE_INT v);
+ void opN (aa_gi_code o, HOST_WIDE_INT v);
+ void insN (int b, unsigned HOST_WIDE_INT v);
+
+ /* The search algorithm that we use for aarch64 is non-recursive.
+ Thus we do not require the iteration provided by genimm_hash.
+ Produce an empty loop and go straight to exam_full. */
+ bool exam_search (unsigned HOST_WIDE_INT, int) { return false; }
+
+ bool exam_simple (HOST_WIDE_INT val, machine_mode mode, int);
+ bool exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base);
+ void exam_full (unsigned HOST_WIDE_INT val);
+ void generate (rtx dest, machine_mode mode) const;
+};
- if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+genimm_aa64::genimm_aa64 (HOST_WIDE_INT c)
+ : genimm_base (c)
+{
+#ifdef ENABLE_CHECKING
+ code[0] = code[1] = code[2] = code[3] = AA_GI_NIL;
+ op[0] = op[1] = op[2] = op[3] = 0;
+#endif
+}
+
+void
+genimm_aa64::set0 (HOST_WIDE_INT v)
+{
+ cost = 1;
+ code[0] = AA_GI_SET;
+ op[0] = v;
+}
+
+void
+genimm_aa64::opN (aa_gi_code c, HOST_WIDE_INT v)
+{
+ int n = cost++;
+ gcc_checking_assert (n > 0 && n < max_cost);
+ code[n] = c;
+ op[n] = v;
+}
+
+void
+genimm_aa64::insN (int b, unsigned HOST_WIDE_INT v)
+{
+ int n = cost++;
+ gcc_checking_assert (n > 0 && n < max_cost);
+ gcc_checking_assert ((b & ~48) == 0);
+ code[n] = (aa_gi_code)b;
+ op[n] = (v >> b) & 0xffff;
+}
+
+/* Look for simple constants that aren't worth hashing. */
+
+bool
+genimm_aa64::exam_simple (HOST_WIDE_INT val, machine_mode mode, int)
+{
+ if (aarch64_move_imm (val, mode))
{
- if (generate)
- emit_insn (gen_rtx_SET (dest, imm));
- num_insns++;
- return num_insns;
+ set0 (val);
+ return true;
}
-
if (mode == SImode)
{
/* We know we can't do this in 1 insn, and we must be able to do it
in two; so don't mess around looking for sequences that don't buy
us anything. */
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
- emit_insn (gen_insv_immsi (dest, GEN_INT (16),
- GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
- }
- num_insns += 2;
- return num_insns;
+ set0 (val & 0xffff);
+ insN (16, val);
+ return true;
}
+ return false;
+}
- /* Remaining cases are all for DImode. */
+/* A subroutine of genimm_aa64::exam_full. If VAL can be created from BASE
+ via the addition of a constant, construct the recipe as appropriate and
+ return true. Otherwise return false. */
- val = INTVAL (imm);
- subtargets = optimize && can_create_pseudo_p ();
+bool
+genimm_aa64::exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base)
+{
+ HOST_WIDE_INT diff = val - base;
+ if (aarch64_uimm12_shift (diff < 0 ? -diff : diff))
+ {
+ set0 (base);
+ opN (AA_GI_PLUS, diff);
+ return true;
+ }
+ return false;
+}
- one_match = 0;
- zero_match = 0;
- mask = 0xffff;
- first_not_ffff_match = -1;
+/* Examine the DImode quantity VAL, and store a recipe for its creation. */
- for (i = 0; i < 64; i += 16, mask <<= 16)
+void
+genimm_aa64::exam_full (unsigned HOST_WIDE_INT val)
+{
+ unsigned HOST_WIDE_INT mask;
+ int one_match = 0;
+ int zero_match = 0;
+ int first_not_ffff_match = -1;
+
+ for (int i = 0; i < 64; i += 16)
{
+ mask = HOST_WIDE_INT_UC (0xffff) << i;
if ((val & mask) == mask)
one_match++;
else
@@ -1379,211 +1470,186 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
if (one_match == 2)
{
/* Set one of the quarters and then insert back into result. */
- mask = 0xffffll << first_not_ffff_match;
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
- GEN_INT ((val >> first_not_ffff_match)
- & 0xffff)));
- }
- num_insns += 2;
- return num_insns;
+ mask = HOST_WIDE_INT_UC (0xffff) << first_not_ffff_match;
+ set0 (val | mask);
+ insN (first_not_ffff_match, val);
+ return;
}
if (zero_match == 2)
goto simple_sequence;
- mask = 0x0ffff0000UL;
- for (i = 16; i < 64; i += 16, mask <<= 16)
+ for (int i = 16; i < 64; i += 16, mask <<= 16)
{
- HOST_WIDE_INT comp = mask & ~(mask - 1);
+ unsigned HOST_WIDE_INT comp = HOST_WIDE_INT_1U << i;
+ mask = HOST_WIDE_INT_UC (0xffff) << i;
- if (aarch64_uimm12_shift (val - (val & mask)))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - (val & mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (subtarget,
- GEN_INT ((val + comp) & mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - ((val + comp) & mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (subtarget,
- GEN_INT ((val - comp) | ~mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - ((val - comp) | ~mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - (val | ~mask))));
- }
- num_insns += 2;
- return num_insns;
- }
+ if (exam_plus (val, val & mask))
+ return;
+ if (exam_plus (val, (val + comp) & mask))
+ return;
+ if (exam_plus (val, (val - comp) | ~mask))
+ return;
+ if (exam_plus (val, val | ~mask))
+ return;
}
- /* See if we can do it by arithmetically combining two
- immediates. */
- for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+ /* See if we can do it by arithmetically combining two immediates. */
+ for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
- int j;
- mask = 0xffff;
+ unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
- if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
- || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - aarch64_bitmasks[i])));
- }
- num_insns += 2;
- return num_insns;
- }
+ if (exam_plus (val, bmi))
+ return;
- for (j = 0; j < 64; j += 16, mask <<= 16)
+ for (int j = 0; j < 64; j += 16)
{
- if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
+ mask = HOST_WIDE_INT_UC (0xffff) << j;
+ if ((bmi & ~mask) == (val & ~mask))
{
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_insv_immdi (dest, GEN_INT (j),
- GEN_INT ((val >> j) & 0xffff)));
- }
- num_insns += 2;
- return num_insns;
+ set0 (bmi);
+ insN (j, val);
+ return;
}
}
}
/* See if we can do it by logically combining two immediates. */
- for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+ for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
- if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
+ unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
+
+ if ((bmi & val) == bmi)
{
- int j;
+ for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+ {
+ unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
- emit_insn (gen_rtx_SET (subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_iordi3 (dest, subtarget,
- GEN_INT (aarch64_bitmasks[j])));
- }
- num_insns += 2;
- return num_insns;
- }
+ if (val == (bmi | bmj))
+ {
+ set0 (bmi);
+ opN (AA_GI_IOR, bmj);
+ return;
+ }
+ }
}
- else if ((val & aarch64_bitmasks[i]) == val)
+ else if ((val & bmi) == val)
{
- int j;
+ for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+ {
+ unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
- emit_insn (gen_rtx_SET (subtarget,
- GEN_INT (aarch64_bitmasks[j])));
- emit_insn (gen_anddi3 (dest, subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- }
- num_insns += 2;
- return num_insns;
- }
+ if (val == (bmi & bmj))
+ {
+ set0 (bmi);
+ opN (AA_GI_AND, bmj);
+ return;
+ }
+ }
}
}
if (one_match > zero_match)
{
/* Set either first three quarters or all but the third. */
- mask = 0xffffll << (16 - first_not_ffff_match);
- if (generate)
- emit_insn (gen_rtx_SET (dest,
- GEN_INT (val | mask | 0xffffffff00000000ull)));
- num_insns ++;
+ mask = HOST_WIDE_INT_UC (0xffff) << (16 - first_not_ffff_match);
+ set0 (val | mask | HOST_WIDE_INT_UC (0xffffffff00000000));
/* Now insert other two quarters. */
- for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
- i < 64; i += 16, mask <<= 16)
+ for (int i = first_not_ffff_match + 16; i < 64; i += 16)
{
+ mask = HOST_WIDE_INT_UC (0xffff) << i;
if ((val & mask) != mask)
- {
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- num_insns ++;
- }
+ insN (i, val);
}
- return num_insns;
+ return;
}
simple_sequence:
- first = true;
- mask = 0xffff;
- for (i = 0; i < 64; i += 16, mask <<= 16)
+ cost = 0;
+ for (int i = 0; i < 64; i += 16)
{
+ mask = HOST_WIDE_INT_UC (0xffff) << i;
if ((val & mask) != 0)
{
- if (first)
- {
- if (generate)
- emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
- num_insns ++;
- first = false;
- }
+ if (cost == 0)
+ set0 (val & mask);
else
- {
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- num_insns ++;
- }
+ insN (i, val);
}
}
+}
+
+/* Follow the recipe to construct a value in MODE
+ placing the result in DEST. */
+
+void
+genimm_aa64::generate (rtx dest, machine_mode mode) const
+{
+ int n = cost;
- return num_insns;
+ gcc_checking_assert (n >= 1 && n <= max_cost);
+ gcc_checking_assert (code[0] == AA_GI_SET);
+
+ /* If possible, put the original SET into its own pseudo, so that
+ it might be CSE'd. We can't do this if we use INSV, and we only
+ ever use arithmetic with N == 2. */
+ if (n == 2 && optimize && can_create_pseudo_p ())
+ {
+ rtx_code rc = aa_gi_binop (code[1]);
+ if (rc != UNKNOWN)
+ {
+ rtx sub = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (sub, GEN_INT (op[0])));
+ sub = gen_rtx_fmt_ee (rc, mode, sub, GEN_INT (op[1]));
+ emit_insn (gen_rtx_SET (dest, sub));
+ return;
+ }
+ }
+
+ emit_insn (gen_rtx_SET (dest, GEN_INT (op[0])));
+
+ for (int i = 1; i < n; ++i)
+ {
+ rtx x = GEN_INT (op[i]);
+ switch (code[i])
+ {
+ case AA_GI_PLUS:
+ case AA_GI_IOR:
+ case AA_GI_AND:
+ x = gen_rtx_fmt_ee (aa_gi_binop (code[i]), mode, dest, x);
+ x = gen_rtx_SET (dest, x);
+ break;
+ case AA_GI_INS0:
+ case AA_GI_INS1:
+ case AA_GI_INS2:
+ case AA_GI_INS3:
+ if (mode == SImode)
+ x = gen_insv_immsi (dest, GEN_INT ((int)code[i]), x);
+ else
+ x = gen_insv_immdi (dest, GEN_INT ((int)code[i]), x);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ emit_insn (x);
+ }
}
+} // anon namespace
+
+/* Examine IMM in MODE and return the number insns required to construct it.
+ If GENERATE is true, emit instructions to compute IMM into DEST. */
+
+static inline int
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+ machine_mode mode)
+{
+ genimm_aa64 data = genimm_hash<genimm_aa64>::hash (INTVAL (imm), mode);
+ if (generate)
+ data.generate (dest, mode);
+ return data.cost;
+}
void
aarch64_expand_mov_immediate (rtx dest, rtx imm)