diff mbox

[11/15] aarch64: Use hashing infrastructure for generating constants

Message ID 1439341904-9345-12-git-send-email-rth@redhat.com
State New
Headers show

Commit Message

Richard Henderson Aug. 12, 2015, 1:11 a.m. UTC
Aside from the hashing, and the splitting of insn generation from
recipe creation, there's no algorithmic change.

Cc: Marcus Shawcroft <marcus.shawcroft@arm.com>
Cc: Richard Earnshaw <richard.earnshaw@arm.com>
---
	* config/aarch64/aarch64.c: Include genimm-hash.h
	(aa_gi_code): New enum.
	(genimm_aa64): New class.
	(genimm_aa64::genimm_aa64): New.
	(genimm_aa64::set0, genimm_aa64::opN, genimm_aa64::insN): New.
	(genimm_aa64::exam_simple): New.
	(genimm_aa64::exam_plus): New.
	(genimm_aa64::generate): New.
	(genimm_aa64::exam_full): Extract from the body of the
	old aarch64_internal_mov_immediate.
	(aarch64_internal_mov_immediate): Rewrite using genimm_hash.
---
 gcc/config/aarch64/aarch64.c | 446 +++++++++++++++++++++++++------------------
 1 file changed, 256 insertions(+), 190 deletions(-)
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1394ed7..6b12a07 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -76,6 +76,7 @@ 
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "genimm-hash.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -1317,54 +1318,144 @@  aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
   return plus_constant (mode, reg, offset);
 }
 
-static int
-aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
-				machine_mode mode)
+namespace {
+
+/* In order to simplify the below, make sure none of the
+   given rtx codes are in {0,16,32,48}.  */
+STATIC_ASSERT (((int)PLUS & ~48) != 0);
+STATIC_ASSERT (((int)IOR & ~48) != 0);
+STATIC_ASSERT (((int)AND & ~48) != 0);
+
+enum aa_gi_code
 {
-  unsigned HOST_WIDE_INT mask;
-  int i;
-  bool first;
-  unsigned HOST_WIDE_INT val;
-  bool subtargets;
-  rtx subtarget;
-  int one_match, zero_match, first_not_ffff_match;
-  int num_insns = 0;
+  AA_GI_NIL = -2,
+  AA_GI_SET = -1,
+
+  AA_GI_INS0 = 0,
+  AA_GI_INS1 = 16,
+  AA_GI_INS2 = 32,
+  AA_GI_INS3 = 48,
+
+  AA_GI_PLUS = PLUS,
+  AA_GI_IOR = IOR,
+  AA_GI_AND = AND
+};
+
+struct genimm_aa64 : genimm_base<aa_gi_code, 4>
+{
+  static const int max_simple = 2;
+
+  static rtx_code aa_gi_binop(aa_gi_code c)
+  {
+    return (c == AA_GI_PLUS || c == AA_GI_IOR || c == AA_GI_AND
+	    ? (rtx_code)c : UNKNOWN);
+  }
+
+  genimm_aa64 (HOST_WIDE_INT c);
+
+  void set0 (HOST_WIDE_INT v);
+  void opN (aa_gi_code o, HOST_WIDE_INT v);
+  void insN (int b, unsigned HOST_WIDE_INT v);
+
+  /* The search algorithm that we use for aarch64 is non-recursive.
+     Thus we do not require the iteration provided by genimm_hash.
+     Produce an empty loop and go straight to exam_full.  */
+  bool exam_search (unsigned HOST_WIDE_INT, int) { return false; }
+
+  bool exam_simple (HOST_WIDE_INT val, machine_mode mode, int);
+  bool exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base);
+  void exam_full (unsigned HOST_WIDE_INT val);
+  void generate (rtx dest, machine_mode mode) const;
+};
 
-  if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+genimm_aa64::genimm_aa64 (HOST_WIDE_INT c)
+  : genimm_base (c)
+{
+#ifdef ENABLE_CHECKING
+  code[0] = code[1] = code[2] = code[3] = AA_GI_NIL;
+  op[0] = op[1] = op[2] = op[3] = 0;
+#endif
+}
+
+void
+genimm_aa64::set0 (HOST_WIDE_INT v)
+{
+  cost = 1;
+  code[0] = AA_GI_SET;
+  op[0] = v;
+}
+
+void
+genimm_aa64::opN (aa_gi_code c, HOST_WIDE_INT v)
+{
+  int n = cost++;
+  gcc_checking_assert (n > 0 && n < max_cost);
+  code[n] = c;
+  op[n] = v;
+}
+
+void
+genimm_aa64::insN (int b, unsigned HOST_WIDE_INT v)
+{
+  int n = cost++;
+  gcc_checking_assert (n > 0 && n < max_cost);
+  gcc_checking_assert ((b & ~48) == 0);
+  code[n] = (aa_gi_code)b;
+  op[n] = (v >> b) & 0xffff;
+}
+
+/* Look for simple constants that aren't worth hashing.  */
+
+bool
+genimm_aa64::exam_simple (HOST_WIDE_INT val, machine_mode mode, int)
+{
+  if (aarch64_move_imm (val, mode))
     {
-      if (generate)
-	emit_insn (gen_rtx_SET (dest, imm));
-      num_insns++;
-      return num_insns;
+      set0 (val);
+      return true;
     }
-
   if (mode == SImode)
     {
       /* We know we can't do this in 1 insn, and we must be able to do it
 	 in two; so don't mess around looking for sequences that don't buy
 	 us anything.  */
-      if (generate)
-	{
-	  emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
-	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-	}
-      num_insns += 2;
-      return num_insns;
+      set0 (val & 0xffff);
+      insN (16, val);
+      return true;
     }
+  return false;
+}
 
-  /* Remaining cases are all for DImode.  */
+/* A subroutine of genimm_aa64::exam_full.  If VAL can be created from BASE
+   via the addition of a constant, construct the recipe as appropriate and
+   return true.  Otherwise return false.  */
 
-  val = INTVAL (imm);
-  subtargets = optimize && can_create_pseudo_p ();
+bool
+genimm_aa64::exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base)
+{
+  HOST_WIDE_INT diff = val - base;
+  if (aarch64_uimm12_shift (diff < 0 ? -diff : diff))
+    {
+      set0 (base);
+      opN (AA_GI_PLUS, diff);
+      return true;
+    }
+  return false;
+}
 
-  one_match = 0;
-  zero_match = 0;
-  mask = 0xffff;
-  first_not_ffff_match = -1;
+/* Examine the DImode quantity VAL, and store a recipe for its creation.  */
 
-  for (i = 0; i < 64; i += 16, mask <<= 16)
+void
+genimm_aa64::exam_full (unsigned HOST_WIDE_INT val)
+{
+  unsigned HOST_WIDE_INT mask;
+  int one_match = 0;
+  int zero_match = 0;
+  int first_not_ffff_match = -1;
+
+  for (int i = 0; i < 64; i += 16)
     {
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
       if ((val & mask) == mask)
 	one_match++;
       else
@@ -1379,211 +1470,186 @@  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
   if (one_match == 2)
     {
       /* Set one of the quarters and then insert back into result.  */
-      mask = 0xffffll << first_not_ffff_match;
-      if (generate)
-	{
-	  emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
-	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-				     GEN_INT ((val >> first_not_ffff_match)
-					      & 0xffff)));
-	}
-      num_insns += 2;
-      return num_insns;
+      mask = HOST_WIDE_INT_UC (0xffff) << first_not_ffff_match;
+      set0 (val | mask);
+      insN (first_not_ffff_match, val);
+      return;
     }
 
   if (zero_match == 2)
     goto simple_sequence;
 
-  mask = 0x0ffff0000UL;
-  for (i = 16; i < 64; i += 16, mask <<= 16)
+  for (int i = 16; i < 64; i += 16, mask <<= 16)
     {
-      HOST_WIDE_INT comp = mask & ~(mask - 1);
+      unsigned HOST_WIDE_INT comp = HOST_WIDE_INT_1U << i;
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
 
-      if (aarch64_uimm12_shift (val - (val & mask)))
-	{
-	  if (generate)
-	    {
-	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - (val & mask))));
-	    }
-	  num_insns += 2;
-	  return num_insns;
-	}
-      else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
-	{
-	  if (generate)
-	    {
-	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget,
-				      GEN_INT ((val + comp) & mask)));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - ((val + comp) & mask))));
-	    }
-	  num_insns += 2;
-	  return num_insns;
-	}
-      else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
-	{
-	  if (generate)
-	    {
-	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget,
-				      GEN_INT ((val - comp) | ~mask)));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - ((val - comp) | ~mask))));
-	    }
-	  num_insns += 2;
-	  return num_insns;
-	}
-      else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
-	{
-	  if (generate)
-	    {
-	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - (val | ~mask))));
-	    }
-	  num_insns += 2;
-	  return num_insns;
-	}
+      if (exam_plus (val, val & mask))
+	return;
+      if (exam_plus (val, (val + comp) & mask))
+	return;
+      if (exam_plus (val, (val - comp) | ~mask))
+	return;
+      if (exam_plus (val, val | ~mask))
+	return;
     }
 
-  /* See if we can do it by arithmetically combining two
-     immediates.  */
-  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+  /* See if we can do it by arithmetically combining two immediates.  */
+  for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
     {
-      int j;
-      mask = 0xffff;
+      unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
 
-      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
-	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
-	{
-	  if (generate)
-	    {
-	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget,
-				      GEN_INT (aarch64_bitmasks[i])));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - aarch64_bitmasks[i])));
-	    }
-	  num_insns += 2;
-	  return num_insns;
-	}
+      if (exam_plus (val, bmi))
+	return;
 
-      for (j = 0; j < 64; j += 16, mask <<= 16)
+      for (int j = 0; j < 64; j += 16)
 	{
-	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
+          mask = HOST_WIDE_INT_UC (0xffff) << j;
+	  if ((bmi & ~mask) == (val & ~mask))
 	    {
-	      if (generate)
-		{
-		  emit_insn (gen_rtx_SET (dest,
-					  GEN_INT (aarch64_bitmasks[i])));
-		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-					     GEN_INT ((val >> j) & 0xffff)));
-		}
-	      num_insns += 2;
-	      return num_insns;
+	      set0 (bmi);
+	      insN (j, val);
+	      return;
 	    }
 	}
     }
 
   /* See if we can do it by logically combining two immediates.  */
-  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+  for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
     {
-      if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
+      unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
+
+      if ((bmi & val) == bmi)
 	{
-	  int j;
+	  for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+	    {
+	      unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
 
-	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
-	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
-	      {
-		if (generate)
-		  {
-		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		    emit_insn (gen_rtx_SET (subtarget,
-					    GEN_INT (aarch64_bitmasks[i])));
-		    emit_insn (gen_iordi3 (dest, subtarget,
-					   GEN_INT (aarch64_bitmasks[j])));
-		  }
-		num_insns += 2;
-		return num_insns;
-	      }
+	      if (val == (bmi | bmj))
+		{
+		  set0 (bmi);
+		  opN (AA_GI_IOR, bmj);
+		  return;
+		}
+	    }
 	}
-      else if ((val & aarch64_bitmasks[i]) == val)
+      else if ((val & bmi) == val)
 	{
-	  int j;
+	  for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+	    {
+	      unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
 
-	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
-	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
-	      {
-		if (generate)
-		  {
-		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		    emit_insn (gen_rtx_SET (subtarget,
-					    GEN_INT (aarch64_bitmasks[j])));
-		    emit_insn (gen_anddi3 (dest, subtarget,
-					   GEN_INT (aarch64_bitmasks[i])));
-		  }
-		num_insns += 2;
-		return num_insns;
-	      }
+	      if (val == (bmi & bmj))
+		{
+		  set0 (bmi);
+		  opN (AA_GI_AND, bmj);
+		  return;
+		}
+	    }
 	}
     }
 
   if (one_match > zero_match)
     {
       /* Set either first three quarters or all but the third.	 */
-      mask = 0xffffll << (16 - first_not_ffff_match);
-      if (generate)
-	emit_insn (gen_rtx_SET (dest,
-				GEN_INT (val | mask | 0xffffffff00000000ull)));
-      num_insns ++;
+      mask = HOST_WIDE_INT_UC (0xffff) << (16 - first_not_ffff_match);
+      set0 (val | mask | HOST_WIDE_INT_UC (0xffffffff00000000));
 
       /* Now insert other two quarters.	 */
-      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-	   i < 64; i += 16, mask <<= 16)
+      for (int i = first_not_ffff_match + 16; i < 64; i += 16)
 	{
+	  mask = HOST_WIDE_INT_UC (0xffff) << i;
 	  if ((val & mask) != mask)
-	    {
-	      if (generate)
-		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					   GEN_INT ((val >> i) & 0xffff)));
-	      num_insns ++;
-	    }
+	    insN (i, val);
 	}
-      return num_insns;
+      return;
     }
 
  simple_sequence:
-  first = true;
-  mask = 0xffff;
-  for (i = 0; i < 64; i += 16, mask <<= 16)
+  cost = 0;
+  for (int i = 0; i < 64; i += 16)
     {
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
       if ((val & mask) != 0)
 	{
-	  if (first)
-	    {
-	      if (generate)
-		emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
-	      num_insns ++;
-	      first = false;
-	    }
+	  if (cost == 0)
+	    set0 (val & mask);
 	  else
-	    {
-	      if (generate)
-		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					   GEN_INT ((val >> i) & 0xffff)));
-	      num_insns ++;
-	    }
+	    insN (i, val);
 	}
     }
+}
+
+/* Follow the recipe to construct a value in MODE
+   placing the result in DEST.  */
+
+void
+genimm_aa64::generate (rtx dest, machine_mode mode) const
+{
+  int n = cost;
 
-  return num_insns;
+  gcc_checking_assert (n >= 1 && n <= max_cost);
+  gcc_checking_assert (code[0] == AA_GI_SET);
+
+  /* If possible, put the original SET into its own pseudo, so that
+     it might be CSE'd.  We can't do this if we use INSV, and we only
+     ever use arithmetic with N == 2.  */
+  if (n == 2 && optimize && can_create_pseudo_p ())
+    {
+      rtx_code rc = aa_gi_binop (code[1]);
+      if (rc != UNKNOWN)
+	{
+	  rtx sub = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (sub, GEN_INT (op[0])));
+	  sub = gen_rtx_fmt_ee (rc, mode, sub, GEN_INT (op[1]));
+	  emit_insn (gen_rtx_SET (dest, sub));
+	  return;
+	}
+    }
+
+  emit_insn (gen_rtx_SET (dest, GEN_INT (op[0])));
+
+  for (int i = 1; i < n; ++i)
+    {
+      rtx x = GEN_INT (op[i]);
+      switch (code[i])
+	{
+	case AA_GI_PLUS:
+	case AA_GI_IOR:
+	case AA_GI_AND:
+	  x = gen_rtx_fmt_ee (aa_gi_binop (code[i]), mode, dest, x);
+	  x = gen_rtx_SET (dest, x);
+	  break;
+	case AA_GI_INS0:
+	case AA_GI_INS1:
+	case AA_GI_INS2:
+	case AA_GI_INS3:
+	  if (mode == SImode)
+	    x = gen_insv_immsi (dest, GEN_INT ((int)code[i]), x);
+	  else
+	    x = gen_insv_immdi (dest, GEN_INT ((int)code[i]), x);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      emit_insn (x);
+    }
 }
 
+} // anon namespace
+
+/* Examine IMM in MODE and return the number insns required to construct it.
+   If GENERATE is true, emit instructions to compute IMM into DEST.  */
+
+static inline int
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+                                machine_mode mode)
+{
+  genimm_aa64 data = genimm_hash<genimm_aa64>::hash (INTVAL (imm), mode);
+  if (generate)
+    data.generate (dest, mode);
+  return data.cost;
+}
 
 void
 aarch64_expand_mov_immediate (rtx dest, rtx imm)