diff mbox

Speed up insn-attrtab.c compilation

Message ID Pine.LNX.4.64.1205071428440.25409@wotan.suse.de
State New
Headers show

Commit Message

Michael Matz May 7, 2012, 1:11 p.m. UTC
Hi,

neverending story.  Maybe this time something gets in :)

This patch changes generation of insn-attrtab.c to:
* order attributes topologically (so that the "inlining" genattrtab does
  is as effective as possible, and doesn't hit the size limits too much)
* adjusts the rtx_cost for the attribute tests to correctly account for
  all tests (even the cheap ones have an impact after all)
* lowers the limits for inlining
* only uses an "optimized" new value if it actually is cheaper than
  the old value (and overall not too expensive)
* limits the number of predicates we remember from if/elseif/elseif 
  cascades to optimize further elseif tests (doesn't change code on x86,
  but reduces generation time)

The effect is that insn-attrtab.c on x86_64 is 2.9 MB instead of 6.0 MB, 
and that compilation of insn-attrtab.c with an unoptimized (i.e. stage1) 
cc1 is done in 45 seconds instead of 269 seconds.  The genattrtab call 
itself (unoptimized genattrtab) takes 9.5 seconds instead of 9.1.

This was before Steven implemented the three file split, with that split 
take the above as cumulative time, the overall speedup transfers to the 
split version.

Compilation time on x86_64-linux, with an unoptimized cc1(plus), with -g 
-O2:
            unpatched        patched
big-code.c  157.1s           157.4s
kdecore.cc  304.3s           301.9s

(i.e. noise).

In particular this patch doesn't contain my controversial variant of 
caching get_attr_xxx calls (which dynamically calls those functions more 
often than Jakubs variant).  Still a good speedup, though.

Regstrapping on x86_64-linux in progress.  Okay if that passes?  (I'd like 
to retain the #if 0 code therein, it's just a generator and it helps 
easily debugging the topsort thingy if something breaks)


Ciao,
Michael.
-----------------------
	* genattrtab.c (attr_rtx_cost): Move earlier, start with cost being 1.
	(simplify_test_exp): Handle one more case of distributive law,
	decrease cost threshold.
	(tests_attr_p, get_attr_order): New functions.
	(optimize_attrs): Use topological order, inline only cheap values.
	(write_attr_set): Reset our_known_true after some time.

Comments

Mike Stump May 7, 2012, 7:10 p.m. UTC | #1
On May 7, 2012, at 6:11 AM, Michael Matz wrote:
> I'd like to retain the #if 0 code therein,

Can you structure this code as

#define DEBUG 0

  if (DEBUG) ...

?

If so, that would be a preferable way to structure the code.

Nice work.
Michael Matz May 8, 2012, 10:17 a.m. UTC | #2
Hi,

On Mon, 7 May 2012, Mike Stump wrote:

> On May 7, 2012, at 6:11 AM, Michael Matz wrote:
> > I'd like to retain the #if 0 code therein,
> 
> Can you structure this code as
> 
> #define DEBUG 0
> 
>   if (DEBUG) ...
> 
> ?
> 
> If so, that would be a preferable way to structure the code.

Sure, consider the patch so amended.


Ciao,
Michael.
Michael Matz May 17, 2012, 2:40 p.m. UTC | #3
Ping.

On Tue, 8 May 2012, Michael Matz wrote:

> Hi,
> 
> On Mon, 7 May 2012, Mike Stump wrote:
> 
> > On May 7, 2012, at 6:11 AM, Michael Matz wrote:
> > > I'd like to retain the #if 0 code therein,
> > 
> > Can you structure this code as
> > 
> > #define DEBUG 0
> > 
> >   if (DEBUG) ...
> > 
> > ?
> > 
> > If so, that would be a preferable way to structure the code.
> 
> Sure, consider the patch so amended.
> 
> 
> Ciao,
> Michael.
>
Richard Biener May 18, 2012, 10:02 a.m. UTC | #4
On Thu, May 17, 2012 at 4:40 PM, Michael Matz <matz@suse.de> wrote:
> Ping.

Ok.

Thanks,
Richard.

> On Tue, 8 May 2012, Michael Matz wrote:
>
>> Hi,
>>
>> On Mon, 7 May 2012, Mike Stump wrote:
>>
>> > On May 7, 2012, at 6:11 AM, Michael Matz wrote:
>> > > I'd like to retain the #if 0 code therein,
>> >
>> > Can you structure this code as
>> >
>> > #define DEBUG 0
>> >
>> >   if (DEBUG) ...
>> >
>> > ?
>> >
>> > If so, that would be a preferable way to structure the code.
>>
>> Sure, consider the patch so amended.
>>
>>
>> Ciao,
>> Michael.
>>
diff mbox

Patch

Index: genattrtab.c
===================================================================
--- genattrtab.c	(revision 187098)
+++ genattrtab.c	(working copy)
@@ -1636,6 +1636,57 @@  write_length_unit_log (void)
   printf ("EXPORTED_CONST int length_unit_log = %u;\n", length_unit_log);
 }
 
+/* Compute approximate cost of the expression.  Used to decide whether
+   expression is cheap enough for inline.  */
+static int
+attr_rtx_cost (rtx x)
+{
+  int cost = 1;
+  enum rtx_code code;
+  if (!x)
+    return 0;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case MATCH_OPERAND:
+      if (XSTR (x, 1)[0])
+	return 10;
+      else
+	return 1;
+
+    case EQ_ATTR_ALT:
+      return 1;
+
+    case EQ_ATTR:
+      /* Alternatives don't result into function call.  */
+      if (!strcmp_check (XSTR (x, 0), alternative_name))
+	return 1;
+      else
+	return 5;
+    default:
+      {
+	int i, j;
+	const char *fmt = GET_RTX_FORMAT (code);
+	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	  {
+	    switch (fmt[i])
+	      {
+	      case 'V':
+	      case 'E':
+		for (j = 0; j < XVECLEN (x, i); j++)
+		  cost += attr_rtx_cost (XVECEXP (x, i, j));
+		break;
+	      case 'e':
+		cost += attr_rtx_cost (XEXP (x, i));
+		break;
+	      }
+	  }
+      }
+      break;
+    }
+  return cost;
+}
+
 /* Take a COND expression and see if any of the conditions in it can be
    simplified.  If any are known true or known false for the particular insn
    code, the COND can be further simplified.
@@ -2262,57 +2313,6 @@  simplify_or_tree (rtx exp, rtx *pterm, i
   return exp;
 }
 
-/* Compute approximate cost of the expression.  Used to decide whether
-   expression is cheap enough for inline.  */
-static int
-attr_rtx_cost (rtx x)
-{
-  int cost = 0;
-  enum rtx_code code;
-  if (!x)
-    return 0;
-  code = GET_CODE (x);
-  switch (code)
-    {
-    case MATCH_OPERAND:
-      if (XSTR (x, 1)[0])
-	return 10;
-      else
-	return 0;
-
-    case EQ_ATTR_ALT:
-      return 0;
-
-    case EQ_ATTR:
-      /* Alternatives don't result into function call.  */
-      if (!strcmp_check (XSTR (x, 0), alternative_name))
-	return 0;
-      else
-	return 5;
-    default:
-      {
-	int i, j;
-	const char *fmt = GET_RTX_FORMAT (code);
-	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
-	  {
-	    switch (fmt[i])
-	      {
-	      case 'V':
-	      case 'E':
-		for (j = 0; j < XVECLEN (x, i); j++)
-		  cost += attr_rtx_cost (XVECEXP (x, i, j));
-		break;
-	      case 'e':
-		cost += attr_rtx_cost (XEXP (x, i));
-		break;
-	      }
-	  }
-      }
-      break;
-    }
-  return cost;
-}
-
 /* Simplify test expression and use temporary obstack in order to avoid
    memory bloat.  Use ATTR_IND_SIMPLIFIED to avoid unnecessary simplifications
    and avoid unnecessary copying if possible.  */
@@ -2645,6 +2645,25 @@  simplify_test_exp (rtx exp, int insn_cod
 	  return SIMPLIFY_TEST_EXP (newexp, insn_code, insn_index);
 	}
 
+      /* Similarly,
+	    convert (ior (and (y) (x))
+			 (and (z) (x)))
+	    to      (and (ior (y) (z))
+			 (x))
+         Note that we want the common term to stay at the end.
+       */
+
+      else if (GET_CODE (left) == AND && GET_CODE (right) == AND
+	       && attr_equal_p (XEXP (left, 1), XEXP (right, 1)))
+	{
+	  newexp = attr_rtx (IOR, XEXP (left, 0), XEXP (right, 0));
+
+	  left = newexp;
+	  right = XEXP (right, 1);
+	  newexp = attr_rtx (AND, left, right);
+	  return SIMPLIFY_TEST_EXP (newexp, insn_code, insn_index);
+	}
+
       /* See if all or all but one of the insn's alternatives are specified
 	 in this tree.  Optimize if so.  */
 
@@ -2780,7 +2799,7 @@  simplify_test_exp (rtx exp, int insn_cod
 	      x = evaluate_eq_attr (exp, attr, av->value,
 				    insn_code, insn_index);
 	      x = SIMPLIFY_TEST_EXP (x, insn_code, insn_index);
-	      if (attr_rtx_cost(x) < 20)
+	      if (attr_rtx_cost(x) < 7)
 		return x;
 	    }
 	}
@@ -2800,6 +2819,133 @@  simplify_test_exp (rtx exp, int insn_cod
   return newexp;
 }
 
+/* Return 1 if any EQ_ATTR subexpression of P refers to ATTR,
+   otherwise return 0.  */
+
+static int
+tests_attr_p (rtx p, struct attr_desc *attr)
+{
+  const char *fmt;
+  int i, ie, j, je;
+
+  if (GET_CODE (p) == EQ_ATTR)
+    {
+      if (XSTR (p, 0) != attr->name)
+	return 0;
+      return 1;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (p));
+  ie = GET_RTX_LENGTH (GET_CODE (p));
+  for (i = 0; i < ie; i++)
+    {
+      switch (*fmt++)
+	{
+	case 'e':
+	  if (tests_attr_p (XEXP (p, i), attr))
+	    return 1;
+	  break;
+
+	case 'E':
+	  je = XVECLEN (p, i);
+	  for (j = 0; j < je; ++j)
+	    if (tests_attr_p (XVECEXP (p, i, j), attr))
+	      return 1;
+	  break;
+	}
+    }
+
+  return 0;
+}
+
+/* Calculate a topological sorting of all attributes so that
+   all attributes only depend on attributes in front of it.
+   Place the result in *RET (which is a pointer to an array of
+   attr_desc pointers), and return the size of that array.  */
+
+static int
+get_attr_order (struct attr_desc ***ret)
+{
+  int i, j;
+  int num = 0;
+  struct attr_desc *attr;
+  struct attr_desc **all, **sorted;
+  char *handled;
+  for (i = 0; i < MAX_ATTRS_INDEX; i++)
+    for (attr = attrs[i]; attr; attr = attr->next)
+      num++;
+  all = XNEWVEC (struct attr_desc *, num);
+  sorted = XNEWVEC (struct attr_desc *, num);
+  handled = XCNEWVEC (char, num);
+  num = 0;
+  for (i = 0; i < MAX_ATTRS_INDEX; i++)
+    for (attr = attrs[i]; attr; attr = attr->next)
+      all[num++] = attr;
+
+  j = 0;
+  for (i = 0; i < num; i++)
+    if (all[i]->is_const)
+      handled[i] = 1, sorted[j++] = all[i];
+
+  /* We have only few attributes hence we can live with the inner
+     loop being O(n^2), unlike the normal fast variants of topological
+     sorting.  */
+  while (j < num)
+    {
+      for (i = 0; i < num; i++)
+	if (!handled[i])
+	  {
+	    /* Let's see if I depends on anything interesting.  */
+	    int k;
+	    for (k = 0; k < num; k++)
+	      if (!handled[k])
+		{
+		  struct attr_value *av;
+		  for (av = all[i]->first_value; av; av = av->next)
+		    if (av->num_insns != 0)
+		      if (tests_attr_p (av->value, all[k]))
+			break;
+
+		  if (av)
+		    /* Something in I depends on K.  */
+		    break;
+		}
+	    if (k == num)
+	      {
+		/* Nothing in I depended on anything intersting, so
+		   it's done.  */
+		handled[i] = 1;
+		sorted[j++] = all[i];
+	      }
+	  }
+    }
+
+#if 0
+  for (j = 0; j < num; j++)
+    {
+      struct attr_desc *attr2;
+      struct attr_value *av;
+
+      attr = sorted[j];
+      fprintf (stderr, "%s depends on: ", attr->name);
+      for (i = 0; i < MAX_ATTRS_INDEX; ++i)
+	for (attr2 = attrs[i]; attr2; attr2 = attr2->next)
+	  if (!attr2->is_const)
+	    for (av = attr->first_value; av; av = av->next)
+	      if (av->num_insns != 0)
+		if (tests_attr_p (av->value, attr2))
+		  {
+		    fprintf (stderr, "%s, ", attr2->name);
+		    break;
+		  }
+      fprintf (stderr, "\n");
+    }
+#endif
+  free (all);
+  *ret = sorted;
+  return num;
+}
+
 /* Optimize the attribute lists by seeing if we can determine conditional
    values from the known values of other attributes.  This will save subroutine
    calls during the compilation.  */
@@ -2814,6 +2960,8 @@  optimize_attrs (void)
   int i;
   struct attr_value_list *ivbuf;
   struct attr_value_list *iv;
+  struct attr_desc **topsort;
+  int topnum;
 
   /* For each insn code, make a list of all the insn_ent's for it,
      for all values for all attributes.  */
@@ -2829,18 +2977,22 @@  optimize_attrs (void)
 
   iv = ivbuf = XNEWVEC (struct attr_value_list, num_insn_ents);
 
-  for (i = 0; i < MAX_ATTRS_INDEX; i++)
-    for (attr = attrs[i]; attr; attr = attr->next)
-      for (av = attr->first_value; av; av = av->next)
-	for (ie = av->first_insn; ie; ie = ie->next)
-	  {
-	    iv->attr = attr;
-	    iv->av = av;
-	    iv->ie = ie;
-	    iv->next = insn_code_values[ie->def->insn_code];
-	    insn_code_values[ie->def->insn_code] = iv;
-	    iv++;
-	  }
+  /* Create the chain of insn*attr values such that we see dependend
+     attributes after their dependencies.  As we use a stack via the
+     next pointers start from the end of the topological order.  */
+  topnum = get_attr_order (&topsort);
+  for (i = topnum - 1; i >= 0; i--)
+    for (av = topsort[i]->first_value; av; av = av->next)
+      for (ie = av->first_insn; ie; ie = ie->next)
+	{
+	  iv->attr = topsort[i];
+	  iv->av = av;
+	  iv->ie = ie;
+	  iv->next = insn_code_values[ie->def->insn_code];
+	  insn_code_values[ie->def->insn_code] = iv;
+	  iv++;
+	}
+  free (topsort);
 
   /* Sanity check on num_insn_ents.  */
   gcc_assert (iv == ivbuf + num_insn_ents);
@@ -2875,7 +3027,15 @@  optimize_attrs (void)
 	    }
 
 	  rtl_obstack = old;
-	  if (newexp != av->value)
+	  /* If we created a new value for this instruction, and it's
+	     cheaper than the old value, and overall cheap, use that
+	     one as specific value for the current instruction.
+	     The last test is to avoid exploding the get_attr_ function
+	     sizes for no much gain.  */
+	  if (newexp != av->value
+	      && attr_rtx_cost (newexp) < attr_rtx_cost (av->value)
+	      && attr_rtx_cost (newexp) < 26
+	     )
 	    {
 	      newexp = attr_copy_rtx (newexp);
 	      remove_insn_ent (av, ie);
@@ -3975,6 +4135,10 @@  write_attr_set (struct attr_desc *attr,
 	  rtx testexp;
 	  rtx inner_true;
 
+	  /* Reset our_known_true after some time to not accumulate
+	     too much cruft (slowing down genattrtab).  */
+	  if ((i & 31) == 0)
+	    our_known_true = known_true;
 	  testexp = eliminate_known_true (our_known_true,
 					  XVECEXP (value, 0, i),
 					  insn_code, insn_index);