Patchwork Further insn-attrtab.c speedup

login
register
mail settings
Submitter Jakub Jelinek
Date June 18, 2010, 9:39 p.m.
Message ID <20100618213952.GG7811@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/56225/
State New
Headers show

Comments

Jakub Jelinek - June 18, 2010, 9:39 p.m.
Hi!

This patch implements get_attr_* (insn) caching in insn-attrtab.c.
insn-attrtab.c contains stuff like:
    case 1685:  /* *vec_extractv2di_1_rex64 */
    case 1684:  /* *vec_extractv2di_1_rex64_avx */
      extract_constrain_insn_cached (insn);
      if (!((1 << which_alternative) & 0x7))
        {
          return 1;
        }
      else if (get_attr_memory (insn) == MEMORY_BOTH)
        {
          return 3;
        }
      else if (get_attr_memory (insn) == MEMORY_LOAD)
        {
          return 2;
        }
      else if (get_attr_memory (insn) == MEMORY_NONE)
        {
          return 1;
        }
      else
        {
          return 0;
        }
and get_attr_memory isn't pure (as it can call extract_insn_cached
or extract_constrain_insn_cached, recog_memoized, etc.).
This patch instead emits
  enum attr_memory cached_memory ATTRIBUTE_UNUSED;
...
    case 1685:  /* *vec_extractv2di_1_rex64 */
    case 1684:  /* *vec_extractv2di_1_rex64_avx */
      extract_constrain_insn_cached (insn);
      if (!((1 << which_alternative) & 0x7))
        {
          return 1;
        }
      else if ((cached_memory = get_attr_memory (insn)) == MEMORY_BOTH)
        {
          return 3;
        }
      else if (cached_memory == MEMORY_LOAD)
        {
          return 2;
        }
      else if (cached_memory == MEMORY_NONE)
        {
          return 1;
        }
      else
        {
          return 0;
        }
Similar for conditions like if (something && (((get_attr_memory (insn) == MEMORY_BOTH) || (get_attr_memory (insn) == MEMORY_LOAD))))
it emits if (something && ((((cached_memory = get_attr_memory (insn)) == MEMORY_BOTH || (cached_memory == MEMORY_LOAD))))
The resulting insn-attrtab.c compiled with -Wuninitialized, so I hope I've done
all the conditions when the cached attribute can be used finally right.

The patch initially scans all the conditions in a function and checks
which if..else if..else blocks check the same attribute more than once,
for such attributes emits cached_<attributename> vars at the start of
the function.  Then for each block figures again which attributes would
be good to cache there, and for those when not proven to be already cached
emits the (cached_XXX = get_attr_XXX (insn)) == YYY form, otherwise
cached_XXX == YYY form.  write_test_expr tracks 3 different sets of attributes
that are known to be cached: one set is what can be cached during the current
write_test_expr call - anything that is known to be cached before the if (resp.
else if) and in || (resp. &&) series operands after it has been set.
The second set which it tracks is which attributes are known to be set
for when the whole if/else if condition evaluates to 1 (i.e. what
is known to be set inside of the then block) and another set
what is known to be set in else if/else block after it.
Say (cached_XXX = get_attr_XXX (insn)) == YYY right after if (
will known to be set in both places, for
if (something && (cached_XXX = get_attr_XXX (insn)) == YYY)
only in the then block and
if (something || (cached_XXX = get_attr_XXX (insn)) == YYY)
only in the else block.
if ((something || (cached_XXX = get_attr_XXX (insn)) == YYY) && something2)
in neither.

The patch is on top of the latest genattrtab speedup patch.

Bootstrapped/regtested on x86_64-linux and i686-linux.
Compared to vanilla trunk, in gcc/*.o only
bt-load.o cc1-checksum.o cc1obj-checksum.o cc1objplus-checksum.o
cc1plus-checksum.o cfgexpand.o haifa-sched.o insn-attrtab.o
insn-automata.o sel-sched-ir.o
(i.e. checksums, insn-attrtab itself and what includes changed insn-attr.h)
changed. insn-attrtab.o .text sizes:
	vanilla		genattrtab_speedup_only	both_patches
x86_64	1003956		635929			573453
i686	864342		535461			475104

Ok to commit (assuming the speedup patch is approved; except for the
make_automaton_attrs hunk it would apply even to vanilla genattrtab.c though)?

2010-06-18  Jakub Jelinek  <jakub@redhat.com>

	* Makefile.in (build/genattrtab.o): Depend on vecprim.h.
	* genattrtab.c: Include vecprim.h.
	(cached_attrs, cached_attr_count, attrs_seen_once,
	attrs_seen_more_than_once, attrs_to_cache, attrs_cached_inside,
	attrs_cached_after): New variables.
	(find_attrs_to_cache): New function.
	(FLG_BITWISE, FLG_AFTER, FLG_INSIDE, FLG_OUTSIDE_AND): Define.
	(write_test_expr): Add attrs_cached argument, return it too,
	attempt to cache non-const attributes used more than once in
	a single case handling.
	(write_attr_get): Use find_attrs_to_cache, for caching candidates
	emit cached_* variables.  Adjust write_attr_set callers.
	(write_attr_set): Add attrs_cached attribute, use find_attrs_to_cache
	to find attributes that should be cached in this block.  Adjust
	write_test_expr callers.
	(write_attr_case): Clear attrs_to_cache.  Adjust write_attr_set
	callers.
	(make_automaton_attrs): Adjust write_test_expr caller.


	Jakub
Jeff Law - June 18, 2010, 10:41 p.m.
On 06/18/10 15:39, Jakub Jelinek wrote:
> Hi!
>
> This patch implements get_attr_* (insn) caching in insn-attrtab.c.
> insn-attrtab.c contains stuff like:
>      case 1685:  /* *vec_extractv2di_1_rex64 */
>      case 1684:  /* *vec_extractv2di_1_rex64_avx */
>        extract_constrain_insn_cached (insn);
>        if (!((1<<  which_alternative)&  0x7))
>          {
>            return 1;
>          }
>        else if (get_attr_memory (insn) == MEMORY_BOTH)
>          {
>            return 3;
>          }
>        else if (get_attr_memory (insn) == MEMORY_LOAD)
>          {
>            return 2;
>          }
>        else if (get_attr_memory (insn) == MEMORY_NONE)
>          {
>            return 1;
>          }
>        else
>          {
>            return 0;
>          }
> and get_attr_memory isn't pure (as it can call extract_insn_cached
> or extract_constrain_insn_cached, recog_memoized, etc.).
> This patch instead emits
>    enum attr_memory cached_memory ATTRIBUTE_UNUSED;
> ...
>      case 1685:  /* *vec_extractv2di_1_rex64 */
>      case 1684:  /* *vec_extractv2di_1_rex64_avx */
>        extract_constrain_insn_cached (insn);
>        if (!((1<<  which_alternative)&  0x7))
>          {
>            return 1;
>          }
>        else if ((cached_memory = get_attr_memory (insn)) == MEMORY_BOTH)
>          {
>            return 3;
>          }
>        else if (cached_memory == MEMORY_LOAD)
>          {
>            return 2;
>          }
>        else if (cached_memory == MEMORY_NONE)
>          {
>            return 1;
>          }
>        else
>          {
>            return 0;
>          }
> Similar for conditions like if (something&&  (((get_attr_memory (insn) == MEMORY_BOTH) || (get_attr_memory (insn) == MEMORY_LOAD))))
> it emits if (something&&  ((((cached_memory = get_attr_memory (insn)) == MEMORY_BOTH || (cached_memory == MEMORY_LOAD))))
> The resulting insn-attrtab.c compiled with -Wuninitialized, so I hope I've done
> all the conditions when the cached attribute can be used finally right.
>    
I'd been wondering about this class of insn-attrtab optimizations for a 
long time, probably back to the initial days of the SSA work.   
Obviously what I was worried about was the potential side effects of the 
get_attr_XXX functions since they can execute arbitrary code, which 
could include changing the form of the insn (thus changing the return 
value of subsequent calls) or something awful like setting global state, 
then querying the global state on a subsequent call.

I'm more than happy to consider any such port fundamentally broken.  We 
should probably document the assumptions we make about the cachability 
of the get_attr_XXX calls.

Unfortunately, I'm not qualified to comment on the changes themselves.  
I've tried hard through the years to not dive into the genfoo internals.

jeff
Jakub Jelinek - June 19, 2010, 6:32 a.m.
On Fri, Jun 18, 2010 at 04:41:26PM -0600, Jeff Law wrote:
> I'd been wondering about this class of insn-attrtab optimizations
> for a long time, probably back to the initial days of the SSA work.
> Obviously what I was worried about was the potential side effects of
> the get_attr_XXX functions since they can execute arbitrary code,
> which could include changing the form of the insn (thus changing the
> return value of subsequent calls) or something awful like setting
> global state, then querying the global state on a subsequent call.
> 
> I'm more than happy to consider any such port fundamentally broken.
> We should probably document the assumptions we make about the
> cachability of the get_attr_XXX calls.

Yeah, I'd consider any such port very much broken.  The port
doesn't have control over when exactly and how many times such get_attr_*
functions are called and e.g. genattrtab already has been doing
optimizations that assume it.  Say instead of emitting
if ((get_attr_memory (insn) == MEMORY_BOTH))
  {
    if (something && (get_attr_memory (insn) == MEMORY_BOTH))
      return 3;
    else
      return 2;
  }
else
  return 1;
vanilla genattrtab.c will emit
if ((get_attr_memory (insn) == MEMORY_BOTH))
  {
    if (something)
      return 3;
    else
      return 2;
  }
else
  return 1;
etc. (see e.g. eliminate_known_true).  So certainly
all those functions must be "pure with possible caching", definitely not
modify the the insn itself, when called second time with no code in between
must return the same value and ideally shouldn't change the global
variables at all in that case (I guess exception could be some statistics
computation, or say temporary memory allocation/freeing or GC allocation
that isn't referenced anywhere in global vars afterwards).

	Jakub
Michael Matz - June 21, 2010, 12:08 p.m.
Hi,

On Fri, 18 Jun 2010, Jakub Jelinek wrote:

> This patch instead emits
>   enum attr_memory cached_memory ATTRIBUTE_UNUSED;
> ...
>     case 1685:  /* *vec_extractv2di_1_rex64 */
>     case 1684:  /* *vec_extractv2di_1_rex64_avx */
>       extract_constrain_insn_cached (insn);
>       if (!((1 << which_alternative) & 0x7))
>         {
>           return 1;
>         }
>       else if ((cached_memory = get_attr_memory (insn)) == MEMORY_BOTH)
>         {
>           return 3;
>         }
>       else if (cached_memory == MEMORY_LOAD)
> 
> Similar for conditions like if (something && (((get_attr_memory (insn) 
> == MEMORY_BOTH) || (get_attr_memory (insn) == MEMORY_LOAD)))) it emits 
> if (something && ((((cached_memory = get_attr_memory (insn)) == 
> MEMORY_BOTH || (cached_memory == MEMORY_LOAD)))) The resulting 
> insn-attrtab.c compiled with -Wuninitialized, so I hope I've done all 
> the conditions when the cached attribute can be used finally right.

There's quite some complexity you had to add to handle such situations 
correctly:

  if (cond1 && get_attr_x () ...)
    ...
  else if (cond2 && get_attr_x () ...)

Here the first get_attr_x call doesn't dominate the second one, hence you 
can't transform this sequence into just one call using the cached value.  
But backends must be able to determine all attributes for all 
instructions, so we _can_ transform this into

  cached_x = get_attr_x ();
  if (cond1 && cached_x ...)
    ...
  else if (cond2 && cached_x ...)

Which is what my patches were doing, getting rid of all get_attr_... calls 
inside conditions.  I'm especially worried about a sequence of get_attr() 
calls in conditions where you can remove only the first one, if at all.

This would mean to evaluate some get_attr calls more often than in an 
unmodified insn-attrtab, but in past measurements this didn't change 
performance (and reduced code size even more).

> Ok to commit (assuming the speedup patch is approved;

Mark approved your first speedup patch.


Ciao,
Michael.
Jakub Jelinek - June 21, 2010, 12:20 p.m.
On Mon, Jun 21, 2010 at 02:08:36PM +0200, Michael Matz wrote:
> There's quite some complexity you had to add to handle such situations 
> correctly:
> 
>   if (cond1 && get_attr_x () ...)
>     ...
>   else if (cond2 && get_attr_x () ...)
> 
> Here the first get_attr_x call doesn't dominate the second one, hence you 
> can't transform this sequence into just one call using the cached value.  

Yeah, the patch figures that out and doesn't use the cached_x ==
variant in the second if.
But e.g. for
if (cond1 || get_attr_x () == A)
...
else if (cond2 || get_attr_x () == B)
...
it would.  Or for
if ((cond1 && get_attr_x () == A) || cond2)
{
if (get_attr_x () == B)
...
}
it would.

> But backends must be able to determine all attributes for all 
> instructions, so we _can_ transform this into
> 
>   cached_x = get_attr_x ();
>   if (cond1 && cached_x ...)
>     ...
>   else if (cond2 && cached_x ...)
> 
> Which is what my patches were doing, getting rid of all get_attr_... calls 
> inside conditions.

The get_attr_x () functions are often quite expensive, so I didn't want to
introduce calls that would return values that wouldn't be used.
Yeah, the generated code could be tiny bit smaller, but I'd be worried that
it would be slower too.

>   I'm especially worried about a sequence of get_attr() 
> calls in conditions where you can remove only the first one, if at all.
> 
> This would mean to evaluate some get_attr calls more often than in an 
> unmodified insn-attrtab, but in past measurements this didn't change 
> performance (and reduced code size even more).

My patch doesn't add any extra get_attr_* calls.  All it does is transform
some (get_attr_X (insn) == something) expressions to
((cached_X = get_attr_X (insn)) == something)
and some into
(cached_X == something).

> 
> > Ok to commit (assuming the speedup patch is approved;
> 
> Mark approved your first speedup patch.

I wasn't sure if Mark's mail was meant as an ack, anyway, Richard acked it
today.

	Jakub
Michael Matz - June 21, 2010, 1:15 p.m.
Hi,

On Mon, 21 Jun 2010, Jakub Jelinek wrote:

> >   I'm especially worried about a sequence of get_attr() 
> > calls in conditions where you can remove only the first one, if at all.
> > 
> > This would mean to evaluate some get_attr calls more often than in an 
> > unmodified insn-attrtab, but in past measurements this didn't change 
> > performance (and reduced code size even more).
> 
> My patch doesn't add any extra get_attr_* calls.

I know, I was talking about my variant of attacking this problem and that 
it didn't cause slowdowns.  For optimized insn-attrtab it shouldn't matter 
too much (x86 for instance mostly has get_attr_type and get_attr_mode 
calls already in the first CONDition of the non-DFA attributes, which all 
will be removed also by your patch), but when removing the optimize_attrs 
call it mattered.

I mostly was concerned with the complexity of handling the attribute reuse 
correctly, but I wrote that before I saw the approval, so please ignore me 
on that one.


Ciao,
Michael.

Patch

--- gcc/Makefile.in.jj	2010-06-17 10:59:05.000000000 +0200
+++ gcc/Makefile.in	2010-06-18 20:38:31.000000000 +0200
@@ -3802,7 +3802,7 @@  build/genattr.o : genattr.c $(RTL_BASE_H
   coretypes.h $(GTM_H) errors.h $(READ_MD_H) gensupport.h
 build/genattrtab.o : genattrtab.c $(RTL_BASE_H) $(OBSTACK_H)		\
   $(BCONFIG_H) $(SYSTEM_H) coretypes.h $(GTM_H) errors.h $(GGC_H)	\
-  $(READ_MD_H) gensupport.h
+  $(READ_MD_H) gensupport.h vecprim.h
 build/genautomata.o : genautomata.c $(RTL_BASE_H) $(OBSTACK_H)		\
   $(BCONFIG_H) $(SYSTEM_H) coretypes.h $(GTM_H) errors.h $(VEC_H)	\
   $(HASHTAB_H) gensupport.h
--- gcc/genattrtab.c.jj	2010-06-17 13:00:34.000000000 +0200
+++ gcc/genattrtab.c	2010-06-18 20:36:52.000000000 +0200
@@ -113,6 +113,7 @@  along with GCC; see the file COPYING3.  
 #include "errors.h"
 #include "read-md.h"
 #include "gensupport.h"
+#include "vecprim.h"
 
 /* Flags for make_internal_attr's `special' parameter.  */
 #define ATTR_NONE		0
@@ -277,7 +278,7 @@  static void write_attr_valueq	   (struct
 static struct attr_value *find_most_used  (struct attr_desc *);
 static void write_attr_set	   (struct attr_desc *, int, rtx,
 				    const char *, const char *, rtx,
-				    int, int);
+				    int, int, unsigned int);
 static void write_attr_case	   (struct attr_desc *, struct attr_value *,
 				    int, const char *, const char *, int, rtx);
 static void write_attr_value	   (struct attr_desc *, rtx);
@@ -3126,16 +3127,98 @@  gen_delay (rtx def, int lineno)
   delays = delay;
 }
 
+/* Names of attributes that could be possibly cached.  */
+static const char *cached_attrs[32];
+/* Number of such attributes.  */
+static int cached_attr_count;
+/* Bitmasks of possibly cached attributes.  */
+static unsigned int attrs_seen_once, attrs_seen_more_than_once;
+static unsigned int attrs_to_cache;
+static unsigned int attrs_cached_inside, attrs_cached_after;
+
+/* Finds non-const attributes that could be possibly cached.
+   When create is TRUE, fills in cached_attrs array.
+   Computes ATTRS_SEEN_ONCE and ATTRS_SEEN_MORE_THAN_ONCE
+   bitmasks.  */
+
+static void
+find_attrs_to_cache (rtx exp, bool create)
+{
+  int i;
+  const char *name;
+  struct attr_desc *attr;
+
+  if (exp == NULL)
+    return;
+
+  switch (GET_CODE (exp))
+    {
+    case NOT:
+      if (GET_CODE (XEXP (exp, 0)) == EQ_ATTR)
+	find_attrs_to_cache (XEXP (exp, 0), create);
+      return;
+
+    case EQ_ATTR:
+      name = XSTR (exp, 0);
+      if (name == alternative_name)
+	return;
+      for (i = 0; i < cached_attr_count; i++)
+	if (name == cached_attrs[i])
+	  {
+	    if ((attrs_seen_once & (1U << i)) != 0)
+	      attrs_seen_more_than_once |= (1U << i);
+	    else
+	      attrs_seen_once |= (1U << i);
+	    return;
+	  }
+      if (!create)
+	return;
+      attr = find_attr (&name, 0);
+      gcc_assert (attr);
+      if (attr->is_const)
+	return;
+      if (cached_attr_count == 32)
+	return;
+      cached_attrs[cached_attr_count] = XSTR (exp, 0);
+      attrs_seen_once |= (1U << cached_attr_count);
+      cached_attr_count++;
+      return;
+
+    case AND:
+    case IOR:
+      find_attrs_to_cache (XEXP (exp, 0), create);
+      find_attrs_to_cache (XEXP (exp, 1), create);
+      return;
+
+    case COND:
+      for (i = 0; i < XVECLEN (exp, 0); i += 2)
+	find_attrs_to_cache (XVECEXP (exp, 0, i), create);
+      return;
+
+    default:
+      return;
+    }
+}
+
 /* Given a piece of RTX, print a C expression to test its truth value.
    We use AND and IOR both for logical and bit-wise operations, so
-   interpret them as logical unless they are inside a comparison expression.
-   The first bit of FLAGS will be nonzero in that case.
+   interpret them as logical unless they are inside a comparison expression.  */
 
-   Set the second bit of FLAGS to make references to attribute values use
-   a cached local variable instead of calling a function.  */
+/* Interpret AND/IOR as bit-wise operations instead of logical.  */
+#define FLG_BITWISE		1
+/* Set if cached attribute will be known initialized in else block after
+   this condition.  This is true for LHS of toplevel && and || and
+   even for RHS of ||, but not for RHS of &&.  */
+#define FLG_AFTER		2
+/* Set if cached attribute will be known initialized in then block after
+   this condition.  This is true for LHS of toplevel && and || and
+   even for RHS of &&, but not for RHS of ||.  */
+#define FLG_INSIDE		4
+/* Cleared when an operand of &&.  */
+#define FLG_OUTSIDE_AND		8
 
-static void
-write_test_expr (rtx exp, int flags)
+static unsigned int
+write_test_expr (rtx exp, unsigned int attrs_cached, int flags)
 {
   int comparison_operator = 0;
   RTX_CODE code;
@@ -3157,12 +3240,30 @@  write_test_expr (rtx exp, int flags)
     case EQ: case NE:
     case GE: case GT:
     case LE: case LT:
-      comparison_operator = 1;
+      comparison_operator = FLG_BITWISE;
 
     case PLUS:   case MINUS:  case MULT:     case DIV:      case MOD:
     case AND:    case IOR:    case XOR:
     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
-      write_test_expr (XEXP (exp, 0), flags | comparison_operator);
+      if ((code != AND && code != IOR) || (flags & FLG_BITWISE))
+	{
+	  flags &= ~(FLG_AFTER | FLG_INSIDE | FLG_OUTSIDE_AND);
+	  write_test_expr (XEXP (exp, 0), attrs_cached,
+			   flags | comparison_operator);
+	}
+      else
+	{
+	  if (code == AND)
+	    flags &= ~FLG_OUTSIDE_AND;
+	  if (GET_CODE (XEXP (exp, 0)) == code
+	      || GET_CODE (XEXP (exp, 0)) == EQ_ATTR
+	      || (GET_CODE (XEXP (exp, 0)) == NOT
+		  && GET_CODE (XEXP (XEXP (exp, 0), 0)) == EQ_ATTR))
+	    attrs_cached
+	      = write_test_expr (XEXP (exp, 0), attrs_cached, flags);
+	  else
+	    write_test_expr (XEXP (exp, 0), attrs_cached, flags);
+	}
       switch (code)
 	{
 	case EQ:
@@ -3211,13 +3312,13 @@  write_test_expr (rtx exp, int flags)
 	  printf (" %% ");
 	  break;
 	case AND:
-	  if (flags & 1)
+	  if (flags & FLG_BITWISE)
 	    printf (" & ");
 	  else
 	    printf (" && ");
 	  break;
 	case IOR:
-	  if (flags & 1)
+	  if (flags & FLG_BITWISE)
 	    printf (" | ");
 	  else
 	    printf (" || ");
@@ -3236,15 +3337,49 @@  write_test_expr (rtx exp, int flags)
 	  gcc_unreachable ();
 	}
 
-      write_test_expr (XEXP (exp, 1), flags | comparison_operator);
+      if (code == AND)
+	{
+	  /* For if (something && (cached_x = get_attr_x (insn)) == X)
+	     cached_x is only known to be initialized in then block.  */
+	  flags &= ~FLG_AFTER;
+	}
+      else if (code == IOR)
+	{
+	  if (flags & FLG_OUTSIDE_AND)
+	    /* For if (something || (cached_x = get_attr_x (insn)) == X)
+	       cached_x is only known to be initialized in else block
+	       and else if conditions.  */
+	    flags &= ~FLG_INSIDE;
+	  else
+	    /* For if ((something || (cached_x = get_attr_x (insn)) == X)
+		       && something_else)
+	       cached_x is not know to be initialized anywhere.  */
+	    flags &= ~(FLG_AFTER | FLG_INSIDE);
+	}
+      if ((code == AND || code == IOR)
+	  && (GET_CODE (XEXP (exp, 1)) == code
+	      || GET_CODE (XEXP (exp, 1)) == EQ_ATTR
+	      || (GET_CODE (XEXP (exp, 1)) == NOT
+		  && GET_CODE (XEXP (XEXP (exp, 1), 0)) == EQ_ATTR)))
+	attrs_cached
+	  = write_test_expr (XEXP (exp, 1), attrs_cached, flags);
+      else
+	write_test_expr (XEXP (exp, 1), attrs_cached,
+			 flags | comparison_operator);
       break;
 
     case NOT:
       /* Special-case (not (eq_attrq "alternative" "x")) */
-      if (! (flags & 1) && GET_CODE (XEXP (exp, 0)) == EQ_ATTR
-	  && XSTR (XEXP (exp, 0), 0) == alternative_name)
+      if (! (flags & FLG_BITWISE) && GET_CODE (XEXP (exp, 0)) == EQ_ATTR)
 	{
-	  printf ("which_alternative != %s", XSTR (XEXP (exp, 0), 1));
+	  if (XSTR (XEXP (exp, 0), 0) == alternative_name)
+	    {
+	      printf ("which_alternative != %s", XSTR (XEXP (exp, 0), 1));
+	      break;
+	    }
+
+	  printf ("! ");
+	  attrs_cached = write_test_expr (XEXP (exp, 0), attrs_cached, flags);
 	  break;
 	}
 
@@ -3255,7 +3390,7 @@  write_test_expr (rtx exp, int flags)
       switch (code)
 	{
 	case NOT:
-	  if (flags & 1)
+	  if (flags & FLG_BITWISE)
 	    printf ("~ ");
 	  else
 	    printf ("! ");
@@ -3270,14 +3405,15 @@  write_test_expr (rtx exp, int flags)
 	  gcc_unreachable ();
 	}
 
-      write_test_expr (XEXP (exp, 0), flags);
+      flags &= ~(FLG_AFTER | FLG_INSIDE | FLG_OUTSIDE_AND);
+      write_test_expr (XEXP (exp, 0), attrs_cached, flags);
       break;
 
     case EQ_ATTR_ALT:
 	{
 	  int set = XINT (exp, 0), bit = 0;
 
-	  if (flags & 1)
+	  if (flags & FLG_BITWISE)
 	    fatal ("EQ_ATTR_ALT not valid inside comparison");
 
 	  if (!set)
@@ -3323,7 +3459,7 @@  write_test_expr (rtx exp, int flags)
        have been removed by optimization.   Handle "alternative"
        specially and give error if EQ_ATTR present inside a comparison.  */
     case EQ_ATTR:
-      if (flags & 1)
+      if (flags & FLG_BITWISE)
 	fatal ("EQ_ATTR not valid inside comparison");
 
       if (XSTR (exp, 0) == alternative_name)
@@ -3340,12 +3476,26 @@  write_test_expr (rtx exp, int flags)
 	{
 	  write_test_expr (evaluate_eq_attr (exp, attr,
 					     attr->default_val->value, -2, -2),
-			   flags);
+			   attrs_cached, 0);
 	}
       else
 	{
-	  if (flags & 2)
-	    printf ("attr_%s", attr->name);
+	  int i;
+	  for (i = 0; i < cached_attr_count; i++)
+	    if (attr->name == cached_attrs[i])
+	      break;
+	  if (i < cached_attr_count && (attrs_cached & (1U << i)) != 0)
+	    printf ("cached_%s", attr->name);
+	  else if (i < cached_attr_count && (attrs_to_cache & (1U << i)) != 0)
+	    {
+	      printf ("(cached_%s = get_attr_%s (insn))",
+		      attr->name, attr->name);
+	      if (flags & FLG_AFTER)
+		attrs_cached_after |= (1U << i);
+	      if (flags & FLG_INSIDE)
+		attrs_cached_inside |= (1U << i);
+	      attrs_cached |= (1U << i);
+	    }
 	  else
 	    printf ("get_attr_%s (insn)", attr->name);
 	  printf (" == ");
@@ -3355,7 +3505,7 @@  write_test_expr (rtx exp, int flags)
 
     /* Comparison test of flags for define_delays.  */
     case ATTR_FLAG:
-      if (flags & 1)
+      if (flags & FLG_BITWISE)
 	fatal ("ATTR_FLAG not valid inside comparison");
       printf ("(flags & ATTR_FLAG_%s) != 0", XSTR (exp, 0));
       break;
@@ -3407,11 +3557,11 @@  write_test_expr (rtx exp, int flags)
       break;
 
     case IF_THEN_ELSE:
-      write_test_expr (XEXP (exp, 0), flags & 2);
+      write_test_expr (XEXP (exp, 0), attrs_cached, 0);
       printf (" ? ");
-      write_test_expr (XEXP (exp, 1), flags | 1);
+      write_test_expr (XEXP (exp, 1), attrs_cached, FLG_BITWISE);
       printf (" : ");
-      write_test_expr (XEXP (exp, 2), flags | 1);
+      write_test_expr (XEXP (exp, 2), attrs_cached, FLG_BITWISE);
       break;
 
     default:
@@ -3420,6 +3570,7 @@  write_test_expr (rtx exp, int flags)
     }
 
   printf (")");
+  return attrs_cached;
 }
 
 /* Given an attribute value, return the maximum CONST_STRING argument
@@ -3624,6 +3775,7 @@  static void
 write_attr_get (struct attr_desc *attr)
 {
   struct attr_value *av, *common_av;
+  int i, j;
 
   /* Find the most used attribute value.  Handle that as the `default' of the
      switch we will generate.  */
@@ -3653,16 +3805,48 @@  write_attr_get (struct attr_desc *attr)
 	if (av->num_insns == 1)
 	  write_attr_set (attr, 2, av->value, "return", ";",
 			  true_rtx, av->first_insn->def->insn_code,
-			  av->first_insn->def->insn_index);
+			  av->first_insn->def->insn_index, 0);
 	else if (av->num_insns != 0)
 	  write_attr_set (attr, 2, av->value, "return", ";",
-			  true_rtx, -2, 0);
+			  true_rtx, -2, 0, 0);
 
       printf ("}\n\n");
       return;
     }
 
   printf ("{\n");
+
+  /* Find attributes that are worth caching in the conditions.  */
+  cached_attr_count = 0;
+  attrs_seen_more_than_once = 0;
+  for (av = attr->first_value; av; av = av->next)
+    {
+      attrs_seen_once = 0;
+      find_attrs_to_cache (av->value, true);
+    }
+  /* Remove those that aren't worth caching from the array.  */
+  for (i = 0, j = 0; i < cached_attr_count; i++)
+    if ((attrs_seen_more_than_once & (1U << i)) != 0)
+      {
+	const char *name = cached_attrs[i];
+	struct attr_desc *cached_attr;
+	if (i != j)
+	  cached_attrs[j] = name;
+	cached_attr = find_attr (&name, 0);
+	gcc_assert (cached_attr && cached_attr->is_const == 0);
+	if (cached_attr->enum_name)
+	  printf ("  enum %s", cached_attr->enum_name);
+	else if (!cached_attr->is_numeric)
+	  printf ("  enum attr_%s", cached_attr->name);
+	else
+	  printf ("  int");
+	printf (" cached_%s ATTRIBUTE_UNUSED;\n", name);
+	j++;
+      }
+  cached_attr_count = j;
+  if (cached_attr_count)
+    printf ("\n");
+
   printf ("  switch (recog_memoized (insn))\n");
   printf ("    {\n");
 
@@ -3672,6 +3856,7 @@  write_attr_get (struct attr_desc *attr)
 
   write_attr_case (attr, common_av, 0, "return", ";", 4, true_rtx);
   printf ("    }\n}\n\n");
+  cached_attr_count = 0;
 }
 
 /* Given an AND tree of known true terms (because we are inside an `if' with
@@ -3710,7 +3895,7 @@  eliminate_known_true (rtx known_true, rt
 static void
 write_attr_set (struct attr_desc *attr, int indent, rtx value,
 		const char *prefix, const char *suffix, rtx known_true,
-		int insn_code, int insn_index)
+		int insn_code, int insn_index, unsigned int attrs_cached)
 {
   if (GET_CODE (value) == COND)
     {
@@ -3722,6 +3907,15 @@  write_attr_set (struct attr_desc *attr, 
       int first_if = 1;
       int i;
 
+      if (cached_attr_count)
+	{
+	  attrs_seen_once = 0;
+	  attrs_seen_more_than_once = 0;
+	  for (i = 0; i < XVECLEN (value, 0); i += 2)
+	    find_attrs_to_cache (XVECEXP (value, 0, i), false);
+	  attrs_to_cache |= attrs_seen_more_than_once;
+	}
+
       for (i = 0; i < XVECLEN (value, 0); i += 2)
 	{
 	  rtx testexp;
@@ -3752,17 +3946,22 @@  write_attr_set (struct attr_desc *attr, 
 	  if (inner_true == false_rtx)
 	    continue;
 
+	  attrs_cached_inside = attrs_cached;
+	  attrs_cached_after = attrs_cached;
 	  write_indent (indent);
 	  printf ("%sif ", first_if ? "" : "else ");
 	  first_if = 0;
-	  write_test_expr (testexp, 0);
+	  write_test_expr (testexp, attrs_cached,
+			   (FLG_AFTER | FLG_INSIDE | FLG_OUTSIDE_AND));
+	  attrs_cached = attrs_cached_after;
 	  printf ("\n");
 	  write_indent (indent + 2);
 	  printf ("{\n");
 
 	  write_attr_set (attr, indent + 4,
 			  XVECEXP (value, 0, i + 1), prefix, suffix,
-			  inner_true, insn_code, insn_index);
+			  inner_true, insn_code, insn_index,
+			  attrs_cached_inside);
 	  write_indent (indent + 2);
 	  printf ("}\n");
 	  our_known_true = newexp;
@@ -3777,7 +3976,8 @@  write_attr_set (struct attr_desc *attr, 
 	}
 
       write_attr_set (attr, first_if ? indent : indent + 4, default_val,
-		      prefix, suffix, our_known_true, insn_code, insn_index);
+		      prefix, suffix, our_known_true, insn_code, insn_index,
+		      attrs_cached);
 
       if (! first_if)
 	{
@@ -3858,13 +4058,14 @@  write_attr_case (struct attr_desc *attr,
       printf ("extract_insn_cached (insn);\n");
     }
 
+  attrs_to_cache = 0;
   if (av->num_insns == 1)
     write_attr_set (attr, indent + 2, av->value, prefix, suffix,
 		    known_true, av->first_insn->def->insn_code,
-		    av->first_insn->def->insn_index);
+		    av->first_insn->def->insn_index, 0);
   else
     write_attr_set (attr, indent + 2, av->value, prefix, suffix,
-		    known_true, -2, 0);
+		    known_true, -2, 0, 0);
 
   if (strncmp (prefix, "return", 6))
     {
@@ -4547,7 +4748,7 @@  make_automaton_attrs (void)
 	    }
 	  else
 	    printf ("  else if (");
-	  write_test_expr (test, 0);
+	  write_test_expr (test, 0, 0);
 	  printf (")\n");
 	  printf ("    {\n");
 	  printf ("      internal_dfa_insn_code\n");