diff mbox

Ping: RFA: add lock_length attribute to break branch-shortening cycles

Message ID 20121019152353.58plpczcgso4og8o-nzlynne@webmail.spamcop.net
State New
Headers show

Commit Message

Joern Rennecke Oct. 19, 2012, 7:23 p.m. UTC
Quoting Richard Sandiford <rdsandiford@googlemail.com>:

> Joern Rennecke <joern.rennecke@embecosm.com> writes:

>> When the condition is not fulfilled, we want to keep the length from the
>> previous iteration.
>
> Right, that's what I mean.  So we need to make sure that the difference
> between the address of the current instruction and the address of the
> next instruction also doesn't change.  The code as posted was:
>
> 	  else
> 	    {
> 	      new_length = insn_current_length (insn);
> 	      insn_current_address += new_length;
> 	    }
>
> #ifdef ADJUST_INSN_LENGTH
> 	  /* If needed, do any adjustment.  */
> 	  tmp_length = new_length;
> 	  ADJUST_INSN_LENGTH (insn, new_length);
> 	  insn_current_address += (new_length - tmp_length);
> #endif
>
> 	  if (new_length != insn_lengths[uid]
> 	      && (new_length > insn_lengths[uid] || first_pass))
> 	    {
> 	      insn_lengths[uid] = new_length;
> 	      something_changed = 1;
> 	    }
>
> which always leaves insn_current_address based off new_length,
> even if new_length is out of kilter with insn_lengths[uid].

Indeed.

I have attached a patch which I believe addresses the issues you raised.

Because making the length sticky is problematic when UIDs aren't unique,
I have tested this patch on top of that patch:

http://gcc.gnu.org/ml/gcc-patches/2012-10/msg01806.html

regression tested on i686-pc-linux-gnu X cris-elf and
i686-pc-linux-gnu X mipsel-elf.
2012-10-19  J"orn Rennecke  <joern.rennecke@arc.com>

	* final.c (shorten_branches): When optimizing, start with small
	length and increase from there, and don't decrease lengths.

Comments

Richard Sandiford Oct. 20, 2012, 9:26 a.m. UTC | #1
Joern Rennecke <joern.rennecke@embecosm.com> writes:
> @@ -1165,6 +1175,7 @@ shorten_branches (rtx first ATTRIBUTE_UN
>       get the current insn length.  If it has changed, reflect the change.
>       When nothing changes for a full pass, we are done.  */
>  
> +  bool first_pass ATTRIBUTE_UNUSED = true;
>    while (something_changed)
>      {
>        something_changed = 0;
> @@ -1220,6 +1231,7 @@ shorten_branches (rtx first ATTRIBUTE_UN
>  	      rtx prev;
>  	      int rel_align = 0;
>  	      addr_diff_vec_flags flags;
> +	      enum machine_mode vec_mode;
>  
>  	      /* Avoid automatic aggregate initialization.  */
>  	      flags = ADDR_DIFF_VEC_FLAGS (body);
> @@ -1298,9 +1310,12 @@ shorten_branches (rtx first ATTRIBUTE_UN
>  		  else
>  		    max_addr += align_fuzz (max_lab, rel_lab, 0, 0);
>  		}
> -	      PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
> -							max_addr - rel_addr,
> -							body));
> +	      vec_mode = CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
> +						   max_addr - rel_addr, body);
> +	      if (first_pass
> +		  || (GET_MODE_SIZE (vec_mode)
> +		      >= GET_MODE_SIZE (GET_MODE (body))))
> +		PUT_MODE (body, vec_mode);
>  	      if (JUMP_TABLES_IN_TEXT_SECTION
>  		  || readonly_data_section == text_section)
>  		{

I think instead the set-up loop should have:

      if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC)
	{
#ifdef CASE_VECTOR_SHORTEN_MODE
	  if (increasing && GET_CODE (body) == ADDR_DIFF_VEC)
	    PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body));
#endif
	  /* This only takes room if read-only data goes into the text
	     section.  */
	  if (JUMP_TABLES_IN_TEXT_SECTION
	      || readonly_data_section == text_section)
	    insn_lengths[uid] = (XVECLEN (body,
					  GET_CODE (body) == ADDR_DIFF_VEC)
				 * GET_MODE_SIZE (GET_MODE (body)));
	  /* Alignment is handled by ADDR_VEC_ALIGN.  */
	}

(with just the CASE_VECTOR_SHORTEN_MODE part being new).
We then start with the most optimistic length possible,
as with everything else.

The main shortening if statement should then be conditional on:

#ifdef CASE_VECTOR_SHORTEN_MODE
	  if (increasing
	      && JUMP_P (insn)
	      && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
...

(testing "increasing" rather than "optimize").  The code for changing
the mode should simply be:

	      if (GET_MODE_SIZE (vec_mode)
		  >= GET_MODE_SIZE (GET_MODE (body))))
		PUT_MODE (body, vec_mode);

with first_pass no longer being necessary.

OK with that change, if you agree.

Richard
Joern Rennecke Oct. 20, 2012, 11:16 a.m. UTC | #2
Quoting Richard Sandiford <rdsandiford@googlemail.com>:

> I think instead the set-up loop should have:
>
>       if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC)
> 	{
> #ifdef CASE_VECTOR_SHORTEN_MODE
> 	  if (increasing && GET_CODE (body) == ADDR_DIFF_VEC)
> 	    PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body));
> #endif
> 	  /* This only takes room if read-only data goes into the text
> 	     section.  */
> 	  if (JUMP_TABLES_IN_TEXT_SECTION
> 	      || readonly_data_section == text_section)
> 	    insn_lengths[uid] = (XVECLEN (body,
> 					  GET_CODE (body) == ADDR_DIFF_VEC)
> 				 * GET_MODE_SIZE (GET_MODE (body)));
> 	  /* Alignment is handled by ADDR_VEC_ALIGN.  */
> 	}
>
> (with just the CASE_VECTOR_SHORTEN_MODE part being new).
> We then start with the most optimistic length possible,
> as with everything else.

Well, ports could always tailor the initial mode with CASE_VECTOR_MODE,
but it is indeed simpler when the branch shortening pass provide a
sensible initialization.

How about putting this at the end of this block:
#ifdef CASE_VECTOR_SHORTEN_MODE
   if (optimize)
     {
       /* Look for ADDR_DIFF_VECs, and initialize their minimum and maximum
          label fields.  */

Of course increasing would have to be set earlier.

With regards to in what way it would make sense to change increasing to
something other than optimize, I think we could have a flag to control
this, which is turned on by default at -O1; it could then be turned off
if people want only some other (quicker?) optimizations, or if they want
to work around a machine-specific bug triggered by a single source file.
I.e. optimize might be set when increasing is not.  Vice versa it makes
little sense - iterating branch shortening is certainly an optimization.
So is using CASE_VECTOR_SHORTEN_MODE, but it does not require iterating,
as we've already calculated the required addresses in the preliminary pass
before the main loop.
So it makes sense to keep the condition for CASE_VECTOR_SHORTEN_MODE
as 'optimize' (or use a different flag, e.g. case_vector_shorten_p),
and have this at the end of this initial CASE_VECTOR_SHORTEN_MODE block:

           flags.min_after_base = min > rel;
           flags.max_after_base = max > rel;
           ADDR_DIFF_VEC_FLAGS (pat) = flags;

+         if (increasing)
+           PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body));
         }
     }
#endif /* CASE_VECTOR_SHORTEN_MODE */

               continue;
             }
#endif /* CASE_VECTOR_SHORTEN_MODE */



> The main shortening if statement should then be conditional on:
>
> #ifdef CASE_VECTOR_SHORTEN_MODE
> 	  if (increasing
> 	      && JUMP_P (insn)
> 	      && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)

As explained above, CASE_VECTOR_SHORTEN_MODE can make in the
decreasing/non-iterating branch shortening mode.

so I think the code for changing the mode should be
                if (!increasing
                   || (GET_MODE_SIZE (vec_mode)
                       >= GET_MODE_SIZE (GET_MODE (body))))
                 PUT_MODE (body, vec_mode);
Richard Sandiford Oct. 20, 2012, 12:14 p.m. UTC | #3
Joern Rennecke <joern.rennecke@embecosm.com> writes:
> Quoting Richard Sandiford <rdsandiford@googlemail.com>:
>> I think instead the set-up loop should have:
>>
>>       if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC)
>> 	{
>> #ifdef CASE_VECTOR_SHORTEN_MODE
>> 	  if (increasing && GET_CODE (body) == ADDR_DIFF_VEC)
>> 	    PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body));
>> #endif
>> 	  /* This only takes room if read-only data goes into the text
>> 	     section.  */
>> 	  if (JUMP_TABLES_IN_TEXT_SECTION
>> 	      || readonly_data_section == text_section)
>> 	    insn_lengths[uid] = (XVECLEN (body,
>> 					  GET_CODE (body) == ADDR_DIFF_VEC)
>> 				 * GET_MODE_SIZE (GET_MODE (body)));
>> 	  /* Alignment is handled by ADDR_VEC_ALIGN.  */
>> 	}
>>
>> (with just the CASE_VECTOR_SHORTEN_MODE part being new).
>> We then start with the most optimistic length possible,
>> as with everything else.
>
> Well, ports could always tailor the initial mode with CASE_VECTOR_MODE,
> but it is indeed simpler when the branch shortening pass provide a
> sensible initialization.

That, plus I think CASE_VECTOR_MODE should always be the conservatively
correct mode.

> How about putting this at the end of this block:
> #ifdef CASE_VECTOR_SHORTEN_MODE
>    if (optimize)
>      {
>        /* Look for ADDR_DIFF_VECs, and initialize their minimum and maximum
>           label fields.  */

OK, that does sound better.

> With regards to in what way it would make sense to change increasing to
> something other than optimize, I think we could have a flag to control
> this, which is turned on by default at -O1; it could then be turned off
> if people want only some other (quicker?) optimizations, or if they want
> to work around a machine-specific bug triggered by a single source file.
> I.e. optimize might be set when increasing is not.  Vice versa it makes
> little sense - iterating branch shortening is certainly an optimization.
> So is using CASE_VECTOR_SHORTEN_MODE, but it does not require iterating,
> as we've already calculated the required addresses in the preliminary pass
> before the main loop.
> So it makes sense to keep the condition for CASE_VECTOR_SHORTEN_MODE
> as 'optimize' (or use a different flag, e.g. case_vector_shorten_p),
> and have this at the end of this initial CASE_VECTOR_SHORTEN_MODE block:
>
>            flags.min_after_base = min > rel;
>            flags.max_after_base = max > rel;
>            ADDR_DIFF_VEC_FLAGS (pat) = flags;
>
> +         if (increasing)
> +           PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body));
>          }
>      }
> #endif /* CASE_VECTOR_SHORTEN_MODE */
>
>                continue;
>              }
> #endif /* CASE_VECTOR_SHORTEN_MODE */
>
>
>
>> The main shortening if statement should then be conditional on:
>>
>> #ifdef CASE_VECTOR_SHORTEN_MODE
>> 	  if (increasing
>> 	      && JUMP_P (insn)
>> 	      && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
>
> As explained above, CASE_VECTOR_SHORTEN_MODE can make in the
> decreasing/non-iterating branch shortening mode.
>
> so I think the code for changing the mode should be
>                 if (!increasing
>                    || (GET_MODE_SIZE (vec_mode)
>                        >= GET_MODE_SIZE (GET_MODE (body))))
>                  PUT_MODE (body, vec_mode);

OK, sounds good.

Richard
diff mbox

Patch

Index: final.c
===================================================================
--- final.c	(revision 192573)
+++ final.c	(working copy)
@@ -1066,7 +1066,15 @@  shorten_branches (rtx first ATTRIBUTE_UN
     }
 #endif /* CASE_VECTOR_SHORTEN_MODE */
 
+  /* When optimizing, we start assuming minimum length, and keep increasing
+     lengths as we find the need for this, till nothing changes.
+     When not optimizing, we start assuming maximum lengths, and
+     do a single pass to update the lengths.  */
+  bool increasing = optimize != 0;
+
   /* Compute initial lengths, addresses, and varying flags for each insn.  */
+  int (*length_fun) (rtx) = increasing ? insn_min_length : insn_default_length;
+
   for (insn_current_address = 0, insn = first;
        insn != 0;
        insn_current_address += insn_lengths[uid], insn = NEXT_INSN (insn))
@@ -1117,6 +1125,8 @@  shorten_branches (rtx first ATTRIBUTE_UN
 #else
 	  const_delay_slots = 0;
 #endif
+	  int (*inner_length_fun) (rtx)
+	    = const_delay_slots ? length_fun : insn_default_length;
 	  /* Inside a delay slot sequence, we do not do any branch shortening
 	     if the shortening could change the number of delay slots
 	     of the branch.  */
@@ -1131,7 +1141,7 @@  shorten_branches (rtx first ATTRIBUTE_UN
 		inner_length = (asm_insn_count (PATTERN (inner_insn))
 				* insn_default_length (inner_insn));
 	      else
-		inner_length = insn_default_length (inner_insn);
+		inner_length = inner_length_fun (inner_insn);
 
 	      insn_lengths[inner_uid] = inner_length;
 	      if (const_delay_slots)
@@ -1149,7 +1159,7 @@  shorten_branches (rtx first ATTRIBUTE_UN
 	}
       else if (GET_CODE (body) != USE && GET_CODE (body) != CLOBBER)
 	{
-	  insn_lengths[uid] = insn_default_length (insn);
+	  insn_lengths[uid] = length_fun (insn);
 	  varying_length[uid] = insn_variable_length_p (insn);
 	}
 
@@ -1165,6 +1175,7 @@  shorten_branches (rtx first ATTRIBUTE_UN
      get the current insn length.  If it has changed, reflect the change.
      When nothing changes for a full pass, we are done.  */
 
+  bool first_pass ATTRIBUTE_UNUSED = true;
   while (something_changed)
     {
       something_changed = 0;
@@ -1220,6 +1231,7 @@  shorten_branches (rtx first ATTRIBUTE_UN
 	      rtx prev;
 	      int rel_align = 0;
 	      addr_diff_vec_flags flags;
+	      enum machine_mode vec_mode;
 
 	      /* Avoid automatic aggregate initialization.  */
 	      flags = ADDR_DIFF_VEC_FLAGS (body);
@@ -1298,9 +1310,12 @@  shorten_branches (rtx first ATTRIBUTE_UN
 		  else
 		    max_addr += align_fuzz (max_lab, rel_lab, 0, 0);
 		}
-	      PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
-							max_addr - rel_addr,
-							body));
+	      vec_mode = CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
+						   max_addr - rel_addr, body);
+	      if (first_pass
+		  || (GET_MODE_SIZE (vec_mode)
+		      >= GET_MODE_SIZE (GET_MODE (body))))
+		PUT_MODE (body, vec_mode);
 	      if (JUMP_TABLES_IN_TEXT_SECTION
 		  || readonly_data_section == text_section)
 		{
@@ -1362,10 +1377,15 @@  shorten_branches (rtx first ATTRIBUTE_UN
 
 		  if (inner_length != insn_lengths[inner_uid])
 		    {
-		      insn_lengths[inner_uid] = inner_length;
-		      something_changed = 1;
+		      if (!increasing || inner_length > insn_lengths[inner_uid])
+			{
+			  insn_lengths[inner_uid] = inner_length;
+			  something_changed = 1;
+			}
+		      else
+			inner_length = insn_lengths[inner_uid];
 		    }
-		  insn_current_address += insn_lengths[inner_uid];
+		  insn_current_address += inner_length;
 		  new_length += inner_length;
 		}
 	    }
@@ -1382,15 +1402,19 @@  shorten_branches (rtx first ATTRIBUTE_UN
 	  insn_current_address += (new_length - tmp_length);
 #endif
 
-	  if (new_length != insn_lengths[uid])
+	  if (new_length != insn_lengths[uid]
+	      && (!increasing || new_length > insn_lengths[uid]))
 	    {
 	      insn_lengths[uid] = new_length;
 	      something_changed = 1;
 	    }
+	  else
+	    insn_current_address += insn_lengths[uid] - new_length;
 	}
       /* For a non-optimizing compile, do only a single pass.  */
-      if (!optimize)
+      if (!increasing)
 	break;
+      first_pass = false;
     }
 
   free (varying_length);