Message ID | 20121019152353.58plpczcgso4og8o-nzlynne@webmail.spamcop.net |
---|---|
State | New |
Headers | show |
Joern Rennecke <joern.rennecke@embecosm.com> writes: > @@ -1165,6 +1175,7 @@ shorten_branches (rtx first ATTRIBUTE_UN > get the current insn length. If it has changed, reflect the change. > When nothing changes for a full pass, we are done. */ > > + bool first_pass ATTRIBUTE_UNUSED = true; > while (something_changed) > { > something_changed = 0; > @@ -1220,6 +1231,7 @@ shorten_branches (rtx first ATTRIBUTE_UN > rtx prev; > int rel_align = 0; > addr_diff_vec_flags flags; > + enum machine_mode vec_mode; > > /* Avoid automatic aggregate initialization. */ > flags = ADDR_DIFF_VEC_FLAGS (body); > @@ -1298,9 +1310,12 @@ shorten_branches (rtx first ATTRIBUTE_UN > else > max_addr += align_fuzz (max_lab, rel_lab, 0, 0); > } > - PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr, > - max_addr - rel_addr, > - body)); > + vec_mode = CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr, > + max_addr - rel_addr, body); > + if (first_pass > + || (GET_MODE_SIZE (vec_mode) > + >= GET_MODE_SIZE (GET_MODE (body)))) > + PUT_MODE (body, vec_mode); > if (JUMP_TABLES_IN_TEXT_SECTION > || readonly_data_section == text_section) > { I think instead the set-up loop should have: if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC) { #ifdef CASE_VECTOR_SHORTEN_MODE if (increasing && GET_CODE (body) == ADDR_DIFF_VEC) PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body)); #endif /* This only takes room if read-only data goes into the text section. */ if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section) insn_lengths[uid] = (XVECLEN (body, GET_CODE (body) == ADDR_DIFF_VEC) * GET_MODE_SIZE (GET_MODE (body))); /* Alignment is handled by ADDR_VEC_ALIGN. */ } (with just the CASE_VECTOR_SHORTEN_MODE part being new). We then start with the most optimistic length possible, as with everything else. The main shortening if statement should then be conditional on: #ifdef CASE_VECTOR_SHORTEN_MODE if (increasing && JUMP_P (insn) && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) ... (testing "increasing" rather than "optimize"). The code for changing the mode should simply be: if (GET_MODE_SIZE (vec_mode) >= GET_MODE_SIZE (GET_MODE (body)))) PUT_MODE (body, vec_mode); with first_pass no longer being necessary. OK with that change, if you agree. Richard
Quoting Richard Sandiford <rdsandiford@googlemail.com>: > I think instead the set-up loop should have: > > if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC) > { > #ifdef CASE_VECTOR_SHORTEN_MODE > if (increasing && GET_CODE (body) == ADDR_DIFF_VEC) > PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body)); > #endif > /* This only takes room if read-only data goes into the text > section. */ > if (JUMP_TABLES_IN_TEXT_SECTION > || readonly_data_section == text_section) > insn_lengths[uid] = (XVECLEN (body, > GET_CODE (body) == ADDR_DIFF_VEC) > * GET_MODE_SIZE (GET_MODE (body))); > /* Alignment is handled by ADDR_VEC_ALIGN. */ > } > > (with just the CASE_VECTOR_SHORTEN_MODE part being new). > We then start with the most optimistic length possible, > as with everything else. Well, ports could always tailor the initial mode with CASE_VECTOR_MODE, but it is indeed simpler when the branch shortening pass provide a sensible initialization. How about putting this at the end of this block: #ifdef CASE_VECTOR_SHORTEN_MODE if (optimize) { /* Look for ADDR_DIFF_VECs, and initialize their minimum and maximum label fields. */ Of course increasing would have to be set earlier. With regards to in what way it would make sense to change increasing to something other than optimize, I think we could have a flag to control this, which is turned on by default at -O1; it could then be turned off if people want only some other (quicker?) optimizations, or if they want to work around a machine-specific bug triggered by a single source file. I.e. optimize might be set when increasing is not. Vice versa it makes little sense - iterating branch shortening is certainly an optimization. So is using CASE_VECTOR_SHORTEN_MODE, but it does not require iterating, as we've already calculated the required addresses in the preliminary pass before the main loop. So it makes sense to keep the condition for CASE_VECTOR_SHORTEN_MODE as 'optimize' (or use a different flag, e.g. case_vector_shorten_p), and have this at the end of this initial CASE_VECTOR_SHORTEN_MODE block: flags.min_after_base = min > rel; flags.max_after_base = max > rel; ADDR_DIFF_VEC_FLAGS (pat) = flags; + if (increasing) + PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body)); } } #endif /* CASE_VECTOR_SHORTEN_MODE */ continue; } #endif /* CASE_VECTOR_SHORTEN_MODE */ > The main shortening if statement should then be conditional on: > > #ifdef CASE_VECTOR_SHORTEN_MODE > if (increasing > && JUMP_P (insn) > && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) As explained above, CASE_VECTOR_SHORTEN_MODE can make in the decreasing/non-iterating branch shortening mode. so I think the code for changing the mode should be if (!increasing || (GET_MODE_SIZE (vec_mode) >= GET_MODE_SIZE (GET_MODE (body)))) PUT_MODE (body, vec_mode);
Joern Rennecke <joern.rennecke@embecosm.com> writes: > Quoting Richard Sandiford <rdsandiford@googlemail.com>: >> I think instead the set-up loop should have: >> >> if (GET_CODE (body) == ADDR_VEC || GET_CODE (body) == ADDR_DIFF_VEC) >> { >> #ifdef CASE_VECTOR_SHORTEN_MODE >> if (increasing && GET_CODE (body) == ADDR_DIFF_VEC) >> PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body)); >> #endif >> /* This only takes room if read-only data goes into the text >> section. */ >> if (JUMP_TABLES_IN_TEXT_SECTION >> || readonly_data_section == text_section) >> insn_lengths[uid] = (XVECLEN (body, >> GET_CODE (body) == ADDR_DIFF_VEC) >> * GET_MODE_SIZE (GET_MODE (body))); >> /* Alignment is handled by ADDR_VEC_ALIGN. */ >> } >> >> (with just the CASE_VECTOR_SHORTEN_MODE part being new). >> We then start with the most optimistic length possible, >> as with everything else. > > Well, ports could always tailor the initial mode with CASE_VECTOR_MODE, > but it is indeed simpler when the branch shortening pass provide a > sensible initialization. That, plus I think CASE_VECTOR_MODE should always be the conservatively correct mode. > How about putting this at the end of this block: > #ifdef CASE_VECTOR_SHORTEN_MODE > if (optimize) > { > /* Look for ADDR_DIFF_VECs, and initialize their minimum and maximum > label fields. */ OK, that does sound better. > With regards to in what way it would make sense to change increasing to > something other than optimize, I think we could have a flag to control > this, which is turned on by default at -O1; it could then be turned off > if people want only some other (quicker?) optimizations, or if they want > to work around a machine-specific bug triggered by a single source file. > I.e. optimize might be set when increasing is not. Vice versa it makes > little sense - iterating branch shortening is certainly an optimization. > So is using CASE_VECTOR_SHORTEN_MODE, but it does not require iterating, > as we've already calculated the required addresses in the preliminary pass > before the main loop. > So it makes sense to keep the condition for CASE_VECTOR_SHORTEN_MODE > as 'optimize' (or use a different flag, e.g. case_vector_shorten_p), > and have this at the end of this initial CASE_VECTOR_SHORTEN_MODE block: > > flags.min_after_base = min > rel; > flags.max_after_base = max > rel; > ADDR_DIFF_VEC_FLAGS (pat) = flags; > > + if (increasing) > + PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (0, 0, body)); > } > } > #endif /* CASE_VECTOR_SHORTEN_MODE */ > > continue; > } > #endif /* CASE_VECTOR_SHORTEN_MODE */ > > > >> The main shortening if statement should then be conditional on: >> >> #ifdef CASE_VECTOR_SHORTEN_MODE >> if (increasing >> && JUMP_P (insn) >> && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) > > As explained above, CASE_VECTOR_SHORTEN_MODE can make in the > decreasing/non-iterating branch shortening mode. > > so I think the code for changing the mode should be > if (!increasing > || (GET_MODE_SIZE (vec_mode) > >= GET_MODE_SIZE (GET_MODE (body)))) > PUT_MODE (body, vec_mode); OK, sounds good. Richard
Index: final.c =================================================================== --- final.c (revision 192573) +++ final.c (working copy) @@ -1066,7 +1066,15 @@ shorten_branches (rtx first ATTRIBUTE_UN } #endif /* CASE_VECTOR_SHORTEN_MODE */ + /* When optimizing, we start assuming minimum length, and keep increasing + lengths as we find the need for this, till nothing changes. + When not optimizing, we start assuming maximum lengths, and + do a single pass to update the lengths. */ + bool increasing = optimize != 0; + /* Compute initial lengths, addresses, and varying flags for each insn. */ + int (*length_fun) (rtx) = increasing ? insn_min_length : insn_default_length; + for (insn_current_address = 0, insn = first; insn != 0; insn_current_address += insn_lengths[uid], insn = NEXT_INSN (insn)) @@ -1117,6 +1125,8 @@ shorten_branches (rtx first ATTRIBUTE_UN #else const_delay_slots = 0; #endif + int (*inner_length_fun) (rtx) + = const_delay_slots ? length_fun : insn_default_length; /* Inside a delay slot sequence, we do not do any branch shortening if the shortening could change the number of delay slots of the branch. */ @@ -1131,7 +1141,7 @@ shorten_branches (rtx first ATTRIBUTE_UN inner_length = (asm_insn_count (PATTERN (inner_insn)) * insn_default_length (inner_insn)); else - inner_length = insn_default_length (inner_insn); + inner_length = inner_length_fun (inner_insn); insn_lengths[inner_uid] = inner_length; if (const_delay_slots) @@ -1149,7 +1159,7 @@ shorten_branches (rtx first ATTRIBUTE_UN } else if (GET_CODE (body) != USE && GET_CODE (body) != CLOBBER) { - insn_lengths[uid] = insn_default_length (insn); + insn_lengths[uid] = length_fun (insn); varying_length[uid] = insn_variable_length_p (insn); } @@ -1165,6 +1175,7 @@ shorten_branches (rtx first ATTRIBUTE_UN get the current insn length. If it has changed, reflect the change. When nothing changes for a full pass, we are done. */ + bool first_pass ATTRIBUTE_UNUSED = true; while (something_changed) { something_changed = 0; @@ -1220,6 +1231,7 @@ shorten_branches (rtx first ATTRIBUTE_UN rtx prev; int rel_align = 0; addr_diff_vec_flags flags; + enum machine_mode vec_mode; /* Avoid automatic aggregate initialization. */ flags = ADDR_DIFF_VEC_FLAGS (body); @@ -1298,9 +1310,12 @@ shorten_branches (rtx first ATTRIBUTE_UN else max_addr += align_fuzz (max_lab, rel_lab, 0, 0); } - PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr, - max_addr - rel_addr, - body)); + vec_mode = CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr, + max_addr - rel_addr, body); + if (first_pass + || (GET_MODE_SIZE (vec_mode) + >= GET_MODE_SIZE (GET_MODE (body)))) + PUT_MODE (body, vec_mode); if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section) { @@ -1362,10 +1377,15 @@ shorten_branches (rtx first ATTRIBUTE_UN if (inner_length != insn_lengths[inner_uid]) { - insn_lengths[inner_uid] = inner_length; - something_changed = 1; + if (!increasing || inner_length > insn_lengths[inner_uid]) + { + insn_lengths[inner_uid] = inner_length; + something_changed = 1; + } + else + inner_length = insn_lengths[inner_uid]; } - insn_current_address += insn_lengths[inner_uid]; + insn_current_address += inner_length; new_length += inner_length; } } @@ -1382,15 +1402,19 @@ shorten_branches (rtx first ATTRIBUTE_UN insn_current_address += (new_length - tmp_length); #endif - if (new_length != insn_lengths[uid]) + if (new_length != insn_lengths[uid] + && (!increasing || new_length > insn_lengths[uid])) { insn_lengths[uid] = new_length; something_changed = 1; } + else + insn_current_address += insn_lengths[uid] - new_length; } /* For a non-optimizing compile, do only a single pass. */ - if (!optimize) + if (!increasing) break; + first_pass = false; } free (varying_length);