===================================================================
@@ -5862,6 +5862,127 @@ iv_ca_prune (struct ivopts_data *data, struct iv_c
return best_cost;
}
+/* Check if CAND_IDX is a candidate other than OLD_CAND and has
+ cheaper local cost for USE than BEST_CP. Return pointer to
+ the corresponding cost_pair, otherwise just return BEST_CP. */
+
+static struct cost_pair*
+cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
+ unsigned int cand_idx, struct iv_cand *old_cand,
+ struct cost_pair *best_cp)
+{
+ struct iv_cand *cand;
+ struct cost_pair *cp;
+
+ gcc_assert (old_cand != NULL);
+ if (cand_idx == old_cand->id)
+ return best_cp;
+
+ cand = iv_cand (data, cand_idx);
+ cp = get_use_iv_cost (data, use, cand);
+ if (cp != NULL
+ && (best_cp == NULL || cheaper_cost_pair (cp, best_cp)))
+ return cp;
+
+ return best_cp;
+}
+
+/* Try replacing candidates in IVS which are used more than once. In the
+ first step, this function replaces candidates in IVS with CAND if it
+ has lower local cost. In the second step, it replaces candidate for
+ any iv use if the use is represented by a candidate has already been
+ replaced in the first step. In the last step, this function tries to
+ prune the new IVS. Replacement of candidate is recorded in DELTA. */
+
+static comp_cost
+iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
+ struct iv_cand *cand, struct iv_ca_delta **delta)
+{
+ comp_cost cost;
+ unsigned int i, j;
+ bitmap_iterator bi;
+ struct iv_use *use;
+ bool should_replace;
+ struct iv_ca_delta *act, *tmp_delta;
+ struct cost_pair *old_cp, *best_cp = NULL, *cp;
+
+ *delta = NULL;
+ /* First step, extend current IVS. */
+ for (i = 0; i < ivs->upto; i++)
+ {
+ use = iv_use (data, i);
+ old_cp = iv_ca_cand_for_use (ivs, use);
+
+ if (old_cp && old_cp->cand == cand)
+ continue;
+
+ /* Skip if the candidate is only used once in IVS. */
+ if (ivs->n_cand_uses[old_cp->cand->id] == 1)
+ continue;
+
+ cp = get_use_iv_cost (data, use, cand);
+ if (!cp || !cheaper_cost_pair (cp, old_cp))
+ continue;
+
+ *delta = iv_ca_delta_add (use, old_cp, cp, *delta);
+ }
+
+ /* No need for further replacement. */
+ if (*delta == NULL)
+ return infinite_cost;
+
+ iv_ca_delta_commit (data, ivs, *delta, true);
+ tmp_delta = NULL;
+ /* Second step, replace candidate for any iv use if the use is
+ represented by a candidate has already been replaced in the
+ first step. */
+ for (i = 0; i < ivs->upto; i++)
+ {
+ use = iv_use (data, i);
+
+ old_cp = iv_ca_cand_for_use (ivs, use);
+ if (old_cp->cand == cand)
+ continue;
+
+ should_replace = false;
+ for (act = *delta; act; act = act->next_change)
+ if (old_cp->cand == act->old_cp->cand)
+ {
+ should_replace = true;
+ break;
+ }
+ if (!should_replace)
+ continue;
+
+ best_cp = NULL;
+ if (data->consider_all_candidates)
+ for (j = 0; j < n_iv_cands (data); j++)
+ best_cp = cheaper_cost_with_cand (data, use, j,
+ old_cp->cand, best_cp);
+ else
+ EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
+ best_cp = cheaper_cost_with_cand (data, use, j,
+ old_cp->cand, best_cp);
+
+ if (!best_cp)
+ continue;
+
+ tmp_delta = iv_ca_delta_add (use, old_cp, best_cp, tmp_delta);
+ }
+
+ cost = iv_ca_cost (ivs);
+ /* Last step, prune IVS if necessary. */
+ if (tmp_delta)
+ {
+ iv_ca_delta_commit (data, ivs, tmp_delta, true);
+ *delta = iv_ca_delta_join (*delta, tmp_delta);
+ cost = iv_ca_prune (data, ivs, cand, &tmp_delta);
+ }
+ iv_ca_delta_commit (data, ivs, *delta, false);
+ *delta = iv_ca_delta_join (*delta, tmp_delta);
+ return cost;
+}
+
/* Tries to extend the sets IVS in the best possible way in order
to express the USE. If ORIGINALP is true, prefer candidates from
the original set of IVs, otherwise favor important candidates not
@@ -5995,10 +6116,13 @@ get_initial_solution (struct ivopts_data *data, bo
return ivs;
}
-/* Tries to improve set of induction variables IVS. */
+/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
+ points to a bool variable, this function tries to replace IVS at
+ fixed point if it's true. */
static bool
-try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
+try_improve_iv_set (struct ivopts_data *data,
+ struct iv_ca *ivs, bool *try_replace_p)
{
unsigned i, n_ivs;
comp_cost acost, best_cost = iv_ca_cost (ivs);
@@ -6042,7 +6166,35 @@ static bool
/* Try removing the candidates from the set instead. */
best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
- /* Nothing more we can do. */
+ if (!best_delta && *try_replace_p)
+ {
+ *try_replace_p = false;
+ /* So far candidate selecting algorithm tends to choose fewer IVs
+ so that it can handle cases in which loops have many variables
+ but the best choice is often to use only one general biv. One
+ weakness is it can't handle opposite cases, in which different
+ candidates should be chosen with respect to each use. To solve
+ the problem, we replace candidates in a manner described by the
+ comments of iv_ca_replace, thus give general algorithm a chance
+ to break local optimal fixed-point in these cases. */
+ for (i = 0; i < n_iv_cands (data); i++)
+ {
+ cand = iv_cand (data, i);
+ if (iv_ca_cand_used_p (ivs, cand))
+ continue;
+
+ acost = iv_ca_replace (data, ivs, cand, &act_delta);
+ if (compare_costs (acost, best_cost) < 0)
+ {
+ best_cost = acost;
+ iv_ca_delta_free (&best_delta);
+ best_delta = act_delta;
+ }
+ else
+ iv_ca_delta_free (&act_delta);
+ }
+ }
+
if (!best_delta)
return false;
}
@@ -6061,6 +6213,7 @@ static struct iv_ca *
find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
{
struct iv_ca *set;
+ bool try_replace_p = true;
/* Get the initial solution. */
set = get_initial_solution (data, originalp);
@@ -6077,7 +6230,7 @@ find_optimal_iv_set_1 (struct ivopts_data *data, b
iv_ca_dump (data, dump_file, set);
}
- while (try_improve_iv_set (data, set))
+ while (try_improve_iv_set (data, set, &try_replace_p))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
===================================================================
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
+
+void foo (void) {
+ int i, j, k;
+
+ for ( i = 1; i <= 30; i++ )
+ for ( j = 1; j <= 30; j++ ) {
+ r[i][j] = 0;
+ for(k = 1; k <= 30; k++ )
+ r[i][j] += a[i][k]*b[k][j];
+ }
+}
+
+/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */