Message ID | alpine.LSU.2.20.1712011325250.12252@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Series | [gimple-linterchange] Rewrite compute_access_stride | expand |
On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguenther@suse.de> wrote: > > This is the access stride computation change. Apart from the > stride extraction I adjusted the cost model to handle non-constant > strides by checking if either is a multiple of the other and > simply fail interchanging if it's the wrong way around for one > ref or if the simple method using multiple_of_p fails to determine > either case. > > This still handles the bwaves case. > > I think we want additional testcases with variable strides for each > case we add - I believe this is the most conservative way to treat > variable strides. > > It may be inconsistent with the constant stride handling where you > allow for many OK DRs to outweight a few not OK DRs, but as it > worked for bwaves it must be good enough ;) > > Tested on x86_64-unknown-linux-gnu (just the interchange testcases). > > Currently running a bootstrap with -O3 -g -floop-interchange. > > Ok for the branch? Ok. This actually is closer the motivation: simple/conservative cost model that only transforms code when it's known to be good. I will check the impact on the number of interchange in spec. Thanks, bin > > Richard. > > 2017-12-01 Richard Biener <rguenther@suse.de> > > * gimple-loop-interchange.cc (estimate_val_by_simplify_replace): > Remove. > (compute_access_stride): Rewrite using instantiate_scev, > remove constant substitution. > (should_interchange_loops): Adjust for non-constant strides. > > Index: gcc/gimple-loop-interchange.cc > =================================================================== > --- gcc/gimple-loop-interchange.cc (revision 255303) > +++ gcc/gimple-loop-interchange.cc (working copy) > @@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne > } > } > > -/* Estimate and return the value of EXPR by replacing variables in EXPR > - with CST_TREE and simplifying. */ > - > -static tree > -estimate_val_by_simplify_replace (tree expr, tree cst_tree) > -{ > - unsigned i, n; > - tree ret = NULL_TREE, e, se; > - > - if (!expr) > - return NULL_TREE; > - > - /* Do not bother to replace constants. */ > - if (CONSTANT_CLASS_P (expr)) > - return expr; > - > - if (!EXPR_P (expr)) > - return cst_tree; > - > - n = TREE_OPERAND_LENGTH (expr); > - for (i = 0; i < n; i++) > - { > - e = TREE_OPERAND (expr, i); > - se = estimate_val_by_simplify_replace (e, cst_tree); > - if (e == se) > - continue; > - > - if (!ret) > - ret = copy_node (expr); > - > - TREE_OPERAND (ret, i) = se; > - } > - > - return (ret ? fold (ret) : expr); > -} > - > /* Given data reference DR in LOOP_NEST, the function computes DR's access > stride at each level of loop from innermost LOOP to outer. On success, > it saves access stride at each level loop in a vector which is pointed > @@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop > > tree ref = DR_REF (dr); > tree scev_base = build_fold_addr_expr (ref); > - tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); > - tree niters = build_int_cst (sizetype, AVG_LOOP_NITER); > - access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size); > - > - do { > - tree scev_fn = analyze_scalar_evolution (loop, scev_base); > - if (chrec_contains_undetermined (scev_fn) > - || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num)) > - break; > - > - if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC) > - { > - scev_base = scev_fn; > - strides->safe_push (build_int_cst (sizetype, 0)); > - continue; > - } > - > - scev_base = CHREC_LEFT (scev_fn); > - if (tree_contains_chrecs (scev_base, NULL)) > - break; > - > - tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn)); > - > - enum ev_direction scev_dir = scev_direction (scev_fn); > - /* Estimate if step isn't constant. */ > - if (scev_dir == EV_DIR_UNKNOWN) > - { > - scev_step = estimate_val_by_simplify_replace (scev_step, niters); > - if (TREE_CODE (scev_step) != INTEGER_CST > - || tree_int_cst_lt (scev_step, access_size)) > - scev_step = access_size; > - } > - /* Compute absolute value of scev step. */ > - else if (scev_dir == EV_DIR_DECREASES) > - scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step); > - > - strides->safe_push (scev_step); > - } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL); > + tree scev = analyze_scalar_evolution (loop, scev_base); > + scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); > + if (! chrec_contains_undetermined (scev)) > + { > + tree sl = scev; > + struct loop *expected = loop; > + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > + { > + struct loop *sl_loop = get_chrec_loop (sl); > + while (sl_loop != expected) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > + strides->safe_push (CHREC_RIGHT (sl)); > + sl = CHREC_LEFT (sl); > + expected = loop_outer (expected); > + } > + if (! tree_contains_chrecs (sl, NULL)) > + while (expected != loop_outer (loop_nest)) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > + } > > dr->aux = strides; > } > @@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx > struct data_reference *dr; > bool all_seq_dr_before_p = true, all_seq_dr_after_p = true; > widest_int iloop_strides = 0, oloop_strides = 0; > + unsigned num_unresolved_drs = 0; > + unsigned num_resolved_ok_drs = 0; > + unsigned num_resolved_not_ok_drs = 0; > > if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n"); > @@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx > { > vec<tree> *stride = DR_ACCESS_STRIDE (dr); > tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx]; > - gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST); > - gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST); > > bool subloop_stride_p = false; > /* Data ref can't be invariant or sequential access at current loop if > its address changes with respect to any subloops. */ > for (j = i_idx + 1; j < stride->length (); ++j) > - if (integer_nonzerop ((*stride)[j])) > + if (!integer_zerop ((*stride)[j])) > { > subloop_stride_p = true; > break; > } > > - if (integer_nonzerop (iloop_stride)) > - iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); > - else if (!subloop_stride_p) > - num_old_inv_drs++; > - > - if (integer_nonzerop (oloop_stride)) > - oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); > - else if (!subloop_stride_p) > - num_new_inv_drs++; > + if (integer_zerop (iloop_stride)) > + { > + if (!subloop_stride_p) > + num_old_inv_drs++; > + } > + if (integer_zerop (oloop_stride)) > + { > + if (!subloop_stride_p) > + num_new_inv_drs++; > + } > + > + if (TREE_CODE (iloop_stride) == INTEGER_CST > + && TREE_CODE (oloop_stride) == INTEGER_CST) > + { > + iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); > + oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); > + } > + else if (multiple_of_p (TREE_TYPE (iloop_stride), > + iloop_stride, oloop_stride)) > + num_resolved_ok_drs++; > + else if (multiple_of_p (TREE_TYPE (iloop_stride), > + oloop_stride, iloop_stride)) > + num_resolved_not_ok_drs++; > + else > + num_unresolved_drs++; > > /* Data ref can't be sequential access if its address changes in sub > loop. */ > @@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx > interchange. Note invariant is considered sequential here. */ > tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); > if (all_seq_dr_before_p > - && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide (access_size))) > + && ! (integer_zerop (iloop_stride) > + || operand_equal_p (access_size, iloop_stride, 0))) > all_seq_dr_before_p = false; > if (all_seq_dr_after_p > - && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide (access_size))) > + && ! (integer_zerop (oloop_stride) > + || operand_equal_p (access_size, oloop_stride, 0))) > all_seq_dr_after_p = false; > } > > @@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx > fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n", > all_seq_dr_before_p ? "true" : "false", > all_seq_dr_after_p ? "true" : "false"); > + fprintf (dump_file, "OK to interchage with variable strides: %d\n", > + num_resolved_ok_drs); > + fprintf (dump_file, "Not OK to interchage with variable strides: %d\n", > + num_resolved_not_ok_drs); > + fprintf (dump_file, "Variable strides we cannot decide: %d\n", > + num_unresolved_drs); > } > > + if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0) > + return false; > + > /* We use different stride comparison ratio for interchanging innermost > two loops or not. The idea is to be conservative in interchange for > the innermost loops. */
On Fri, 1 Dec 2017, Bin.Cheng wrote: > On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguenther@suse.de> wrote: > > > > This is the access stride computation change. Apart from the > > stride extraction I adjusted the cost model to handle non-constant > > strides by checking if either is a multiple of the other and > > simply fail interchanging if it's the wrong way around for one > > ref or if the simple method using multiple_of_p fails to determine > > either case. > > > > This still handles the bwaves case. > > > > I think we want additional testcases with variable strides for each > > case we add - I believe this is the most conservative way to treat > > variable strides. > > > > It may be inconsistent with the constant stride handling where you > > allow for many OK DRs to outweight a few not OK DRs, but as it > > worked for bwaves it must be good enough ;) > > > > Tested on x86_64-unknown-linux-gnu (just the interchange testcases). > > > > Currently running a bootstrap with -O3 -g -floop-interchange. > > > > Ok for the branch? > Ok. This actually is closer the motivation: simple/conservative cost > model that only transforms code when it's known to be good. > I will check the impact on the number of interchange in spec. Few high-level observations. In tree_loop_interchange::interchange we try interchanging adjacent loops, starting from innermost with outer of innermost. This way the innermost loop will bubble up as much as possible. But we don't seem to handle bulling multiple loops like for for (i=0; i<n; ++i) for (j=0; j<n; ++j) for (k=0; k<n ++k) a[j][k][i] = 1.; because the innermost two loops are correctly ordered so we then try interchanging the k and the i loop which succeeds but then we stop. So there's something wrong in the iteration scheme. I would have expected it to be quadratic, basically iterating the ::interchange loop until we didn't perform any interchange (or doing sth more clever). loop_cand::can_interchange_p seems to perform per BB checks (supported_operations, num_stmts) that with the way we interchange should disallow any such BB in a loop that we interchange or interchange across. That means it looks like sth we should perform early, like during data dependence gathering by for example inlining find_data_references_in_bb and doing those per-stmt checks there? In prepare_perfect_loop_nest we seem to be somewhat quadratic in the way we re-compute dependences if doing so failed (we also always just strip the outermost loop while the failing DDR could involve a DR that is in an inner loop). I think it should be possible to re-structure this computing dependences from inner loop body to outer loop bodies (the ddrs vector is, as opposed to the dr vector, unsorted I think). I haven't fully thought this out yet though - a similar iteration scheme could improve DR gathering though that's not so costly. Overall we should try improving on function names, we have valid_data_dependences, can_interchange_loops, should_interchange_loops, can_interchange_p which all are related but do slightly different things. My usual approach is to inline all single-use functions to improve things (and make program flow more visible). But I guess that's too much implementation detail. Didn't get to the IV re-mapping stuff yet but you (of course) end up with quite some dead IVs when iterating the interchange. You seem to add a new canonical IV just to avoid rewriting the existing exit test, right? Defering that to the "final" interchange on a nest should avoid those dead IVs. Will now leave for the weekend. Thanks, Richard.
On Fri, Dec 1, 2017 at 2:26 PM, Richard Biener <rguenther@suse.de> wrote: > On Fri, 1 Dec 2017, Bin.Cheng wrote: > >> On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguenther@suse.de> wrote: >> > >> > This is the access stride computation change. Apart from the >> > stride extraction I adjusted the cost model to handle non-constant >> > strides by checking if either is a multiple of the other and >> > simply fail interchanging if it's the wrong way around for one >> > ref or if the simple method using multiple_of_p fails to determine >> > either case. >> > >> > This still handles the bwaves case. >> > >> > I think we want additional testcases with variable strides for each >> > case we add - I believe this is the most conservative way to treat >> > variable strides. >> > >> > It may be inconsistent with the constant stride handling where you >> > allow for many OK DRs to outweight a few not OK DRs, but as it >> > worked for bwaves it must be good enough ;) >> > >> > Tested on x86_64-unknown-linux-gnu (just the interchange testcases). >> > >> > Currently running a bootstrap with -O3 -g -floop-interchange. >> > >> > Ok for the branch? >> Ok. This actually is closer the motivation: simple/conservative cost >> model that only transforms code when it's known to be good. >> I will check the impact on the number of interchange in spec. > > Few high-level observations. > > In tree_loop_interchange::interchange we try interchanging adjacent > loops, starting from innermost with outer of innermost. This way > the innermost loop will bubble up as much as possible. But we > don't seem to handle bulling multiple loops like for > > for (i=0; i<n; ++i) > for (j=0; j<n; ++j) > for (k=0; k<n ++k) > a[j][k][i] = 1.; > > because the innermost two loops are correctly ordered so we then > try interchanging the k and the i loop which succeeds but then > we stop. So there's something wrong in the iteration scheme. > I would have expected it to be quadratic, basically iterating > the ::interchange loop until we didn't perform any interchange > (or doing sth more clever). Yes, I restricted it to be a single pass process in loop nest. Ideally we could create a vector of loop_cand for the whole loop nest, then sort/permute loops wrto computed strides. > > loop_cand::can_interchange_p seems to perform per BB checks > (supported_operations, num_stmts) that with the way we interchange > should disallow any such BB in a loop that we interchange or > interchange across. That means it looks like sth we should > perform early, like during data dependence gathering by for > example inlining find_data_references_in_bb and doing those > per-stmt checks there? Yes. The only problem is the check on the reduction. We can build up all loop_cand earlier, or simply move non-reduction checks earlier. > > In prepare_perfect_loop_nest we seem to be somewhat quadratic > in the way we re-compute dependences if doing so failed > (we also always just strip the outermost loop while the failing > DDR could involve a DR that is in an inner loop). I think it > should be possible to re-structure this computing dependences > from inner loop body to outer loop bodies (the ddrs vector > is, as opposed to the dr vector, unsorted I think). Even more, we would want interface in tree-data-ref.c so that data dependence can be computed/assembled level by level in loop nest. loop distribution can be benefited as well. > I haven't fully thought this out yet though - a similar > iteration scheme could improve DR gathering though that's not > so costly. I can change this one now. > > Overall we should try improving on function names, we have > valid_data_dependences, can_interchange_loops, should_interchange_loops, > can_interchange_p which all are related but do slightly different > things. My usual approach is to inline all single-use functions > to improve things (and make program flow more visible). But > I guess that's too much implementation detail. > > Didn't get to the IV re-mapping stuff yet but you (of course) > end up with quite some dead IVs when iterating the interchange. > You seem to add a new canonical IV just to avoid rewriting the > existing exit test, right? Defering that to the "final" > interchange on a nest should avoid those dead IVs. Hmm, with the help pf new created dce interface, all dead IV/RE will be deleted after this pass. Note for current implementation, dead code can be generated from mapped IV, canonical IV and the reduction. Deferring adding canonical IV looks not practical wrto current level by level interchange, because inner loop's IV is needed for interchange? > > Will now leave for the weekend. Have a nice WE! Thanks, bin > > Thanks, > Richard.
Index: gcc/gimple-loop-interchange.cc =================================================================== --- gcc/gimple-loop-interchange.cc (revision 255303) +++ gcc/gimple-loop-interchange.cc (working copy) @@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne } } -/* Estimate and return the value of EXPR by replacing variables in EXPR - with CST_TREE and simplifying. */ - -static tree -estimate_val_by_simplify_replace (tree expr, tree cst_tree) -{ - unsigned i, n; - tree ret = NULL_TREE, e, se; - - if (!expr) - return NULL_TREE; - - /* Do not bother to replace constants. */ - if (CONSTANT_CLASS_P (expr)) - return expr; - - if (!EXPR_P (expr)) - return cst_tree; - - n = TREE_OPERAND_LENGTH (expr); - for (i = 0; i < n; i++) - { - e = TREE_OPERAND (expr, i); - se = estimate_val_by_simplify_replace (e, cst_tree); - if (e == se) - continue; - - if (!ret) - ret = copy_node (expr); - - TREE_OPERAND (ret, i) = se; - } - - return (ret ? fold (ret) : expr); -} - /* Given data reference DR in LOOP_NEST, the function computes DR's access stride at each level of loop from innermost LOOP to outer. On success, it saves access stride at each level loop in a vector which is pointed @@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop tree ref = DR_REF (dr); tree scev_base = build_fold_addr_expr (ref); - tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); - tree niters = build_int_cst (sizetype, AVG_LOOP_NITER); - access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size); - - do { - tree scev_fn = analyze_scalar_evolution (loop, scev_base); - if (chrec_contains_undetermined (scev_fn) - || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num)) - break; - - if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC) - { - scev_base = scev_fn; - strides->safe_push (build_int_cst (sizetype, 0)); - continue; - } - - scev_base = CHREC_LEFT (scev_fn); - if (tree_contains_chrecs (scev_base, NULL)) - break; - - tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn)); - - enum ev_direction scev_dir = scev_direction (scev_fn); - /* Estimate if step isn't constant. */ - if (scev_dir == EV_DIR_UNKNOWN) - { - scev_step = estimate_val_by_simplify_replace (scev_step, niters); - if (TREE_CODE (scev_step) != INTEGER_CST - || tree_int_cst_lt (scev_step, access_size)) - scev_step = access_size; - } - /* Compute absolute value of scev step. */ - else if (scev_dir == EV_DIR_DECREASES) - scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step); - - strides->safe_push (scev_step); - } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL); + tree scev = analyze_scalar_evolution (loop, scev_base); + scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); + if (! chrec_contains_undetermined (scev)) + { + tree sl = scev; + struct loop *expected = loop; + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) + { + struct loop *sl_loop = get_chrec_loop (sl); + while (sl_loop != expected) + { + strides->safe_push (size_int (0)); + expected = loop_outer (expected); + } + strides->safe_push (CHREC_RIGHT (sl)); + sl = CHREC_LEFT (sl); + expected = loop_outer (expected); + } + if (! tree_contains_chrecs (sl, NULL)) + while (expected != loop_outer (loop_nest)) + { + strides->safe_push (size_int (0)); + expected = loop_outer (expected); + } + } dr->aux = strides; } @@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx struct data_reference *dr; bool all_seq_dr_before_p = true, all_seq_dr_after_p = true; widest_int iloop_strides = 0, oloop_strides = 0; + unsigned num_unresolved_drs = 0; + unsigned num_resolved_ok_drs = 0; + unsigned num_resolved_not_ok_drs = 0; if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n"); @@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx { vec<tree> *stride = DR_ACCESS_STRIDE (dr); tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx]; - gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST); - gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST); bool subloop_stride_p = false; /* Data ref can't be invariant or sequential access at current loop if its address changes with respect to any subloops. */ for (j = i_idx + 1; j < stride->length (); ++j) - if (integer_nonzerop ((*stride)[j])) + if (!integer_zerop ((*stride)[j])) { subloop_stride_p = true; break; } - if (integer_nonzerop (iloop_stride)) - iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); - else if (!subloop_stride_p) - num_old_inv_drs++; - - if (integer_nonzerop (oloop_stride)) - oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); - else if (!subloop_stride_p) - num_new_inv_drs++; + if (integer_zerop (iloop_stride)) + { + if (!subloop_stride_p) + num_old_inv_drs++; + } + if (integer_zerop (oloop_stride)) + { + if (!subloop_stride_p) + num_new_inv_drs++; + } + + if (TREE_CODE (iloop_stride) == INTEGER_CST + && TREE_CODE (oloop_stride) == INTEGER_CST) + { + iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); + oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); + } + else if (multiple_of_p (TREE_TYPE (iloop_stride), + iloop_stride, oloop_stride)) + num_resolved_ok_drs++; + else if (multiple_of_p (TREE_TYPE (iloop_stride), + oloop_stride, iloop_stride)) + num_resolved_not_ok_drs++; + else + num_unresolved_drs++; /* Data ref can't be sequential access if its address changes in sub loop. */ @@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx interchange. Note invariant is considered sequential here. */ tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); if (all_seq_dr_before_p - && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide (access_size))) + && ! (integer_zerop (iloop_stride) + || operand_equal_p (access_size, iloop_stride, 0))) all_seq_dr_before_p = false; if (all_seq_dr_after_p - && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide (access_size))) + && ! (integer_zerop (oloop_stride) + || operand_equal_p (access_size, oloop_stride, 0))) all_seq_dr_after_p = false; } @@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n", all_seq_dr_before_p ? "true" : "false", all_seq_dr_after_p ? "true" : "false"); + fprintf (dump_file, "OK to interchage with variable strides: %d\n", + num_resolved_ok_drs); + fprintf (dump_file, "Not OK to interchage with variable strides: %d\n", + num_resolved_not_ok_drs); + fprintf (dump_file, "Variable strides we cannot decide: %d\n", + num_unresolved_drs); } + if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0) + return false; + /* We use different stride comparison ratio for interchanging innermost two loops or not. The idea is to be conservative in interchange for the innermost loops. */