===================================================================
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "--param allow-store-data-races=0 -O2 -fdump-tree-lim1" } */
+
+/* Test that g_2 is not written to unless !g_1. */
+
+int g_1 = 1;
+int g_2 = 0;
+
+int func_1(void)
+{
+ int l;
+ for (l = 0; l < 1234; l++)
+ {
+ if (g_1)
+ return l;
+ else
+ g_2 = 0;
+ }
+ return 999;
+}
+
+/* { dg-final { scan-tree-dump-times "MEM.*g_2_lsm_flag" 1 "lim1" } } */
+/* { dg-final { cleanup-tree-dump "lim1" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "--param allow-store-data-races=0 -O2 -fdump-tree-lim1" } */
+
+/* Test that `count' is not written to unless p->data > 0. */
+
+int count;
+
+struct obj {
+ int data;
+ struct obj *next;
+} *q;
+
+void func()
+{
+ struct obj *p;
+ for (p = q; p; p = p->next)
+ if (p->data > 0)
+ count++;
+}
+
+/* { dg-final { scan-tree-dump-times "MEM count_lsm.. count_lsm_flag" 1 "lim1" } } */
+/* { dg-final { cleanup-tree-dump "lim1" } } */
===================================================================
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O2 -fdump-tree-lim1" } */
+
+/* Test that `count' is not written to unless p->data>0. */
+
+int count;
+
+struct obj {
+ int data;
+ struct obj *next;
+} *q;
+
+void func()
+{
+ struct obj *p;
+ __transaction_atomic {
+ for (p = q; p; p = p->next)
+ if (p->data > 0)
+ count++;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "MEM count_lsm.. count_lsm_flag" 1 "lim1" } } */
+/* { dg-final { cleanup-tree-dump "lim1" } } */
===================================================================
@@ -52,7 +52,7 @@ along with GCC; see the file COPYING3.
}
}
- Where COND and INV are is invariants, but evaluating INV may trap or be
+ Where COND and INV are invariants, but evaluating INV may trap or be
invalid from some other reason if !COND. This may be transformed to
if (cond)
@@ -132,6 +132,10 @@ typedef struct mem_ref
hashval_t hash; /* Its hash value. */
bitmap stored; /* The set of loops in that this memory location
is stored to. */
+ bitmap loaded; /* The set of loops in that this
+ memory location may be loaded.
+ This includes aliases of the memory
+ location. */
VEC (mem_ref_locs_p, heap) *accesses_in_loop;
/* The locations of the accesses. Vector
indexed by the loop number. */
@@ -1487,6 +1491,7 @@ memref_free (void *obj)
mem_ref_locs_p accs;
BITMAP_FREE (mem->stored);
+ BITMAP_FREE (mem->loaded);
BITMAP_FREE (mem->indep_loop);
BITMAP_FREE (mem->dep_loop);
BITMAP_FREE (mem->indep_ref);
@@ -1510,6 +1515,7 @@ mem_ref_alloc (tree mem, unsigned hash,
ref->id = id;
ref->hash = hash;
ref->stored = BITMAP_ALLOC (NULL);
+ ref->loaded = BITMAP_ALLOC (NULL);
ref->indep_loop = BITMAP_ALLOC (NULL);
ref->dep_loop = BITMAP_ALLOC (NULL);
ref->indep_ref = BITMAP_ALLOC (NULL);
@@ -1569,6 +1575,18 @@ mark_ref_stored (mem_ref_p ref, struct l
bitmap_set_bit (ref->stored, loop->num);
}
+/* Marks reference REF as loaded in LOOP. */
+
+static void
+mark_ref_loaded (mem_ref_p ref, struct loop *loop)
+{
+ for (;
+ loop != current_loops->tree_root
+ && !bitmap_bit_p (ref->loaded, loop->num);
+ loop = loop_outer (loop))
+ bitmap_set_bit (ref->loaded, loop->num);
+}
+
/* Gathers memory references in statement STMT in LOOP, storing the
information about them in the memory_accesses structure. Marks
the vops accessed through unrecognized statements there as
@@ -1626,6 +1644,22 @@ gather_mem_refs_stmt (struct loop *loop,
fprintf (dump_file, "\n");
}
}
+
+ if (gimple_assign_single_p (stmt))
+ {
+ tree t = gimple_assign_rhs1 (stmt);
+ /* ?? For some reason two exact COMPONENT_REFs cannot be
+ compared with pointer equality, so ask the alias oracle. */
+ if (ref->mem == t
+ || ((TREE_CODE (t) == SSA_NAME
+ || DECL_P (t)
+ || handled_component_p (t)
+ || TREE_CODE (t) == MEM_REF
+ || TREE_CODE (t) == TARGET_MEM_REF)
+ && refs_may_alias_p (t, ref->mem)))
+ mark_ref_loaded (ref, loop);
+ }
+
if (is_stored)
mark_ref_stored (ref, loop);
@@ -1956,6 +1990,175 @@ get_lsm_tmp_name (tree ref, unsigned n)
return lsm_tmp_name;
}
+struct prev_flag_edges {
+ /* Edge to insert new flag comparison code. */
+ edge append_cond_position;
+
+ /* Edge for fall through from previous flag comparison. */
+ edge last_cond_fallthru;
+};
+
+/* Helper function for execute_sm. Emit code to store TMP_VAR into
+ MEM along edge EX.
+
+ The store is only done if MEM has changed. We do this so no
+ changes to MEM occur on code paths that did not originally store
+ into it.
+
+ The common case for execute_sm will transform:
+
+ for (...) {
+ if (foo)
+ stuff;
+ else
+ MEM = TMP_VAR;
+ }
+
+ into:
+
+ lsm = MEM;
+ for (...) {
+ if (foo)
+ stuff;
+ else
+ lsm = TMP_VAR;
+ }
+ MEM = lsm;
+
+ This function will generate:
+
+ // Note: this load can be avoided unless there is a load already
+ // present in the loop.
+ lsm = MEM;
+
+ lsm_flag = false;
+ ...
+ for (...) {
+ if (foo)
+ stuff;
+ else {
+ lsm = TMP_VAR;
+ lsm_flag = true;
+ }
+ }
+ if (lsm_flag) <--
+ MEM = lsm; <--
+*/
+
+static void
+execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
+{
+ basic_block new_bb, then_bb, old_dest;
+ bool loop_has_only_one_exit;
+ edge then_old_edge, orig_ex = ex;
+ gimple_stmt_iterator gsi;
+ gimple stmt;
+ struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
+
+ /* ?? Insert store after previous store if applicable. See note
+ below. */
+ if (prev_edges)
+ ex = prev_edges->append_cond_position;
+
+ loop_has_only_one_exit = single_pred_p (ex->dest);
+
+ if (loop_has_only_one_exit)
+ ex = split_block_after_labels (ex->dest);
+
+ old_dest = ex->dest;
+ new_bb = split_edge (ex);
+ then_bb = create_empty_bb (new_bb);
+ if (current_loops && new_bb->loop_father)
+ add_bb_to_loop (then_bb, new_bb->loop_father);
+
+ gsi = gsi_start_bb (new_bb);
+ stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+ gsi = gsi_start_bb (then_bb);
+ /* Insert actual store. */
+ stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
+ gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+ make_edge (new_bb, then_bb, EDGE_TRUE_VALUE);
+ make_edge (new_bb, old_dest, EDGE_FALSE_VALUE);
+ then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU);
+
+ set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
+
+ if (prev_edges)
+ {
+ basic_block prevbb = prev_edges->last_cond_fallthru->src;
+ redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
+ set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
+ set_immediate_dominator (CDI_DOMINATORS, old_dest,
+ recompute_dominator (CDI_DOMINATORS, old_dest));
+ }
+
+ /* ?? Because stores may alias, they must happen in the exact
+ sequence they originally happened. Save the position right after
+ the (_lsm) store we just created so we can continue appending after
+ it and maintain the original order. */
+ {
+ struct prev_flag_edges *p;
+
+ if (orig_ex->aux)
+ orig_ex->aux = NULL;
+ alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
+ p = (struct prev_flag_edges *) orig_ex->aux;
+ p->append_cond_position = then_old_edge;
+ p->last_cond_fallthru = find_edge (new_bb, old_dest);
+ orig_ex->aux = (void *) p;
+ }
+
+ if (!loop_has_only_one_exit)
+ for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ unsigned i;
+
+ for (i = 0; i < gimple_phi_num_args (phi); i++)
+ if (gimple_phi_arg_edge (phi, i)->src == new_bb)
+ {
+ tree arg = gimple_phi_arg_def (phi, i);
+ add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
+ update_stmt (phi);
+ }
+ }
+ /* Remove the original fall through edge. This was the
+ single_succ_edge (new_bb). */
+ EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
+}
+
+/* Helper function for execute_sm. On every location where REF is
+ set, set an appropriate flag indicating the store. */
+
+static tree
+execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
+{
+ unsigned i;
+ mem_ref_loc_p loc;
+ tree flag;
+ VEC (mem_ref_loc_p, heap) *locs = NULL;
+ char *str = get_lsm_tmp_name (ref->mem, ~0);
+
+ lsm_tmp_name_add ("_flag");
+ flag = make_rename_temp (boolean_type_node, str);
+ get_all_locs_in_loop (loop, ref, &locs);
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
+ {
+ gimple_stmt_iterator gsi;
+ gimple stmt;
+
+ gsi = gsi_for_stmt (loc->stmt);
+ stmt = gimple_build_assign (flag, boolean_true_node);
+ gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+ }
+ VEC_free (mem_ref_loc_p, heap, locs);
+ return flag;
+}
+
/* Executes store motion of memory reference REF from LOOP.
Exits from the LOOP are stored in EXITS. The initialization of the
temporary variable is put to the preheader of the loop, and assignments
@@ -1964,12 +2167,16 @@ get_lsm_tmp_name (tree ref, unsigned n)
static void
execute_sm (struct loop *loop, VEC (edge, heap) *exits, mem_ref_p ref)
{
- tree tmp_var;
+ tree tmp_var, store_flag;
unsigned i;
- gimple load, store;
+ gimple load;
struct fmt_data fmt_data;
- edge ex;
+ edge ex, latch_edge;
struct lim_aux_data *lim_data;
+ bool multi_threaded_model_p = false;
+ /* ?? FIXME TESTING TESTING ?? */
+ multi_threaded_model_p=true;
+ /* ?? FIXME TESTING TESTING ?? */
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1985,23 +2192,69 @@ execute_sm (struct loop *loop, VEC (edge
fmt_data.orig_loop = loop;
for_each_index (&ref->mem, force_move_till, &fmt_data);
+ if ((flag_tm && loop_preheader_edge (loop)->src->flags & BB_IN_TRANSACTION)
+ || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
+ multi_threaded_model_p = true;
+
+ if (multi_threaded_model_p)
+ store_flag = execute_sm_if_changed_flag_set (loop, ref);
+
rewrite_mem_refs (loop, ref, tmp_var);
- /* Emit the load & stores. */
- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
- lim_data = init_lim_data (load);
- lim_data->max_loop = loop;
- lim_data->tgt_loop = loop;
-
- /* Put this into the latch, so that we are sure it will be processed after
- all dependencies. */
- gsi_insert_on_edge (loop_latch_edge (loop), load);
- FOR_EACH_VEC_ELT (edge, exits, i, ex)
- {
- store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
- gsi_insert_on_edge (ex, store);
+ /* Emit the load code into the latch, so that we are sure it will
+ be processed after all dependencies. */
+ latch_edge = loop_latch_edge (loop);
+ /* For the multi-threaded variant, we can avoid the load altogether,
+ since the store is predicated by a flag. However, do the load if
+ it was originally in the loop. */
+ if (!multi_threaded_model_p || bitmap_bit_p (ref->loaded, loop->num))
+ {
+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
+ lim_data = init_lim_data (load);
+ lim_data->max_loop = loop;
+ lim_data->tgt_loop = loop;
+ gsi_insert_on_edge (latch_edge, load);
+ }
+ else if (multi_threaded_model_p)
+ {
+ /* We know that all uses of tmp_var will be gated by lsm_flag,
+ but subsequent passes can be confused and think tmp_var is
+ used before it is defined. If we avoided the load, create a
+ PHI node of 0 for the data flow path we know to be
+ unreachable.
+
+ ?? Ideally what we'd want is a flow sensitive data flow
+ analysis pass that can discover the above, and both remove
+ the load and add the dummy PHI nodes for unreachable
+ blocks. */
+ gimple phi;
+ edge e = loop_preheader_edge (loop);
+ tree t = make_ssa_name (tmp_var, NULL);
+
+ phi = create_phi_node (t, e->dest);
+ SSA_NAME_DEF_STMT (t) = phi;
+ add_phi_arg (phi, build_zero_cst (TREE_TYPE (t)), e, UNKNOWN_LOCATION);
+ }
+ if (multi_threaded_model_p)
+ {
+ load = gimple_build_assign (store_flag, boolean_false_node);
+ lim_data = init_lim_data (load);
+ lim_data->max_loop = loop;
+ lim_data->tgt_loop = loop;
+ gsi_insert_on_edge (latch_edge, load);
}
+
+ /* Sink the store to every exit from the loop. */
+ FOR_EACH_VEC_ELT (edge, exits, i, ex)
+ if (!multi_threaded_model_p)
+ {
+ gimple store;
+ store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
+ gsi_insert_on_edge (ex, store);
+ }
+ else
+ execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
}
/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
@@ -2410,6 +2663,8 @@ tree_ssa_lim_initialize (void)
if (flag_tm)
compute_transaction_bits ();
+
+ alloc_aux_for_edges (0);
}
/* Cleans up after the invariant motion pass. */
@@ -2421,6 +2676,8 @@ tree_ssa_lim_finalize (void)
unsigned i;
bitmap b;
+ free_aux_for_edges ();
+
FOR_EACH_BB (bb)
SET_ALWAYS_EXECUTED_IN (bb, NULL);
===================================================================
@@ -2449,13 +2449,15 @@ compute_transaction_bits (void)
struct tm_region *region;
VEC (basic_block, heap) *queue;
unsigned int i;
- gimple_stmt_iterator gsi;
basic_block bb;
/* ?? Perhaps we need to abstract gate_tm_init further, because we
certainly don't need it to calculate CDI_DOMINATOR info. */
gate_tm_init ();
+ FOR_EACH_BB (bb)
+ bb->flags &= ~BB_IN_TRANSACTION;
+
for (region = all_tm_regions; region; region = region->next)
{
queue = get_tm_region_blocks (region->entry_block,
@@ -2464,11 +2466,7 @@ compute_transaction_bits (void)
NULL,
/*stop_at_irr_p=*/true);
for (i = 0; VEC_iterate (basic_block, queue, i, bb); ++i)
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- gimple_set_in_transaction (stmt, true);
- }
+ bb->flags |= BB_IN_TRANSACTION;
VEC_free (basic_block, heap, queue);
}
===================================================================
@@ -305,11 +305,6 @@ struct GTY(()) gimple_statement_base {
/* Nonzero if this statement contains volatile operands. */
unsigned has_volatile_ops : 1;
- /* Nonzero if this statement appears inside a transaction. This bit
- is calculated on de-mand and has relevant information only after
- it has been calculated with compute_transaction_bits. */
- unsigned in_transaction : 1;
-
/* The SUBCODE field can be used for tuple-specific flags for tuples
that do not require subcodes. Note that SUBCODE should be at
least as wide as tree codes, as several tuples store tree codes
@@ -1585,15 +1580,7 @@ gimple_set_has_volatile_ops (gimple stmt
static inline bool
gimple_in_transaction (gimple stmt)
{
- return stmt->gsbase.in_transaction;
-}
-
-/* Set the IN_TRANSACTION flag to TRANSACTIONP. */
-
-static inline void
-gimple_set_in_transaction (gimple stmt, bool transactionp)
-{
- stmt->gsbase.in_transaction = (unsigned) transactionp;
+ return gimple_bb (stmt)->flags & BB_IN_TRANSACTION;
}
/* Return true if statement STMT may access memory. */
===================================================================
@@ -814,10 +814,10 @@ free_aux_for_blocks (void)
clear_aux_for_blocks ();
}
-/* Allocate a memory edge of SIZE as BB->aux. The obstack must
+/* Allocate a memory edge of SIZE as E->aux. The obstack must
be first initialized by alloc_aux_for_edges. */
-static void
+void
alloc_aux_for_edge (edge e, int size)
{
/* Verify that aux field is clear. */
===================================================================
@@ -256,7 +256,12 @@ enum bb_flags
df_set_bb_dirty, but not cleared by df_analyze, so it can be used
to test whether a block has been modified prior to a df_analyze
call. */
- BB_MODIFIED = 1 << 12
+ BB_MODIFIED = 1 << 12,
+
+ /* Set on blocks that are in a transaction. This is calculated on
+ demand, and is available after calling
+ compute_transaction_bits(). */
+ BB_IN_TRANSACTION = 1 << 13
};
/* Dummy flag for convenience in the hot/cold partitioning code. */
@@ -788,6 +793,7 @@ extern basic_block alloc_block (void);
extern void alloc_aux_for_blocks (int);
extern void clear_aux_for_blocks (void);
extern void free_aux_for_blocks (void);
+extern void alloc_aux_for_edge (edge, int);
extern void alloc_aux_for_edges (int);
extern void clear_aux_for_edges (void);
extern void free_aux_for_edges (void);