@@ -2226,7 +2226,8 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
asan.o : asan.c asan.h $(CONFIG_H) $(SYSTEM_H) $(GIMPLE_H) \
output.h coretypes.h $(GIMPLE_PRETTY_PRINT_H) \
tree-iterator.h $(TREE_FLOW_H) $(TREE_PASS_H) \
- $(TARGET_H) $(EXPR_H) $(OPTABS_H) $(TM_P_H) langhooks.h
+ $(TARGET_H) $(EXPR_H) $(OPTABS_H) $(TM_P_H) langhooks.h \
+ $(HASH_TABLE_H)
tsan.o : $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TREE_INLINE_H) \
$(GIMPLE_H) $(DIAGNOSTIC_H) langhooks.h \
$(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(CGRAPH_H) $(GGC_H) \
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "output.h"
#include "tm_p.h"
#include "langhooks.h"
+#include "hash-table.h"
/* AddressSanitizer finds out-of-bounds and use-after-free bugs
with <2x slowdown on average.
@@ -212,6 +213,159 @@ alias_set_type asan_shadow_set = -1;
alias set is used for all shadow memory accesses. */
static GTY(()) tree shadow_ptr_types[2];
+/* Hashtable support for memory references used by gimple
+ statements. */
+
+/* This type represents a reference to a memory region. */
+struct __attribute__ ((visibility ("hidden"))) mem_ref
+{
+ /* The expression of the begining of the memory region. */
+ tree start;
+ /* The expression representing the length of the region. */
+ tree len;
+ /* This is true iff the memory reference is a store. */
+ bool is_store;
+
+ /* Constructors. */
+ mem_ref () : start (NULL_TREE), len (NULL_TREE), is_store (false) {}
+ mem_ref (tree s, bool p) : start (s), len (NULL_TREE), is_store (p) {}
+ mem_ref (tree s, tree l, bool p) : start (s), len (l), is_store (p) {}
+};
+
+struct __attribute__((visibility ("hidden"))) mem_ref_hasher
+ : typed_delete_remove <mem_ref>
+{
+ typedef mem_ref value_type;
+ typedef mem_ref compare_type;
+
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+};
+
+static bool get_mem_ref_of_assignment (const gimple, mem_ref &);
+static bool get_mem_refs_of_builtin_call (const gimple, mem_ref&,
+ mem_ref&, mem_ref&,
+ bool &);
+/* Hash a memory reference. */
+
+inline hashval_t
+mem_ref_hasher::hash (const mem_ref *mem_ref)
+{
+ hashval_t h = iterative_hash_hashval_t (0, mem_ref->is_store);
+ h = iterative_hash_expr (mem_ref->start, h);
+ h = iterative_hash_expr (mem_ref->len, h);
+ return h;
+}
+
+/* Compare two memory references. We accept the length of either
+ memory references to be NULL_TREE. */
+
+inline bool
+mem_ref_hasher::equal (const mem_ref *m1,
+ const mem_ref *m2)
+{
+ if (m1->is_store != m2->is_store
+ || (m1->len != NULL_TREE) != (m2->len != NULL_TREE)
+ || !operand_equal_p (m1->start, m2->start, 0))
+ return false;
+
+ if (m1->len != NULL_TREE)
+ return operand_equal_p (m1->len, m2->len, 0);
+
+ return true;
+}
+
+/* Returns a reference to the hash table containing memory references.
+ This function ensures that the hash table is created. Note that
+ this hash table is updated by the function
+ update_mem_ref_hash_table. */
+
+static hash_table <mem_ref_hasher>&
+get_mem_ref_hash_table ()
+{
+ static hash_table <mem_ref_hasher> ht;
+
+ if (!ht.is_created ())
+ ht.create (10);
+
+ return ht;
+}
+
+/* Return true iff a given gimple statement has been instrumented.
+ Note that the statement is "defined" by the memory references it
+ contains. */
+
+static bool
+has_stmt_been_instrumented_p (gimple stmt)
+{
+ if (gimple_assign_single_p (stmt))
+ {
+ mem_ref r;
+ if (get_mem_ref_of_assignment (stmt, r))
+ return (get_mem_ref_hash_table ().find (&r) != NULL);
+ }
+ else if (is_gimple_builtin_call (stmt))
+ {
+ mem_ref src0, src1, dest;
+ bool dest_is_deref = false;
+ if (get_mem_refs_of_builtin_call (stmt, src0, src1, dest,
+ dest_is_deref))
+ {
+ if (src0.start != NULL_TREE
+ && get_mem_ref_hash_table ().find (&src0) == NULL)
+ return false;
+
+ if (src1.start != NULL_TREE
+ && get_mem_ref_hash_table ().find (&src1) == NULL)
+ return false;
+
+ if (dest.start != NULL_TREE
+ && get_mem_ref_hash_table ().find (&dest) == NULL)
+ return false;
+
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Insert a memory reference into the hash table. */
+
+static void
+update_mem_ref_hash_table (const mem_ref &ref)
+{
+ hash_table <mem_ref_hasher> ht = get_mem_ref_hash_table ();
+ mem_ref **slot = ht.find_slot (&ref, INSERT);
+ gcc_assert (*slot == NULL);
+ *slot = new mem_ref (ref);
+}
+
+/* Set REF to the memory reference present in a gimple assignment
+ ASSIGNMENT. Return true upon successful completion, false
+ otherwise. */
+
+static bool
+get_mem_ref_of_assignment (const gimple assignment,
+ mem_ref &ref)
+{
+ gcc_assert (gimple_assign_single_p (assignment));
+
+ if (gimple_store_p (assignment))
+ {
+ ref.start = gimple_assign_lhs (assignment);
+ ref.is_store = true;
+ }
+ else if (gimple_assign_load_p (assignment))
+ {
+ ref.start = gimple_assign_rhs1 (assignment);
+ ref.is_store = false;
+ }
+ else
+ return false;
+
+ return true;
+}
+
/* Initialize shadow_ptr_types array. */
static void
@@ -1092,24 +1246,22 @@ instrument_strlen_call (gimple_stmt_iterator *iter)
return true;
}
-/* Instrument the call to a built-in memory access function that is
- pointed to by the iterator ITER.
-
- Upon completion, return TRUE iff *ITER has been advanced to the
- statement following the one it was originally pointing to. */
+/* Return the memory references contained in a gimple statement
+ representing a builtin call that has to do with memory access. */
static bool
-instrument_builtin_call (gimple_stmt_iterator *iter)
+get_mem_refs_of_builtin_call (const gimple call,
+ mem_ref &src0,
+ mem_ref &src1,
+ mem_ref &dst,
+ bool &dest_is_deref)
{
- gimple call = gsi_stmt (*iter);
-
gcc_checking_assert (is_gimple_builtin_call (call));
tree callee = gimple_call_fndecl (call);
- location_t loc = gimple_location (call);
tree source0 = NULL_TREE, source1 = NULL_TREE,
dest = NULL_TREE, len = NULL_TREE;
- bool is_store = true;
+ bool is_store = true, got_reference_p = false;
switch (DECL_FUNCTION_CODE (callee))
{
@@ -1154,7 +1306,9 @@ instrument_builtin_call (gimple_stmt_iterator *iter)
break;
case BUILT_IN_STRLEN:
- return instrument_strlen_call (iter);
+ source0 = gimple_call_arg (call, 0);
+ len = gimple_call_lhs (call);
+ break ;
/* And now the __atomic* and __sync builtins.
These are handled differently from the classical memory memory
@@ -1364,9 +1518,6 @@ instrument_builtin_call (gimple_stmt_iterator *iter)
dest, build_int_cst (TREE_TYPE (dest), 0));
else
gcc_unreachable ();
-
- instrument_derefs (iter, dest, loc, is_store);
- return false;
}
default:
@@ -1379,37 +1530,149 @@ instrument_builtin_call (gimple_stmt_iterator *iter)
if (len != NULL_TREE)
{
if (source0 != NULL_TREE)
- instrument_mem_region_access (source0, len, iter,
- loc, /*is_store=*/false);
+ {
+ src0.start = source0;
+ src0.len = len;
+ src0.is_store = false;
+ }
+
if (source1 != NULL_TREE)
- instrument_mem_region_access (source1, len, iter,
- loc, /*is_store=*/false);
- else if (dest != NULL_TREE)
- instrument_mem_region_access (dest, len, iter,
- loc, /*is_store=*/true);
-
- *iter = gsi_for_stmt (call);
- return false;
+ {
+ src1.start = source1;
+ src1.len = len;
+ src1.is_store = false;
+ }
+
+ if (dest != NULL_TREE)
+ {
+ dst.start = dest;
+ dst.len = len;
+ dst.is_store = true;
+ }
+
+ got_reference_p = true;
}
- return false;
+ else
+ {
+ if (dest)
+ {
+ dst.start = dest;
+ dst.len = NULL_TREE;
+ dst.is_store = is_store;
+ dest_is_deref = true;
+ got_reference_p = true;
+ }
+ }
+
+ return got_reference_p;
+}
+
+/* Instrument the call to a built-in memory access function that is
+ pointed to by the iterator ITER.
+
+ Upon completion, return TRUE iff *ITER has been advanced to the
+ statement following the one it was originally pointing to. */
+
+static bool
+instrument_builtin_call (gimple_stmt_iterator *iter)
+{
+ bool iter_advanced_p = false;
+ gimple call = gsi_stmt (*iter);
+
+ gcc_checking_assert (is_gimple_builtin_call (call));
+
+ tree callee = gimple_call_fndecl (call);
+ location_t loc = gimple_location (call);
+
+ if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRLEN)
+ iter_advanced_p = instrument_strlen_call (iter);
+ else
+ {
+ mem_ref src0, src1, dest;
+ bool dest_is_deref = false;
+
+ if (get_mem_refs_of_builtin_call (call, src0, src1, dest,
+ dest_is_deref))
+ {
+ if (dest_is_deref)
+ {
+ instrument_derefs (iter, dest.start, loc, dest.is_store);
+ gsi_next (iter);
+ iter_advanced_p = true;
+ update_mem_ref_hash_table (dest);
+ }
+ else if (src0.len || src1.len || dest.len)
+ {
+ if (src0.start)
+ {
+ instrument_mem_region_access (src0.start, src0.len,
+ iter, loc, /*is_store=*/false);
+ update_mem_ref_hash_table (src0);
+ }
+ if (src1.start != NULL_TREE)
+ {
+ instrument_mem_region_access (src1.start, src1.len,
+ iter, loc, /*is_store=*/false);
+ update_mem_ref_hash_table (src1);
+ }
+ if (dest.start != NULL_TREE)
+ {
+ instrument_mem_region_access (dest.start, dest.len,
+ iter, loc, /*is_store=*/true);
+ update_mem_ref_hash_table (dest);
+ }
+ *iter = gsi_for_stmt (call);
+ gsi_next (iter);
+ iter_advanced_p = true;
+ }
+ }
+ }
+ return iter_advanced_p;
}
/* Instrument the assignment statement ITER if it is subject to
- instrumentation. */
+ instrumentation. Return TRUE iff instrumentation actually
+ happened. In that case, the iterator ITER is advanced to the next
+ logical expression following the one initially pointed to by ITER,
+ and the relevant memory reference that which access has been
+ instrumented is added to the memory references hash table. */
-static void
-instrument_assignment (gimple_stmt_iterator *iter)
+static bool
+maybe_instrument_assignment (gimple_stmt_iterator *iter)
{
gimple s = gsi_stmt (*iter);
gcc_assert (gimple_assign_single_p (s));
+ tree ref_expr = NULL_TREE;
+ bool is_store, is_instrumented = false;
+
if (gimple_store_p (s))
- instrument_derefs (iter, gimple_assign_lhs (s),
- gimple_location (s), true);
+ {
+ ref_expr = gimple_assign_lhs (s);
+ is_store = true;
+ instrument_derefs (iter, ref_expr,
+ gimple_location (s),
+ is_store);
+ update_mem_ref_hash_table (mem_ref (ref_expr, is_store));
+ is_instrumented = true;
+ }
+
if (gimple_assign_load_p (s))
- instrument_derefs (iter, gimple_assign_rhs1 (s),
- gimple_location (s), false);
+ {
+ ref_expr = gimple_assign_rhs1 (s);
+ is_store = false;
+ instrument_derefs (iter, ref_expr,
+ gimple_location (s),
+ is_store);
+ update_mem_ref_hash_table (mem_ref (ref_expr, is_store));
+ is_instrumented = true;
+ }
+
+ if (is_instrumented)
+ gsi_next (iter);
+
+ return is_instrumented;
}
/* Instrument the function call pointed to by the iterator ITER, if it
@@ -1449,11 +1712,10 @@ maybe_instrument_call (gimple_stmt_iterator *iter)
return false;
}
-/* asan: this looks too complex. Can this be done simpler? */
-/* Transform
- 1) Memory references.
- 2) BUILTIN_ALLOCA calls.
-*/
+/* Walk each instruction of all basic block and instrument those that
+ represent memory references: loads, stores, or function calls.
+ In a given basic block, this function avoids instrumenting memory
+ references that have already been instrumented. */
static void
transform_statements (void)
@@ -1464,23 +1726,39 @@ transform_statements (void)
FOR_EACH_BB (bb)
{
+ get_mem_ref_hash_table ().dispose ();
+
if (bb->index >= saved_last_basic_block) continue;
for (i = gsi_start_bb (bb); !gsi_end_p (i);)
{
gimple s = gsi_stmt (i);
- if (gimple_assign_single_p (s))
- instrument_assignment (&i);
- else if (is_gimple_call (s))
+ if (has_stmt_been_instrumented_p (s))
+ gsi_next (&i);
+ else if (gimple_assign_single_p (s)
+ && maybe_instrument_assignment (&i))
+ /* Nothing to do as maybe_instrument_assignment advanced
+ the iterator I. */;
+ else if (is_gimple_call (s)
+ && maybe_instrument_call (&i))
+ /* Nothing to do as maybe_instrument_call
+ advanced the iterator I. */;
+ else
{
- if (maybe_instrument_call (&i))
- /* Avoid gsi_next (&i), because maybe_instrument_call
- advanced the I iterator already. */
- continue;
+ /* No instrumentation happened.
+
+ If the current instruction is a function call, let's
+ forget about the memory references that got
+ instrumented. Otherwise we might miss some
+ instrumentation opportunities. */
+ if (is_gimple_call (s))
+ get_mem_ref_hash_table ().dispose ();
+
+ gsi_next (&i);
}
- gsi_next (&i);
}
}
+ get_mem_ref_hash_table ().dispose ();
}
/* Build
new file mode 100644
@@ -0,0 +1,70 @@
+/*
+ This tests that when faced with two references to the same memory
+ location in the same basic block, the second reference should not
+ be instrumented by the Address Sanitizer
+
+ { dg-options "-fdump-tree-asan0" }
+ { dg-do compile }
+ */
+
+static char tab[4] = {0};
+
+static int
+test0 ()
+{
+ /* __builtin___asan_report_store1 called 2 times for the two stores
+ below. */
+ tab[0] = 1;
+ tab[1] = 2;
+
+ /* __builtin___asan_report_load1 called 1 time for the store
+ below. */
+ char t0 = tab[1];
+
+ /* This load should not be instrumented because it is to the same
+ memory location as above. */
+ char t1 = tab[1];
+
+ return t0 + t1;
+}
+
+static int
+test1 ()
+{
+ /*__builtin___asan_report_store1 called 1 time here to instrument
+ the initialization. */
+ char foo[4] = {1};
+
+ /*__builtin___asan_report_store1 called 2 times here to instrument
+ the store to the memory region of tab. */
+ __builtin_memset (tab, 3, sizeof (tab));
+
+ /* There is no instrumentation for the two memset calls below. */
+ __builtin_memset (tab, 4, sizeof (tab));
+ __builtin_memset (tab, 5, sizeof (tab));
+
+ /* There are 2 calls to __builtin___asan_report_store1 and 2 calls
+ to __builtin___asan_report_load1 to instrument the store to
+ (subset of) the memory region of tab. */
+ __builtin_memcpy (&tab[1], foo, sizeof (tab) - 1);
+
+ /* There is 1 call to __builtin___asan_report_load1 to represent
+ the load from tab[1]. */
+ return tab[1];
+
+ /* So for these function, there should be 5 calls to
+ __builtin___asan_report_store1. */
+}
+
+int
+main ()
+{
+ return test0 () && test1 ();
+}
+
+/*
+ { dg-final { scan-tree-dump-times "__builtin___asan_report_store1" 7 "instrumented stores"} }
+
+ { dg-final { scan-tree-dump-times "__builtin___asan_report_load" 4 "instrumented loads"} }
+
+ */