From patchwork Thu Nov 1 19:52:34 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [01/13] Initial import of asan from the Google branch Date: Thu, 01 Nov 2012 09:52:34 -0000 From: Dodji Seketeli X-Patchwork-Id: 196357 Message-Id: <1351799566-31447-2-git-send-email-dodji@redhat.com> To: gcc-patches@gcc.gnu.org Cc: dnovillo@google.com, jakub@redhat.com, wmi@google.com, davidxl@google.com, konstantin.s.serebryany@gmail.com From: dnovillo This patch merely imports the initial state of asan as it was in the Google branch. It provides basic infrastructure for asan to instrument memory accesses on the heap, at -O3. Note that it supports neither stack nor global variable protection. The rest of the patches of the set is intended to further improve this base. * Makefile.in: Add tree-asan.c. * common.opt: Add -fasan option. * invoke.texi: Document the new flag. * passes.c: Add the asan pass. * toplev.c (compile_file): Call asan_finish_file. * tree-asan.c: New file. * tree-asan.h: New file. * tree-pass.h: Declare pass_asan. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/asan@192328 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.asan | 10 ++ gcc/Makefile.in | 5 + gcc/common.opt | 4 + gcc/doc/invoke.texi | 8 +- gcc/passes.c | 1 + gcc/toplev.c | 5 + gcc/tree-asan.c | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/tree-asan.h | 26 ++++ gcc/tree-pass.h | 1 + 9 files changed, 462 insertions(+), 1 deletion(-) create mode 100644 gcc/ChangeLog.asan create mode 100644 gcc/tree-asan.c create mode 100644 gcc/tree-asan.h diff --git a/gcc/ChangeLog.asan b/gcc/ChangeLog.asan new file mode 100644 index 0000000..40299e2 --- /dev/null +++ b/gcc/ChangeLog.asan @@ -0,0 +1,10 @@ +2012-10-10 Wei Mi + + * Makefile.in: Add tree-asan.c. + * common.opt: Add -fasan option. + * invoke.texi: Document the new flag. + * passes.c: Add the asan pass. + * toplev.c (compile_file): Call asan_finish_file. + * tree-asan.c: New file. + * tree-asan.h: New file. + * tree-pass.h: Declare pass_asan. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3a8ffbe..e8c4a19 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1350,6 +1350,7 @@ OBJS = \ tracer.o \ trans-mem.o \ tree-affine.o \ + tree-asan.o \ tree-call-cdce.o \ tree-cfg.o \ tree-cfgcleanup.o \ @@ -2209,6 +2210,10 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(RTL_H) \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) +tree-asan.o : tree-asan.c tree-asan.h $(CONFIG_H) pointer-set.h \ + $(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \ + output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ + tree-pretty-print.h tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) \ diff --git a/gcc/common.opt b/gcc/common.opt index 5b69aff..eca0740 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -849,6 +849,10 @@ fargument-noalias-anything Common Ignore Does nothing. Preserved for backward compatibility. +fasan +Common RejectNegative Report Var(flag_asan) +Enable AddressSanitizer, a memory error detector + fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ff0c87d..8292ef1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -353,7 +353,7 @@ Objective-C and Objective-C++ Dialects}. @item Optimization Options @xref{Optimize Options,,Options that Control Optimization}. @gccoptlist{-falign-functions[=@var{n}] -falign-jumps[=@var{n}] @gol --falign-labels[=@var{n}] -falign-loops[=@var{n}] -fassociative-math @gol +-falign-labels[=@var{n}] -falign-loops[=@var{n}] -fasan -fassociative-math @gol -fauto-inc-dec -fbranch-probabilities -fbranch-target-load-optimize @gol -fbranch-target-load-optimize2 -fbtr-bb-exclusive -fcaller-saves @gol -fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol @@ -6822,6 +6822,12 @@ assumptions based on that. The default is @option{-fzero-initialized-in-bss}. +@item -fasan +Enable AddressSanitizer, a fast memory error detector. +Memory access instructions will be instrumented to detect +out-of-bounds and use-after-free bugs. So far only heap bugs will be detected. +See @uref{http://code.google.com/p/address-sanitizer/} for more details. + @item -fmudflap -fmudflapth -fmudflapir @opindex fmudflap @opindex fmudflapth diff --git a/gcc/passes.c b/gcc/passes.c index 67aae52..66a2f74 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1456,6 +1456,7 @@ init_optimization_passes (void) NEXT_PASS (pass_split_crit_edges); NEXT_PASS (pass_pre); NEXT_PASS (pass_sink_code); + NEXT_PASS (pass_asan); NEXT_PASS (pass_tree_loop); { struct opt_pass **p = &pass_tree_loop.pass.sub; diff --git a/gcc/toplev.c b/gcc/toplev.c index 5cbb364..b1aff0c 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -72,6 +72,7 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "alloc-pool.h" #include "tree-mudflap.h" +#include "tree-asan.h" #include "gimple.h" #include "tree-ssa-alias.h" #include "plugin.h" @@ -570,6 +571,10 @@ compile_file (void) if (flag_mudflap) mudflap_finish_file (); + /* File-scope initialization for AddressSanitizer. */ + if (flag_asan) + asan_finish_file (); + output_shared_constant_pool (); output_object_blocks (); finish_tm_clone_pairs (); diff --git a/gcc/tree-asan.c b/gcc/tree-asan.c new file mode 100644 index 0000000..a8841d6 --- /dev/null +++ b/gcc/tree-asan.c @@ -0,0 +1,403 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" +#include "basic-block.h" +#include "flags.h" +#include "function.h" +#include "tree-inline.h" +#include "gimple.h" +#include "tree-iterator.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "diagnostic.h" +#include "demangle.h" +#include "langhooks.h" +#include "ggc.h" +#include "cgraph.h" +#include "gimple.h" +#include "tree-asan.h" +#include "gimple-pretty-print.h" + +/* + AddressSanitizer finds out-of-bounds and use-after-free bugs + with <2x slowdown on average. + + The tool consists of two parts: + instrumentation module (this file) and a run-time library. + The instrumentation module adds a run-time check before every memory insn. + For a 8- or 16- byte load accessing address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; // *(short*) for 16-byte access. + if (ShadowValue) + __asan_report_load8(X); + For a load of N bytes (N=1, 2 or 4) from address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; + if (ShadowValue) + if ((X & 7) + N - 1 > ShadowValue) + __asan_report_loadN(X); + Stores are instrumented similarly, but using __asan_report_storeN functions. + A call too __asan_init() is inserted to the list of module CTORs. + + The run-time library redefines malloc (so that redzone are inserted around + the allocated memory) and free (so that reuse of free-ed memory is delayed), + provides __asan_report* and __asan_init functions. + + Read more: + http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm + + Future work: + The current implementation supports only detection of out-of-bounds and + use-after-free bugs in heap. + In order to support out-of-bounds for stack and globals we will need + to create redzones for stack and global object and poison them. +*/ + +/* The shadow address is computed as (X>>asan_scale) + (1<src; + join_bb = e->dest; + + /* A recap at this point: join_bb is the basic block at whose head + is the gimple statement for which this check expression is being + built. cond_bb is the (possibly new, synthetic) basic block the + end of which will contain the cache-lookup code, and a + conditional that jumps to the cache-miss code or, much more + likely, over to join_bb. */ + + /* Create the bb that contains the crash block. */ + then_bb = create_empty_bb (cond_bb); + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU); + + /* Mark the pseudo-fallthrough edge from cond_bb to join_bb. */ + e = find_edge (cond_bb, join_bb); + e->flags = EDGE_FALSE_VALUE; + e->count = cond_bb->count; + e->probability = REG_BR_PROB_BASE; + + /* Update dominance info. Note that bb_join's data was + updated by split_block. */ + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); + } + + base_addr = create_tmp_reg (uintptr_type, "__asan_base_addr"); + + seq = NULL; + t = fold_convert_loc (location, uintptr_type, + unshare_expr (base)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + g = gimple_build_assign (base_addr, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Build (base_addr >> asan_scale) + (1 << asan_offset_log). */ + + t = build2 (RSHIFT_EXPR, uintptr_type, base_addr, + build_int_cst (uintptr_type, asan_scale)); + t = build2 (PLUS_EXPR, uintptr_type, t, + build2 (LSHIFT_EXPR, uintptr_type, + build_int_cst (uintptr_type, 1), + build_int_cst (uintptr_type, asan_offset_log) + )); + t = build1 (INDIRECT_REF, shadow_type, + build1 (VIEW_CONVERT_EXPR, shadow_ptr_type, t)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + shadow_value = create_tmp_reg (shadow_type, "__asan_shadow"); + g = gimple_build_assign (shadow_value, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + t = build2 (NE_EXPR, boolean_type_node, shadow_value, + build_int_cst (shadow_type, 0)); + if (size_in_bytes < 8) + { + + /* Slow path for 1-, 2- and 4- byte accesses. + Build ((base_addr & 7) + (size_in_bytes - 1)) >= shadow_value. */ + + u = build2 (BIT_AND_EXPR, uintptr_type, + base_addr, + build_int_cst (uintptr_type, 7)); + u = build1 (CONVERT_EXPR, shadow_type, u); + u = build2 (PLUS_EXPR, shadow_type, u, + build_int_cst (shadow_type, size_in_bytes - 1)); + u = build2 (GE_EXPR, uintptr_type, u, shadow_value); + } + else + u = build_int_cst (boolean_type_node, 1); + t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + cond = create_tmp_reg (boolean_type_node, "__asan_crash_cond"); + g = gimple_build_assign (cond, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE, + NULL_TREE); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Generate call to the run-time library (e.g. __asan_report_load8). */ + + gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + seq = NULL; + g = gimple_build_call (report_error_func (is_store, size_in_bytes), + 1, base_addr); + gimple_seq_add_stmt (&seq, g); + + /* Insert the check code in the THEN block. */ + + gsi = gsi_start_bb (then_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + + *iter = gsi_start_bb (join_bb); +} + +/* If T represents a memory access, add instrumentation code before ITER. + LOCATION is source code location. + IS_STORE is either 1 (for a store) or 0 (for a load). */ + +static void +instrument_derefs (gimple_stmt_iterator *iter, tree t, + location_t location, int is_store) +{ + tree type, base; + int size_in_bytes; + + type = TREE_TYPE (t); + if (type == error_mark_node) + return; + switch (TREE_CODE (t)) + { + case ARRAY_REF: + case COMPONENT_REF: + case INDIRECT_REF: + case MEM_REF: + break; + default: + return; + } + size_in_bytes = tree_low_cst (TYPE_SIZE (type), 0) / BITS_PER_UNIT; + if (size_in_bytes != 1 && size_in_bytes != 2 && + size_in_bytes != 4 && size_in_bytes != 8 && size_in_bytes != 16) + return; + { + /* For now just avoid instrumenting bit field acceses. + Fixing it is doable, but expected to be messy. */ + + HOST_WIDE_INT bitsize, bitpos; + tree offset; + enum machine_mode mode; + int volatilep = 0, unsignedp = 0; + get_inner_reference (t, &bitsize, &bitpos, &offset, + &mode, &unsignedp, &volatilep, false); + if (bitpos != 0 || bitsize != size_in_bytes * BITS_PER_UNIT) + return; + } + + base = build_addr (t, current_function_decl); + build_check_stmt (base, iter, location, is_store, size_in_bytes); +} + +/* asan: this looks too complex. Can this be done simpler? */ +/* Transform + 1) Memory references. + 2) BUILTIN_ALLOCA calls. +*/ + +static void +transform_statements (void) +{ + basic_block bb; + gimple_stmt_iterator i; + int saved_last_basic_block = last_basic_block; + enum gimple_rhs_class grhs_class; + + FOR_EACH_BB (bb) + { + if (bb->index >= saved_last_basic_block) continue; + for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + { + gimple s = gsi_stmt (i); + if (gimple_code (s) != GIMPLE_ASSIGN) + continue; + instrument_derefs (&i, gimple_assign_lhs (s), + gimple_location (s), 1); + instrument_derefs (&i, gimple_assign_rhs1 (s), + gimple_location (s), 0); + grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s)); + if (grhs_class == GIMPLE_BINARY_RHS) + instrument_derefs (&i, gimple_assign_rhs2 (s), + gimple_location (s), 0); + } + } +} + +/* Module-level instrumentation. + - Insert __asan_init() into the list of CTORs. + - TODO: insert redzones around globals. + */ + +void +asan_finish_file (void) +{ + tree ctor_statements = NULL_TREE; + append_to_statement_list (build_call_expr (asan_init_func (), 0), + &ctor_statements); + cgraph_build_static_cdtor ('I', ctor_statements, + MAX_RESERVED_INIT_PRIORITY - 1); +} + +/* Instrument the current function. */ + +static unsigned int +asan_instrument (void) +{ + struct gimplify_ctx gctx; + tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode, true); + int is_64 = tree_low_cst (TYPE_SIZE (uintptr_type), 0) == 64; + asan_offset_log = is_64 ? asan_offset_log_64 : asan_offset_log_32; + push_gimplify_context (&gctx); + transform_statements (); + pop_gimplify_context (NULL); + return 0; +} + +static bool +gate_asan (void) +{ + return flag_asan != 0; +} + +struct gimple_opt_pass pass_asan = +{ + { + GIMPLE_PASS, + "asan", /* name */ + gate_asan, /* gate */ + asan_instrument, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_flow | TODO_verify_stmts + | TODO_update_ssa /* todo_flags_finish */ + } +}; diff --git a/gcc/tree-asan.h b/gcc/tree-asan.h new file mode 100644 index 0000000..590cf35 --- /dev/null +++ b/gcc/tree-asan.h @@ -0,0 +1,26 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef TREE_ASAN +#define TREE_ASAN + +extern void asan_finish_file(void); + +#endif /* TREE_ASAN */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 8ed2d98..73c5886 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -259,6 +259,7 @@ struct register_pass_info extern struct gimple_opt_pass pass_mudflap_1; extern struct gimple_opt_pass pass_mudflap_2; +extern struct gimple_opt_pass pass_asan; extern struct gimple_opt_pass pass_lower_cf; extern struct gimple_opt_pass pass_refactor_eh; extern struct gimple_opt_pass pass_lower_eh;