Patchwork [asan] WIP protection of globals

login
register
mail settings
Submitter Jakub Jelinek
Date Oct. 16, 2012, 2:58 p.m.
Message ID <20121016145848.GH584@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/191821/
State New
Headers show

Comments

Jakub Jelinek - Oct. 16, 2012, 2:58 p.m.
Hi!

This is a WIP patch for globals protection.
I'm not filling names yet and has_dynamic_init is always
false (wonder how to figure it has_dynamic_init out, especially
with LTO, TYPE_ADDRESSABLE (TREE_TYPE (decl)) probably isn't it,
and for more I'm afraid we need a langhook).


	Jakub
Marek Polacek - Oct. 16, 2012, 8:24 p.m.
On Tue, Oct 16, 2012 at 04:58:48PM +0200, Jakub Jelinek wrote:
> @@ -2022,6 +2036,12 @@ assemble_variable (tree decl, int top_le
>        if (DECL_ALIGN (decl) > BITS_PER_UNIT)
>  	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (DECL_ALIGN_UNIT (decl)));
>        assemble_variable_contents (decl, name, dont_output_data);
> +      if (asan_protected)
> +	{
> +	  unsigned HOST_WIDE_INT int size
> +	    = tree_low_cst (DECL_SIZE_UNIT (decl), 1);

Shouldn't this be only HOST_WIDE_INT, without following int?

	Marek
Xinliang David Li - Oct. 16, 2012, 9:41 p.m.
On Tue, Oct 16, 2012 at 7:58 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This is a WIP patch for globals protection.
> I'm not filling names yet and has_dynamic_init is always
> false (wonder how to figure it has_dynamic_init out, especially
> with LTO, TYPE_ADDRESSABLE (TREE_TYPE (decl)) probably isn't it,
> and for more I'm afraid we need a langhook).
>
> --- gcc/varasm.c.jj     2012-10-11 19:10:39.000000000 +0200
> +++ gcc/varasm.c        2012-10-16 15:40:37.075662625 +0200
> @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-mudflap.h"
>  #include "cgraph.h"
>  #include "pointer-set.h"
> +#include "asan.h"
>
>  #ifdef XCOFF_DEBUGGING_INFO
>  #include "xcoffout.h"          /* Needed for external data
> @@ -1831,6 +1832,9 @@ assemble_noswitch_variable (tree decl, c
>    size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
>    rounded = size;
>
> +  if (flag_asan && asan_protect_global (decl))
> +    size += asan_red_zone_size (size);
> +
>    /* Don't allocate zero bytes of common,
>       since that means "undefined external" in the linker.  */
>    if (size == 0)
> @@ -1897,6 +1901,7 @@ assemble_variable (tree decl, int top_le
>    const char *name;
>    rtx decl_rtl, symbol;
>    section *sect;
> +  bool asan_protected = false;
>
>    /* This function is supposed to handle VARIABLES.  Ensure we have one.  */
>    gcc_assert (TREE_CODE (decl) == VAR_DECL);
> @@ -1984,6 +1989,15 @@ assemble_variable (tree decl, int top_le
>    /* Compute the alignment of this data.  */
>
>    align_variable (decl, dont_output_data);
> +
> +  if (flag_asan
> +      && asan_protect_global (decl)
> +      && DECL_ALIGN (decl) < ASAN_RED_ZONE_SIZE * BITS_PER_UNIT)
> +    {
> +      asan_protected = true;
> +      DECL_ALIGN (decl) = ASAN_RED_ZONE_SIZE * BITS_PER_UNIT;
> +    }
> +
>    set_mem_align (decl_rtl, DECL_ALIGN (decl));
>
>    if (TREE_PUBLIC (decl))
> @@ -2022,6 +2036,12 @@ assemble_variable (tree decl, int top_le
>        if (DECL_ALIGN (decl) > BITS_PER_UNIT)
>         ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (DECL_ALIGN_UNIT (decl)));
>        assemble_variable_contents (decl, name, dont_output_data);
> +      if (asan_protected)
> +       {
> +         unsigned HOST_WIDE_INT int size
> +           = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
> +         assemble_zeros (asan_red_zone_size (size));
> +       }
>      }
>  }
>
> @@ -6926,6 +6946,8 @@ place_block_symbol (rtx symbol)
>        decl = SYMBOL_REF_DECL (symbol);
>        alignment = DECL_ALIGN (decl);
>        size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
> +      if (flag_asan && asan_protect_global (decl))
> +       size += asan_red_zone_size (size);
>      }
>
>    /* Calculate the object's offset from the start of the block.  */
> --- gcc/Makefile.in.jj  2012-10-15 09:40:40.000000000 +0200
> +++ gcc/Makefile.in     2012-10-16 16:54:12.463712014 +0200
> @@ -2712,7 +2712,7 @@ varasm.o : varasm.c $(CONFIG_H) $(SYSTEM
>     output.h $(DIAGNOSTIC_CORE_H) xcoffout.h debug.h $(GGC_H) $(TM_P_H) \
>     $(HASHTAB_H) $(TARGET_H) langhooks.h gt-varasm.h $(BASIC_BLOCK_H) \
>     $(CGRAPH_H) $(TARGET_DEF_H) tree-mudflap.h \
> -   pointer-set.h $(COMMON_TARGET_H)
> +   pointer-set.h $(COMMON_TARGET_H) asan.h
>  function.o : function.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_ERROR_H) \
>     $(TREE_H) $(GIMPLE_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) \
>     $(OPTABS_H) $(LIBFUNCS_H) $(REGS_H) hard-reg-set.h insn-config.h $(RECOG_H) \
> --- gcc/asan.c.jj       2012-10-16 12:18:41.000000000 +0200
> +++ gcc/asan.c  2012-10-16 16:52:24.266434151 +0200
> @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.
>  #include "target.h"
>  #include "expr.h"
>  #include "optabs.h"
> +#include "output.h"
>
>  /*
>   AddressSanitizer finds out-of-bounds and use-after-free bugs
> @@ -270,6 +271,48 @@ asan_emit_stack_protection (rtx base, HO
>    return ret;
>  }
>
> +/* Return true if DECL is a VAR_DECL that should be protected
> +   by Address Sanitizer, by appending a red zone with protected
> +   shadow memory after it and aligning it to at least
> +   ASAN_RED_ZONE_SIZE bytes.  */
> +
> +bool
> +asan_protect_global (tree decl)
> +{
> +  rtx rtl, symbol;
> +  section *sect;
> +
> +  if (TREE_CODE (decl) != VAR_DECL
> +      || DECL_THREAD_LOCAL_P (decl)
> +      || DECL_EXTERNAL (decl)
> +      || !TREE_ASM_WRITTEN (decl)
> +      || !DECL_RTL_SET_P (decl)
> +      || DECL_ONE_ONLY (decl)
> +      || DECL_COMMON (decl)

Why the above two condition? If the linker picks the larger size one,
it is ok to do the instrumentation.


> +      || (DECL_SECTION_NAME (decl) != NULL_TREE
> +         && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))

Why is this condition? Is it related to -fdata-sections ?

> +      || DECL_SIZE (decl) == 0
> +      || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
> +      || !valid_constant_size_p (DECL_SIZE_UNIT (decl))
> +      || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE)
> +    return false;
> +
> +  rtl = DECL_RTL (decl);
> +  if (!MEM_P (rtl) || GET_CODE (XEXP (rtl, 0)) != SYMBOL_REF)
> +    return false;
> +  symbol = XEXP (rtl, 0);
> +
> +  if (CONSTANT_POOL_ADDRESS_P (symbol)
> +      || TREE_CONSTANT_POOL_ADDRESS_P (symbol))
> +    return false;
> +
> +  sect = get_variable_section (decl, false);
> +  if (sect->common.flags & SECTION_COMMON)
> +    return false;
> +
> +  return true;
> +}
> +
>  /* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}.
>     IS_STORE is either 1 (for a store) or 0 (for a load).
>     SIZE_IN_BYTES is one of 1, 2, 4, 8, 16.  */
> @@ -568,6 +611,55 @@ transform_statements (void)
>      }
>  }
>
> +/* Build __asan_global type.  */
> +

More description of the global type.

> +static tree
> +asan_global_struct (void)
> +{
> +  static const char *field_names[5]
> +    = { "__beg", "__size", "__size_with_redzone",
> +        "__name", "__has_dynamic_init" };
> +  tree fields[5], ret;
> +  int i;
> +
> +  ret = make_node (RECORD_TYPE);
> +  for (i = 0; i < 5; i++)
> +    {
> +      fields[i]
> +       = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
> +                     get_identifier (field_names[i]),
> +                     (i == 0 || i == 3) ? const_ptr_type_node
> +                     : build_nonstandard_integer_type (POINTER_SIZE, 1));
> +      DECL_CONTEXT (fields[i]) = ret;
> +      if (i)
> +       DECL_CHAIN (fields[i - 1]) = fields[i];
> +    }
> +  TYPE_FIELDS (ret) = fields[0];
> +  TYPE_NAME (ret) = get_identifier ("__asan_global");
> +  layout_type (ret);
> +  return ret;
> +}
> +
> +static void
> +asan_add_global (tree decl, tree type, VEC(constructor_elt, gc) *v)
> +{

Missing comments.

> +  tree init, uptr = TREE_TYPE (DECL_CHAIN (TYPE_FIELDS (type)));
> +  unsigned HOST_WIDE_INT size;
> +  VEC(constructor_elt, gc) *vinner = NULL;
> +  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE,
> +                         fold_convert (const_ptr_type_node,
> +                                       build_fold_addr_expr (decl)));
> +  size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
> +  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, size));
> +  size += asan_red_zone_size (size);
> +  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, size));
> +  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE,
> +                         build_int_cst (const_ptr_type_node, 0));
> +  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, 0));
> +  init = build_constructor (type, vinner);
> +  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init);
> +}
> +



David

>  /* Module-level instrumentation.
>     - Insert __asan_init() into the list of CTORs.
>     - TODO: insert redzones around globals.
> @@ -577,8 +669,59 @@ void
>  asan_finish_file (void)
>  {
>    tree ctor_statements = NULL_TREE;
> +  struct varpool_node *vnode;
> +  unsigned HOST_WIDE_INT gcount = 0;
> +
>    append_to_statement_list (build_call_expr (asan_init_func (), 0),
>                              &ctor_statements);
> +  FOR_EACH_DEFINED_VARIABLE (vnode)
> +    if (asan_protect_global (vnode->symbol.decl))
> +      ++gcount;
> +  if (gcount)
> +    {
> +      tree type = asan_global_struct (), var, ctor, decl;
> +      tree uptr = build_nonstandard_integer_type (POINTER_SIZE, 1);
> +      tree dtor_statements = NULL_TREE;
> +      VEC(constructor_elt, gc) *v;
> +      char buf[20];
> +
> +      type = build_array_type_nelts (type, gcount);
> +      ASM_GENERATE_INTERNAL_LABEL (buf, "LASAN", 0);
> +      var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (buf),
> +                       type);
> +      TREE_STATIC (var) = 1;
> +      TREE_PUBLIC (var) = 0;
> +      DECL_ARTIFICIAL (var) = 1;
> +      DECL_IGNORED_P (var) = 1;
> +      v = VEC_alloc (constructor_elt, gc, gcount);
> +      FOR_EACH_DEFINED_VARIABLE (vnode)
> +       if (asan_protect_global (vnode->symbol.decl))
> +          asan_add_global (vnode->symbol.decl, TREE_TYPE (type), v);
> +      ctor = build_constructor (type, v);
> +      TREE_CONSTANT (ctor) = 1;
> +      TREE_STATIC (ctor) = 1;
> +      DECL_INITIAL (var) = ctor;
> +      varpool_assemble_decl (varpool_node (var));
> +
> +      type = build_function_type_list (void_type_node,
> +                                      build_pointer_type (TREE_TYPE (type)),
> +                                      uptr, NULL_TREE);
> +      decl = build_fn_decl ("__asan_register_globals", type);
> +      TREE_NOTHROW (decl) = 1;
> +      append_to_statement_list (build_call_expr (decl, 2,
> +                                                build_fold_addr_expr (var),
> +                                                build_int_cst (uptr, gcount)),
> +                               &ctor_statements);
> +
> +      decl = build_fn_decl ("__asan_unregister_globals", type);
> +      TREE_NOTHROW (decl) = 1;
> +      append_to_statement_list (build_call_expr (decl, 2,
> +                                                build_fold_addr_expr (var),
> +                                                build_int_cst (uptr, gcount)),
> +                               &dtor_statements);
> +      cgraph_build_static_cdtor ('D', dtor_statements,
> +                                MAX_RESERVED_INIT_PRIORITY - 1);
> +    }
>    cgraph_build_static_cdtor ('I', ctor_statements,
>                               MAX_RESERVED_INIT_PRIORITY - 1);
>  }
> --- gcc/asan.h.jj       2012-10-15 09:40:03.000000000 +0200
> +++ gcc/asan.h  2012-10-16 15:38:30.850358396 +0200
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.
>
>  extern void asan_finish_file (void);
>  extern rtx asan_emit_stack_protection (rtx, HOST_WIDE_INT *, tree *, int);
> +extern bool asan_protect_global (tree);
>
>  /* Alias set for accessing the shadow memory.  */
>  extern alias_set_type asan_shadow_set;
> @@ -48,4 +49,11 @@ extern alias_set_type asan_shadow_set;
>
>  #define ASAN_STACK_FRAME_MAGIC 0x41b58ab3
>
> +static inline unsigned int
> +asan_red_zone_size (unsigned int size)
> +{
> +  unsigned int c = size & (ASAN_RED_ZONE_SIZE - 1);
> +  return c ? 2 * ASAN_RED_ZONE_SIZE - c : ASAN_RED_ZONE_SIZE;
> +}
> +
>  #endif /* TREE_ASAN */
>
>         Jakub
Jakub Jelinek - Oct. 16, 2012, 10:03 p.m.
On Tue, Oct 16, 2012 at 02:41:42PM -0700, Xinliang David Li wrote:
> > +bool
> > +asan_protect_global (tree decl)
> > +{
> > +  rtx rtl, symbol;
> > +  section *sect;
> > +
> > +  if (TREE_CODE (decl) != VAR_DECL
> > +      || DECL_THREAD_LOCAL_P (decl)
> > +      || DECL_EXTERNAL (decl)
> > +      || !TREE_ASM_WRITTEN (decl)
> > +      || !DECL_RTL_SET_P (decl)
> > +      || DECL_ONE_ONLY (decl)
> > +      || DECL_COMMON (decl)
> 
> Why the above two condition? If the linker picks the larger size one,
> it is ok to do the instrumentation.

For DECL_ONE_ONLY, what LLVM does is plain wrong, it puts address of
even vars exported from shared libraries (which can be overridden) into
the array passed to __asan_*register_globals.  That can't work, you don't
know if the var that is found first was compiled with -fasan or not.
We need to use a local alias in that case (yeah, my WIP patch doesn't do
that yet, but I wanted to post at least something), and I believe local
aliases might be an issue with DECL_ONE_ONLY (and anyway, if a -fno-asan
DECL_ONE_ONLY var wins over -fasan one, there is no padding after it).

LLVM does other things wrong, it increases the size of the vars which is
IMHO a big no no, say for copy relocs etc., and last I'd say the relocations
in the description variable are unnecessary, would be better if it e.g. used
PC relative relocations and could made the array passed to
__asan_*register_globals read-only.

And for DECL_COMMON, you can't put any padding after a common variable
without making the size of the common var larger (and increasing its
alignment), both are undesirable for -fasan/-fno-asan mixing.

> > +      || (DECL_SECTION_NAME (decl) != NULL_TREE
> > +         && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
> 
> Why is this condition? Is it related to -fdata-sections ?

-fdata-sections will have non-NULL DECL_SECTION_NAME, but still
DECL_HAS_IMPLICIT_SECTION_NAME_P.  The above condition is not to break
various packages that put say a struct into some user section and expect
the section then to contain an array of those structs.
E.g. Linux kernel does this, systemtap probes, prelink, ...
If padding is inserted, all those would break.

	Jakub
Xinliang David Li - Oct. 16, 2012, 10:50 p.m.
On Tue, Oct 16, 2012 at 3:03 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Oct 16, 2012 at 02:41:42PM -0700, Xinliang David Li wrote:
>> > +bool
>> > +asan_protect_global (tree decl)
>> > +{
>> > +  rtx rtl, symbol;
>> > +  section *sect;
>> > +
>> > +  if (TREE_CODE (decl) != VAR_DECL
>> > +      || DECL_THREAD_LOCAL_P (decl)
>> > +      || DECL_EXTERNAL (decl)
>> > +      || !TREE_ASM_WRITTEN (decl)
>> > +      || !DECL_RTL_SET_P (decl)
>> > +      || DECL_ONE_ONLY (decl)
>> > +      || DECL_COMMON (decl)
>>
>> Why the above two condition? If the linker picks the larger size one,
>> it is ok to do the instrumentation.
>
> For DECL_ONE_ONLY, what LLVM does is plain wrong, it puts address of
> even vars exported from shared libraries (which can be overridden) into
> the array passed to __asan_*register_globals.  That can't work, you don't
> know if the var that is found first was compiled with -fasan or not.
> We need to use a local alias in that case (yeah, my WIP patch doesn't do
> that yet, but I wanted to post at least something), and I believe local

Does that mean that all globals defined in shared libraries can not be
protected as long as they are not protected or hidden? This sounds
like a big limitation.  We need to answer the following two questions:

1) How often are exported variables get preempted?
2) Is it a common use case to mix -fasan and -fno-asan ?

If the answer is no for either of the questions, we should allow the
above at the risk of some possible false positives -- as the goal is
to find as many bugs as possible (without too much noise). In short,
false negatives are worse.


> aliases might be an issue with DECL_ONE_ONLY (and anyway, if a -fno-asan
> DECL_ONE_ONLY var wins over -fasan one, there is no padding after it).
>
> LLVM does other things wrong, it increases the size of the vars which is
> IMHO a big no no, say for copy relocs etc., and last I'd say the relocations
> in the description variable are unnecessary, would be better if it e.g. used
> PC relative relocations and could made the array passed to
> __asan_*register_globals read-only.
>

I like the GCC way better too.


> And for DECL_COMMON, you can't put any padding after a common variable
> without making the size of the common var larger (and increasing its
> alignment), both are undesirable for -fasan/-fno-asan mixing.

If the linker picks the large one (which I believe it does), is that
still a problem?

>
>> > +      || (DECL_SECTION_NAME (decl) != NULL_TREE
>> > +         && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
>>
>> Why is this condition? Is it related to -fdata-sections ?
>
> -fdata-sections will have non-NULL DECL_SECTION_NAME, but still
> DECL_HAS_IMPLICIT_SECTION_NAME_P.  The above condition is not to break
> various packages that put say a struct into some user section and expect
> the section then to contain an array of those structs.
> E.g. Linux kernel does this, systemtap probes, prelink, ...
> If padding is inserted, all those would break.
>

Ok -- not common scenarios to be of a concern.

thanks,

David


>         Jakub
Jakub Jelinek - Oct. 16, 2012, 11:02 p.m.
On Tue, Oct 16, 2012 at 03:50:27PM -0700, Xinliang David Li wrote:
> Does that mean that all globals defined in shared libraries can not be
> protected as long as they are not protected or hidden? This sounds
> like a big limitation.  We need to answer the following two questions:

For !DECL_ONE_ONLY !DECL_COMMON vars you can protect them just fine,
just do:
	.globl	i
	.data
	.align 32
	.type	i, @object
	.size	i, 4
i:
	.long	7
	.skip	60
	.set	.LASAN.i,i
and refer to .LASAN.i (i.e. a local alias) instead of i (or, as I said
earlier, with ABI change of __asan_*register_globals or some alternative
entrypoint for that it can be .LASAN.i-. and thus a PC-relative, not
dynamic, relocation).  If i is preempted by a different i in another
library, each shared library simply protects the red zone after its own var,
and the fact that only one i is actually used by the program doesn't matter
much.

> 1) How often are exported variables get preempted?

It is not uncommon, and DECL_ONE_ONLY is preempted very often.

> 2) Is it a common use case to mix -fasan and -fno-asan ?

-fasan shouldn't be an ABI option IMHO, and changing the size of globals is
ABI changing.

> > And for DECL_COMMON, you can't put any padding after a common variable
> > without making the size of the common var larger (and increasing its
> > alignment), both are undesirable for -fasan/-fno-asan mixing.
> 
> If the linker picks the large one (which I believe it does), is that
> still a problem?

Yes.  For copy relocations at least.

	Jakub
Xinliang David Li - Oct. 16, 2012, 11:19 p.m.
On Tue, Oct 16, 2012 at 4:02 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Oct 16, 2012 at 03:50:27PM -0700, Xinliang David Li wrote:
>> Does that mean that all globals defined in shared libraries can not be
>> protected as long as they are not protected or hidden? This sounds
>> like a big limitation.  We need to answer the following two questions:
>
> For !DECL_ONE_ONLY !DECL_COMMON vars you can protect them just fine,
> just do:
>         .globl  i
>         .data
>         .align 32
>         .type   i, @object
>         .size   i, 4
> i:
>         .long   7
>         .skip   60
>         .set    .LASAN.i,i
> and refer to .LASAN.i (i.e. a local alias) instead of i (or, as I said
> earlier, with ABI change of __asan_*register_globals or some alternative
> entrypoint for that it can be .LASAN.i-. and thus a PC-relative, not
> dynamic, relocation).  If i is preempted by a different i in another
> library, each shared library simply protects the red zone after its own var,
> and the fact that only one i is actually used by the program doesn't matter
> much.
>

Ok, using local aliases for those cases sound good.

>> 1) How often are exported variables get preempted?
>
> It is not uncommon, and DECL_ONE_ONLY is preempted very often.
>
>> 2) Is it a common use case to mix -fasan and -fno-asan ?
>
> -fasan shouldn't be an ABI option IMHO, and changing the size of globals is
> ABI changing.
>

I am not sure -- fasan is an error detecting feature -- the goal is to
find bugs -- missing handling of commons etc. are not desirable.
Besides  if ABI changes consistently for all objects, why does it
matter?

Or making common/decl_one_only protected under an additional option.

thanks,

David


>> > And for DECL_COMMON, you can't put any padding after a common variable
>> > without making the size of the common var larger (and increasing its
>> > alignment), both are undesirable for -fasan/-fno-asan mixing.
>>
>> If the linker picks the large one (which I believe it does), is that
>> still a problem?
>
> Yes.  For copy relocations at least.
>
>         Jakub
Jakub Jelinek - Oct. 17, 2012, 6:51 a.m.
On Tue, Oct 16, 2012 at 04:19:09PM -0700, Xinliang David Li wrote:
> I am not sure -- fasan is an error detecting feature -- the goal is to
> find bugs -- missing handling of commons etc. are not desirable.
> Besides  if ABI changes consistently for all objects, why does it
> matter?
> 
> Or making common/decl_one_only protected under an additional option.

Note that LLVM doesn't protect common vars nor comdat linkage vars either
(at least 3.1 release), as can be seen on

struct A { int a; char b[64]; };
inline A *foo ()
{
  static A a;
  return &a;
}
A *(*p) () = foo;

C++ testcase and

int i, j, k;
int l = 26;
struct S { char buf[32]; } m, n = { { 1 } };

C testcase.  Only p, l and n vars are protected.  For common, there is a
possibility to just use -fno-common, unless your sources rely on common
vars.

	Jakub
Xinliang David Li - Oct. 17, 2012, 7:11 a.m.
On Tue, Oct 16, 2012 at 11:51 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Oct 16, 2012 at 04:19:09PM -0700, Xinliang David Li wrote:
>> I am not sure -- fasan is an error detecting feature -- the goal is to
>> find bugs -- missing handling of commons etc. are not desirable.
>> Besides  if ABI changes consistently for all objects, why does it
>> matter?
>>
>> Or making common/decl_one_only protected under an additional option.
>
> Note that LLVM doesn't protect common vars nor comdat linkage vars either
> (at least 3.1 release), as can be seen on
>
> struct A { int a; char b[64]; };
> inline A *foo ()
> {
>   static A a;
>   return &a;
> }
> A *(*p) () = foo;
>
> C++ testcase and
>
> int i, j, k;
> int l = 26;
> struct S { char buf[32]; } m, n = { { 1 } };
>
> C testcase.  Only p, l and n vars are protected.  For common, there is a
> possibility to just use -fno-common, unless your sources rely on common
> vars.

Ok -- but I doubt it is due to the concern of ABI breakage.

Note that the debug version of libstdc++ (for error checking) is ABI
breaking too.

David

>
>         Jakub

Patch

--- gcc/varasm.c.jj	2012-10-11 19:10:39.000000000 +0200
+++ gcc/varasm.c	2012-10-16 15:40:37.075662625 +0200
@@ -51,6 +51,7 @@  along with GCC; see the file COPYING3.
 #include "tree-mudflap.h"
 #include "cgraph.h"
 #include "pointer-set.h"
+#include "asan.h"
 
 #ifdef XCOFF_DEBUGGING_INFO
 #include "xcoffout.h"		/* Needed for external data
@@ -1831,6 +1832,9 @@  assemble_noswitch_variable (tree decl, c
   size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
   rounded = size;
 
+  if (flag_asan && asan_protect_global (decl))
+    size += asan_red_zone_size (size);
+
   /* Don't allocate zero bytes of common,
      since that means "undefined external" in the linker.  */
   if (size == 0)
@@ -1897,6 +1901,7 @@  assemble_variable (tree decl, int top_le
   const char *name;
   rtx decl_rtl, symbol;
   section *sect;
+  bool asan_protected = false;
 
   /* This function is supposed to handle VARIABLES.  Ensure we have one.  */
   gcc_assert (TREE_CODE (decl) == VAR_DECL);
@@ -1984,6 +1989,15 @@  assemble_variable (tree decl, int top_le
   /* Compute the alignment of this data.  */
 
   align_variable (decl, dont_output_data);
+
+  if (flag_asan
+      && asan_protect_global (decl)
+      && DECL_ALIGN (decl) < ASAN_RED_ZONE_SIZE * BITS_PER_UNIT)
+    {
+      asan_protected = true;
+      DECL_ALIGN (decl) = ASAN_RED_ZONE_SIZE * BITS_PER_UNIT;
+    }
+
   set_mem_align (decl_rtl, DECL_ALIGN (decl));
 
   if (TREE_PUBLIC (decl))
@@ -2022,6 +2036,12 @@  assemble_variable (tree decl, int top_le
       if (DECL_ALIGN (decl) > BITS_PER_UNIT)
 	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (DECL_ALIGN_UNIT (decl)));
       assemble_variable_contents (decl, name, dont_output_data);
+      if (asan_protected)
+	{
+	  unsigned HOST_WIDE_INT int size
+	    = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+	  assemble_zeros (asan_red_zone_size (size));
+	}
     }
 }
 
@@ -6926,6 +6946,8 @@  place_block_symbol (rtx symbol)
       decl = SYMBOL_REF_DECL (symbol);
       alignment = DECL_ALIGN (decl);
       size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+      if (flag_asan && asan_protect_global (decl))
+	size += asan_red_zone_size (size);
     }
 
   /* Calculate the object's offset from the start of the block.  */
--- gcc/Makefile.in.jj	2012-10-15 09:40:40.000000000 +0200
+++ gcc/Makefile.in	2012-10-16 16:54:12.463712014 +0200
@@ -2712,7 +2712,7 @@  varasm.o : varasm.c $(CONFIG_H) $(SYSTEM
    output.h $(DIAGNOSTIC_CORE_H) xcoffout.h debug.h $(GGC_H) $(TM_P_H) \
    $(HASHTAB_H) $(TARGET_H) langhooks.h gt-varasm.h $(BASIC_BLOCK_H) \
    $(CGRAPH_H) $(TARGET_DEF_H) tree-mudflap.h \
-   pointer-set.h $(COMMON_TARGET_H)
+   pointer-set.h $(COMMON_TARGET_H) asan.h
 function.o : function.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_ERROR_H) \
    $(TREE_H) $(GIMPLE_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) \
    $(OPTABS_H) $(LIBFUNCS_H) $(REGS_H) hard-reg-set.h insn-config.h $(RECOG_H) \
--- gcc/asan.c.jj	2012-10-16 12:18:41.000000000 +0200
+++ gcc/asan.c	2012-10-16 16:52:24.266434151 +0200
@@ -45,6 +45,7 @@  along with GCC; see the file COPYING3.
 #include "target.h"
 #include "expr.h"
 #include "optabs.h"
+#include "output.h"
 
 /*
  AddressSanitizer finds out-of-bounds and use-after-free bugs 
@@ -270,6 +271,48 @@  asan_emit_stack_protection (rtx base, HO
   return ret;
 }
 
+/* Return true if DECL is a VAR_DECL that should be protected
+   by Address Sanitizer, by appending a red zone with protected
+   shadow memory after it and aligning it to at least
+   ASAN_RED_ZONE_SIZE bytes.  */
+
+bool
+asan_protect_global (tree decl)
+{
+  rtx rtl, symbol;
+  section *sect;
+
+  if (TREE_CODE (decl) != VAR_DECL
+      || DECL_THREAD_LOCAL_P (decl)
+      || DECL_EXTERNAL (decl)
+      || !TREE_ASM_WRITTEN (decl)
+      || !DECL_RTL_SET_P (decl)
+      || DECL_ONE_ONLY (decl)
+      || DECL_COMMON (decl)
+      || (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
+      || DECL_SIZE (decl) == 0
+      || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
+      || !valid_constant_size_p (DECL_SIZE_UNIT (decl))
+      || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE)
+    return false;
+
+  rtl = DECL_RTL (decl);
+  if (!MEM_P (rtl) || GET_CODE (XEXP (rtl, 0)) != SYMBOL_REF)
+    return false;
+  symbol = XEXP (rtl, 0);
+
+  if (CONSTANT_POOL_ADDRESS_P (symbol)
+      || TREE_CONSTANT_POOL_ADDRESS_P (symbol))
+    return false;
+
+  sect = get_variable_section (decl, false);
+  if (sect->common.flags & SECTION_COMMON)
+    return false;
+
+  return true;    
+}
+
 /* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}.
    IS_STORE is either 1 (for a store) or 0 (for a load).
    SIZE_IN_BYTES is one of 1, 2, 4, 8, 16.  */
@@ -568,6 +611,55 @@  transform_statements (void)
     }
 }
 
+/* Build __asan_global type.  */
+
+static tree
+asan_global_struct (void)
+{
+  static const char *field_names[5]
+    = { "__beg", "__size", "__size_with_redzone",
+        "__name", "__has_dynamic_init" };
+  tree fields[5], ret;
+  int i;
+
+  ret = make_node (RECORD_TYPE);
+  for (i = 0; i < 5; i++)
+    {
+      fields[i]
+	= build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+		      get_identifier (field_names[i]),
+		      (i == 0 || i == 3) ? const_ptr_type_node
+		      : build_nonstandard_integer_type (POINTER_SIZE, 1));
+      DECL_CONTEXT (fields[i]) = ret;
+      if (i)
+	DECL_CHAIN (fields[i - 1]) = fields[i];
+    }
+  TYPE_FIELDS (ret) = fields[0];
+  TYPE_NAME (ret) = get_identifier ("__asan_global");
+  layout_type (ret);
+  return ret;
+}
+
+static void
+asan_add_global (tree decl, tree type, VEC(constructor_elt, gc) *v)
+{
+  tree init, uptr = TREE_TYPE (DECL_CHAIN (TYPE_FIELDS (type)));
+  unsigned HOST_WIDE_INT size;
+  VEC(constructor_elt, gc) *vinner = NULL;
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE,
+			  fold_convert (const_ptr_type_node,
+					build_fold_addr_expr (decl)));
+  size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, size));
+  size += asan_red_zone_size (size);
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, size));
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE,
+			  build_int_cst (const_ptr_type_node, 0));
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, 0));
+  init = build_constructor (type, vinner);
+  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init);
+}
+
 /* Module-level instrumentation.
    - Insert __asan_init() into the list of CTORs.
    - TODO: insert redzones around globals.
@@ -577,8 +669,59 @@  void
 asan_finish_file (void)
 {
   tree ctor_statements = NULL_TREE;
+  struct varpool_node *vnode;
+  unsigned HOST_WIDE_INT gcount = 0;
+
   append_to_statement_list (build_call_expr (asan_init_func (), 0),
                             &ctor_statements);
+  FOR_EACH_DEFINED_VARIABLE (vnode)
+    if (asan_protect_global (vnode->symbol.decl))
+      ++gcount;
+  if (gcount)
+    {
+      tree type = asan_global_struct (), var, ctor, decl;
+      tree uptr = build_nonstandard_integer_type (POINTER_SIZE, 1);
+      tree dtor_statements = NULL_TREE;
+      VEC(constructor_elt, gc) *v;
+      char buf[20];
+
+      type = build_array_type_nelts (type, gcount);
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LASAN", 0);
+      var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (buf),
+			type);
+      TREE_STATIC (var) = 1;
+      TREE_PUBLIC (var) = 0;
+      DECL_ARTIFICIAL (var) = 1;
+      DECL_IGNORED_P (var) = 1;
+      v = VEC_alloc (constructor_elt, gc, gcount);
+      FOR_EACH_DEFINED_VARIABLE (vnode)
+	if (asan_protect_global (vnode->symbol.decl))
+          asan_add_global (vnode->symbol.decl, TREE_TYPE (type), v);
+      ctor = build_constructor (type, v);
+      TREE_CONSTANT (ctor) = 1;
+      TREE_STATIC (ctor) = 1;
+      DECL_INITIAL (var) = ctor;
+      varpool_assemble_decl (varpool_node (var));
+
+      type = build_function_type_list (void_type_node,
+				       build_pointer_type (TREE_TYPE (type)),
+				       uptr, NULL_TREE);
+      decl = build_fn_decl ("__asan_register_globals", type);
+      TREE_NOTHROW (decl) = 1;
+      append_to_statement_list (build_call_expr (decl, 2,
+						 build_fold_addr_expr (var),
+						 build_int_cst (uptr, gcount)),
+				&ctor_statements);
+
+      decl = build_fn_decl ("__asan_unregister_globals", type);
+      TREE_NOTHROW (decl) = 1;
+      append_to_statement_list (build_call_expr (decl, 2,
+						 build_fold_addr_expr (var),
+						 build_int_cst (uptr, gcount)),
+				&dtor_statements);
+      cgraph_build_static_cdtor ('D', dtor_statements,
+				 MAX_RESERVED_INIT_PRIORITY - 1);
+    }
   cgraph_build_static_cdtor ('I', ctor_statements,
                              MAX_RESERVED_INIT_PRIORITY - 1);
 }
--- gcc/asan.h.jj	2012-10-15 09:40:03.000000000 +0200
+++ gcc/asan.h	2012-10-16 15:38:30.850358396 +0200
@@ -23,6 +23,7 @@  along with GCC; see the file COPYING3.
 
 extern void asan_finish_file (void);
 extern rtx asan_emit_stack_protection (rtx, HOST_WIDE_INT *, tree *, int);
+extern bool asan_protect_global (tree);
 
 /* Alias set for accessing the shadow memory.  */
 extern alias_set_type asan_shadow_set;
@@ -48,4 +49,11 @@  extern alias_set_type asan_shadow_set;
 
 #define ASAN_STACK_FRAME_MAGIC	0x41b58ab3
 
+static inline unsigned int
+asan_red_zone_size (unsigned int size)
+{
+  unsigned int c = size & (ASAN_RED_ZONE_SIZE - 1);
+  return c ? 2 * ASAN_RED_ZONE_SIZE - c : ASAN_RED_ZONE_SIZE;
+}
+
 #endif /* TREE_ASAN */