diff mbox

[3/3,asan] Instrument built-in memory access function calls

Message ID 8762618ha6.fsf@redhat.com
State New
Headers show

Commit Message

Dodji Seketeli Oct. 23, 2012, 1:11 p.m. UTC
This patch instruments many memory access patterns through builtins.

Basically, for a call like:

     __builtin_memset (from, 0, n_bytes);

the patch would only instrument the accesses at the beginning and at
the end of the memory region [from, from + n_bytes].  This is the
strategy used by the llvm implementation of asan.

This instrumentation for all the memory access builtin functions that
expose a well specified memory region -- one that explicitly states
the number of bytes accessed in the region.

Tested by running cc1 -fasan on variations of simple programs like:

    void
    foo ()
    {
      char foo[1] = {0};

      __builtin_memset (foo, 0, 2);
    }

and by starring at the gimple output.

gcc/
	* asan.c (insert_if_then_before_iter)
	(instrument_mem_region_access)
	(maybe_instrument_builtin_call, maybe_instrument_call): New static
	functions.
	(instrument_assignment): Factorize from ...
	(transform_statements): ... here.  Use maybe_instrument_call to
	instrument (builtin) function calls as well.
---
 gcc/asan.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 320 insertions(+), 8 deletions(-)

Comments

Jakub Jelinek Oct. 23, 2012, 1:39 p.m. UTC | #1
On Tue, Oct 23, 2012 at 03:11:29PM +0200, Dodji Seketeli wrote:
> 	* asan.c (insert_if_then_before_iter)
> 	(instrument_mem_region_access)
> 	(maybe_instrument_builtin_call, maybe_instrument_call): New static

Why not just write it:
	* asan.c (insert_if_then_before_iter, instrument_mem_region_access,
	maybe_instrument_builtin_call, maybe_instrument_call): New static
?

> 	functions.

+  tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));                                                                                            

Shouldn't pointed_to_type be always char_type_node?
I mean it shouldn't be VOID_TYPE, even when the argument is (void *),
etc.

> +      /* The 'then block' of the 'if (len != 0) condition is where
> +	 we'll generate the asan instrumentation code now.  */
> +      gsi = gsi_start_bb (then_bb);
> +
> +      /* Instrument the beginning of the memory region to be accessed,
> +	 and arrange for the rest of the intrumentation code to be
> +	 inserted in the then block *after* the current gsi.  */
> +      build_check_stmt (base, &gsi, location, is_store,
> +			int_size_in_bytes (pointed_to_type));
> +      gsi = gsi_last_bb (then_bb);
> +    }
> +  else
> +    {
> +      /* Instrument the beginning of the memory region to be
> +	 accessed.  */
> +      build_check_stmt (base, iter, location, is_store,
> +			int_size_in_bytes (pointed_to_type));
> +      gsi = *iter;
> +    }

Is there any reason why you can't call build_check_stmt just once, after
the conditional?  I.e. do
      ...
      gsi = gsi_start_bb (then_bb);
    }
  else
    gsi = *iter;

  build_check_stmt (base, &gsi, location, is_store,
		    int_size_in_bytes (pointed_to_type));

> +  /* instrument access at _2;  */
> +  gsi_next (&gsi);
> +  tree end = gimple_assign_lhs (region_end);
> +  build_check_stmt (end, &gsi, location, is_store,

Can't you just pass gimple_assign_lhs (region_end) as first
argument to build_check_stmt?  And again, I think you want
to test a single byte there, not more.

> +		    int_size_in_bytes (TREE_TYPE (end)));

> +  switch (DECL_FUNCTION_CODE (callee))
> +    {
> +      /* (s, s, n) style memops.  */
> +    case BUILT_IN_BCMP:
> +    case BUILT_IN_MEMCMP:
> +      /* These cannot be safely instrumented as their length parameter
> +         is just a mere limit.
> +
> +    case BUILT_IN_STRNCASECMP:
> +    case BUILT_IN_STRNCMP:  */

I think these comments make the code less readable instead of more readable,
I'd move the comments why something can't be instrumented to the default:
case.

On the other side, you IMHO want to handle here also __atomic_* and __sync_*
builtins (not by using instrument_mem_region_access, but
just instrument_derefs (if the argument is ADDR_EXPR, on what it points to,
otherwise if it is SSA_NAME, on MEM_REF created for it).

	Jakub
Xinliang David Li Oct. 23, 2012, 3:47 p.m. UTC | #2
On Tue, Oct 23, 2012 at 6:11 AM, Dodji Seketeli <dodji@redhat.com> wrote:
> This patch instruments many memory access patterns through builtins.
>
> Basically, for a call like:
>
>      __builtin_memset (from, 0, n_bytes);
>
> the patch would only instrument the accesses at the beginning and at
> the end of the memory region [from, from + n_bytes].  This is the
> strategy used by the llvm implementation of asan.
>
> This instrumentation for all the memory access builtin functions that
> expose a well specified memory region -- one that explicitly states
> the number of bytes accessed in the region.
>
> Tested by running cc1 -fasan on variations of simple programs like:
>
>     void
>     foo ()
>     {
>       char foo[1] = {0};
>
>       __builtin_memset (foo, 0, 2);
>     }
>
> and by starring at the gimple output.
>
> gcc/
>         * asan.c (insert_if_then_before_iter)
>         (instrument_mem_region_access)
>         (maybe_instrument_builtin_call, maybe_instrument_call): New static
>         functions.
>         (instrument_assignment): Factorize from ...
>         (transform_statements): ... here.  Use maybe_instrument_call to
>         instrument (builtin) function calls as well.
> ---
>  gcc/asan.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 320 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/asan.c b/gcc/asan.c
> index aed1a60..a8e3827 100644
> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -468,6 +468,40 @@ create_cond_insert_point_before_iter (gimple_stmt_iterator *iter,
>    return gsi_last_bb (cond_bb);
>  }
>
> +/* Insert an if condition followed by a 'then block' right before the
> +   statement pointed to by ITER.  The fallthrough block -- which is the
> +   else block of the condition as well as the destination of the
> +   outcoming edge of the 'then block' -- starts with the statement
> +   pointed to by ITER.
> +
> +   COND is the condition of the if.
> +
> +   If THEN_MORE_LIKELY_P is true,
> +   the the probability of the edge to the 'then block' is higher than
> +   the probability of the edge to the fallthrough block.
> +
> +   Upon completion of the function, *THEN_BB is set to the newly
> +   inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the
> +   fallthrough block.
> +
> +   *ITER is adjusted to still point to the same statement it was
> +   pointing to initially.  */
> +
> +static void
> +insert_if_then_before_iter (gimple cond,
> +                           gimple_stmt_iterator *iter,
> +                           bool then_more_likely_p,
> +                           basic_block *then_bb,
> +                           basic_block *fallthrough_bb)
> +{
> +  gimple_stmt_iterator cond_insert_point =
> +    create_cond_insert_point_before_iter (iter,
> +                                         then_more_likely_p,
> +                                         then_bb,
> +                                         fallthrough_bb);
> +  gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
> +}
> +
>  /* Instrument the memory access instruction BASE.  Insert new
>     statements before ITER.
>
> @@ -628,7 +662,7 @@ build_check_stmt (tree base, gimple_stmt_iterator *iter,
>
>  /* If T represents a memory access, add instrumentation code before ITER.
>     LOCATION is source code location.
> -   IS_STORE is either 1 (for a store) or 0 (for a load).  */
> +   IS_STORE is either TRUE (for a store) or FALSE (for a load).  */
>
>  static void
>  instrument_derefs (gimple_stmt_iterator *iter, tree t,
> @@ -670,6 +704,285 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
>    build_check_stmt (base, iter, location, is_store, size_in_bytes);
>  }
>
> +/* Instrument an access to a contiguous memory region that starts at
> +   the address pointed to by BASE, over a length of LEN (expressed in
> +   the sizeof (*BASE) bytes).  ITER points to the the instruction
> +   before which the instrumentation instructions must be inserted.
> +   LOCATION  is the source location that the instrumentation
> +   instructions must have.  If IS_STORE is true, then the memory
> +   access is a store; otherwise, it's a load.  */
> +
> +static void
> +instrument_mem_region_access (tree base, tree len,
> +                             gimple_stmt_iterator *iter,
> +                             location_t location, bool is_store)
> +{
> +  if (integer_zerop (len))
> +    return;
> +
> +  gimple_stmt_iterator gsi = *iter;
> +  tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));
> +
> +  if (!is_gimple_constant (len))
> +    {
> +      /* So, the length of the memory area to asan-protect is
> +        non-constant.  Let's guard the generated instrumentation code
> +        like:
> +
> +        if (len != 0)
> +          {
> +            //asan instrumentation code goes here.
> +           }
> +          // falltrough instructions, starting with *ITER.  */
> +
> +      basic_block fallthrough_bb, then_bb;
> +      gimple g = gimple_build_cond (NE_EXPR,
> +                                   len,
> +                                   build_int_cst (TREE_TYPE (len), 0),
> +                                   NULL_TREE, NULL_TREE);
> +      gimple_set_location (g, location);
> +      insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true,
> +                                 &then_bb, &fallthrough_bb);
> +      /* Note that fallthrough_bb starts with the statement that was
> +        pointed to by ITER.  */
> +
> +      /* The 'then block' of the 'if (len != 0) condition is where
> +        we'll generate the asan instrumentation code now.  */
> +      gsi = gsi_start_bb (then_bb);
> +
> +      /* Instrument the beginning of the memory region to be accessed,
> +        and arrange for the rest of the intrumentation code to be
> +        inserted in the then block *after* the current gsi.  */
> +      build_check_stmt (base, &gsi, location, is_store,
> +                       int_size_in_bytes (pointed_to_type));
> +      gsi = gsi_last_bb (then_bb);
> +    }
> +  else
> +    {
> +      /* Instrument the beginning of the memory region to be
> +        accessed.  */
> +      build_check_stmt (base, iter, location, is_store,
> +                       int_size_in_bytes (pointed_to_type));
> +      gsi = *iter;
> +    }
> +
> +  /* We want to instrument the access at the end of the memory region,
> +     which is at (base + len - 1).  */
> +
> +  /* offset = len - 1;  */
> +  len = unshare_expr (len);
> +  gimple offset =
> +    gimple_build_assign_with_ops (TREE_CODE (len),
> +                                 make_ssa_name (TREE_TYPE (len), NULL),
> +                                 len, NULL);
> +  gimple_set_location (offset, location);
> +  gsi_insert_before (&gsi, offset, GSI_NEW_STMT);
> +
> +  offset =
> +    gimple_build_assign_with_ops (MINUS_EXPR,
> +                                 make_ssa_name (size_type_node, NULL),
> +                                 gimple_assign_lhs (offset),
> +                                 build_int_cst (size_type_node, 1));
> +  gimple_set_location (offset, location);
> +  gsi_insert_after (&gsi, offset, GSI_NEW_STMT);
> +
> +  /* _1 = base;  */
> +  base = unshare_expr (base);
> +  gimple region_end =
> +    gimple_build_assign_with_ops (TREE_CODE (base),
> +                                 make_ssa_name (TREE_TYPE (base), NULL),
> +                                 base, NULL);
> +  gimple_set_location (region_end, location);
> +  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
> +
> +  /* _2 = _1 + offset;  */
> +  region_end =
> +    gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
> +                                 make_ssa_name (TREE_TYPE (base), NULL),
> +                                 gimple_assign_lhs (region_end),
> +                                 gimple_assign_lhs (offset));
> +  gimple_set_location (region_end, location);
> +  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
> +
> +  /* instrument access at _2;  */
> +  gsi_next (&gsi);
> +  tree end = gimple_assign_lhs (region_end);
> +  build_check_stmt (end, &gsi, location, is_store,
> +                   int_size_in_bytes (TREE_TYPE (end)));
> +}
> +
> +/* If the statement pointed to by the iterator ITER is a call to a
> +   builtin memory access function, instrumented it and return TRUE.
> +   Otherwise, return false.  */
> +
> +static bool
> +maybe_instrument_builtin_call (gimple_stmt_iterator *iter)
> +{
> +  gimple call = gsi_stmt (*iter);
> +
> +  if (!is_gimple_call (call))
> +    return false;
> +
> +  tree callee = gimple_call_fndecl (call);
> +
> +  if (!is_builtin_fn (callee)
> +      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
> +    return false;
> +
> +  tree source0 = NULL_TREE, source1 = NULL_TREE,
> +    dest = NULL_TREE, len = NULL_TREE;
> +
> +  switch (DECL_FUNCTION_CODE (callee))
> +    {
> +      /* (s, s, n) style memops.  */
> +    case BUILT_IN_BCMP:
> +    case BUILT_IN_MEMCMP:
> +      /* These cannot be safely instrumented as their length parameter
> +         is just a mere limit.
> +
> +    case BUILT_IN_STRNCASECMP:
> +    case BUILT_IN_STRNCMP:  */
> +      len = gimple_call_arg (call, 2);
> +      source0 = gimple_call_arg (call, 0);
> +      source1 = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (src, dest, n) style memops.  */
> +    case BUILT_IN_BCOPY:
> +      len = gimple_call_arg (call, 2);
> +      source0 = gimple_call_arg (call, 0);
> +      dest = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (dest, src, n) style memops.  */
> +    case BUILT_IN_MEMCPY:
> +    case BUILT_IN_MEMCPY_CHK:
> +    case BUILT_IN_MEMMOVE:
> +    case BUILT_IN_MEMMOVE_CHK:
> +    case BUILT_IN_MEMPCPY:
> +    case BUILT_IN_MEMPCPY_CHK:
> +
> +      /* The builtin below cannot be safely instrumented as their
> +         length parameter is just a mere limit.
> +

Why can't the following be instrumented? The length is min (n, strlen (str)).


> +    case BUILT_IN_STPNCPY:
> +    case BUILT_IN_STPNCPY_CHK:
> +    case BUILT_IN_STRNCPY:
> +    case BUILT_IN_STRNCPY_CHK: */
> +      dest = gimple_call_arg (call, 0);
> +      source0 = gimple_call_arg (call, 1);
> +      len = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (dest, src, n) where src is appended to the end of dest.
> +        These cannot be instrumented either.
> +    case BUILT_IN_STRNCAT:
> +    case BUILT_IN_STRNCAT_CHK:
> +      break;  */
> +
> +      /* (dest, n) style memops.  */
> +    case BUILT_IN_BZERO:
> +      dest = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (dest, x, n) style memops*/
> +    case BUILT_IN_MEMSET:
> +    case BUILT_IN_MEMSET_CHK:
> +      dest = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (src, n) style memops.  */
> +    case BUILT_IN_STRNDUP:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (src, x, n) style memops.  */
> +    case BUILT_IN_MEMCHR:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 2);
> +    break;
> +
> +      /* memops that have no length information.
> +    case BUILT_IN_INDEX:
> +    case BUILT_IN_RINDEX:
> +    case BUILT_IN_STRCHR:
> +    case BUILT_IN_STRDUP:
> +    case BUILT_IN_STRLEN:
> +    case BUILT_IN_STRRCHR:
> +      break;  */


For 'strlen', can the memory check be done at the end of the string
using the returned length?

For strchr, the instrumentation can be done at the returned pointer, right?

thanks,

David


> +
> +      /* (dest, src) and no length info.
> +    case BUILT_IN_STPCPY:
> +    case BUILT_IN_STPCPY_CHK:
> +    case BUILT_IN_STRCASECMP:
> +    case BUILT_IN_STRCAT:
> +    case BUILT_IN_STRCAT_CHK:
> +    case BUILT_IN_STRCPY:
> +    case BUILT_IN_STRCPY_CHK:
> +      break;  */
> +
> +      /* (s, s) and no length info.
> +    case BUILT_IN_STRCMP:
> +    case BUILT_IN_STRCSPN:
> +    case BUILT_IN_STRPBRK:
> +    case BUILT_IN_STRSPN:
> +    case BUILT_IN_STRSTR:
> +      break;  */
> +
> +    default:
> +      break;
> +    }
> +
> +  if (len != NULL_TREE)
> +    {
> +      if (source0 != NULL_TREE)
> +       instrument_mem_region_access (source0, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/false);
> +      if (source1 != NULL_TREE)
> +       instrument_mem_region_access (source1, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/false);
> +      else if (dest != NULL_TREE)
> +       instrument_mem_region_access (dest, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/true);
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
> +/*  Instrument the assignment statement ITER if it is subject to
> +    instrumentation.  */
> +
> +static void
> +instrument_assignment (gimple_stmt_iterator *iter)
> +{
> +  gimple s = gsi_stmt (*iter);
> +
> +  gcc_assert (gimple_assign_single_p (s));
> +
> +  instrument_derefs (iter, gimple_assign_lhs (s),
> +                    gimple_location (s), true);
> +  instrument_derefs (iter, gimple_assign_rhs1 (s),
> +                    gimple_location (s), false);
> +}
> +
> +/* Instrument the function call pointed to by the iterator ITER, if it
> +   is subject to instrumentation.  At the moment, the only function
> +   calls that are instrumented are some built-in functions that access
> +   memory.  Look at maybe_instrument_builtin_call to learn more.  */
> +
> +static void
> +maybe_instrument_call (gimple_stmt_iterator *iter)
> +{
> +  maybe_instrument_builtin_call (iter);
> +}
> +
>  /* asan: this looks too complex. Can this be done simpler? */
>  /* Transform
>     1) Memory references.
> @@ -688,13 +1001,12 @@ transform_statements (void)
>        if (bb->index >= saved_last_basic_block) continue;
>        for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
>          {
> -          gimple s = gsi_stmt (i);
> -          if (!gimple_assign_single_p (s))
> -           continue;
> -          instrument_derefs (&i, gimple_assign_lhs (s),
> -                             gimple_location (s), true);
> -          instrument_derefs (&i, gimple_assign_rhs1 (s),
> -                             gimple_location (s), false);
> +         gimple s = gsi_stmt (i);
> +
> +         if (gimple_assign_single_p (s))
> +           instrument_assignment (&i);
> +         else if (is_gimple_call (s))
> +           maybe_instrument_call (&i);
>          }
>      }
>  }
> --
>                 Dodji
Jakub Jelinek Oct. 23, 2012, 3:58 p.m. UTC | #3
On Tue, Oct 23, 2012 at 08:47:48AM -0700, Xinliang David Li wrote:
> > +      /* The builtin below cannot be safely instrumented as their
> > +         length parameter is just a mere limit.
> > +
> 
> Why can't the following be instrumented? The length is min (n, strlen (str)).

Because that would be too expensive, and libasan intercepts (most of the)
str* functions anyway, both so that it can check this and test argument
overlap.  The memory builtin instrumentation is done primary for the cases
where the builtins are expanded inline, without calling library routine,
because then nothing is verified in libasan.

> For 'strlen', can the memory check be done at the end of the string
> using the returned length?

Guess strlen is commonly expanded inline, so it would be worthwhile to check
the shadow memory after the call (well, we could check the first byte
before the call and the last one after the call).

	Jakub
Jakub Jelinek Oct. 23, 2012, 4:03 p.m. UTC | #4
On Tue, Oct 23, 2012 at 03:11:29PM +0200, Dodji Seketeli wrote:
> +      /* (src, n) style memops.  */
> +    case BUILT_IN_STRNDUP:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 1);
> +      break;

I think you can't instrument strndup either, the length is just a limit
there, it can copy fewer characters than that if strlen (source0) is
shorter.  libasan intercepts strndup I think.

> +      /* (src, x, n) style memops.  */      
> +    case BUILT_IN_MEMCHR:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 2);

And similarly for memchr, you could call
p = malloc (4096);
p[4095] = 1;
x = memchr (p, 1, 8192);
and it shouldn't read anything past the end of the
allocated area.

	Jakub
Xinliang David Li Oct. 23, 2012, 4:06 p.m. UTC | #5
On Tue, Oct 23, 2012 at 8:58 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Oct 23, 2012 at 08:47:48AM -0700, Xinliang David Li wrote:
>> > +      /* The builtin below cannot be safely instrumented as their
>> > +         length parameter is just a mere limit.
>> > +
>>
>> Why can't the following be instrumented? The length is min (n, strlen (str)).
>
> Because that would be too expensive, and libasan intercepts (most of the)
> str* functions anyway, both so that it can check this and test argument
> overlap.  The memory builtin instrumentation is done primary for the cases
> where the builtins are expanded inline, without calling library routine,
> because then nothing is verified in libasan.
>

Ok that makes sense.

thanks,

David


>> For 'strlen', can the memory check be done at the end of the string
>> using the returned length?
>
> Guess strlen is commonly expanded inline, so it would be worthwhile to check
> the shadow memory after the call (well, we could check the first byte
> before the call and the last one after the call).
>
>         Jakub
Dodji Seketeli Oct. 24, 2012, 3:16 p.m. UTC | #6
Jakub Jelinek <jakub@redhat.com> writes:

>> For 'strlen', can the memory check be done at the end of the string
>> using the returned length?
>
> Guess strlen is commonly expanded inline, so it would be worthwhile to check
> the shadow memory after the call (well, we could check the first byte
> before the call and the last one after the call).

How do I get the result of the (strlen) call in gimple?
Jakub Jelinek Oct. 24, 2012, 3:44 p.m. UTC | #7
On Wed, Oct 24, 2012 at 05:16:26PM +0200, Dodji Seketeli wrote:
> Jakub Jelinek <jakub@redhat.com> writes:
> 
> >> For 'strlen', can the memory check be done at the end of the string
> >> using the returned length?
> >
> > Guess strlen is commonly expanded inline, so it would be worthwhile to check
> > the shadow memory after the call (well, we could check the first byte
> > before the call and the last one after the call).
> 
> How do I get the result of the (strlen) call in gimple?

That is gimple_call_lhs (call_stmt).
So for insturmenting strlen, you want to ammend:
  tmp = strlen (ptr);
as
  asan_addr_check (ptr);
  tmp = strlen (ptr);
  asan_addr_check (ptr + tmp);

	Jakub
Dodji Seketeli Oct. 24, 2012, 3:54 p.m. UTC | #8
Jakub Jelinek <jakub@redhat.com> writes:

> On Wed, Oct 24, 2012 at 05:16:26PM +0200, Dodji Seketeli wrote:
>> Jakub Jelinek <jakub@redhat.com> writes:
>> 
>> >> For 'strlen', can the memory check be done at the end of the string
>> >> using the returned length?
>> >
>> > Guess strlen is commonly expanded inline, so it would be worthwhile to check
>> > the shadow memory after the call (well, we could check the first byte
>> > before the call and the last one after the call).
>> 
>> How do I get the result of the (strlen) call in gimple?
>
> That is gimple_call_lhs (call_stmt).

OK, thank you.

> So for insturmenting strlen, you want to ammend:
>   tmp = strlen (ptr);
> as
>   asan_addr_check (ptr);
>   tmp = strlen (ptr);
>   asan_addr_check (ptr + tmp);

I see, thanks.
diff mbox

Patch

diff --git a/gcc/asan.c b/gcc/asan.c
index aed1a60..a8e3827 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -468,6 +468,40 @@  create_cond_insert_point_before_iter (gimple_stmt_iterator *iter,
   return gsi_last_bb (cond_bb);
 }
 
+/* Insert an if condition followed by a 'then block' right before the
+   statement pointed to by ITER.  The fallthrough block -- which is the
+   else block of the condition as well as the destination of the
+   outcoming edge of the 'then block' -- starts with the statement
+   pointed to by ITER.
+
+   COND is the condition of the if.  
+
+   If THEN_MORE_LIKELY_P is true,
+   the the probability of the edge to the 'then block' is higher than
+   the probability of the edge to the fallthrough block.
+
+   Upon completion of the function, *THEN_BB is set to the newly
+   inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the
+   fallthrough block.
+
+   *ITER is adjusted to still point to the same statement it was
+   pointing to initially.  */
+
+static void
+insert_if_then_before_iter (gimple cond,
+			    gimple_stmt_iterator *iter,
+			    bool then_more_likely_p,
+			    basic_block *then_bb,
+			    basic_block *fallthrough_bb)
+{
+  gimple_stmt_iterator cond_insert_point =
+    create_cond_insert_point_before_iter (iter,
+					  then_more_likely_p,
+					  then_bb,
+					  fallthrough_bb);
+  gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
+}
+
 /* Instrument the memory access instruction BASE.  Insert new
    statements before ITER.
 
@@ -628,7 +662,7 @@  build_check_stmt (tree base, gimple_stmt_iterator *iter,
 
 /* If T represents a memory access, add instrumentation code before ITER.
    LOCATION is source code location.
-   IS_STORE is either 1 (for a store) or 0 (for a load).  */
+   IS_STORE is either TRUE (for a store) or FALSE (for a load).  */
 
 static void
 instrument_derefs (gimple_stmt_iterator *iter, tree t,
@@ -670,6 +704,285 @@  instrument_derefs (gimple_stmt_iterator *iter, tree t,
   build_check_stmt (base, iter, location, is_store, size_in_bytes);
 }
 
+/* Instrument an access to a contiguous memory region that starts at
+   the address pointed to by BASE, over a length of LEN (expressed in
+   the sizeof (*BASE) bytes).  ITER points to the the instruction
+   before which the instrumentation instructions must be inserted.
+   LOCATION  is the source location that the instrumentation
+   instructions must have.  If IS_STORE is true, then the memory
+   access is a store; otherwise, it's a load.  */
+
+static void
+instrument_mem_region_access (tree base, tree len,
+			      gimple_stmt_iterator *iter,
+			      location_t location, bool is_store)
+{
+  if (integer_zerop (len))
+    return;
+
+  gimple_stmt_iterator gsi = *iter;
+  tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));
+
+  if (!is_gimple_constant (len))
+    {
+      /* So, the length of the memory area to asan-protect is
+	 non-constant.  Let's guard the generated instrumentation code
+	 like:
+
+	 if (len != 0)
+	   {
+	     //asan instrumentation code goes here.
+           }
+	   // falltrough instructions, starting with *ITER.  */
+
+      basic_block fallthrough_bb, then_bb;
+      gimple g = gimple_build_cond (NE_EXPR,
+				    len,
+				    build_int_cst (TREE_TYPE (len), 0),
+				    NULL_TREE, NULL_TREE);
+      gimple_set_location (g, location);
+      insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true,
+				  &then_bb, &fallthrough_bb);
+      /* Note that fallthrough_bb starts with the statement that was
+	 pointed to by ITER.  */
+
+      /* The 'then block' of the 'if (len != 0) condition is where
+	 we'll generate the asan instrumentation code now.  */
+      gsi = gsi_start_bb (then_bb);
+
+      /* Instrument the beginning of the memory region to be accessed,
+	 and arrange for the rest of the intrumentation code to be
+	 inserted in the then block *after* the current gsi.  */
+      build_check_stmt (base, &gsi, location, is_store,
+			int_size_in_bytes (pointed_to_type));
+      gsi = gsi_last_bb (then_bb);
+    }
+  else
+    {
+      /* Instrument the beginning of the memory region to be
+	 accessed.  */
+      build_check_stmt (base, iter, location, is_store,
+			int_size_in_bytes (pointed_to_type));
+      gsi = *iter;
+    }
+
+  /* We want to instrument the access at the end of the memory region,
+     which is at (base + len - 1).  */
+
+  /* offset = len - 1;  */
+  len = unshare_expr (len);
+  gimple offset =
+    gimple_build_assign_with_ops (TREE_CODE (len),
+				  make_ssa_name (TREE_TYPE (len), NULL),
+				  len, NULL);
+  gimple_set_location (offset, location);
+  gsi_insert_before (&gsi, offset, GSI_NEW_STMT);
+
+  offset =
+    gimple_build_assign_with_ops (MINUS_EXPR,
+				  make_ssa_name (size_type_node, NULL),
+				  gimple_assign_lhs (offset),
+				  build_int_cst (size_type_node, 1));
+  gimple_set_location (offset, location);
+  gsi_insert_after (&gsi, offset, GSI_NEW_STMT);
+
+  /* _1 = base;  */
+  base = unshare_expr (base);
+  gimple region_end =
+    gimple_build_assign_with_ops (TREE_CODE (base),
+				  make_ssa_name (TREE_TYPE (base), NULL),
+				  base, NULL);
+  gimple_set_location (region_end, location);
+  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+  /* _2 = _1 + offset;  */
+  region_end =
+    gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
+				  make_ssa_name (TREE_TYPE (base), NULL),
+				  gimple_assign_lhs (region_end), 
+				  gimple_assign_lhs (offset));
+  gimple_set_location (region_end, location);
+  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+  /* instrument access at _2;  */
+  gsi_next (&gsi);
+  tree end = gimple_assign_lhs (region_end);
+  build_check_stmt (end, &gsi, location, is_store,
+		    int_size_in_bytes (TREE_TYPE (end)));
+}
+
+/* If the statement pointed to by the iterator ITER is a call to a
+   builtin memory access function, instrumented it and return TRUE.
+   Otherwise, return false.  */
+
+static bool
+maybe_instrument_builtin_call (gimple_stmt_iterator *iter)
+{
+  gimple call = gsi_stmt (*iter);
+
+  if (!is_gimple_call (call))
+    return false;
+
+  tree callee = gimple_call_fndecl (call);
+
+  if (!is_builtin_fn (callee)
+      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+    return false;
+
+  tree source0 = NULL_TREE, source1 = NULL_TREE,
+    dest = NULL_TREE, len = NULL_TREE;
+
+  switch (DECL_FUNCTION_CODE (callee))
+    {
+      /* (s, s, n) style memops.  */
+    case BUILT_IN_BCMP:
+    case BUILT_IN_MEMCMP:
+      /* These cannot be safely instrumented as their length parameter
+         is just a mere limit.
+
+    case BUILT_IN_STRNCASECMP:
+    case BUILT_IN_STRNCMP:  */
+      len = gimple_call_arg (call, 2);
+      source0 = gimple_call_arg (call, 0);
+      source1 = gimple_call_arg (call, 1);
+      break;
+
+      /* (src, dest, n) style memops.  */
+    case BUILT_IN_BCOPY:
+      len = gimple_call_arg (call, 2);
+      source0 = gimple_call_arg (call, 0);
+      dest = gimple_call_arg (call, 2);
+      break;
+
+      /* (dest, src, n) style memops.  */
+    case BUILT_IN_MEMCPY:
+    case BUILT_IN_MEMCPY_CHK:
+    case BUILT_IN_MEMMOVE:
+    case BUILT_IN_MEMMOVE_CHK:
+    case BUILT_IN_MEMPCPY:
+    case BUILT_IN_MEMPCPY_CHK:
+
+      /* The builtin below cannot be safely instrumented as their
+         length parameter is just a mere limit.
+
+    case BUILT_IN_STPNCPY:
+    case BUILT_IN_STPNCPY_CHK:
+    case BUILT_IN_STRNCPY:
+    case BUILT_IN_STRNCPY_CHK: */
+      dest = gimple_call_arg (call, 0);
+      source0 = gimple_call_arg (call, 1);
+      len = gimple_call_arg (call, 2);
+      break;
+
+      /* (dest, src, n) where src is appended to the end of dest.
+	 These cannot be instrumented either.
+    case BUILT_IN_STRNCAT:
+    case BUILT_IN_STRNCAT_CHK:
+      break;  */
+
+      /* (dest, n) style memops.  */
+    case BUILT_IN_BZERO:
+      dest = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 1);
+      break;
+
+      /* (dest, x, n) style memops*/
+    case BUILT_IN_MEMSET:
+    case BUILT_IN_MEMSET_CHK:
+      dest = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 2);
+      break;
+
+      /* (src, n) style memops.  */
+    case BUILT_IN_STRNDUP:
+      source0 = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 1);
+      break;
+
+      /* (src, x, n) style memops.  */      
+    case BUILT_IN_MEMCHR:
+      source0 = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 2);
+    break;
+
+      /* memops that have no length information.
+    case BUILT_IN_INDEX:
+    case BUILT_IN_RINDEX:
+    case BUILT_IN_STRCHR:
+    case BUILT_IN_STRDUP:
+    case BUILT_IN_STRLEN:
+    case BUILT_IN_STRRCHR:
+      break;  */
+
+      /* (dest, src) and no length info.
+    case BUILT_IN_STPCPY:
+    case BUILT_IN_STPCPY_CHK:
+    case BUILT_IN_STRCASECMP:
+    case BUILT_IN_STRCAT:
+    case BUILT_IN_STRCAT_CHK:
+    case BUILT_IN_STRCPY:
+    case BUILT_IN_STRCPY_CHK:
+      break;  */
+
+      /* (s, s) and no length info.
+    case BUILT_IN_STRCMP:
+    case BUILT_IN_STRCSPN:
+    case BUILT_IN_STRPBRK:
+    case BUILT_IN_STRSPN:
+    case BUILT_IN_STRSTR:
+      break;  */
+
+    default:
+      break;
+    }
+
+  if (len != NULL_TREE)
+    {
+      if (source0 != NULL_TREE)
+	instrument_mem_region_access (source0, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/false);
+      if (source1 != NULL_TREE)
+	instrument_mem_region_access (source1, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/false);
+      else if (dest != NULL_TREE)
+	instrument_mem_region_access (dest, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/true);
+      return true;
+    }
+
+  return false;
+}
+
+/*  Instrument the assignment statement ITER if it is subject to
+    instrumentation.  */
+
+static void
+instrument_assignment (gimple_stmt_iterator *iter)
+{
+  gimple s = gsi_stmt (*iter);
+
+  gcc_assert (gimple_assign_single_p (s));
+
+  instrument_derefs (iter, gimple_assign_lhs (s),
+		     gimple_location (s), true);
+  instrument_derefs (iter, gimple_assign_rhs1 (s),
+		     gimple_location (s), false);
+}
+
+/* Instrument the function call pointed to by the iterator ITER, if it
+   is subject to instrumentation.  At the moment, the only function
+   calls that are instrumented are some built-in functions that access
+   memory.  Look at maybe_instrument_builtin_call to learn more.  */
+
+static void
+maybe_instrument_call (gimple_stmt_iterator *iter)
+{
+  maybe_instrument_builtin_call (iter);
+}
+
 /* asan: this looks too complex. Can this be done simpler? */
 /* Transform
    1) Memory references.
@@ -688,13 +1001,12 @@  transform_statements (void)
       if (bb->index >= saved_last_basic_block) continue;
       for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
         {
-          gimple s = gsi_stmt (i);
-          if (!gimple_assign_single_p (s))
-	    continue;
-          instrument_derefs (&i, gimple_assign_lhs (s),
-                             gimple_location (s), true);
-          instrument_derefs (&i, gimple_assign_rhs1 (s),
-                             gimple_location (s), false);
+	  gimple s = gsi_stmt (i);
+
+	  if (gimple_assign_single_p (s))
+	    instrument_assignment (&i);
+	  else if (is_gimple_call (s))
+	    maybe_instrument_call (&i);
         }
     }
 }