GCC Middle and Back End API Reference
asan.c File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "gimple.h"
#include "tree-iterator.h"
#include "cgraph.h"
#include "tree-ssanames.h"
#include "tree-pass.h"
#include "asan.h"
#include "gimple-pretty-print.h"
#include "target.h"
#include "expr.h"
#include "optabs.h"
#include "output.h"
#include "tm_p.h"
#include "langhooks.h"
#include "hash-table.h"
#include "alloc-pool.h"
#include "cfgloop.h"
#include "gimple-builder.h"
#include "sanitizer.def"
Include dependency graph for asan.c:

Data Structures

struct  asan_mem_ref
struct  asan_mem_ref_hasher
struct  asan_add_string_csts_data

Macros

#define PROB_VERY_UNLIKELY   (REG_BR_PROB_BASE / 2000 - 1)
#define PROB_ALWAYS   (REG_BR_PROB_BASE)
#define BT_FN_BOOL_VPTR_PTR_I1_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[0]
#define BT_FN_I1_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[0]
#define BT_FN_I1_VPTR_I1_INT   BT_FN_IX_VPTR_IX_INT[0]
#define BT_FN_VOID_VPTR_I1_INT   BT_FN_VOID_VPTR_IX_INT[0]
#define BT_FN_BOOL_VPTR_PTR_I2_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[1]
#define BT_FN_I2_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[1]
#define BT_FN_I2_VPTR_I2_INT   BT_FN_IX_VPTR_IX_INT[1]
#define BT_FN_VOID_VPTR_I2_INT   BT_FN_VOID_VPTR_IX_INT[1]
#define BT_FN_BOOL_VPTR_PTR_I4_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[2]
#define BT_FN_I4_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[2]
#define BT_FN_I4_VPTR_I4_INT   BT_FN_IX_VPTR_IX_INT[2]
#define BT_FN_VOID_VPTR_I4_INT   BT_FN_VOID_VPTR_IX_INT[2]
#define BT_FN_BOOL_VPTR_PTR_I8_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[3]
#define BT_FN_I8_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[3]
#define BT_FN_I8_VPTR_I8_INT   BT_FN_IX_VPTR_IX_INT[3]
#define BT_FN_VOID_VPTR_I8_INT   BT_FN_VOID_VPTR_IX_INT[3]
#define BT_FN_BOOL_VPTR_PTR_I16_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[4]
#define BT_FN_I16_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[4]
#define BT_FN_I16_VPTR_I16_INT   BT_FN_IX_VPTR_IX_INT[4]
#define BT_FN_VOID_VPTR_I16_INT   BT_FN_VOID_VPTR_IX_INT[4]
#define ATTR_NOTHROW_LEAF_LIST   ECF_NOTHROW | ECF_LEAF
#define ATTR_TMPURE_NOTHROW_LEAF_LIST   ECF_TM_PURE | ATTR_NOTHROW_LEAF_LIST
#define ATTR_NORETURN_NOTHROW_LEAF_LIST   ECF_NORETURN | ATTR_NOTHROW_LEAF_LIST
#define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST   ECF_TM_PURE | ATTR_NORETURN_NOTHROW_LEAF_LIST
#define ATTR_COLD_NOTHROW_LEAF_LIST
#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST
#define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS)

Functions

static alloc_pool asan_mem_ref_get_alloc_pool ()
static void asan_mem_ref_init ()
static asan_mem_refasan_mem_ref_new ()
tree asan_mem_ref_get_end ()
static hash_table
< asan_mem_ref_hasher > & 
get_mem_ref_hash_table ()
static void empty_mem_ref_hash_table ()
static void free_mem_ref_resources ()
static bool has_mem_ref_been_instrumented ()
static bool get_mem_ref_of_assignment (const gimple assignment, asan_mem_ref *ref, bool *ref_is_store)
static bool get_mem_refs_of_builtin_call (const gimple call, asan_mem_ref *src0, tree *src0_len, bool *src0_is_store, asan_mem_ref *src1, tree *src1_len, bool *src1_is_store, asan_mem_ref *dst, tree *dst_len, bool *dst_is_store, bool *dest_is_deref)
static bool has_stmt_been_instrumented_p ()
static void update_mem_ref_hash_table ()
static void asan_init_shadow_ptr_types ()
static tree asan_pp_string ()
static rtx asan_shadow_cst ()
static void asan_clear_shadow ()
rtx asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls, int length)
static bool asan_needs_local_alias ()
bool asan_protect_global ()
static tree report_error_func ()
static gimple_stmt_iterator create_cond_insert_point (gimple_stmt_iterator *iter, bool before_p, bool then_more_likely_p, bool create_then_fallthru_edge, basic_block *then_block, basic_block *fallthrough_block)
static void insert_if_then_before_iter (gimple cond, gimple_stmt_iterator *iter, bool then_more_likely_p, basic_block *then_bb, basic_block *fallthrough_bb)
static void build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, bool before_p, bool is_store, int size_in_bytes)
static void instrument_derefs (gimple_stmt_iterator *iter, tree t, location_t location, bool is_store)
static void instrument_mem_region_access (tree base, tree len, gimple_stmt_iterator *iter, location_t location, bool is_store)
static bool instrument_strlen_call ()
static bool instrument_builtin_call ()
static bool maybe_instrument_assignment ()
static bool maybe_instrument_call ()
static void transform_statements ()
static tree asan_global_struct ()
static void asan_add_global ()
void initialize_sanitizer_builtins ()
static int count_string_csts ()
static int add_string_csts ()
void asan_finish_file ()
static unsigned int asan_instrument ()
static bool gate_asan ()
gimple_opt_passmake_pass_asan ()
static bool gate_asan_O0 ()
gimple_opt_passmake_pass_asan_O0 ()

Variables

alias_set_type asan_shadow_set = -1
static tree shadow_ptr_types [2]
static alloc_pool asan_mem_ref_alloc_pool
static hash_table
< asan_mem_ref_hasher
asan_mem_ref_ht

Macro Definition Documentation

#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST
#define ATTR_COLD_NOTHROW_LEAF_LIST
#define ATTR_NORETURN_NOTHROW_LEAF_LIST   ECF_NORETURN | ATTR_NOTHROW_LEAF_LIST
#define ATTR_NOTHROW_LEAF_LIST   ECF_NOTHROW | ECF_LEAF
#define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST   ECF_TM_PURE | ATTR_NORETURN_NOTHROW_LEAF_LIST
#define ATTR_TMPURE_NOTHROW_LEAF_LIST   ECF_TM_PURE | ATTR_NOTHROW_LEAF_LIST
#define BT_FN_BOOL_VPTR_PTR_I16_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[4]
#define BT_FN_BOOL_VPTR_PTR_I1_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[0]
#define BT_FN_BOOL_VPTR_PTR_I2_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[1]
#define BT_FN_BOOL_VPTR_PTR_I4_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[2]
#define BT_FN_BOOL_VPTR_PTR_I8_INT_INT   BT_FN_BOOL_VPTR_PTR_IX_INT_INT[3]
#define BT_FN_I16_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[4]
#define BT_FN_I16_VPTR_I16_INT   BT_FN_IX_VPTR_IX_INT[4]
#define BT_FN_I1_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[0]
#define BT_FN_I1_VPTR_I1_INT   BT_FN_IX_VPTR_IX_INT[0]
#define BT_FN_I2_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[1]
#define BT_FN_I2_VPTR_I2_INT   BT_FN_IX_VPTR_IX_INT[1]
#define BT_FN_I4_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[2]
#define BT_FN_I4_VPTR_I4_INT   BT_FN_IX_VPTR_IX_INT[2]
#define BT_FN_I8_CONST_VPTR_INT   BT_FN_IX_CONST_VPTR_INT[3]
#define BT_FN_I8_VPTR_I8_INT   BT_FN_IX_VPTR_IX_INT[3]
#define BT_FN_VOID_VPTR_I16_INT   BT_FN_VOID_VPTR_IX_INT[4]
#define BT_FN_VOID_VPTR_I1_INT   BT_FN_VOID_VPTR_IX_INT[0]
#define BT_FN_VOID_VPTR_I2_INT   BT_FN_VOID_VPTR_IX_INT[1]
#define BT_FN_VOID_VPTR_I4_INT   BT_FN_VOID_VPTR_IX_INT[2]
#define BT_FN_VOID_VPTR_I8_INT   BT_FN_VOID_VPTR_IX_INT[3]
#define DEF_SANITIZER_BUILTIN (   ENUM,
  NAME,
  TYPE,
  ATTRS 
)
Value:
decl = add_builtin_function ("__builtin_" NAME, TYPE, ENUM, \
set_call_expr_flags (decl, ATTRS); \
set_builtin_decl (ENUM, decl, true);
#define PROB_ALWAYS   (REG_BR_PROB_BASE)
#define PROB_VERY_UNLIKELY   (REG_BR_PROB_BASE / 2000 - 1)

Function Documentation

static int add_string_csts ( )
static

Called via htab_traverse. Call asan_add_global on emitted STRING_CSTs from the constant hash table.

static void asan_add_global ( )
static

Append description of a single global DECL into vector V. TYPE is __asan_global struct type as returned by asan_global_struct.

static void asan_clear_shadow ( )
static

Clear shadow memory at SHADOW_MEM, LEN bytes. Can't call a library call here though.

References asan_init_shadow_ptr_types(), ASAN_STACK_MAGIC_LEFT, DECL_NAME, DECL_P, HOST_WIDE_INT, IDENTIFIER_LENGTH, NULL_TREE, offset, pp_decimal_int, pp_space, pp_string(), pp_tree_identifier(), pp_wide_integer, and shadow_ptr_types.

rtx asan_emit_stack_protection ( rtx  base,
HOST_WIDE_INT offsets,
tree decls,
int  length 
)

Insert code to protect stack vars. The prologue sequence should be emitted directly, epilogue sequence returned. BASE is the register holding the stack base, against which OFFSETS array offsets are relative to, OFFSETS array contains pairs of offsets in reverse order, always the end offset of some gap that needs protection followed by starting offset, and DECLS is an array of representative decls for each var partition. LENGTH is the length of the OFFSETS array, DECLS array is LENGTH / 2 - 1 elements long (OFFSETS include gap before the first variable as well as gaps after each stack variable).

 First of all, prepare the description string.   
 Emit the prologue sequence.   
 Construct epilogue sequence.   
void asan_finish_file ( void  )

Needs to be tree asan_ctor_statements;

/** Module-level instrumentation.

  • Insert __asan_init() into the list of CTORs.
  • TODO: insert redzones around globals.

Avoid instrumenting code in the asan ctors/dtors. We don't need to insert padding after the description strings, nor after .LASAN* array.

Referenced by emit_debug_global_declarations().

static tree asan_global_struct ( )
static

Build struct asan_global { const void *beg; uptr size; uptr __size_with_redzone; const void *name; uptr __has_dynamic_init; } type.

static void asan_init_shadow_ptr_types ( )
static

Initialize shadow_ptr_types array.

Referenced by asan_clear_shadow().

static unsigned int asan_instrument ( )
static

Instrument the current function.

static alloc_pool asan_mem_ref_get_alloc_pool ( )
static

This creates the alloc pool used to store the instances of asan_mem_ref that are stored in the hash table asan_mem_ref_ht.

Referenced by asan_mem_ref_init().

tree asan_mem_ref_get_end ( )

This builds and returns a pointer to the end of the memory region that starts at START and of length LEN.

Return a tree expression that represents the end of the referenced memory region. Beware that this function can actually build a new tree expression.

References asan_mem_ref::start.

Referenced by has_mem_ref_been_instrumented(), and instrument_derefs().

static void asan_mem_ref_init ( )
static

Initializes an instance of asan_mem_ref.

References asan_mem_ref_get_alloc_pool(), and pool_alloc().

static asan_mem_ref* asan_mem_ref_new ( )
static

Allocates memory for an instance of asan_mem_ref into the memory pool returned by asan_mem_ref_get_alloc_pool and initialize it. START is the address of (or the expression pointing to) the beginning of memory reference. ACCESS_SIZE is the size of the access to the referenced memory.

References fold_build2, integer_zerop(), NULL_TREE, and TREE_TYPE.

static bool asan_needs_local_alias ( )
static

Return true if DECL, a global var, might be overridden and needs therefore a local alias.

static tree asan_pp_string ( )
static

Create ADDR_EXPR of STRING_CST with the PP pretty printer text.

bool asan_protect_global ( )

Return true if DECL is a VAR_DECL that should be protected by Address Sanitizer, by appending a red zone with protected shadow memory after it and aligning it to at least ASAN_RED_ZONE_SIZE bytes.

     Instrument all STRING_CSTs except those created
     by asan_pp_string here.   
     TLS vars aren't statically protectable.   
     Externs will be protected elsewhere.   
     Comdat vars pose an ABI problem, we can't know if
     the var that is selected by the linker will have
     padding or not.   
     Similarly for common vars.  People can use -fno-common.   
     Don't protect if using user section, often vars placed
     into user section from multiple TUs are then assumed
     to be an array of such vars, putting padding in there
     breaks this assumption.   

Referenced by initialize_sanitizer_builtins().

static rtx asan_shadow_cst ( )
static

Return a CONST_INT representing 4 subsequent shadow memory bytes.

static void build_check_stmt ( location_t  location,
tree  base,
gimple_stmt_iterator iter,
bool  before_p,
bool  is_store,
int  size_in_bytes 
)
static

Instrument the memory access instruction BASE. Insert new statements before or after ITER.

Note that the memory access represented by BASE can be either an SSA_NAME, or a non-SSA expression. LOCATION is the source code location. IS_STORE is TRUE for a store, FALSE for a load. BEFORE_P is TRUE for inserting the instrumentation code before ITER, FALSE for inserting it after ITER. SIZE_IN_BYTES is one of 1, 2, 4, 8, 16.

If BEFORE_P is TRUE, *ITER is arranged to still point to the statement it was pointing to prior to calling this function, otherwise, it points to the statement logically following it.

 Get an iterator on the point where we can add the condition
 statement for the instrumentation.   
 BASE can already be an SSA_NAME; in that case, do not create a
 new SSA_NAME for it.   
 Build
 (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset ().   
     Slow path for 1, 2 and 4 byte accesses.
     Test (shadow != 0)
          & ((base_addr & 7) + (size_in_bytes - 1)) >= shadow).   
 Generate call to the run-time library (e.g. __asan_report_load8).   

Referenced by instrument_derefs().

static int count_string_csts ( )
static

Called via htab_traverse. Count number of emitted STRING_CSTs in the constant hash table.

static gimple_stmt_iterator create_cond_insert_point ( gimple_stmt_iterator iter,
bool  before_p,
bool  then_more_likely_p,
bool  create_then_fallthru_edge,
basic_block then_block,
basic_block fallthrough_block 
)
static

Split the current basic block and create a condition statement insertion point right before or after the statement pointed to by ITER. Return an iterator to the point at which the caller might safely insert the condition statement.

THEN_BLOCK must be set to the address of an uninitialized instance of basic_block. The function will then set *THEN_BLOCK to the 'then block' of the condition statement to be inserted by the caller.

If CREATE_THEN_FALLTHRU_EDGE is false, no edge will be created from *THEN_BLOCK to *FALLTHROUGH_BLOCK.

Similarly, the function will set *FALLTRHOUGH_BLOCK to the 'else block' of the condition statement to be inserted by the caller.

Note that *FALLTHROUGH_BLOCK is a new block that contains the statements starting from *ITER, and *THEN_BLOCK is a new empty block.

*ITER is adjusted to point to always point to the first statement of the basic block * FALLTHROUGH_BLOCK. That statement is the same as what ITER was pointing to prior to calling this function, if BEFORE_P is true; otherwise, it is its following statement.

 Get a hold on the 'condition block', the 'then block' and the
 'else block'.   
 Set up the newly created 'then block'.   
 Set up the fallthrough basic block.   
 Update dominance info for the newly created then_bb; note that
 fallthru_bb's dominance info has already been updated by
 split_bock.   
static void empty_mem_ref_hash_table ( )
static

Clear all entries from the memory references hash table.

static void free_mem_ref_resources ( )
static

Free the memory references hash table.

static bool gate_asan ( )
static
static bool gate_asan_O0 ( )
static
static hash_table<asan_mem_ref_hasher>& get_mem_ref_hash_table ( )
static

Returns a reference to the hash table containing memory references. This function ensures that the hash table is created. Note that this hash table is updated by the function update_mem_ref_hash_table.

static bool get_mem_ref_of_assignment ( const gimple  assignment,
asan_mem_ref ref,
bool ref_is_store 
)
static

Set REF to the memory reference present in a gimple assignment ASSIGNMENT. Return true upon successful completion, false otherwise.

References BUILT_IN_NORMAL, DECL_FUNCTION_CODE, gcc_checking_assert, gimple_call_builtin_p(), gimple_call_fndecl(), and NULL_TREE.

static bool get_mem_refs_of_builtin_call ( const gimple  call,
asan_mem_ref src0,
tree src0_len,
bool src0_is_store,
asan_mem_ref src1,
tree src1_len,
bool src1_is_store,
asan_mem_ref dst,
tree dst_len,
bool dst_is_store,
bool dest_is_deref 
)
static

Return the memory references contained in a gimple statement representing a builtin call that has to do with memory access.

     (s, s, n) style memops.   
     (src, dest, n) style memops.   
     (dest, src, n) style memops.   
     (dest, n) style memops.   
     (dest, x, n) style memops 
   And now the __atomic* and __sync builtins.
   These are handled differently from the classical memory memory
   access builtins above.   
     fall through.   
       DEST represents the address of a memory location.
       instrument_derefs wants the memory location, so lets
       dereference the address DEST before handing it to
       instrument_derefs.   
     The other builtins memory access are not instrumented in this
     function because they either don't have any length parameter,
     or their length parameter is just a limit.   
static bool has_mem_ref_been_instrumented ( )
static

Return true iff the memory reference REF has been instrumented.

Return true iff access to memory region starting at REF and of length LEN has been instrumented.

First let's see if the address of the beginning of REF has been instrumented.

     Let's see if the end of the region has been instrumented.   

References asan_mem_ref::access_size, and asan_mem_ref_get_end().

Referenced by instrument_derefs().

static bool has_stmt_been_instrumented_p ( )
static

Return true iff a given gimple statement has been instrumented. Note that the statement is "defined" by the memory references it contains.

void initialize_sanitizer_builtins ( void  )

Initialize sanitizer.def builtins if the FE hasn't initialized them.

This file contains the definitions and documentation for the Address Sanitizer and Thread Sanitizer builtins used in the GNU compiler. Copyright (C) 2012-2013 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/.

Before including this file, you should define a macro:

DEF_SANITIZER_BUILTIN (ENUM, NAME, TYPE, ATTRS)

See builtins.def for details. The builtins are created by the C-family of FEs in c-family/c-common.c, for other FEs by asan.c.

Address Sanitizer

Do not reorder the BUILT_IN_ASAN_REPORT* builtins, e.g. cfgcleanup.c relies on this order.

Thread Sanitizer

Undefined Behavior Sanitizer

References asan_protect_global(), TREE_ASM_WRITTEN, TREE_CODE, and constant_descriptor_tree::value.

Referenced by tsan_pass().

static void insert_if_then_before_iter ( gimple  cond,
gimple_stmt_iterator iter,
bool  then_more_likely_p,
basic_block then_bb,
basic_block fallthrough_bb 
)
static

Insert an if condition followed by a 'then block' right before the statement pointed to by ITER. The fallthrough block – which is the else block of the condition as well as the destination of the outcoming edge of the 'then block' – starts with the statement pointed to by ITER.

COND is the condition of the if.

If THEN_MORE_LIKELY_P is true, the probability of the edge to the 'then block' is higher than the probability of the edge to the fallthrough block.

Upon completion of the function, *THEN_BB is set to the newly inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the fallthrough block.

*ITER is adjusted to still point to the same statement it was pointing to initially.

Referenced by instrument_derefs().

static bool instrument_builtin_call ( )
static

Instrument the call to a built-in memory access function that is pointed to by the iterator ITER.

Upon completion, return TRUE iff *ITER has been advanced to the statement following the one it was originally pointing to.

static void instrument_derefs ( gimple_stmt_iterator iter,
tree  t,
location_t  location,
bool  is_store 
)
static

If T represents a memory access, add instrumentation code before ITER. LOCATION is source code location. IS_STORE is either TRUE (for a store) or FALSE (for a load).

References asan_mem_ref_get_end(), build_check_stmt(), build_int_cst(), gimple_build_cond(), gimple_set_location(), gsi_last_bb(), has_mem_ref_been_instrumented(), insert_if_then_before_iter(), integer_zerop(), INTEGRAL_TYPE_P, is_gimple_constant(), NULL, NULL_TREE, POINTER_TYPE_P, and TREE_TYPE.

static void instrument_mem_region_access ( tree  base,
tree  len,
gimple_stmt_iterator iter,
location_t  location,
bool  is_store 
)
static

Instrument an access to a contiguous memory region that starts at the address pointed to by BASE, over a length of LEN (expressed in the sizeof (*BASE) bytes). ITER points to the instruction before which the instrumentation instructions must be inserted. LOCATION is the source location that the instrumentation instructions must have. If IS_STORE is true, then the memory access is a store; otherwise, it's a load.

 If the beginning of the memory region has already been
 instrumented, do not instrument it.   
 If the end of the memory region has already been instrumented, do
 not instrument it.  
     So, the length of the memory area to asan-protect is
     non-constant.  Let's guard the generated instrumentation code
     like:

     if (len != 0)
       {

asan instrumentation code goes here. } falltrough instructions, starting with *ITER.

     Note that fallthrough_bb starts with the statement that was
     pointed to by ITER.   
     The 'then block' of the 'if (len != 0) condition is where
     we'll generate the asan instrumentation code now.   
     Instrument the beginning of the memory region to be accessed,
     and arrange for the rest of the intrumentation code to be
     inserted in the then block *after* the current gsi.   
       We are in the case where the length of the region is not
       constant; so instrumentation code is being generated in the
       'then block' of the 'if (len != 0) condition.  Let's arrange
       for the subsequent instrumentation statements to go in the
       'then block'.   
         Don't remember this access as instrumented, if length
         is unknown.  It might be zero and not being actually
         instrumented, so we can't rely on it being instrumented.   
 We want to instrument the access at the end of the memory region,
 which is at (base + len - 1).   
 offset = len - 1;   
 _1 = base;   
 _2 = _1 + offset;   
 instrument access at _2;   

References update_mem_ref_hash_table().

static bool instrument_strlen_call ( )
static

Instrument the call (to the builtin strlen function) pointed to by ITER.

This function instruments the access to the first byte of the argument, right before the call. After the call it instruments the access to the last byte of the argument; it uses the result of the call to deduce the offset of that last byte.

Upon completion, iff the call has actually been instrumented, this function returns TRUE and *ITER points to the statement logically following the built-in strlen function call *ITER was initially pointing to. Otherwise, the function returns FALSE and *ITER remains unchanged.

   Some passes might clear the return value of the strlen call;
   bail out in that case.  Return FALSE as we are not advancing
   *ITER.   
 Instrument the access to the first byte of str_arg.  i.e:

 _1 = str_arg; instrument (_1);  
 If we initially had an instruction like:

     int n = strlen (str)

 we now want to instrument the access to str[n], after the
 instruction above. 
 So let's build the access to str[n] that is, access through the
 pointer_plus expr: (_1 + len).   
 Ensure that iter points to the statement logically following the
 one it was initially pointing to.   
 As *ITER has been advanced to point to the next statement, let's
 return true to inform transform_statements that it shouldn't
 advance *ITER anymore; otherwises it will skip that next
 statement, which wouldn't be instrumented.   
gimple_opt_pass* make_pass_asan ( )
gimple_opt_pass* make_pass_asan_O0 ( )
static bool maybe_instrument_assignment ( )
static

Instrument the assignment statement ITER if it is subject to instrumentation. Return TRUE iff instrumentation actually happened. In that case, the iterator ITER is advanced to the next logical expression following the one initially pointed to by ITER, and the relevant memory reference that which access has been instrumented is added to the memory references hash table.

References basic_block_def::index, and single_pred().

static bool maybe_instrument_call ( )
static

Instrument the function call pointed to by the iterator ITER, if it is subject to instrumentation. At the moment, the only function calls that are instrumented are some built-in functions that access memory. Look at instrument_builtin_call to learn more.

Upon completion return TRUE iff *ITER was advanced to the statement following the one it was originally pointing to.

Don't instrument these.

static tree report_error_func ( )
static

Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. IS_STORE is either 1 (for a store) or 0 (for a load). SIZE_IN_BYTES is one of 1, 2, 4, 8, 16.

static void transform_statements ( )
static

Walk each instruction of all basic block and instrument those that represent memory references: loads, stores, or function calls. In a given basic block, this function avoids instrumenting memory references that have already been instrumented.

     Flush the mem ref hash table, if current bb doesn't have
     exactly one predecessor, or if that predecessor (skipping
     over asan created basic blocks) isn't the last processed
     basic block.  Thus we effectively flush on extended basic
     block boundaries.   
            Nothing to do as maybe_instrument_assignment advanced
            the iterator I.   
            Nothing to do as maybe_instrument_call
            advanced the iterator I.   
             No instrumentation happened.

             If the current instruction is a function call that
             might free something, let's forget about the memory
             references that got instrumented.  Otherwise we might
             miss some instrumentation opportunities.   
static void update_mem_ref_hash_table ( )
static

Insert a memory reference into the hash table.

Referenced by instrument_mem_region_access().


Variable Documentation

alloc_pool asan_mem_ref_alloc_pool
static
hash_table<asan_mem_ref_hasher> asan_mem_ref_ht
static
alias_set_type asan_shadow_set = -1

AddressSanitizer, a fast memory error detector. Copyright (C) 2012-2013 Free Software Foundation, Inc. Contributed by Kostya Serebryany kcc@g.nosp@m.oogl.nosp@m.e.com

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see http://www.gnu.org/licenses/. AddressSanitizer finds out-of-bounds and use-after-free bugs with <2x slowdown on average.

The tool consists of two parts: instrumentation module (this file) and a run-time library. The instrumentation module adds a run-time check before every memory insn. For a 8- or 16- byte load accessing address X: ShadowAddr = (X >> 3) + Offset ShadowValue = *(char*)ShadowAddr; // *(short*) for 16-byte access. if (ShadowValue) __asan_report_load8(X); For a load of N bytes (N=1, 2 or 4) from address X: ShadowAddr = (X >> 3) + Offset ShadowValue = *(char*)ShadowAddr; if (ShadowValue) if ((X & 7) + N - 1 > ShadowValue) __asan_report_loadN(X); Stores are instrumented similarly, but using __asan_report_storeN functions. A call too __asan_init() is inserted to the list of module CTORs.

The run-time library redefines malloc (so that redzone are inserted around the allocated memory) and free (so that reuse of free-ed memory is delayed), provides __asan_report* and __asan_init functions.

Read more: http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm

The current implementation supports detection of out-of-bounds and use-after-free in the heap, on the stack and for global variables.

[Protection of stack variables]

To understand how detection of out-of-bounds and use-after-free works for stack variables, lets look at this example on x86_64 where the stack grows downward:

int foo () { char a[23] = {0}; int b[2] = {0};

a[5] = 1; b[1] = 2;

return a[5] + b[1]; }

For this function, the stack protected by asan will be organized as follows, from the top of the stack to the bottom:

Slot 1/ [red zone of 32 bytes called 'RIGHT RedZone']

Slot 2/ [8 bytes of red zone, that adds up to the space of 'a' to make the next slot be 32 bytes aligned; this one is called Partial Redzone; this 32 bytes alignment is an asan constraint]

Slot 3/ [24 bytes for variable 'a']

Slot 4/ [red zone of 32 bytes called 'Middle RedZone']

Slot 5/ [24 bytes of Partial Red Zone (similar to slot 2]

Slot 6/ [8 bytes for variable 'b']

Slot 7/ [32 bytes of Red Zone at the bottom of the stack, called 'LEFT RedZone']

The 32 bytes of LEFT red zone at the bottom of the stack can be decomposed as such:

1/ The first 8 bytes contain a magical asan number that is always 0x41B58AB3.

2/ The following 8 bytes contains a pointer to a string (to be parsed at runtime by the runtime asan library), which format is the following:

"<function-name> <space> <num-of-variables-on-the-stack> (<32-bytes-aligned-offset-in-bytes-of-variable> <space> <length-of-var-in-bytes> ){n} "

where '(...){n}' means the content inside the parenthesis occurs 'n' times, with 'n' being the number of variables on the stack.

3/ The following 16 bytes of the red zone have no particular format.

The shadow memory for that stack layout is going to look like this:

  • content of shadow memory 8 bytes for slot 7: 0xF1F1F1F1. The F1 byte pattern is a magic number called ASAN_STACK_MAGIC_LEFT and is a way for the runtime to know that the memory for that shadow byte is part of a the LEFT red zone intended to seat at the bottom of the variables on the stack.
  • content of shadow memory 8 bytes for slots 6 and 5: 0xF4F4F400. The F4 byte pattern is a magic number called ASAN_STACK_MAGIC_PARTIAL. It flags the fact that the memory region for this shadow byte is a PARTIAL red zone intended to pad a variable A, so that the slot following {A,padding} is 32 bytes aligned.

    Note that the fact that the least significant byte of this shadow memory content is 00 means that 8 bytes of its corresponding memory (which corresponds to the memory of variable 'b') is addressable.

  • content of shadow memory 8 bytes for slot 4: 0xF2F2F2F2. The F2 byte pattern is a magic number called ASAN_STACK_MAGIC_MIDDLE. It flags the fact that the memory region for this shadow byte is a MIDDLE red zone intended to seat between two 32 aligned slots of {variable,padding}.
  • content of shadow memory 8 bytes for slot 3 and 2: 0xF4000000. This represents is the concatenation of variable 'a' and the partial red zone following it, like what we had for variable 'b'. The least significant 3 bytes being 00 means that the 3 bytes of variable 'a' are addressable.
  • content of shadow memory 8 bytes for slot 1: 0xF3F3F3F3. The F3 byte pattern is a magic number called ASAN_STACK_MAGIC_RIGHT. It flags the fact that the memory region for this shadow byte is a RIGHT red zone intended to seat at the top of the variables of the stack.

Note that the real variable layout is done in expand_used_vars in cfgexpand.c. As far as Address Sanitizer is concerned, it lays out stack variables as well as the different red zones, emits some prologue code to populate the shadow memory as to poison (mark as non-accessible) the regions of the red zones and mark the regions of stack variables as accessible, and emit some epilogue code to un-poison (mark as accessible) the regions of red zones right before the function exits.

[Protection of global variables]

The basic idea is to insert a red zone between two global variables and install a constructor function that calls the asan runtime to do the populating of the relevant shadow memory regions at load time.

So the global variables are laid out as to insert a red zone between them. The size of the red zones is so that each variable starts on a 32 bytes boundary.

Then a constructor function is installed so that, for each global variable, it calls the runtime asan library function __asan_register_globals_with an instance of this type:

struct asan_global { Address of the beginning of the global variable. const void *beg;

Initial size of the global variable. uptr __size;

Size of the global variable + size of the red zone. This size is 32 bytes aligned. uptr __size_with_redzone;

Name of the global variable. const void *__name;

This is always set to NULL for now. uptr __has_dynamic_init; }

A destructor function that calls the runtime asan library function _asan_unregister_globals is also installed.

tree shadow_ptr_types[2]
static

Pointer types to 1 resp. 2 byte integers in shadow memory. A separate alias set is used for all shadow memory accesses.

Referenced by asan_clear_shadow().