diff mbox

Sparc ASAN

Message ID 20121120.231933.1241692116172434661.davem@davemloft.net
State New
Headers show

Commit Message

David Miller Nov. 21, 2012, 4:19 a.m. UTC
From: David Miller <davem@davemloft.net>
Date: Tue, 20 Nov 2012 21:20:40 -0500 (EST)

> Those seem to be the only problems that need to be resolved for this
> feature to be fully working.

FWIW, here are the changes I am using which, besides the sparc backend
bits, has some temporary workarounds for the issues I brought up.

The address violation detection seems to work properly and the only
thing that seems to be left are some backtrace/unwind issues.  These
are perhaps similar to the unwind bits that the powerpc folks ran
into.

Comments

Jakub Jelinek Nov. 21, 2012, 9:05 a.m. UTC | #1
On Tue, Nov 20, 2012 at 11:19:33PM -0500, David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Tue, 20 Nov 2012 21:20:40 -0500 (EST)
> 
> > Those seem to be the only problems that need to be resolved for this
> > feature to be fully working.
> 
> FWIW, here are the changes I am using which, besides the sparc backend
> bits, has some temporary workarounds for the issues I brought up.

To explain a little bit these unaligned stores.
From what I understand LLVM will always put all the ASAN protected
automatic vars into a single block and align the whole block to 32 bytes
(on i?86/x86_64 in GCC that is the backend dynamic stack realignment,
those andq $-32, %rsp etc. in the prologue, on other targets rth added
a release or two ago handling of these via alloca that where the alloca
allocates up to the alignment - 1 bytes more and we add alignment - 1
to the result of alloca and mask it).
But it seemed nothing on the libasan relies on it actually being 32-byte
aligned, so what I've implemented was keep the vars as aligned as they
were before (well, the base), and just put the padding in between as if
the base was 32-byte aligned and I wanted to align them all to 32-bytes
(ignoring here in the description more aligned vars etc.).
That of course means the shadow memory stores may be unaligned, but as
i?86/x86_64 were the only supported targets initially, and the unaligned
stores are IMHO pretty fast there, it isn't a big deal.

Now, for strict alignment targets, the question is what is the fastest
way to implement this.  There is the cost of dynamically realigning the
stack var block (which isn't just about the alloca at the beginning, but
also pretty much having one extra register reserved for the result of
the alloca, used as the base to access all the local vars, sure, you can
spill it, but if it is used frequently, it will likely be assigned to a hard
reg most of its lifetime), and on the other side there is the cost of
accessing the shadow memory at smaller granularity.

If some target has the virtual-stack-vars reg not even 8 byte aligned,
some realignment is required, as the shadow shift is 3.
If it is just 8 byte aligned and we don't realign the block, then yes,
shadow memory needs to be accessed by bytes, if it is 16 byte aligned,
perhaps we could use 16-bit stores instead of just 8-bit ones.
And perhaps the decision (for strict alignment arches) whether to realign or
not the whole block could be best dynamic, for small number of asan
protected vars (i.e. small number of expected shadow memory stores)
we could just not realign and use 8-bit or 16-bit shadow memory stores,
for larger amounts of vars perhaps it might be cheaper to realign.

So, concerning your patch, which implements basically the never realign,
just sometimes use smaller size of shadow memory accesses, the decision
shouldn't be done solely based on STRICT_ALIGNMENT, but needs to take
into account the alignment of virtual_stack_vars_rtx in the current
function, if it's byte alignment is >= (4 << ASAN_SHADOW_SHIFT), then
you can use the same code as for !STRICT_ALIGNMENT, if it's byte alignment
is >= (2 << ASAN_SHADOW_SHIFT), then you can use HImode stores, otherwise
QImode.

> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -321,7 +321,10 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>  			      NULL_RTX, 1, OPTAB_DIRECT);
>    gcc_assert (asan_shadow_set != -1
>  	      && (ASAN_RED_ZONE_SIZE >> ASAN_SHADOW_SHIFT) == 4);
> -  shadow_mem = gen_rtx_MEM (SImode, shadow_base);
> +  if (STRICT_ALIGNMENT)
> +    shadow_mem = gen_rtx_MEM (QImode, shadow_base);
> +  else
> +    shadow_mem = gen_rtx_MEM (SImode, shadow_base);
>    set_mem_alias_set (shadow_mem, asan_shadow_set);
>    prev_offset = base_offset;
>    for (l = length; l; l -= 2)
> @@ -349,7 +352,20 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>  	      }
>  	    else
>  	      shadow_bytes[i] = ASAN_STACK_MAGIC_PARTIAL;
> -	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
> +	  if (STRICT_ALIGNMENT)
> +	    {
> +	      for (i = 0; i < 4; i++)
> +		{
> +		  rtx mem = adjust_address (shadow_mem, VOIDmode, i);
> +		  rtx val;
> +
> +		  val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
> +						     QImode));
> +		  emit_move_insn (mem, val);
> +		}
> +	    }
> +	  else
> +	    emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>  	  offset = aoff;
>  	}
>        while (offset <= offsets[l - 2] - ASAN_RED_ZONE_SIZE)
> @@ -359,7 +375,22 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>  				       >> ASAN_SHADOW_SHIFT);
>  	  prev_offset = offset;
>  	  memset (shadow_bytes, cur_shadow_byte, 4);
> -	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
> +	  if (STRICT_ALIGNMENT)
> +	    {
> +	      int i;
> +
> +	      for (i = 0; i < 4; i++)
> +		{
> +		  rtx mem = adjust_address (shadow_mem, VOIDmode, i);
> +		  rtx val;
> +
> +		  val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
> +						     QImode));
> +		  emit_move_insn (mem, val);
> +		}
> +	    }
> +	  else
> +	    emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>  	  offset += ASAN_RED_ZONE_SIZE;
>  	}
>        cur_shadow_byte = ASAN_STACK_MAGIC_MIDDLE;

	Jakub
Konstantin Serebryany Nov. 21, 2012, 1:20 p.m. UTC | #2
On Wed, Nov 21, 2012 at 1:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Nov 20, 2012 at 11:19:33PM -0500, David Miller wrote:
>> From: David Miller <davem@davemloft.net>
>> Date: Tue, 20 Nov 2012 21:20:40 -0500 (EST)
>>
>> > Those seem to be the only problems that need to be resolved for this
>> > feature to be fully working.
>>
>> FWIW, here are the changes I am using which, besides the sparc backend
>> bits, has some temporary workarounds for the issues I brought up.
>
> To explain a little bit these unaligned stores.
> From what I understand LLVM will always put all the ASAN protected
> automatic vars into a single block and align the whole block to 32 bytes
> (on i?86/x86_64 in GCC that is the backend dynamic stack realignment,
> those andq $-32, %rsp etc. in the prologue, on other targets rth added
> a release or two ago handling of these via alloca that where the alloca
> allocates up to the alignment - 1 bytes more and we add alignment - 1
> to the result of alloca and mask it).
> But it seemed nothing on the libasan relies on it actually being 32-byte
> aligned,

I think your explanation is correct wrt the default mode, where the
memory to shadow ratio (mapping scale) is 8.

With greater values of mapping scale, you need greater alignment, but
these modes where never used except for experiment. So, don't bother.

As for the unaligned stores: aren't they 2x more expensive on x86
compared to properly aligned ones?

--kcc

> so what I've implemented was keep the vars as aligned as they
> were before (well, the base), and just put the padding in between as if
> the base was 32-byte aligned and I wanted to align them all to 32-bytes
> (ignoring here in the description more aligned vars etc.).
> That of course means the shadow memory stores may be unaligned, but as
> i?86/x86_64 were the only supported targets initially, and the unaligned
> stores are IMHO pretty fast there, it isn't a big deal.
>
> Now, for strict alignment targets, the question is what is the fastest
> way to implement this.  There is the cost of dynamically realigning the
> stack var block (which isn't just about the alloca at the beginning, but
> also pretty much having one extra register reserved for the result of
> the alloca, used as the base to access all the local vars, sure, you can
> spill it, but if it is used frequently, it will likely be assigned to a hard
> reg most of its lifetime), and on the other side there is the cost of
> accessing the shadow memory at smaller granularity.
>
> If some target has the virtual-stack-vars reg not even 8 byte aligned,
> some realignment is required, as the shadow shift is 3.
> If it is just 8 byte aligned and we don't realign the block, then yes,
> shadow memory needs to be accessed by bytes, if it is 16 byte aligned,
> perhaps we could use 16-bit stores instead of just 8-bit ones.
> And perhaps the decision (for strict alignment arches) whether to realign or
> not the whole block could be best dynamic, for small number of asan
> protected vars (i.e. small number of expected shadow memory stores)
> we could just not realign and use 8-bit or 16-bit shadow memory stores,
> for larger amounts of vars perhaps it might be cheaper to realign.
>
> So, concerning your patch, which implements basically the never realign,
> just sometimes use smaller size of shadow memory accesses, the decision
> shouldn't be done solely based on STRICT_ALIGNMENT, but needs to take
> into account the alignment of virtual_stack_vars_rtx in the current
> function, if it's byte alignment is >= (4 << ASAN_SHADOW_SHIFT), then
> you can use the same code as for !STRICT_ALIGNMENT, if it's byte alignment
> is >= (2 << ASAN_SHADOW_SHIFT), then you can use HImode stores, otherwise
> QImode.
>
>> --- a/gcc/asan.c
>> +++ b/gcc/asan.c
>> @@ -321,7 +321,10 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>>                             NULL_RTX, 1, OPTAB_DIRECT);
>>    gcc_assert (asan_shadow_set != -1
>>             && (ASAN_RED_ZONE_SIZE >> ASAN_SHADOW_SHIFT) == 4);
>> -  shadow_mem = gen_rtx_MEM (SImode, shadow_base);
>> +  if (STRICT_ALIGNMENT)
>> +    shadow_mem = gen_rtx_MEM (QImode, shadow_base);
>> +  else
>> +    shadow_mem = gen_rtx_MEM (SImode, shadow_base);
>>    set_mem_alias_set (shadow_mem, asan_shadow_set);
>>    prev_offset = base_offset;
>>    for (l = length; l; l -= 2)
>> @@ -349,7 +352,20 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>>             }
>>           else
>>             shadow_bytes[i] = ASAN_STACK_MAGIC_PARTIAL;
>> -       emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>> +       if (STRICT_ALIGNMENT)
>> +         {
>> +           for (i = 0; i < 4; i++)
>> +             {
>> +               rtx mem = adjust_address (shadow_mem, VOIDmode, i);
>> +               rtx val;
>> +
>> +               val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
>> +                                                  QImode));
>> +               emit_move_insn (mem, val);
>> +             }
>> +         }
>> +       else
>> +         emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>>         offset = aoff;
>>       }
>>        while (offset <= offsets[l - 2] - ASAN_RED_ZONE_SIZE)
>> @@ -359,7 +375,22 @@ asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
>>                                      >> ASAN_SHADOW_SHIFT);
>>         prev_offset = offset;
>>         memset (shadow_bytes, cur_shadow_byte, 4);
>> -       emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>> +       if (STRICT_ALIGNMENT)
>> +         {
>> +           int i;
>> +
>> +           for (i = 0; i < 4; i++)
>> +             {
>> +               rtx mem = adjust_address (shadow_mem, VOIDmode, i);
>> +               rtx val;
>> +
>> +               val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
>> +                                                  QImode));
>> +               emit_move_insn (mem, val);
>> +             }
>> +         }
>> +       else
>> +         emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
>>         offset += ASAN_RED_ZONE_SIZE;
>>       }
>>        cur_shadow_byte = ASAN_STACK_MAGIC_MIDDLE;
>
>         Jakub
diff mbox

Patch

diff --git a/gcc/asan.c b/gcc/asan.c
index bd90e0a..d9b3f1b 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -321,7 +321,10 @@  asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
 			      NULL_RTX, 1, OPTAB_DIRECT);
   gcc_assert (asan_shadow_set != -1
 	      && (ASAN_RED_ZONE_SIZE >> ASAN_SHADOW_SHIFT) == 4);
-  shadow_mem = gen_rtx_MEM (SImode, shadow_base);
+  if (STRICT_ALIGNMENT)
+    shadow_mem = gen_rtx_MEM (QImode, shadow_base);
+  else
+    shadow_mem = gen_rtx_MEM (SImode, shadow_base);
   set_mem_alias_set (shadow_mem, asan_shadow_set);
   prev_offset = base_offset;
   for (l = length; l; l -= 2)
@@ -349,7 +352,20 @@  asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
 	      }
 	    else
 	      shadow_bytes[i] = ASAN_STACK_MAGIC_PARTIAL;
-	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
+	  if (STRICT_ALIGNMENT)
+	    {
+	      for (i = 0; i < 4; i++)
+		{
+		  rtx mem = adjust_address (shadow_mem, VOIDmode, i);
+		  rtx val;
+
+		  val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
+						     QImode));
+		  emit_move_insn (mem, val);
+		}
+	    }
+	  else
+	    emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
 	  offset = aoff;
 	}
       while (offset <= offsets[l - 2] - ASAN_RED_ZONE_SIZE)
@@ -359,7 +375,22 @@  asan_emit_stack_protection (rtx base, HOST_WIDE_INT *offsets, tree *decls,
 				       >> ASAN_SHADOW_SHIFT);
 	  prev_offset = offset;
 	  memset (shadow_bytes, cur_shadow_byte, 4);
-	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
+	  if (STRICT_ALIGNMENT)
+	    {
+	      int i;
+
+	      for (i = 0; i < 4; i++)
+		{
+		  rtx mem = adjust_address (shadow_mem, VOIDmode, i);
+		  rtx val;
+
+		  val = GEN_INT (trunc_int_for_mode (shadow_bytes[i],
+						     QImode));
+		  emit_move_insn (mem, val);
+		}
+	    }
+	  else
+	    emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
 	  offset += ASAN_RED_ZONE_SIZE;
 	}
       cur_shadow_byte = ASAN_STACK_MAGIC_MIDDLE;
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 4e9de98..7bcc815 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -600,6 +600,7 @@  static void sparc_print_operand_address (FILE *, rtx);
 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
 					   enum machine_mode,
 					   secondary_reload_info *);
+static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
 
 #ifdef SUBTARGET_ATTRIBUTE_TABLE
 /* Table of valid machine attributes.  */
@@ -754,6 +755,9 @@  char sparc_hard_reg_printed[8];
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE sparc_option_override
 
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
+
 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
@@ -11034,6 +11038,14 @@  get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
   return 0;
 }
 
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+
+static unsigned HOST_WIDE_INT
+sparc_asan_shadow_offset (void)
+{
+  return (unsigned HOST_WIDE_INT) 1 << (TARGET_ARCH64 ? 44 : 29);
+}
+
 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
    We need to emit DTP-relative relocations.  */
 
diff --git a/libsanitizer/sanitizer_common/sanitizer_common.h b/libsanitizer/sanitizer_common/sanitizer_common.h
index cddefd7..00628fc 100644
--- a/libsanitizer/sanitizer_common/sanitizer_common.h
+++ b/libsanitizer/sanitizer_common/sanitizer_common.h
@@ -21,7 +21,7 @@  namespace __sanitizer {
 // Constants.
 const uptr kWordSize = __WORDSIZE / 8;
 const uptr kWordSizeInBits = 8 * kWordSize;
-const uptr kPageSizeBits = 12;
+const uptr kPageSizeBits = 13;
 const uptr kPageSize = 1UL << kPageSizeBits;
 const uptr kCacheLineSize = 64;
 #ifndef _WIN32