Patchwork __builtin_assume_aligned

login
register
mail settings
Submitter Jakub Jelinek
Date June 27, 2011, 4:54 p.m.
Message ID <20110627165405.GE16443@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/102224/
State New
Headers show

Comments

Jakub Jelinek - June 27, 2011, 4:54 p.m.
On Mon, Jun 27, 2011 at 12:17:40PM +0200, Richard Guenther wrote:
> Ok if you remove the builtins.c folding and instead verify arguments
> from check_builtin_function_arguments.

Thanks, here is what I've committed after bootstrapping/regtesting
again on x86_64-linux and i686-linux.

2011-06-27  Jakub Jelinek  <jakub@redhat.com>

	* builtin-types.def (BT_FN_PTR_CONST_PTR_SIZE_VAR): New.
	* builtins.def (BUILT_IN_ASSUME_ALIGNED): New builtin.
	* tree-ssa-structalias.c (find_func_aliases_for_builtin_call,
	find_func_clobbers): Handle BUILT_IN_ASSUME_ALIGNED.
	* tree-ssa-ccp.c (bit_value_assume_aligned): New function.
	(evaluate_stmt, execute_fold_all_builtins): Handle
	BUILT_IN_ASSUME_ALIGNED.
	* tree-ssa-dce.c (propagate_necessity): Likewise.
	* tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
	call_may_clobber_ref_p_1): Likewise.
	* builtins.c (is_simple_builtin, expand_builtin): Likewise.
	(expand_builtin_assume_aligned): New function.
	* doc/extend.texi (__builtin_assume_aligned): Document.

	* c-common.c (check_builtin_function_arguments): Handle
	BUILT_IN_ASSUME_ALIGNED.

	* gcc.dg/builtin-assume-aligned-1.c: New test.
	* gcc.dg/builtin-assume-aligned-2.c: New test.
	* gcc.target/i386/builtin-assume-aligned-1.c: New test.



	Jakub
Richard Guenther - June 28, 2011, 8:31 a.m.
On Mon, Jun 27, 2011 at 6:54 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Jun 27, 2011 at 12:17:40PM +0200, Richard Guenther wrote:
>> Ok if you remove the builtins.c folding and instead verify arguments
>> from check_builtin_function_arguments.
>
> Thanks, here is what I've committed after bootstrapping/regtesting
> again on x86_64-linux and i686-linux.

Thanks Jakub.  Probably worth an entry in changes.html.

Richard.

> 2011-06-27  Jakub Jelinek  <jakub@redhat.com>
>
>        * builtin-types.def (BT_FN_PTR_CONST_PTR_SIZE_VAR): New.
>        * builtins.def (BUILT_IN_ASSUME_ALIGNED): New builtin.
>        * tree-ssa-structalias.c (find_func_aliases_for_builtin_call,
>        find_func_clobbers): Handle BUILT_IN_ASSUME_ALIGNED.
>        * tree-ssa-ccp.c (bit_value_assume_aligned): New function.
>        (evaluate_stmt, execute_fold_all_builtins): Handle
>        BUILT_IN_ASSUME_ALIGNED.
>        * tree-ssa-dce.c (propagate_necessity): Likewise.
>        * tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
>        call_may_clobber_ref_p_1): Likewise.
>        * builtins.c (is_simple_builtin, expand_builtin): Likewise.
>        (expand_builtin_assume_aligned): New function.
>        * doc/extend.texi (__builtin_assume_aligned): Document.
>
>        * c-common.c (check_builtin_function_arguments): Handle
>        BUILT_IN_ASSUME_ALIGNED.
>
>        * gcc.dg/builtin-assume-aligned-1.c: New test.
>        * gcc.dg/builtin-assume-aligned-2.c: New test.
>        * gcc.target/i386/builtin-assume-aligned-1.c: New test.
>
> --- gcc/builtin-types.def.jj    2011-06-26 09:55:16.000000000 +0200
> +++ gcc/builtin-types.def       2011-06-27 15:08:12.000000000 +0200
> @@ -454,6 +454,8 @@ DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_CONST
>                         BT_INT, BT_CONST_STRING, BT_CONST_STRING)
>  DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_INT_CONST_STRING_VAR,
>                         BT_INT, BT_INT, BT_CONST_STRING)
> +DEF_FUNCTION_TYPE_VAR_2 (BT_FN_PTR_CONST_PTR_SIZE_VAR, BT_PTR,
> +                        BT_CONST_PTR, BT_SIZE)
>
>  DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_STRING_SIZE_CONST_STRING_VAR,
>                         BT_INT, BT_STRING, BT_SIZE, BT_CONST_STRING)
> --- gcc/builtins.def.jj 2011-06-26 09:55:16.000000000 +0200
> +++ gcc/builtins.def    2011-06-27 15:08:12.000000000 +0200
> @@ -1,7 +1,7 @@
>  /* This file contains the definitions and documentation for the
>    builtins used in the GNU compiler.
>    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
> -   2010 Free Software Foundation, Inc.
> +   2010, 2011 Free Software Foundation, Inc.
>
>  This file is part of GCC.
>
> @@ -638,6 +638,7 @@ DEF_EXT_LIB_BUILTIN        (BUILT_IN_EXE
>  DEF_EXT_LIB_BUILTIN        (BUILT_IN_EXECVE, "execve", BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING, ATTR_NOTHROW_LIST)
>  DEF_LIB_BUILTIN        (BUILT_IN_EXIT, "exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_EXPECT, "expect", BT_FN_LONG_LONG_LONG, ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_GCC_BUILTIN        (BUILT_IN_ASSUME_ALIGNED, "assume_aligned", BT_FN_PTR_CONST_PTR_SIZE_VAR, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_EXTEND_POINTER, "extend_pointer", BT_FN_UNWINDWORD_PTR, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_EXTRACT_RETURN_ADDR, "extract_return_addr", BT_FN_PTR_PTR, ATTR_LEAF_LIST)
>  DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFS, "ffs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
> --- gcc/tree-ssa-structalias.c.jj       2011-06-26 09:55:16.000000000 +0200
> +++ gcc/tree-ssa-structalias.c  2011-06-27 15:08:12.000000000 +0200
> @@ -4002,6 +4002,7 @@ find_func_aliases_for_builtin_call (gimp
>       case BUILT_IN_STPCPY_CHK:
>       case BUILT_IN_STRCAT_CHK:
>       case BUILT_IN_STRNCAT_CHK:
> +      case BUILT_IN_ASSUME_ALIGNED:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
> @@ -4726,6 +4727,7 @@ find_func_clobbers (gimple origt)
>              return;
>            }
>          /* The following functions neither read nor clobber memory.  */
> +         case BUILT_IN_ASSUME_ALIGNED:
>          case BUILT_IN_FREE:
>            return;
>          /* Trampolines are of no interest to us.  */
> --- gcc/tree-ssa-ccp.c.jj       2011-06-26 09:55:16.000000000 +0200
> +++ gcc/tree-ssa-ccp.c  2011-06-27 15:08:12.000000000 +0200
> @@ -1476,6 +1476,64 @@ bit_value_binop (enum tree_code code, tr
>   return val;
>  }
>
> +/* Return the propagation value when applying __builtin_assume_aligned to
> +   its arguments.  */
> +
> +static prop_value_t
> +bit_value_assume_aligned (gimple stmt)
> +{
> +  tree ptr = gimple_call_arg (stmt, 0), align, misalign = NULL_TREE;
> +  tree type = TREE_TYPE (ptr);
> +  unsigned HOST_WIDE_INT aligni, misaligni = 0;
> +  prop_value_t ptrval = get_value_for_expr (ptr, true);
> +  prop_value_t alignval;
> +  double_int value, mask;
> +  prop_value_t val;
> +  if (ptrval.lattice_val == UNDEFINED)
> +    return ptrval;
> +  gcc_assert ((ptrval.lattice_val == CONSTANT
> +              && TREE_CODE (ptrval.value) == INTEGER_CST)
> +             || double_int_minus_one_p (ptrval.mask));
> +  align = gimple_call_arg (stmt, 1);
> +  if (!host_integerp (align, 1))
> +    return ptrval;
> +  aligni = tree_low_cst (align, 1);
> +  if (aligni <= 1
> +      || (aligni & (aligni - 1)) != 0)
> +    return ptrval;
> +  if (gimple_call_num_args (stmt) > 2)
> +    {
> +      misalign = gimple_call_arg (stmt, 2);
> +      if (!host_integerp (misalign, 1))
> +       return ptrval;
> +      misaligni = tree_low_cst (misalign, 1);
> +      if (misaligni >= aligni)
> +       return ptrval;
> +    }
> +  align = build_int_cst_type (type, -aligni);
> +  alignval = get_value_for_expr (align, true);
> +  bit_value_binop_1 (BIT_AND_EXPR, type, &value, &mask,
> +                    type, value_to_double_int (ptrval), ptrval.mask,
> +                    type, value_to_double_int (alignval), alignval.mask);
> +  if (!double_int_minus_one_p (mask))
> +    {
> +      val.lattice_val = CONSTANT;
> +      val.mask = mask;
> +      gcc_assert ((mask.low & (aligni - 1)) == 0);
> +      gcc_assert ((value.low & (aligni - 1)) == 0);
> +      value.low |= misaligni;
> +      /* ???  Delay building trees here.  */
> +      val.value = double_int_to_tree (type, value);
> +    }
> +  else
> +    {
> +      val.lattice_val = VARYING;
> +      val.value = NULL_TREE;
> +      val.mask = double_int_minus_one;
> +    }
> +  return val;
> +}
> +
>  /* Evaluate statement STMT.
>    Valid only for assignments, calls, conditionals, and switches. */
>
> @@ -1647,6 +1705,10 @@ evaluate_stmt (gimple stmt)
>              val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
>              break;
>
> +           case BUILT_IN_ASSUME_ALIGNED:
> +             val = bit_value_assume_aligned (stmt);
> +             break;
> +
>            default:;
>            }
>        }
> @@ -2186,6 +2248,11 @@ execute_fold_all_builtins (void)
>                 result = integer_zero_node;
>                break;
>
> +             case BUILT_IN_ASSUME_ALIGNED:
> +               /* Remove __builtin_assume_aligned.  */
> +               result = gimple_call_arg (stmt, 0);
> +               break;
> +
>              case BUILT_IN_STACK_RESTORE:
>                result = optimize_stack_restore (i);
>                if (result)
> --- gcc/tree-ssa-dce.c.jj       2011-06-26 09:55:16.000000000 +0200
> +++ gcc/tree-ssa-dce.c  2011-06-27 15:08:12.000000000 +0200
> @@ -837,7 +837,8 @@ propagate_necessity (struct edge_list *e
>                      || DECL_FUNCTION_CODE (callee) == BUILT_IN_FREE
>                      || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA
>                      || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE
> -                     || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE))
> +                     || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE
> +                     || DECL_FUNCTION_CODE (callee) == BUILT_IN_ASSUME_ALIGNED))
>                continue;
>
>              /* Calls implicitly load from memory, their arguments
> --- gcc/tree-ssa-alias.c.jj     2011-06-26 09:55:16.000000000 +0200
> +++ gcc/tree-ssa-alias.c        2011-06-27 15:08:12.000000000 +0200
> @@ -1253,6 +1253,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_SINCOS:
>        case BUILT_IN_SINCOSF:
>        case BUILT_IN_SINCOSL:
> +       case BUILT_IN_ASSUME_ALIGNED:
>          return false;
>        /* __sync_* builtins and some OpenMP builtins act as threading
>           barriers.  */
> @@ -1511,6 +1512,7 @@ call_may_clobber_ref_p_1 (gimple call, a
>          return false;
>        case BUILT_IN_STACK_SAVE:
>        case BUILT_IN_ALLOCA:
> +       case BUILT_IN_ASSUME_ALIGNED:
>          return false;
>        /* Freeing memory kills the pointed-to memory.  More importantly
>           the call has to serve as a barrier for moving loads and stores
> --- gcc/builtins.c.jj   2011-06-26 09:55:16.000000000 +0200
> +++ gcc/builtins.c      2011-06-27 15:13:34.000000000 +0200
> @@ -4604,6 +4604,23 @@ expand_builtin_expect (tree exp, rtx tar
>   return target;
>  }
>
> +/* Expand a call to __builtin_assume_aligned.  We just return our first
> +   argument as the builtin_assume_aligned semantic should've been already
> +   executed by CCP.  */
> +
> +static rtx
> +expand_builtin_assume_aligned (tree exp, rtx target)
> +{
> +  if (call_expr_nargs (exp) < 2)
> +    return const0_rtx;
> +  target = expand_expr (CALL_EXPR_ARG (exp, 0), target, VOIDmode,
> +                       EXPAND_NORMAL);
> +  gcc_assert (!TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 1))
> +             && (call_expr_nargs (exp) < 3
> +                 || !TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 2))));
> +  return target;
> +}
> +
>  void
>  expand_builtin_trap (void)
>  {
> @@ -5823,6 +5840,8 @@ expand_builtin (tree exp, rtx target, rt
>       return expand_builtin_va_copy (exp);
>     case BUILT_IN_EXPECT:
>       return expand_builtin_expect (exp, target);
> +    case BUILT_IN_ASSUME_ALIGNED:
> +      return expand_builtin_assume_aligned (exp, target);
>     case BUILT_IN_PREFETCH:
>       expand_builtin_prefetch (exp);
>       return const0_rtx;
> @@ -13461,6 +13480,7 @@ is_simple_builtin (tree decl)
>       case BUILT_IN_OBJECT_SIZE:
>       case BUILT_IN_UNREACHABLE:
>        /* Simple register moves or loads from stack.  */
> +      case BUILT_IN_ASSUME_ALIGNED:
>       case BUILT_IN_RETURN_ADDRESS:
>       case BUILT_IN_EXTRACT_RETURN_ADDR:
>       case BUILT_IN_FROB_RETURN_ADDR:
> --- gcc/doc/extend.texi.jj      2011-06-26 09:55:16.000000000 +0200
> +++ gcc/doc/extend.texi 2011-06-27 15:08:12.000000000 +0200
> @@ -7646,6 +7646,28 @@ int g (int c)
>
>  @end deftypefn
>
> +@deftypefn {Built-in Function} void *__builtin_assume_aligned (const void *@var{exp}, size_t @var{align}, ...)
> +This function returns its first argument, and allows the compiler
> +to assume that the returned pointer is at least @var{align} bytes
> +aligned.  This built-in can have either two or three arguments,
> +if it has three, the third argument should have integer type, and
> +if it is non-zero means misalignment offset.  For example:
> +
> +@smallexample
> +void *x = __builtin_assume_aligned (arg, 16);
> +@end smallexample
> +
> +means that the compiler can assume x, set to arg, is at least
> +16 byte aligned, while:
> +
> +@smallexample
> +void *x = __builtin_assume_aligned (arg, 32, 8);
> +@end smallexample
> +
> +means that the compiler can assume for x, set to arg, that
> +(char *) x - 8 is 32 byte aligned.
> +@end deftypefn
> +
>  @deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
>  This function is used to flush the processor's instruction cache for
>  the region of memory between @var{begin} inclusive and @var{end}
> --- gcc/c-family/c-common.c.jj  2011-06-22 10:16:49.000000000 +0200
> +++ gcc/c-family/c-common.c     2011-06-27 15:18:39.000000000 +0200
> @@ -8166,6 +8166,18 @@ check_builtin_function_arguments (tree f
>        }
>       return false;
>
> +    case BUILT_IN_ASSUME_ALIGNED:
> +      if (builtin_function_validate_nargs (fndecl, nargs, 2 + (nargs > 2)))
> +       {
> +         if (nargs >= 3 && TREE_CODE (TREE_TYPE (args[2])) != INTEGER_TYPE)
> +           {
> +             error ("non-integer argument 3 in call to function %qE", fndecl);
> +             return false;
> +           }
> +         return true;
> +       }
> +      return false;
> +
>     default:
>       return true;
>     }
> --- gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c.jj  2011-06-27 15:08:12.000000000 +0200
> +++ gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c     2011-06-27 15:08:12.000000000 +0200
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +void
> +test1 (double *out1, double *out2, double *out3, double *in1,
> +       double *in2, int len)
> +{
> +  int i;
> +  double *__restrict o1 = __builtin_assume_aligned (out1, 16);
> +  double *__restrict o2 = __builtin_assume_aligned (out2, 16);
> +  double *__restrict o3 = __builtin_assume_aligned (out3, 16);
> +  double *__restrict i1 = __builtin_assume_aligned (in1, 16);
> +  double *__restrict i2 = __builtin_assume_aligned (in2, 16);
> +  for (i = 0; i < len; ++i)
> +    {
> +      o1[i] = i1[i] * i2[i];
> +      o2[i] = i1[i] + i2[i];
> +      o3[i] = i1[i] - i2[i];
> +    }
> +}
> +
> +void
> +test2 (double *out1, double *out2, double *out3, double *in1,
> +       double *in2, int len)
> +{
> +  int i, align = 32, misalign = 16;
> +  out1 = __builtin_assume_aligned (out1, align, misalign);
> +  out2 = __builtin_assume_aligned (out2, align, 16);
> +  out3 = __builtin_assume_aligned (out3, 32, misalign);
> +  in1 = __builtin_assume_aligned (in1, 32, 16);
> +  in2 = __builtin_assume_aligned (in2, 32, 0);
> +  for (i = 0; i < len; ++i)
> +    {
> +      out1[i] = in1[i] * in2[i];
> +      out2[i] = in1[i] + in2[i];
> +      out3[i] = in1[i] - in2[i];
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-not "__builtin_assume_aligned" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> --- gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c.jj  2011-06-27 15:08:12.000000000 +0200
> +++ gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c     2011-06-27 15:26:19.000000000 +0200
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +
> +double *bar (void);
> +
> +void
> +foo (double *ptr, int i)
> +{
> +  double *a = __builtin_assume_aligned (ptr, 16, 8, 7);        /* { dg-error "too many arguments to function" } */
> +  double *b = __builtin_assume_aligned (bar (), 16);
> +  double *c = __builtin_assume_aligned (bar (), 16, 8);
> +  double *d = __builtin_assume_aligned (ptr, i, ptr);  /* { dg-error "non-integer argument 3 in call to function" } */
> +  double *e = __builtin_assume_aligned (ptr, i, *ptr); /* { dg-error "non-integer argument 3 in call to function" } */
> +  *a = 0.0;
> +  *b = 0.0;
> +  *c = 0.0;
> +  *d = 0.0;
> +  *e = 0.0;
> +}
> --- gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c.jj 2011-06-27 15:08:12.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c    2011-06-27 15:08:12.000000000 +0200
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -msse2 -mno-avx" } */
> +
> +void
> +test1 (double *out1, double *out2, double *out3, double *in1,
> +       double *in2, int len)
> +{
> +  int i;
> +  double *__restrict o1 = __builtin_assume_aligned (out1, 16);
> +  double *__restrict o2 = __builtin_assume_aligned (out2, 16);
> +  double *__restrict o3 = __builtin_assume_aligned (out3, 16);
> +  double *__restrict i1 = __builtin_assume_aligned (in1, 16);
> +  double *__restrict i2 = __builtin_assume_aligned (in2, 16);
> +  for (i = 0; i < len; ++i)
> +    {
> +      o1[i] = i1[i] * i2[i];
> +      o2[i] = i1[i] + i2[i];
> +      o3[i] = i1[i] - i2[i];
> +    }
> +}
> +
> +void
> +test2 (double *out1, double *out2, double *out3, double *in1,
> +       double *in2, int len)
> +{
> +  int i, align = 32, misalign = 16;
> +  out1 = __builtin_assume_aligned (out1, align, misalign);
> +  out2 = __builtin_assume_aligned (out2, align, 16);
> +  out3 = __builtin_assume_aligned (out3, 32, misalign);
> +  in1 = __builtin_assume_aligned (in1, 32, 16);
> +  in2 = __builtin_assume_aligned (in2, 32, 0);
> +  for (i = 0; i < len; ++i)
> +    {
> +      out1[i] = in1[i] * in2[i];
> +      out2[i] = in1[i] + in2[i];
> +      out3[i] = in1[i] - in2[i];
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-not "movhpd" } } */
> +/* { dg-final { scan-assembler-not "movlpd" } } */
>
>
>        Jakub
>
Gerald Pfeifer - July 8, 2011, 1:46 p.m.
On Tue, 28 Jun 2011, Richard Guenther wrote:
>> Thanks, here is what I've committed after bootstrapping/regtesting
>> again on x86_64-linux and i686-linux.
> Thanks Jakub.  Probably worth an entry in changes.html.

Yes, please. :-)

Gerald

Patch

--- gcc/builtin-types.def.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/builtin-types.def	2011-06-27 15:08:12.000000000 +0200
@@ -454,6 +454,8 @@  DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_CONST
 			 BT_INT, BT_CONST_STRING, BT_CONST_STRING)
 DEF_FUNCTION_TYPE_VAR_2 (BT_FN_INT_INT_CONST_STRING_VAR,
 			 BT_INT, BT_INT, BT_CONST_STRING)
+DEF_FUNCTION_TYPE_VAR_2 (BT_FN_PTR_CONST_PTR_SIZE_VAR, BT_PTR,
+			 BT_CONST_PTR, BT_SIZE)
 
 DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_STRING_SIZE_CONST_STRING_VAR,
 			 BT_INT, BT_STRING, BT_SIZE, BT_CONST_STRING)
--- gcc/builtins.def.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/builtins.def	2011-06-27 15:08:12.000000000 +0200
@@ -1,7 +1,7 @@ 
 /* This file contains the definitions and documentation for the
    builtins used in the GNU compiler.
    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
-   2010 Free Software Foundation, Inc.
+   2010, 2011 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -638,6 +638,7 @@  DEF_EXT_LIB_BUILTIN        (BUILT_IN_EXE
 DEF_EXT_LIB_BUILTIN        (BUILT_IN_EXECVE, "execve", BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING, ATTR_NOTHROW_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_EXIT, "exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_EXPECT, "expect", BT_FN_LONG_LONG_LONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_ASSUME_ALIGNED, "assume_aligned", BT_FN_PTR_CONST_PTR_SIZE_VAR, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_EXTEND_POINTER, "extend_pointer", BT_FN_UNWINDWORD_PTR, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_EXTRACT_RETURN_ADDR, "extract_return_addr", BT_FN_PTR_PTR, ATTR_LEAF_LIST)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_FFS, "ffs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
--- gcc/tree-ssa-structalias.c.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/tree-ssa-structalias.c	2011-06-27 15:08:12.000000000 +0200
@@ -4002,6 +4002,7 @@  find_func_aliases_for_builtin_call (gimp
       case BUILT_IN_STPCPY_CHK:
       case BUILT_IN_STRCAT_CHK:
       case BUILT_IN_STRNCAT_CHK:
+      case BUILT_IN_ASSUME_ALIGNED:
 	{
 	  tree res = gimple_call_lhs (t);
 	  tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
@@ -4726,6 +4727,7 @@  find_func_clobbers (gimple origt)
 	      return;
 	    }
 	  /* The following functions neither read nor clobber memory.  */
+	  case BUILT_IN_ASSUME_ALIGNED:
 	  case BUILT_IN_FREE:
 	    return;
 	  /* Trampolines are of no interest to us.  */
--- gcc/tree-ssa-ccp.c.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/tree-ssa-ccp.c	2011-06-27 15:08:12.000000000 +0200
@@ -1476,6 +1476,64 @@  bit_value_binop (enum tree_code code, tr
   return val;
 }
 
+/* Return the propagation value when applying __builtin_assume_aligned to
+   its arguments.  */
+
+static prop_value_t
+bit_value_assume_aligned (gimple stmt)
+{
+  tree ptr = gimple_call_arg (stmt, 0), align, misalign = NULL_TREE;
+  tree type = TREE_TYPE (ptr);
+  unsigned HOST_WIDE_INT aligni, misaligni = 0;
+  prop_value_t ptrval = get_value_for_expr (ptr, true);
+  prop_value_t alignval;
+  double_int value, mask;
+  prop_value_t val;
+  if (ptrval.lattice_val == UNDEFINED)
+    return ptrval;
+  gcc_assert ((ptrval.lattice_val == CONSTANT
+	       && TREE_CODE (ptrval.value) == INTEGER_CST)
+	      || double_int_minus_one_p (ptrval.mask));
+  align = gimple_call_arg (stmt, 1);
+  if (!host_integerp (align, 1))
+    return ptrval;
+  aligni = tree_low_cst (align, 1);
+  if (aligni <= 1
+      || (aligni & (aligni - 1)) != 0)
+    return ptrval;
+  if (gimple_call_num_args (stmt) > 2)
+    {
+      misalign = gimple_call_arg (stmt, 2);
+      if (!host_integerp (misalign, 1))
+	return ptrval;
+      misaligni = tree_low_cst (misalign, 1);
+      if (misaligni >= aligni)
+	return ptrval;
+    }
+  align = build_int_cst_type (type, -aligni);
+  alignval = get_value_for_expr (align, true);
+  bit_value_binop_1 (BIT_AND_EXPR, type, &value, &mask,
+		     type, value_to_double_int (ptrval), ptrval.mask,
+		     type, value_to_double_int (alignval), alignval.mask);
+  if (!double_int_minus_one_p (mask))
+    {
+      val.lattice_val = CONSTANT;
+      val.mask = mask;
+      gcc_assert ((mask.low & (aligni - 1)) == 0);
+      gcc_assert ((value.low & (aligni - 1)) == 0);
+      value.low |= misaligni;
+      /* ???  Delay building trees here.  */
+      val.value = double_int_to_tree (type, value);
+    }
+  else
+    {
+      val.lattice_val = VARYING;
+      val.value = NULL_TREE;
+      val.mask = double_int_minus_one;
+    }
+  return val;
+}
+
 /* Evaluate statement STMT.
    Valid only for assignments, calls, conditionals, and switches. */
 
@@ -1647,6 +1705,10 @@  evaluate_stmt (gimple stmt)
 	      val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
 	      break;
 
+	    case BUILT_IN_ASSUME_ALIGNED:
+	      val = bit_value_assume_aligned (stmt);
+	      break;
+
 	    default:;
 	    }
 	}
@@ -2186,6 +2248,11 @@  execute_fold_all_builtins (void)
                 result = integer_zero_node;
 		break;
 
+	      case BUILT_IN_ASSUME_ALIGNED:
+		/* Remove __builtin_assume_aligned.  */
+		result = gimple_call_arg (stmt, 0);
+		break;
+
 	      case BUILT_IN_STACK_RESTORE:
 		result = optimize_stack_restore (i);
 		if (result)
--- gcc/tree-ssa-dce.c.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/tree-ssa-dce.c	2011-06-27 15:08:12.000000000 +0200
@@ -837,7 +837,8 @@  propagate_necessity (struct edge_list *e
 		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_FREE
 		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA
 		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_SAVE
-		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE))
+		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE
+		      || DECL_FUNCTION_CODE (callee) == BUILT_IN_ASSUME_ALIGNED))
 		continue;
 
 	      /* Calls implicitly load from memory, their arguments
--- gcc/tree-ssa-alias.c.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/tree-ssa-alias.c	2011-06-27 15:08:12.000000000 +0200
@@ -1253,6 +1253,7 @@  ref_maybe_used_by_call_p_1 (gimple call,
 	case BUILT_IN_SINCOS:
 	case BUILT_IN_SINCOSF:
 	case BUILT_IN_SINCOSL:
+	case BUILT_IN_ASSUME_ALIGNED:
 	  return false;
 	/* __sync_* builtins and some OpenMP builtins act as threading
 	   barriers.  */
@@ -1511,6 +1512,7 @@  call_may_clobber_ref_p_1 (gimple call, a
 	  return false;
 	case BUILT_IN_STACK_SAVE:
 	case BUILT_IN_ALLOCA:
+	case BUILT_IN_ASSUME_ALIGNED:
 	  return false;
 	/* Freeing memory kills the pointed-to memory.  More importantly
 	   the call has to serve as a barrier for moving loads and stores
--- gcc/builtins.c.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/builtins.c	2011-06-27 15:13:34.000000000 +0200
@@ -4604,6 +4604,23 @@  expand_builtin_expect (tree exp, rtx tar
   return target;
 }
 
+/* Expand a call to __builtin_assume_aligned.  We just return our first
+   argument as the builtin_assume_aligned semantic should've been already
+   executed by CCP.  */
+
+static rtx
+expand_builtin_assume_aligned (tree exp, rtx target)
+{
+  if (call_expr_nargs (exp) < 2)
+    return const0_rtx;
+  target = expand_expr (CALL_EXPR_ARG (exp, 0), target, VOIDmode,
+			EXPAND_NORMAL);
+  gcc_assert (!TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 1))
+	      && (call_expr_nargs (exp) < 3
+		  || !TREE_SIDE_EFFECTS (CALL_EXPR_ARG (exp, 2))));
+  return target;
+}
+
 void
 expand_builtin_trap (void)
 {
@@ -5823,6 +5840,8 @@  expand_builtin (tree exp, rtx target, rt
       return expand_builtin_va_copy (exp);
     case BUILT_IN_EXPECT:
       return expand_builtin_expect (exp, target);
+    case BUILT_IN_ASSUME_ALIGNED:
+      return expand_builtin_assume_aligned (exp, target);
     case BUILT_IN_PREFETCH:
       expand_builtin_prefetch (exp);
       return const0_rtx;
@@ -13461,6 +13480,7 @@  is_simple_builtin (tree decl)
       case BUILT_IN_OBJECT_SIZE:
       case BUILT_IN_UNREACHABLE:
 	/* Simple register moves or loads from stack.  */
+      case BUILT_IN_ASSUME_ALIGNED:
       case BUILT_IN_RETURN_ADDRESS:
       case BUILT_IN_EXTRACT_RETURN_ADDR:
       case BUILT_IN_FROB_RETURN_ADDR:
--- gcc/doc/extend.texi.jj	2011-06-26 09:55:16.000000000 +0200
+++ gcc/doc/extend.texi	2011-06-27 15:08:12.000000000 +0200
@@ -7646,6 +7646,28 @@  int g (int c)
 
 @end deftypefn
 
+@deftypefn {Built-in Function} void *__builtin_assume_aligned (const void *@var{exp}, size_t @var{align}, ...)
+This function returns its first argument, and allows the compiler
+to assume that the returned pointer is at least @var{align} bytes
+aligned.  This built-in can have either two or three arguments,
+if it has three, the third argument should have integer type, and
+if it is non-zero means misalignment offset.  For example:
+
+@smallexample
+void *x = __builtin_assume_aligned (arg, 16);
+@end smallexample
+
+means that the compiler can assume x, set to arg, is at least
+16 byte aligned, while:
+
+@smallexample
+void *x = __builtin_assume_aligned (arg, 32, 8);
+@end smallexample
+
+means that the compiler can assume for x, set to arg, that
+(char *) x - 8 is 32 byte aligned.
+@end deftypefn
+
 @deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
 This function is used to flush the processor's instruction cache for
 the region of memory between @var{begin} inclusive and @var{end}
--- gcc/c-family/c-common.c.jj	2011-06-22 10:16:49.000000000 +0200
+++ gcc/c-family/c-common.c	2011-06-27 15:18:39.000000000 +0200
@@ -8166,6 +8166,18 @@  check_builtin_function_arguments (tree f
 	}
       return false;
 
+    case BUILT_IN_ASSUME_ALIGNED:
+      if (builtin_function_validate_nargs (fndecl, nargs, 2 + (nargs > 2)))
+	{
+	  if (nargs >= 3 && TREE_CODE (TREE_TYPE (args[2])) != INTEGER_TYPE)
+	    {
+	      error ("non-integer argument 3 in call to function %qE", fndecl);
+	      return false;
+	    }
+	  return true;
+	}
+      return false;
+
     default:
       return true;
     }
--- gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c.jj	2011-06-27 15:08:12.000000000 +0200
+++ gcc/testsuite/gcc.dg/builtin-assume-aligned-1.c	2011-06-27 15:08:12.000000000 +0200
@@ -0,0 +1,41 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+void
+test1 (double *out1, double *out2, double *out3, double *in1,
+       double *in2, int len)
+{
+  int i;
+  double *__restrict o1 = __builtin_assume_aligned (out1, 16);
+  double *__restrict o2 = __builtin_assume_aligned (out2, 16);
+  double *__restrict o3 = __builtin_assume_aligned (out3, 16);
+  double *__restrict i1 = __builtin_assume_aligned (in1, 16);
+  double *__restrict i2 = __builtin_assume_aligned (in2, 16);
+  for (i = 0; i < len; ++i)
+    {
+      o1[i] = i1[i] * i2[i];
+      o2[i] = i1[i] + i2[i];
+      o3[i] = i1[i] - i2[i];
+    }
+}
+
+void
+test2 (double *out1, double *out2, double *out3, double *in1,
+       double *in2, int len)
+{
+  int i, align = 32, misalign = 16;
+  out1 = __builtin_assume_aligned (out1, align, misalign);
+  out2 = __builtin_assume_aligned (out2, align, 16);
+  out3 = __builtin_assume_aligned (out3, 32, misalign);
+  in1 = __builtin_assume_aligned (in1, 32, 16);
+  in2 = __builtin_assume_aligned (in2, 32, 0);
+  for (i = 0; i < len; ++i)
+    {
+      out1[i] = in1[i] * in2[i];
+      out2[i] = in1[i] + in2[i];
+      out3[i] = in1[i] - in2[i];
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_assume_aligned" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
--- gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c.jj	2011-06-27 15:08:12.000000000 +0200
+++ gcc/testsuite/gcc.dg/builtin-assume-aligned-2.c	2011-06-27 15:26:19.000000000 +0200
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+
+double *bar (void);
+
+void
+foo (double *ptr, int i)
+{
+  double *a = __builtin_assume_aligned (ptr, 16, 8, 7);	/* { dg-error "too many arguments to function" } */
+  double *b = __builtin_assume_aligned (bar (), 16);
+  double *c = __builtin_assume_aligned (bar (), 16, 8);
+  double *d = __builtin_assume_aligned (ptr, i, ptr);	/* { dg-error "non-integer argument 3 in call to function" } */
+  double *e = __builtin_assume_aligned (ptr, i, *ptr);	/* { dg-error "non-integer argument 3 in call to function" } */
+  *a = 0.0;
+  *b = 0.0;
+  *c = 0.0;
+  *d = 0.0;
+  *e = 0.0;
+}
--- gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c.jj	2011-06-27 15:08:12.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/builtin-assume-aligned-1.c	2011-06-27 15:08:12.000000000 +0200
@@ -0,0 +1,41 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-avx" } */
+
+void
+test1 (double *out1, double *out2, double *out3, double *in1,
+       double *in2, int len)
+{
+  int i;
+  double *__restrict o1 = __builtin_assume_aligned (out1, 16);
+  double *__restrict o2 = __builtin_assume_aligned (out2, 16);
+  double *__restrict o3 = __builtin_assume_aligned (out3, 16);
+  double *__restrict i1 = __builtin_assume_aligned (in1, 16);
+  double *__restrict i2 = __builtin_assume_aligned (in2, 16);
+  for (i = 0; i < len; ++i)
+    {
+      o1[i] = i1[i] * i2[i];
+      o2[i] = i1[i] + i2[i];
+      o3[i] = i1[i] - i2[i];
+    }
+}
+
+void
+test2 (double *out1, double *out2, double *out3, double *in1,
+       double *in2, int len)
+{
+  int i, align = 32, misalign = 16;
+  out1 = __builtin_assume_aligned (out1, align, misalign);
+  out2 = __builtin_assume_aligned (out2, align, 16);
+  out3 = __builtin_assume_aligned (out3, 32, misalign);
+  in1 = __builtin_assume_aligned (in1, 32, 16);
+  in2 = __builtin_assume_aligned (in2, 32, 0);
+  for (i = 0; i < len; ++i)
+    {
+      out1[i] = in1[i] * in2[i];
+      out2[i] = in1[i] + in2[i];
+      out3[i] = in1[i] - in2[i];
+    }
+}
+
+/* { dg-final { scan-assembler-not "movhpd" } } */
+/* { dg-final { scan-assembler-not "movlpd" } } */