diff mbox

[AArch64,3/4] Don't generate redundant checks when there is no composite arg

Message ID 572CB198.7030906@foss.arm.com
State New
Headers show

Commit Message

Jiong Wang May 6, 2016, 3 p.m. UTC
AArch64 va_arg gimplify hook is generating redundant instructions.

The current va_arg fetch logic is:

1  if (va_arg offset shows the arg is saved at reg_save_area)
2     if ((va_arg_offset + va_arg_type_size) <= 0)
3        fetch va_arg from reg_save_area.
4     else
5        fetch va_arg from incoming_stack.
6  else
7    fetch va_arg from incoming_stack.

The logic hunk "fetch va_arg from incoming_stack" will be generated
*twice*, thus cause redundance.

There is a particular further "if" check at line 2 because for composite
argument, we don't support argument split, so it's either passed
entirely from reg_save_area, or entirely from incoming_stack area.

Thus, we need the further check at A to decide whether the left space at
reg_save_area is enough, if not, then it's passed from incoming_stack.

While this complex logic is only necessary for composite types, not for
others.

this patch thus *let those redundance only generated for composite types*,
while for basic types like "int", "float" etc, we could just simplify it
into:

   if (va_arg_offset < 0)
     fetch va_arg from reg_save_area.
   else
     fetch va_arg from incoming_stack.

And this simplified version actually is the most usual case.

For example, this patch reduced this instructions number from about 130 to
100 for the included testcase.

ok for trunk?

2016-05-06  Jiong Wang  <jiong.wang@arm.com>

gcc/
   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
   duplicated check code.

gcc/testsuite/
   * gcc.target/aarch64/va_arg_4.c: New testcase.

Comments

James Greenhalgh May 31, 2016, 5:06 p.m. UTC | #1
On Fri, May 06, 2016 at 04:00:40PM +0100, Jiong Wang wrote:
> 2016-05-06  Jiong Wang  <jiong.wang@arm.com>
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Avoid
>   duplicated check code.
> 
> gcc/testsuite/
>   * gcc.target/aarch64/va_arg_4.c: New testcase.

I wonder whether this is safe for the int128_t/uint128_t data types and
overaligned data types? My concern is that something with alignment of
16-bytes may still need the check to skip the final hard register slot
on the stack.

I'm not sure here, so I'll need some more time to think this through, or
for Richard or Marcus to review this and help me understand why there
is nothing to worry about.

Thanks,
James

> From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
> From: "Jiong.Wang" <jiong.wang@arm.com>
> Date: Fri, 6 May 2016 14:37:37 +0100
> Subject: [PATCH 3/4] 3
> 
> ---
>  gcc/config/aarch64/aarch64.c                | 94 ++++++++++++++++++++---------
>  gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++++++
>  2 files changed, 87 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b1a0287..06904d5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9587,6 +9587,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>    bool indirect_p;
>    bool is_ha;		/* is HFA or HVA.  */
>    bool dw_align;	/* double-word align.  */
> +  bool composite_type_p;
>    machine_mode ag_mode = VOIDmode;
>    int nregs;
>    machine_mode mode;
> @@ -9594,13 +9595,14 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>    tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
>    tree stack, f_top, f_off, off, arg, roundup, on_stack;
>    HOST_WIDE_INT size, rsize, adjust, align;
> -  tree t, u, cond1, cond2;
> +  tree t, t1, u, cond1, cond2;
>  
>    indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
>    if (indirect_p)
>      type = build_pointer_type (type);
>  
>    mode = TYPE_MODE (type);
> +  composite_type_p = aarch64_composite_type_p (type, mode);
>  
>    f_stack = TYPE_FIELDS (va_list_type_node);
>    f_grtop = DECL_CHAIN (f_stack);
> @@ -9671,35 +9673,38 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>  	      build_int_cst (TREE_TYPE (off), 0));
>    cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
>  
> -  if (dw_align)
> +  if (composite_type_p)
>      {
> -      /* Emit: offs = (offs + 15) & -16.  */
> -      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -		  build_int_cst (TREE_TYPE (off), 15));
> -      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> -		  build_int_cst (TREE_TYPE (off), -16));
> -      roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> -    }
> -  else
> -    roundup = NULL;
> +      if (dw_align)
> +	{
> +	  /* Emit: offs = (offs + 15) & -16.  */
> +	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		      build_int_cst (TREE_TYPE (off), 15));
> +	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> +		      build_int_cst (TREE_TYPE (off), -16));
> +	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> +	}
> +      else
> +	roundup = NULL;
>  
> -  /* Update ap.__[g|v]r_offs  */
> -  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> -	      build_int_cst (TREE_TYPE (off), rsize));
> -  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
> +      /* Update ap.__[g|v]r_offs  */
> +      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		  build_int_cst (TREE_TYPE (off), rsize));
> +      t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
>  
> -  /* String up.  */
> -  if (roundup)
> -    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
> +      /* String up.  */
> +      if (roundup)
> +	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
>  
> -  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> -  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> -	      build_int_cst (TREE_TYPE (f_off), 0));
> -  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
> +      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
> +      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
> +		  build_int_cst (TREE_TYPE (f_off), 0));
> +      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
>  
> -  /* String up: make sure the assignment happens before the use.  */
> -  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> -  COND_EXPR_ELSE (cond1) = t;
> +      /* String up: make sure the assignment happens before the use.  */
> +      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
> +      COND_EXPR_ELSE (cond1) = t;
> +    }
>  
>    /* Prepare the trees handling the argument that is passed on the stack;
>       the top level node will store in ON_STACK.  */
> @@ -9739,13 +9744,34 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>      on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
>    }
>  
> -  COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
> -  COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
> +  if (composite_type_p)
> +    {
> +      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
> +      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
> +
> +      t = off;
> +    }
> +  else
> +    {
> +      COND_EXPR_THEN (cond1) = on_stack;
> +      if (dw_align)
> +	{
> +	  /* Emit: offs = (offs + 15) & -16.  */
> +	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
> +		      build_int_cst (TREE_TYPE (off), 15));
> +	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
> +		      build_int_cst (TREE_TYPE (off), -16));
> +	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
> +	}
> +      else
> +	roundup = off;
> +
> +      t = roundup;
> +    }
>  
>    /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
> -  t = off;
>    if (adjust)
> -    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
> +    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), composite_type_p ? off : t,
>  		build_int_cst (TREE_TYPE (off), adjust));
>  
>    t = fold_convert (sizetype, t);
> @@ -9827,7 +9853,15 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
>        t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
>      }
>  
> -  COND_EXPR_ELSE (cond2) = t;
> +  if (composite_type_p)
> +    COND_EXPR_ELSE (cond2) = t;
> +  else
> +    {
> +      t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
> +		   build_int_cst (TREE_TYPE (off), rsize));
> +      t1 = build2 (MODIFY_EXPR, TREE_TYPE (f_off), f_off, t1);
> +      COND_EXPR_ELSE (cond1) = build2 (COMPOUND_EXPR, TREE_TYPE (t), t1, t);
> +    }
>    addr = fold_convert (build_pointer_type (type), cond1);
>    addr = build_va_arg_indirect_ref (addr);
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_4.c b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> new file mode 100644
> index 0000000..35232c9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
> +
> +int d2i (double a);
> +
> +int
> +foo(char *fmt, ...)
> +{
> +  int d, e;
> +  double f, g;
> +  __builtin_va_list ap;
> +
> +  __builtin_va_start (ap, fmt);
> +  d = __builtin_va_arg (ap, int);
> +  f = __builtin_va_arg (ap, double);
> +  g = __builtin_va_arg (ap, double);
> +  d += d2i (f);
> +  d += d2i (g);
> +  __builtin_va_end (ap);
> +
> +  /* { dg-final { scan-tree-dump-times "ap.__stack =" 3 "lower_vaarg"} } */
> +  return d;
> +}
> -- 
> 1.9.1
>
diff mbox

Patch

From b92a4c4b8e52a9a952e91f307836022f667ab403 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.wang@arm.com>
Date: Fri, 6 May 2016 14:37:37 +0100
Subject: [PATCH 3/4] 3

---
 gcc/config/aarch64/aarch64.c                | 94 ++++++++++++++++++++---------
 gcc/testsuite/gcc.target/aarch64/va_arg_4.c | 23 +++++++
 2 files changed, 87 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/va_arg_4.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b1a0287..06904d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9587,6 +9587,7 @@  aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   bool indirect_p;
   bool is_ha;		/* is HFA or HVA.  */
   bool dw_align;	/* double-word align.  */
+  bool composite_type_p;
   machine_mode ag_mode = VOIDmode;
   int nregs;
   machine_mode mode;
@@ -9594,13 +9595,14 @@  aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
   tree stack, f_top, f_off, off, arg, roundup, on_stack;
   HOST_WIDE_INT size, rsize, adjust, align;
-  tree t, u, cond1, cond2;
+  tree t, t1, u, cond1, cond2;
 
   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
   if (indirect_p)
     type = build_pointer_type (type);
 
   mode = TYPE_MODE (type);
+  composite_type_p = aarch64_composite_type_p (type, mode);
 
   f_stack = TYPE_FIELDS (va_list_type_node);
   f_grtop = DECL_CHAIN (f_stack);
@@ -9671,35 +9673,38 @@  aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	      build_int_cst (TREE_TYPE (off), 0));
   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
 
-  if (dw_align)
+  if (composite_type_p)
     {
-      /* Emit: offs = (offs + 15) & -16.  */
-      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-		  build_int_cst (TREE_TYPE (off), 15));
-      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
-		  build_int_cst (TREE_TYPE (off), -16));
-      roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
-    }
-  else
-    roundup = NULL;
+      if (dw_align)
+	{
+	  /* Emit: offs = (offs + 15) & -16.  */
+	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		      build_int_cst (TREE_TYPE (off), 15));
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		      build_int_cst (TREE_TYPE (off), -16));
+	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+	}
+      else
+	roundup = NULL;
 
-  /* Update ap.__[g|v]r_offs  */
-  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
-	      build_int_cst (TREE_TYPE (off), rsize));
-  t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
+      /* Update ap.__[g|v]r_offs  */
+      t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		  build_int_cst (TREE_TYPE (off), rsize));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
 
-  /* String up.  */
-  if (roundup)
-    t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
+      /* String up.  */
+      if (roundup)
+	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
 
-  /* [cond2] if (ap.__[g|v]r_offs > 0)  */
-  u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
-	      build_int_cst (TREE_TYPE (f_off), 0));
-  cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
+      /* [cond2] if (ap.__[g|v]r_offs > 0)  */
+      u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
+		  build_int_cst (TREE_TYPE (f_off), 0));
+      cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
 
-  /* String up: make sure the assignment happens before the use.  */
-  t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
-  COND_EXPR_ELSE (cond1) = t;
+      /* String up: make sure the assignment happens before the use.  */
+      t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
+      COND_EXPR_ELSE (cond1) = t;
+    }
 
   /* Prepare the trees handling the argument that is passed on the stack;
      the top level node will store in ON_STACK.  */
@@ -9739,13 +9744,34 @@  aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
   }
 
-  COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
-  COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+  if (composite_type_p)
+    {
+      COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
+      COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
+
+      t = off;
+    }
+  else
+    {
+      COND_EXPR_THEN (cond1) = on_stack;
+      if (dw_align)
+	{
+	  /* Emit: offs = (offs + 15) & -16.  */
+	  t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
+		      build_int_cst (TREE_TYPE (off), 15));
+	  t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
+		      build_int_cst (TREE_TYPE (off), -16));
+	  roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
+	}
+      else
+	roundup = off;
+
+      t = roundup;
+    }
 
   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
-  t = off;
   if (adjust)
-    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
+    t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), composite_type_p ? off : t,
 		build_int_cst (TREE_TYPE (off), adjust));
 
   t = fold_convert (sizetype, t);
@@ -9827,7 +9853,15 @@  aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
     }
 
-  COND_EXPR_ELSE (cond2) = t;
+  if (composite_type_p)
+    COND_EXPR_ELSE (cond2) = t;
+  else
+    {
+      t1 = build2 (PLUS_EXPR, TREE_TYPE (off), roundup,
+		   build_int_cst (TREE_TYPE (off), rsize));
+      t1 = build2 (MODIFY_EXPR, TREE_TYPE (f_off), f_off, t1);
+      COND_EXPR_ELSE (cond1) = build2 (COMPOUND_EXPR, TREE_TYPE (t), t1, t);
+    }
   addr = fold_convert (build_pointer_type (type), cond1);
   addr = build_va_arg_indirect_ref (addr);
 
diff --git a/gcc/testsuite/gcc.target/aarch64/va_arg_4.c b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
new file mode 100644
index 0000000..35232c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/va_arg_4.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -fdump-tree-lower_vaarg" } */
+
+int d2i (double a);
+
+int
+foo(char *fmt, ...)
+{
+  int d, e;
+  double f, g;
+  __builtin_va_list ap;
+
+  __builtin_va_start (ap, fmt);
+  d = __builtin_va_arg (ap, int);
+  f = __builtin_va_arg (ap, double);
+  g = __builtin_va_arg (ap, double);
+  d += d2i (f);
+  d += d2i (g);
+  __builtin_va_end (ap);
+
+  /* { dg-final { scan-tree-dump-times "ap.__stack =" 3 "lower_vaarg"} } */
+  return d;
+}
-- 
1.9.1