diff mbox

[PR66010] Don't take address of ap unless necessary

Message ID 554D0BE4.8090305@mentor.com
State New
Headers show

Commit Message

Tom de Vries May 8, 2015, 7:17 p.m. UTC
Hi,

this patch fixes PR66010.


I.

Consider this test-case, with a va_list passed from f2 to f2_1:
...
#include <stdarg.h>

inline int __attribute__((always_inline))
f2_1 (va_list ap)
{
   return va_arg (ap, int);
}

int
f2 (int i, ...)
{
   int res;
   va_list ap;

   va_start (ap, i);
   res = f2_1 (ap);
   va_end (ap);

   return res;
}
...

When compiling at -O2, before eline we have:
...
f2_1 (struct  * apD.1832)
{
   intD.6 _3;

   # .MEM_2 = VDEF <.MEM_1(D)>
   # USE = anything
   # CLB = anything
   _3 = VA_ARG (&apD.1832, 0B);

   # VUSE <.MEM_2>
   return _3;
}

f2 (intD.6 iD.1835)
{
   struct  apD.1839[1];
   intD.6 resD.1838;
   intD.6 _6;

   # .MEM_2 = VDEF <.MEM_1(D)>
   # USE = anything
   # CLB = anything
   __builtin_va_startD.1030 (&apD.1839, 0);

   # .MEM_3 = VDEF <.MEM_2>
   # USE = anything
   # CLB = anything
   res_4 = f2_1D.1833 (&apD.1839);

   # .MEM_5 = VDEF <.MEM_3>
   # USE = anything
   # CLB = anything
   __builtin_va_endD.1029 (&apD.1839);

   _6 = res_4;

   # .MEM_7 = VDEF <.MEM_5>
   apD.1839 ={v} {CLOBBER};

   # VUSE <.MEM_7>
   return _6;
}
...

Because the va_list type is an array type:
...
   struct  apD.1839[1];
...

we're passing the location of the initial element:
...
   res_4 = f2_1D.1833 (&apD.1839);
...

And the type of the parameter in f2_1 is accordingly a pointer to array element:
...
f2_1 (struct  * apD.1832)
...

That means the address operator here is superfluous.
...
   _3 = VA_ARG (&apD.1832, 0B);
...
Or, differently put, when we take the address of ap in va_start and va_end in 
f2, the result is a pointer to array element type.
When we take the address of ap in f2_2, the result is a pointer to pointer to 
array element type.

This extra indirection doesn't cause wrong code to be generated. The va_arg 
expansion code handles this correctly, thanks to the combination of:
- an unconditional build_fold_indirect_ref in expand_ifn_va_arg_1 which removes
   an indirection, and
- a fixup in gimplify_va_arg_internal that again adds an indirection in some
   cases.

The call gets inlined, and before pass_stdarg we have:
...
f2 (intD.6 iD.1835)
{
   struct  * apD.1849;
   struct  apD.1839[1];
   intD.6 _6;

   # .MEM_2 = VDEF <.MEM_1(D)>
   # USE = nonlocal escaped
   # CLB = nonlocal escaped { D.1839 } (escaped)
   __builtin_va_startD.1030 (&apD.1839, 0);

   # .MEM_3 = VDEF <.MEM_2>
   apD.1849 = &apD.1839;

   # .MEM_7 = VDEF <.MEM_3>
   # USE = nonlocal null { D.1839 D.1849 } (escaped)
   # CLB = nonlocal null { D.1839 D.1849 } (escaped)
   _6 = VA_ARG (&apD.1849, 0B);
...

And after expanding ifn_va_arg, we have:
...
f2 (intD.6 iD.1835)
{
   struct  * ap.3D.1853;
   struct  apD.1839[1];

   # .MEM_2 = VDEF <.MEM_1(D)>
   # USE = nonlocal escaped
   # CLB = nonlocal escaped { D.1839 } (escaped)
   __builtin_va_startD.1030 (&apD.1839, 0);

   ap_3 = &apD.1839;

   ap.3_10 = ap_3;

   # VUSE <.MEM_2>
   _11 = ap.3_10->gp_offsetD.2;
   <SNIP>
...

The pass_stdarg optimization fails:
...
f2: va_list escapes 1, needs to save all GPR units and all FPR units.
...

It fails on the superfluous address operator:
...
va_list escapes in ap_3 = &apD.1839;
...


II.

The patch prevents the superfluous address operator from being added.

It also removes the need for the fixup in gimplify_va_arg_internal, by deciding 
in gimplify_va_arg_expr whether the build_fold_indirect_ref in 
expand_ifn_va_arg_1 is needed before passing ap on to gimplify_va_arg_internal. 
This decision is encoded as an extra argument to ifn_va_arg.


III.

Using the patch, before inlining we can see the address operator has been 
removed in va_arg:
...
f2_1 (struct  * apD.1832)
{
   intD.6 _4;

   # .MEM_3 = VDEF <.MEM_1(D)>
   # USE = anything
   # CLB = anything

   _4 = VA_ARG (ap_2(D), 0B, 0);
   # VUSE <.MEM_3>
   return _4;
}
...

And after inlining, we see that va_start and va_arg now use the same ap:
...
f2 (intD.6 iD.1835)
{
   struct  apD.1839[1];

   # .MEM_2 = VDEF <.MEM_1(D)>
   # USE = anything
   # CLB = anything
   __builtin_va_startD.1030 (&apD.1839, 0);

   # .MEM_3 = VDEF <.MEM_2>
   # USE = anything
   # CLB = anything
   _8 = VA_ARG (&apD.1839, 0B, 0);
...

That allows the pass_stdarg optimization to succeed:
...
f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units.
...

Bootstrapped and reg-tested on x86_64, with and without -m32.

OK for trunk?

[ FWIW, I suspect this patch will make life easier for the reimplementation of 
the pass_stdarg optimization using ifn_va_arg. ]

Thanks,
- Tom

Comments

Richard Biener May 11, 2015, 7:47 a.m. UTC | #1
On Fri, 8 May 2015, Tom de Vries wrote:

> Hi,
> 
> this patch fixes PR66010.
> 
> 
> I.
> 
> Consider this test-case, with a va_list passed from f2 to f2_1:
> ...
> #include <stdarg.h>
> 
> inline int __attribute__((always_inline))
> f2_1 (va_list ap)
> {
>   return va_arg (ap, int);
> }
> 
> int
> f2 (int i, ...)
> {
>   int res;
>   va_list ap;
> 
>   va_start (ap, i);
>   res = f2_1 (ap);
>   va_end (ap);
> 
>   return res;
> }
> ...
> 
> When compiling at -O2, before eline we have:
> ...
> f2_1 (struct  * apD.1832)
> {
>   intD.6 _3;
> 
>   # .MEM_2 = VDEF <.MEM_1(D)>
>   # USE = anything
>   # CLB = anything
>   _3 = VA_ARG (&apD.1832, 0B);
> 
>   # VUSE <.MEM_2>
>   return _3;
> }
> 
> f2 (intD.6 iD.1835)
> {
>   struct  apD.1839[1];
>   intD.6 resD.1838;
>   intD.6 _6;
> 
>   # .MEM_2 = VDEF <.MEM_1(D)>
>   # USE = anything
>   # CLB = anything
>   __builtin_va_startD.1030 (&apD.1839, 0);
> 
>   # .MEM_3 = VDEF <.MEM_2>
>   # USE = anything
>   # CLB = anything
>   res_4 = f2_1D.1833 (&apD.1839);
> 
>   # .MEM_5 = VDEF <.MEM_3>
>   # USE = anything
>   # CLB = anything
>   __builtin_va_endD.1029 (&apD.1839);
> 
>   _6 = res_4;
> 
>   # .MEM_7 = VDEF <.MEM_5>
>   apD.1839 ={v} {CLOBBER};
> 
>   # VUSE <.MEM_7>
>   return _6;
> }
> ...
> 
> Because the va_list type is an array type:
> ...
>   struct  apD.1839[1];
> ...
> 
> we're passing the location of the initial element:
> ...
>   res_4 = f2_1D.1833 (&apD.1839);
> ...
> 
> And the type of the parameter in f2_1 is accordingly a pointer to array
> element:
> ...
> f2_1 (struct  * apD.1832)
> ...
> 
> That means the address operator here is superfluous.
> ...
>   _3 = VA_ARG (&apD.1832, 0B);
> ...
> Or, differently put, when we take the address of ap in va_start and va_end in
> f2, the result is a pointer to array element type.
> When we take the address of ap in f2_2, the result is a pointer to pointer to
> array element type.
> 
> This extra indirection doesn't cause wrong code to be generated. The va_arg
> expansion code handles this correctly, thanks to the combination of:
> - an unconditional build_fold_indirect_ref in expand_ifn_va_arg_1 which
> removes
>   an indirection, and
> - a fixup in gimplify_va_arg_internal that again adds an indirection in some
>   cases.
> 
> The call gets inlined, and before pass_stdarg we have:
> ...
> f2 (intD.6 iD.1835)
> {
>   struct  * apD.1849;
>   struct  apD.1839[1];
>   intD.6 _6;
> 
>   # .MEM_2 = VDEF <.MEM_1(D)>
>   # USE = nonlocal escaped
>   # CLB = nonlocal escaped { D.1839 } (escaped)
>   __builtin_va_startD.1030 (&apD.1839, 0);
> 
>   # .MEM_3 = VDEF <.MEM_2>
>   apD.1849 = &apD.1839;
> 
>   # .MEM_7 = VDEF <.MEM_3>
>   # USE = nonlocal null { D.1839 D.1849 } (escaped)
>   # CLB = nonlocal null { D.1839 D.1849 } (escaped)
>   _6 = VA_ARG (&apD.1849, 0B);
> ...
> 
> And after expanding ifn_va_arg, we have:
> ...
> f2 (intD.6 iD.1835)
> {
>   struct  * ap.3D.1853;
>   struct  apD.1839[1];
> 
>   # .MEM_2 = VDEF <.MEM_1(D)>
>   # USE = nonlocal escaped
>   # CLB = nonlocal escaped { D.1839 } (escaped)
>   __builtin_va_startD.1030 (&apD.1839, 0);
> 
>   ap_3 = &apD.1839;
> 
>   ap.3_10 = ap_3;
> 
>   # VUSE <.MEM_2>
>   _11 = ap.3_10->gp_offsetD.2;
>   <SNIP>
> ...
> 
> The pass_stdarg optimization fails:
> ...
> f2: va_list escapes 1, needs to save all GPR units and all FPR units.
> ...
> 
> It fails on the superfluous address operator:
> ...
> va_list escapes in ap_3 = &apD.1839;
> ...
> 
> 
> II.
> 
> The patch prevents the superfluous address operator from being added.
> 
> It also removes the need for the fixup in gimplify_va_arg_internal, by
> deciding in gimplify_va_arg_expr whether the build_fold_indirect_ref in
> expand_ifn_va_arg_1 is needed before passing ap on to
> gimplify_va_arg_internal. This decision is encoded as an extra argument to
> ifn_va_arg.
> 
> 
> III.
> 
> Using the patch, before inlining we can see the address operator has been
> removed in va_arg:
> ...
> f2_1 (struct  * apD.1832)
> {
>   intD.6 _4;
> 
>   # .MEM_3 = VDEF <.MEM_1(D)>
>   # USE = anything
>   # CLB = anything
> 
>   _4 = VA_ARG (ap_2(D), 0B, 0);
>   # VUSE <.MEM_3>
>   return _4;
> }
> ...
> 
> And after inlining, we see that va_start and va_arg now use the same ap:
> ...
> f2 (intD.6 iD.1835)
> {
>   struct  apD.1839[1];
> 
>   # .MEM_2 = VDEF <.MEM_1(D)>
>   # USE = anything
>   # CLB = anything
>   __builtin_va_startD.1030 (&apD.1839, 0);
> 
>   # .MEM_3 = VDEF <.MEM_2>
>   # USE = anything
>   # CLB = anything
>   _8 = VA_ARG (&apD.1839, 0B, 0);
> ...
> 
> That allows the pass_stdarg optimization to succeed:
> ...
> f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units.
> ...
> 
> Bootstrapped and reg-tested on x86_64, with and without -m32.
> 
> OK for trunk?
> 
> [ FWIW, I suspect this patch will make life easier for the reimplementation of
> the pass_stdarg optimization using ifn_va_arg. ]

+  if (canon_va_type != NULL)
+    {
+      if (!(TREE_CODE (canon_va_type) == ARRAY_TYPE
+           && TREE_CODE (va_type) != ARRAY_TYPE))
+       /* In gimplify_va_arg_expr we take the address of the ap argument, 
mark
+          it addressable now.  */
+       mark_addressable (expr);

can we "simplify" this and ...

-       }
-
+      gcc_assert (TREE_CODE (TREE_TYPE (valist)) != ARRAY_TYPE);
       gimplify_expr (&valist, pre_p, post_p, is_gimple_val, fb_rvalue);

this to use [!]POINTER_TYPE_P ()?  Why do we arrive with non-array
va_type but array canon_va_type in build_va_arg?  I suppose the
va_list argument already decayed to a pointer then (in which case
the argument should already be addressable?)?

I think the overall idea of the patch is good - I'm just worried about
special-casing of ARRAY_TYPE vs. non-pointer-type (because it's the
latter that we ultimately want...).

Thanks,
Richard.
Michael Matz May 12, 2015, 10:12 a.m. UTC | #2
Hi,

On Fri, 8 May 2015, Tom de Vries wrote:

> III.
> 
> Using the patch, before inlining we can see the address operator has been
> removed in va_arg:
> ...
> f2_1 (struct  * apD.1832)
> {
>   intD.6 _4;
> 
>   # .MEM_3 = VDEF <.MEM_1(D)>
>   # USE = anything
>   # CLB = anything
> 
>   _4 = VA_ARG (ap_2(D), 0B, 0);
>   # VUSE <.MEM_3>
>   return _4;
> }

Have you visually inspected that the address operator stays on 
architectures where va_list is e.g. a char pointer (i.e. on those where 
VA_ARG has the side-effect of modifying ap)?


Ciao,
Michael.
Tom de Vries May 12, 2015, 10:38 a.m. UTC | #3
On 12-05-15 12:12, Michael Matz wrote:
> Hi,
>
> On Fri, 8 May 2015, Tom de Vries wrote:
>
>> III.
>>
>> Using the patch, before inlining we can see the address operator has been
>> removed in va_arg:
>> ...
>> f2_1 (struct  * apD.1832)
>> {
>>    intD.6 _4;
>>
>>    # .MEM_3 = VDEF <.MEM_1(D)>
>>    # USE = anything
>>    # CLB = anything
>>
>>    _4 = VA_ARG (ap_2(D), 0B, 0);
>>    # VUSE <.MEM_3>
>>    return _4;
>> }
>
> Have you visually inspected that the address operator stays on
> architectures where va_list is e.g. a char pointer

x86_64 -m32:
...
f2 (int i)
{
   charD.2 * ap.0D.1811;
   intD.1 D.1812;
   intD.1 resD.1808;
   charD.2 * apD.1809;

   try
     {
       # USE = anything
       # CLB = anything
       __builtin_va_startD.1021 (&apD.1809, 0);
       ap.0D.1811 = apD.1809;
       # USE = anything
       # CLB = anything
       resD.1808 = f2_1D.1803 (ap.0D.1811);
       # USE = anything
       # CLB = anything
       __builtin_va_endD.1020 (&apD.1809);
       D.1812 = resD.1808;
       return D.1812;
     }
   finally
     {
       apD.1809 = {CLOBBER};
     }
}

f2_1 (char * ap)
{
   intD.1 D.1815;

   # USE = anything
   # CLB = anything
   D.1815 = VA_ARG (&apD.1802, 0B, 1);
   return D.1815;
}
...

Thanks,
- Tom

> (i.e. on those where
> VA_ARG has the side-effect of modifying ap)?
diff mbox

Patch

Don't take address of ap unless necessary

2015-05-08  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/66010
	* gimplify.c (gimplify_modify_expr): Handle new do_deref argument of
	ifn_va_arg.
	* gimple.h (gimplify_va_arg_internal): Remove loc parameter.
	(gimplify_va_arg_internal): Remove loc parameter.  Assert no array-typed
	va_lists are passed, and remove corresponding handling.
	(gimplify_va_arg_expr): Only take address of ap if necessary.  Add
	do_deref argument to ifn_va_arg.
	* tree-stdarg.c (expand_ifn_va_arg_1): Handle new do_deref argument of
	ifn_va_arg.

	* c-common.c (build_va_arg): Don't mark ap addressable unless necessary.

	* gcc.dg/tree-ssa/stdarg-2.c: Undo scan xfails for f15.
---
 gcc/c-family/c-common.c                  | 16 ++++++++--
 gcc/gimplify.c                           | 52 +++++++++++++++++++++-----------
 gcc/gimplify.h                           |  3 +-
 gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c | 11 ++-----
 gcc/tree-stdarg.c                        | 17 ++++++-----
 5 files changed, 61 insertions(+), 38 deletions(-)

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 378f237..c2aa1ca 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -5918,9 +5918,19 @@  set_compound_literal_name (tree decl)
 tree
 build_va_arg (location_t loc, tree expr, tree type)
 {
-  /* In gimplify_va_arg_expr we take the address of the ap argument, mark it
-     addressable now.  */
-  mark_addressable (expr);
+  tree va_type = TREE_TYPE (expr);
+  tree canon_va_type = (va_type == error_mark_node
+			? NULL_TREE
+			: targetm.canonical_va_list_type (va_type));
+
+  if (canon_va_type != NULL)
+    {
+      if (!(TREE_CODE (canon_va_type) == ARRAY_TYPE
+	    && TREE_CODE (va_type) != ARRAY_TYPE))
+	/* In gimplify_va_arg_expr we take the address of the ap argument, mark
+	   it addressable now.  */
+	mark_addressable (expr);
+    }
 
   expr = build1 (VA_ARG_EXPR, type, expr);
   SET_EXPR_LOCATION (expr, loc);
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9ce3dd9..7ca1374 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -4658,9 +4658,11 @@  gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  tree type = TREE_TYPE (call);
 	  tree ap = CALL_EXPR_ARG (call, 0);
 	  tree tag = CALL_EXPR_ARG (call, 1);
+	  tree do_deref = CALL_EXPR_ARG (call, 2);
 	  tree newcall = build_call_expr_internal_loc (EXPR_LOCATION (call),
-						       IFN_VA_ARG, type, 3, ap,
-						       tag, vlasize);
+						       IFN_VA_ARG, type, 4, ap,
+						       tag, do_deref,
+						       vlasize);
 	  tree *call_p = &(TREE_OPERAND (*from_p, 0));
 	  *call_p = newcall;
 	}
@@ -9304,8 +9306,8 @@  dummy_object (tree type)
    and TYPE.  */
 
 tree
-gimplify_va_arg_internal (tree valist, tree type, location_t loc,
-			  gimple_seq *pre_p, gimple_seq *post_p)
+gimplify_va_arg_internal (tree valist, tree type, gimple_seq *pre_p,
+			  gimple_seq *post_p)
 {
   tree have_va_type = TREE_TYPE (valist);
   tree cano_type = targetm.canonical_va_list_type (have_va_type);
@@ -9317,17 +9319,7 @@  gimplify_va_arg_internal (tree valist, tree type, location_t loc,
      from multiple evaluations.  */
   if (TREE_CODE (have_va_type) == ARRAY_TYPE)
     {
-      /* For this case, the backends will be expecting a pointer to
-	 TREE_TYPE (abi), but it's possible we've
-	 actually been given an array (an actual TARGET_FN_ABI_VA_LIST).
-	 So fix it.  */
-      if (TREE_CODE (TREE_TYPE (valist)) == ARRAY_TYPE)
-	{
-	  tree p1 = build_pointer_type (TREE_TYPE (have_va_type));
-	  valist = fold_convert_loc (loc, p1,
-				     build_fold_addr_expr_loc (loc, valist));
-	}
-
+      gcc_assert (TREE_CODE (TREE_TYPE (valist)) != ARRAY_TYPE);
       gimplify_expr (&valist, pre_p, post_p, is_gimple_val, fb_rvalue);
     }
   else
@@ -9346,7 +9338,7 @@  gimplify_va_arg_expr (tree *expr_p, gimple_seq *pre_p,
   tree promoted_type, have_va_type;
   tree valist = TREE_OPERAND (*expr_p, 0);
   tree type = TREE_TYPE (*expr_p);
-  tree t, tag, ap;
+  tree t, tag, ap, do_deref;
   location_t loc = EXPR_LOCATION (*expr_p);
 
   /* Verify that valist is of the proper type.  */
@@ -9400,9 +9392,33 @@  gimplify_va_arg_expr (tree *expr_p, gimple_seq *pre_p,
     }
 
   /* Transform a VA_ARG_EXPR into an VA_ARG internal function.  */
-  ap = build_fold_addr_expr_loc (loc, valist);
+  if (TREE_CODE (have_va_type) == ARRAY_TYPE)
+    {
+      if (TREE_CODE (TREE_TYPE (valist)) == ARRAY_TYPE)
+	{
+	  /* Take the address, but don't strip it.  Gimplify_va_arg_internal
+	     expects a pointer to array element type.  */
+	  ap = build_fold_addr_expr_loc (loc, valist);
+	  do_deref = integer_zero_node;
+	}
+      else
+	{
+	  /* Don't take the address.  Gimplify_va_arg_internal expects a pointer
+	     to array element type, and we already have that.  */
+	  ap = valist;
+	  do_deref = integer_zero_node;
+	}
+    }
+  else
+    {
+      /* No special handling.  Take the address here, note that it needs to be
+	 stripped before calling gimplify_va_arg_internal. */
+      ap = build_fold_addr_expr_loc (loc, valist);
+      do_deref = integer_one_node;
+    }
   tag = build_int_cst (build_pointer_type (type), 0);
-  *expr_p = build_call_expr_internal_loc (loc, IFN_VA_ARG, type, 2, ap, tag);
+  *expr_p = build_call_expr_internal_loc (loc, IFN_VA_ARG, type, 3, ap, tag,
+					  do_deref);
 
   /* Clear the tentatively set PROP_gimple_lva, to indicate that IFN_VA_ARG
      needs to be expanded.  */
diff --git a/gcc/gimplify.h b/gcc/gimplify.h
index bad8e0f..83bf525 100644
--- a/gcc/gimplify.h
+++ b/gcc/gimplify.h
@@ -82,8 +82,7 @@  extern void gimplify_function_tree (tree);
 extern enum gimplify_status gimplify_va_arg_expr (tree *, gimple_seq *,
 						  gimple_seq *);
 gimple gimplify_assign (tree, tree, gimple_seq *);
-extern tree gimplify_va_arg_internal (tree, tree, location_t, gimple_seq *,
-				      gimple_seq *);
+extern tree gimplify_va_arg_internal (tree, tree, gimple_seq *, gimple_seq *);
 
 /* Return true if gimplify_one_sizepos doesn't need to gimplify
    expr (when in TYPE_SIZE{,_UNIT} and similar type/decl size/bitsize
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
index f09b5de..93a9e8d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
@@ -288,14 +288,9 @@  f15 (int i, ...)
   f15_1 (ap);
   va_end (ap);
 }
-
-/* Following three dg-finals are marked as xfail due to PR66010/PR66013.  */
-/* Was: { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } }.  */
-/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { xfail *-*-* } } } */
-/* Was: { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } }.  */
-/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { xfail *-*-* } } } */
-/* Was: { target s390*-*-linux* }.  */
-/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
 
 /* We may be able to improve upon this after fixing PR66010/PR66013.  */
 /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
diff --git a/gcc/tree-stdarg.c b/gcc/tree-stdarg.c
index 1356374..3bede7e 100644
--- a/gcc/tree-stdarg.c
+++ b/gcc/tree-stdarg.c
@@ -1042,7 +1042,7 @@  expand_ifn_va_arg_1 (function *fun)
     for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
       {
 	gimple stmt = gsi_stmt (i);
-	tree ap, expr, lhs, type;
+	tree ap, expr, lhs, type, do_deref;
 	gimple_seq pre = NULL, post = NULL;
 
 	if (!gimple_call_ifn_va_arg_p (stmt))
@@ -1052,24 +1052,27 @@  expand_ifn_va_arg_1 (function *fun)
 
 	type = TREE_TYPE (TREE_TYPE (gimple_call_arg (stmt, 1)));
 	ap = gimple_call_arg (stmt, 0);
-	ap = build_fold_indirect_ref (ap);
+	do_deref = gimple_call_arg (stmt, 2);
+
+	if (do_deref == integer_one_node)
+	  ap = build_fold_indirect_ref (ap);
 
 	push_gimplify_context (false);
 
-	expr = gimplify_va_arg_internal (ap, type, gimple_location (stmt),
-					 &pre, &post);
+	expr = gimplify_va_arg_internal (ap, type, &pre, &post);
 
 	lhs = gimple_call_lhs (stmt);
 	if (lhs != NULL_TREE)
 	  {
+	    unsigned int nargs = gimple_call_num_args (stmt);
 	    gcc_assert (useless_type_conversion_p (TREE_TYPE (lhs), type));
 
-	    if (gimple_call_num_args (stmt) == 3)
+	    if (nargs == 4)
 	      {
 		/* We've transported the size of with WITH_SIZE_EXPR here as
-		   the 3rd argument of the internal fn call.  Now reinstate
+		   the last argument of the internal fn call.  Now reinstate
 		   it.  */
-		tree size = gimple_call_arg (stmt, 2);
+		tree size = gimple_call_arg (stmt, nargs - 1);
 		expr = build2 (WITH_SIZE_EXPR, TREE_TYPE (expr), expr, size);
 	      }
 
-- 
1.9.1