Fix x86_64 va_arg gimplification (PR target/44575)

Submitted by Jakub Jelinek on June 21, 2010, 4:14 p.m.

Details

Message ID 20100621161416.GQ7811@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 21, 2010, 4:14 p.m.
Hi!

While emit_store_group/emit_load_group* at the RTL level
properly handle stores or loads from PARALLEL of registers where
the sum of sizes is larger than the type being copied by
doing convert_modes etc., ix86_gimplify_va_arg uses the modes
without considering it and thus in some cases stores to the temporary
more than it should.

Following patch fixes it by handling the case of the last word copy
using smaller mode, or if there is no such mode (hear I mean say 3 bytes),
using memcpy.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

2010-06-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/44575
	* config/i386/i386.c (ix86_gimplify_va_arg): When copying
	va_arg from a set of register save slots into a temporary,
	if the container is bigger than type size, do the copying
	using smaller mode or using memcpy.

	* gcc.c-torture/execute/pr44575.c: New test.


	Jakub

Comments

Jan Hubicka June 21, 2010, 4:21 p.m.
> Hi!
> 
> While emit_store_group/emit_load_group* at the RTL level
> properly handle stores or loads from PARALLEL of registers where
> the sum of sizes is larger than the type being copied by
> doing convert_modes etc., ix86_gimplify_va_arg uses the modes
> without considering it and thus in some cases stores to the temporary
> more than it should.
> 
> Following patch fixes it by handling the case of the last word copy
> using smaller mode, or if there is no such mode (hear I mean say 3 bytes),
> using memcpy.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
> -	  int i;
> +	  int i, prev_size = 0;
>  	  tree temp = create_tmp_var (type, "va_arg_tmp");

Other option would be I guess packing the type within union of 2 words size,
so we never overflow.  But I guess this should result in overall saner codegen.
> +		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
> +				       3, dest_addr, src_addr,
> +				       size_int (cur_size));

OK, assuming that this is official way to build memcpy call ;)

Honza

Patch hide | download patch | download mbox

--- gcc/config/i386/i386.c.jj	2010-06-14 07:44:22.000000000 +0200
+++ gcc/config/i386/i386.c	2010-06-21 14:34:17.000000000 +0200
@@ -7267,7 +7267,7 @@  ix86_gimplify_va_arg (tree valist, tree 
 	}
       if (need_temp)
 	{
-	  int i;
+	  int i, prev_size = 0;
 	  tree temp = create_tmp_var (type, "va_arg_tmp");
 
 	  /* addr = &temp; */
@@ -7279,13 +7279,29 @@  ix86_gimplify_va_arg (tree valist, tree 
 	      rtx slot = XVECEXP (container, 0, i);
 	      rtx reg = XEXP (slot, 0);
 	      enum machine_mode mode = GET_MODE (reg);
-	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
-	      tree addr_type = build_pointer_type (piece_type);
-	      tree daddr_type = build_pointer_type_for_mode (piece_type,
-							     ptr_mode, true);
+	      tree piece_type;
+	      tree addr_type;
+	      tree daddr_type;
 	      tree src_addr, src;
 	      int src_offset;
 	      tree dest_addr, dest;
+	      int cur_size = GET_MODE_SIZE (mode);
+
+	      if (prev_size + cur_size > size)
+		{
+		  cur_size = size - prev_size;
+		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+		  if (mode == BLKmode)
+		    mode = QImode;
+		}
+	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
+	      if (mode == GET_MODE (reg))
+		addr_type = build_pointer_type (piece_type);
+	      else
+		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							 true);
+	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							true);
 
 	      if (SSE_REGNO_P (REGNO (reg)))
 		{
@@ -7300,14 +7316,26 @@  ix86_gimplify_va_arg (tree valist, tree 
 	      src_addr = fold_convert (addr_type, src_addr);
 	      src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
 				      size_int (src_offset));
-	      src = build_va_arg_indirect_ref (src_addr);
 
 	      dest_addr = fold_convert (daddr_type, addr);
 	      dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
 				       size_int (INTVAL (XEXP (slot, 1))));
-	      dest = build_va_arg_indirect_ref (dest_addr);
+	      if (cur_size == GET_MODE_SIZE (mode))
+		{
+		  src = build_va_arg_indirect_ref (src_addr);
+		  dest = build_va_arg_indirect_ref (dest_addr);
 
-	      gimplify_assign (dest, src, pre_p);
+		  gimplify_assign (dest, src, pre_p);
+		}
+	      else
+		{
+		  tree copy
+		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+				       3, dest_addr, src_addr,
+				       size_int (cur_size));
+		  gimplify_and_add (copy, pre_p);
+		}
+	      prev_size += cur_size;
 	    }
 	}
 
--- gcc/testsuite/gcc.c-torture/execute/pr44575.c.jj	2010-06-21 14:43:19.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr44575.c	2010-06-21 14:44:20.000000000 +0200
@@ -0,0 +1,49 @@ 
+/* PR target/44575 */
+
+#include <stdarg.h>
+
+int fails = 0;
+struct S { float a[3]; };
+struct S a[5];
+
+void
+check (int z, ...)
+{
+  struct S arg, *p;
+  va_list ap;
+  int j = 0, k = 0;
+  int i;
+  va_start (ap, z);
+  for (i = 2; i < 4; ++i)
+    {
+      p = 0;
+      j++;
+      k += 2;
+      switch ((z << 4) | i)
+	{
+	case 0x12:
+	case 0x13:
+	  p = &a[2];
+	  arg = va_arg (ap, struct S);
+	  break;
+	default:
+	  ++fails;
+	  break;
+	}
+      if (p && p->a[2] != arg.a[2])
+	++fails;
+      if (fails)
+	break;
+    }
+  va_end (ap);
+}
+
+int
+main ()
+{
+  a[2].a[2] = -49026;
+  check (1, a[2], a[2]);
+  if (fails)
+    abort ();
+  return 0;
+}