Patchwork Fix x86_64 va_arg gimplification (PR target/44575)

login
register
mail settings
Submitter Jakub Jelinek
Date June 21, 2010, 4:14 p.m.
Message ID <20100621161416.GQ7811@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/56331/
State New
Headers show

Comments

Jakub Jelinek - June 21, 2010, 4:14 p.m.
Hi!

While emit_store_group/emit_load_group* at the RTL level
properly handle stores or loads from PARALLEL of registers where
the sum of sizes is larger than the type being copied by
doing convert_modes etc., ix86_gimplify_va_arg uses the modes
without considering it and thus in some cases stores to the temporary
more than it should.

Following patch fixes it by handling the case of the last word copy
using smaller mode, or if there is no such mode (hear I mean say 3 bytes),
using memcpy.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

2010-06-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/44575
	* config/i386/i386.c (ix86_gimplify_va_arg): When copying
	va_arg from a set of register save slots into a temporary,
	if the container is bigger than type size, do the copying
	using smaller mode or using memcpy.

	* gcc.c-torture/execute/pr44575.c: New test.


	Jakub
Jan Hubicka - June 21, 2010, 4:21 p.m.
> Hi!
> 
> While emit_store_group/emit_load_group* at the RTL level
> properly handle stores or loads from PARALLEL of registers where
> the sum of sizes is larger than the type being copied by
> doing convert_modes etc., ix86_gimplify_va_arg uses the modes
> without considering it and thus in some cases stores to the temporary
> more than it should.
> 
> Following patch fixes it by handling the case of the last word copy
> using smaller mode, or if there is no such mode (hear I mean say 3 bytes),
> using memcpy.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
> -	  int i;
> +	  int i, prev_size = 0;
>  	  tree temp = create_tmp_var (type, "va_arg_tmp");

Other option would be I guess packing the type within union of 2 words size,
so we never overflow.  But I guess this should result in overall saner codegen.
> +		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
> +				       3, dest_addr, src_addr,
> +				       size_int (cur_size));

OK, assuming that this is official way to build memcpy call ;)

Honza

Patch

--- gcc/config/i386/i386.c.jj	2010-06-14 07:44:22.000000000 +0200
+++ gcc/config/i386/i386.c	2010-06-21 14:34:17.000000000 +0200
@@ -7267,7 +7267,7 @@  ix86_gimplify_va_arg (tree valist, tree 
 	}
       if (need_temp)
 	{
-	  int i;
+	  int i, prev_size = 0;
 	  tree temp = create_tmp_var (type, "va_arg_tmp");
 
 	  /* addr = &temp; */
@@ -7279,13 +7279,29 @@  ix86_gimplify_va_arg (tree valist, tree 
 	      rtx slot = XVECEXP (container, 0, i);
 	      rtx reg = XEXP (slot, 0);
 	      enum machine_mode mode = GET_MODE (reg);
-	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
-	      tree addr_type = build_pointer_type (piece_type);
-	      tree daddr_type = build_pointer_type_for_mode (piece_type,
-							     ptr_mode, true);
+	      tree piece_type;
+	      tree addr_type;
+	      tree daddr_type;
 	      tree src_addr, src;
 	      int src_offset;
 	      tree dest_addr, dest;
+	      int cur_size = GET_MODE_SIZE (mode);
+
+	      if (prev_size + cur_size > size)
+		{
+		  cur_size = size - prev_size;
+		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+		  if (mode == BLKmode)
+		    mode = QImode;
+		}
+	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
+	      if (mode == GET_MODE (reg))
+		addr_type = build_pointer_type (piece_type);
+	      else
+		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							 true);
+	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							true);
 
 	      if (SSE_REGNO_P (REGNO (reg)))
 		{
@@ -7300,14 +7316,26 @@  ix86_gimplify_va_arg (tree valist, tree 
 	      src_addr = fold_convert (addr_type, src_addr);
 	      src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
 				      size_int (src_offset));
-	      src = build_va_arg_indirect_ref (src_addr);
 
 	      dest_addr = fold_convert (daddr_type, addr);
 	      dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
 				       size_int (INTVAL (XEXP (slot, 1))));
-	      dest = build_va_arg_indirect_ref (dest_addr);
+	      if (cur_size == GET_MODE_SIZE (mode))
+		{
+		  src = build_va_arg_indirect_ref (src_addr);
+		  dest = build_va_arg_indirect_ref (dest_addr);
 
-	      gimplify_assign (dest, src, pre_p);
+		  gimplify_assign (dest, src, pre_p);
+		}
+	      else
+		{
+		  tree copy
+		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+				       3, dest_addr, src_addr,
+				       size_int (cur_size));
+		  gimplify_and_add (copy, pre_p);
+		}
+	      prev_size += cur_size;
 	    }
 	}
 
--- gcc/testsuite/gcc.c-torture/execute/pr44575.c.jj	2010-06-21 14:43:19.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr44575.c	2010-06-21 14:44:20.000000000 +0200
@@ -0,0 +1,49 @@ 
+/* PR target/44575 */
+
+#include <stdarg.h>
+
+int fails = 0;
+struct S { float a[3]; };
+struct S a[5];
+
+void
+check (int z, ...)
+{
+  struct S arg, *p;
+  va_list ap;
+  int j = 0, k = 0;
+  int i;
+  va_start (ap, z);
+  for (i = 2; i < 4; ++i)
+    {
+      p = 0;
+      j++;
+      k += 2;
+      switch ((z << 4) | i)
+	{
+	case 0x12:
+	case 0x13:
+	  p = &a[2];
+	  arg = va_arg (ap, struct S);
+	  break;
+	default:
+	  ++fails;
+	  break;
+	}
+      if (p && p->a[2] != arg.a[2])
+	++fails;
+      if (fails)
+	break;
+    }
+  va_end (ap);
+}
+
+int
+main ()
+{
+  a[2].a[2] = -49026;
+  check (1, a[2], a[2]);
+  if (fails)
+    abort ();
+  return 0;
+}