diff mbox

Fix ix86_split_long_move collision handling with TLS (PR target/66470)

Message ID 20150610070629.GJ10247@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 10, 2015, 7:06 a.m. UTC
On Wed, Jun 10, 2015 at 08:06:08AM +0200, Uros Bizjak wrote:
> > If -mx32 is a non-issue here, then perhaps my initial patch is good enough?
> 
> It looks to me, that if you detect and record zero-extended UNSPEC_TP,
> your original patch would also handle -mx32.
> 
> Can you please repost your original patch with the above addition?

I've managed to come up with a testcase that ICEs on -mx32 (with
-maddress-mode=long, with the default UNSPEC_TP seems to be always loaded
separately), and this version fixes even that, ok for trunk/5/4.9/4.8?

2015-06-10  Jakub Jelinek  <jakub@redhat.com>

	PR target/66470
	* config/i386/i386.c (ix86_split_long_move): For collisions
	involving direct tls segment refs, move the UNSPEC_TP possibly
	wrapped in ZERO_EXTEND out of the address for lea, to each of
	the memory loads.

	* gcc.dg/tls/pr66470.c: New test.
	* gcc.target/i386/pr66470.c: New test.



	Jakub

Comments

Uros Bizjak June 10, 2015, 7:12 a.m. UTC | #1
On Wed, Jun 10, 2015 at 9:06 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Jun 10, 2015 at 08:06:08AM +0200, Uros Bizjak wrote:
>> > If -mx32 is a non-issue here, then perhaps my initial patch is good enough?
>>
>> It looks to me, that if you detect and record zero-extended UNSPEC_TP,
>> your original patch would also handle -mx32.
>>
>> Can you please repost your original patch with the above addition?
>
> I've managed to come up with a testcase that ICEs on -mx32 (with
> -maddress-mode=long, with the default UNSPEC_TP seems to be always loaded
> separately), and this version fixes even that, ok for trunk/5/4.9/4.8?
>
> 2015-06-10  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/66470
>         * config/i386/i386.c (ix86_split_long_move): For collisions
>         involving direct tls segment refs, move the UNSPEC_TP possibly
>         wrapped in ZERO_EXTEND out of the address for lea, to each of
>         the memory loads.
>
>         * gcc.dg/tls/pr66470.c: New test.
>         * gcc.target/i386/pr66470.c: New test.

Yes, this patch is OK for mainline and release branches.

Thanks,
Uros.
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2015-06-10 08:18:31.170053193 +0200
+++ gcc/config/i386/i386.c	2015-06-10 08:51:21.315434960 +0200
@@ -22858,7 +22858,7 @@  ix86_split_long_move (rtx operands[])
 	 Do an lea to the last part and use only one colliding move.  */
       else if (collisions > 1)
 	{
-	  rtx base;
+	  rtx base, addr, tls_base = NULL_RTX;
 
 	  collisions = 1;
 
@@ -22869,10 +22869,50 @@  ix86_split_long_move (rtx operands[])
 	  if (GET_MODE (base) != Pmode)
 	    base = gen_rtx_REG (Pmode, REGNO (base));
 
-	  emit_insn (gen_rtx_SET (base, XEXP (part[1][0], 0)));
+	  addr = XEXP (part[1][0], 0);
+	  if (TARGET_TLS_DIRECT_SEG_REFS)
+	    {
+	      struct ix86_address parts;
+	      int ok = ix86_decompose_address (addr, &parts);
+	      gcc_assert (ok);
+	      if (parts.seg == DEFAULT_TLS_SEG_REG)
+		{
+		  /* It is not valid to use %gs: or %fs: in
+		     lea though, so we need to remove it from the
+		     address used for lea and add it to each individual
+		     memory loads instead.  */
+		  addr = copy_rtx (addr);
+		  rtx *x = &addr;
+		  while (GET_CODE (*x) == PLUS)
+		    {
+		      for (i = 0; i < 2; i++)
+			{
+			  rtx u = XEXP (*x, i);
+			  if (GET_CODE (u) == ZERO_EXTEND)
+			    u = XEXP (u, 0);
+			  if (GET_CODE (u) == UNSPEC
+			      && XINT (u, 1) == UNSPEC_TP)
+			    {
+			      tls_base = XEXP (*x, i);
+			      *x = XEXP (*x, 1 - i);
+			      break;
+			    }
+			}
+		      if (tls_base)
+			break;
+		      x = &XEXP (*x, 0);
+		    }
+		  gcc_assert (tls_base);
+		}
+	    }
+	  emit_insn (gen_rtx_SET (base, addr));
+	  if (tls_base)
+	    base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
 	  part[1][0] = replace_equiv_address (part[1][0], base);
 	  for (i = 1; i < nparts; i++)
 	    {
+	      if (tls_base)
+		base = copy_rtx (base);
 	      tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
 	      part[1][i] = replace_equiv_address (part[1][i], tmp);
 	    }
--- gcc/testsuite/gcc.dg/tls/pr66470.c.jj	2015-06-10 08:43:27.719773302 +0200
+++ gcc/testsuite/gcc.dg/tls/pr66470.c	2015-06-10 08:43:27.719773302 +0200
@@ -0,0 +1,29 @@ 
+/* PR target/66470 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target tls } */
+
+extern __thread unsigned long long a[10];
+extern __thread struct S { int a, b; } b[10];
+
+unsigned long long
+foo (long x)
+{
+  return a[x];
+}
+
+struct S
+bar (long x)
+{
+  return b[x];
+}
+
+#ifdef __SIZEOF_INT128__
+extern __thread unsigned __int128 c[10];
+
+unsigned __int128
+baz (long x)
+{
+  return c[x];
+}
+#endif
--- gcc/testsuite/gcc.target/i386/pr66470.c.jj	2015-06-10 08:45:10.111186752 +0200
+++ gcc/testsuite/gcc.target/i386/pr66470.c	2015-06-10 08:47:30.914005019 +0200
@@ -0,0 +1,13 @@ 
+/* PR target/66470 */
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mx32 -maddress-mode=long" } */
+/* { dg-require-effective-target tls } */
+
+extern __thread unsigned __int128 c[10];
+int d;
+
+unsigned __int128
+foo (void)
+{
+  return c[d];
+}