Patchwork Fix store_split_bit_field (PR middle-end/57344)

login
register
mail settings
Submitter Jakub Jelinek
Date May 21, 2013, 2:59 p.m.
Message ID <20130521145951.GW1377@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/245841/
State New
Headers show

Comments

Jakub Jelinek - May 21, 2013, 2:59 p.m.
Hi!

My PR52979 patch introduced following regression in store_split_bit_field.
If op0 is a REG or SUBREG, then the code was assuming that unit is still
BITS_PER_WORD, which isn't the case after PR52979.  This patch changes
those spots to no longer assume that (second and third hunks).
The first hunk is just an optimization, I don't see how could we have
data races on (reg) or (subreg (reg) ...), so by allowing to change whole
words on those rather than limiting those because of too small bitregion_end
we can generate smaller RTL out of the expansion (usually combine will
optimize those back, but for e.g. -O0 it wouldn't).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8/4.7?

2013-05-21  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/57344
	* expmed.c (store_split_bit_field): If op0 is a REG or
	SUBREG of a REG, don't lower unit.  Handle unit not being
	always BITS_PER_WORD.

	* gcc.c-torture/execute/pr57344-1.c: New test.
	* gcc.c-torture/execute/pr57344-2.c: New test.
	* gcc.c-torture/execute/pr57344-3.c: New test.
	* gcc.c-torture/execute/pr57344-4.c: New test.


	Jakub
Richard Guenther - May 23, 2013, 8:40 a.m.
On Tue, 21 May 2013, Jakub Jelinek wrote:

> Hi!
> 
> My PR52979 patch introduced following regression in store_split_bit_field.
> If op0 is a REG or SUBREG, then the code was assuming that unit is still
> BITS_PER_WORD, which isn't the case after PR52979.  This patch changes
> those spots to no longer assume that (second and third hunks).
> The first hunk is just an optimization, I don't see how could we have
> data races on (reg) or (subreg (reg) ...), so by allowing to change whole
> words on those rather than limiting those because of too small bitregion_end
> we can generate smaller RTL out of the expansion (usually combine will
> optimize those back, but for e.g. -O0 it wouldn't).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.8/4.7?
> 
> 2013-05-21  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/57344
> 	* expmed.c (store_split_bit_field): If op0 is a REG or
> 	SUBREG of a REG, don't lower unit.  Handle unit not being
> 	always BITS_PER_WORD.
> 
> 	* gcc.c-torture/execute/pr57344-1.c: New test.
> 	* gcc.c-torture/execute/pr57344-2.c: New test.
> 	* gcc.c-torture/execute/pr57344-3.c: New test.
> 	* gcc.c-torture/execute/pr57344-4.c: New test.
> 
> --- gcc/expmed.c.jj	2013-05-14 10:54:58.000000000 +0200
> +++ gcc/expmed.c	2013-05-21 11:59:00.275839242 +0200
> @@ -1094,10 +1094,14 @@ store_split_bit_field (rtx op0, unsigned
>        thispos = (bitpos + bitsdone) % unit;
>  
>        /* When region of bytes we can touch is restricted, decrease
> -	 UNIT close to the end of the region as needed.  */
> +	 UNIT close to the end of the region as needed.  If op0 is a REG
> +	 or SUBREG of REG, don't do this, as there can't be data races
> +	 on a register and we can expand shorter code in some cases.  */
>        if (bitregion_end
>  	  && unit > BITS_PER_UNIT
> -	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
> +	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
> +	  && !REG_P (op0)
> +	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))

I wonder if !REG_P (SUBREG_REG (op0)) can happen - but I guess better
be safe than sorry.  The rest of the patch looks ok to me.

Thus, ok.

Thanks,
Richard.

>  	{
>  	  unit = unit / 2;
>  	  continue;
> @@ -1147,14 +1151,15 @@ store_split_bit_field (rtx op0, unsigned
>  	 the current word starting from the base register.  */
>        if (GET_CODE (op0) == SUBREG)
>  	{
> -	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
> +	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
> +			    + (offset * unit / BITS_PER_WORD);
>  	  enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
>  	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
>  	    word = word_offset ? const0_rtx : op0;
>  	  else
>  	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
>  					  GET_MODE (SUBREG_REG (op0)));
> -	  offset = 0;
> +	  offset &= BITS_PER_WORD / unit - 1;
>  	}
>        else if (REG_P (op0))
>  	{
> @@ -1162,8 +1167,9 @@ store_split_bit_field (rtx op0, unsigned
>  	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
>  	    word = offset ? const0_rtx : op0;
>  	  else
> -	    word = operand_subword_force (op0, offset, GET_MODE (op0));
> -	  offset = 0;
> +	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
> +					  GET_MODE (op0));
> +	  offset &= BITS_PER_WORD / unit - 1;
>  	}
>        else
>  	word = op0;
> --- gcc/testsuite/gcc.c-torture/execute/pr57344-1.c.jj	2013-05-21 11:38:07.828956569 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr57344-1.c	2013-05-21 11:58:21.242061844 +0200
> @@ -0,0 +1,32 @@
> +/* PR middle-end/57344 */
> +
> +struct __attribute__((packed)) S
> +{
> +  int a : 11;
> +#if __SIZEOF_INT__ * __CHAR_BIT__ >= 32
> +  int b : 22;
> +#else
> +  int b : 13;
> +#endif
> +  char c;
> +  int : 0;
> +} s[2];
> +int i;
> +
> +__attribute__((noinline, noclone)) void
> +foo (int x)
> +{
> +  if (x != -3161)
> +    __builtin_abort ();
> +  asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  struct S t = { 0, -3161L };
> +  s[1] = t;
> +  for (; i < 1; i++)
> +    foo (s[1].b);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr57344-2.c.jj	2013-05-21 11:38:13.536922710 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr57344-2.c	2013-05-21 11:58:36.119977546 +0200
> @@ -0,0 +1,32 @@
> +/* PR middle-end/57344 */
> +
> +struct __attribute__((packed)) S
> +{
> +  int a : 27;
> +#if __SIZEOF_INT__ * __CHAR_BIT__ >= 32
> +  int b : 22;
> +#else
> +  int b : 13;
> +#endif
> +  char c;
> +  int : 0;
> +} s[2];
> +int i;
> +
> +__attribute__((noinline, noclone)) void
> +foo (int x)
> +{
> +  if (x != -3161)
> +    __builtin_abort ();
> +  asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  struct S t = { 0, -3161L };
> +  s[1] = t;
> +  for (; i < 1; i++)
> +    foo (s[1].b);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr57344-3.c.jj	2013-05-21 11:43:11.157231567 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr57344-3.c	2013-05-21 11:55:33.220012554 +0200
> @@ -0,0 +1,28 @@
> +/* PR middle-end/57344 */
> +
> +struct __attribute__((packed)) S
> +{
> +  long long int a : 43;
> +  long long int b : 22;
> +  char c;
> +  long long int : 0;
> +} s[2];
> +int i;
> +
> +__attribute__((noinline, noclone)) void
> +foo (long long int x)
> +{
> +  if (x != -3161LL)
> +    __builtin_abort ();
> +  asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  struct S t = { 0, -3161LL };
> +  s[1] = t;
> +  for (; i < 1; i++)
> +    foo (s[1].b);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr57344-4.c.jj	2013-05-21 11:55:09.346146134 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr57344-4.c	2013-05-21 11:53:31.000000000 +0200
> @@ -0,0 +1,28 @@
> +/* PR middle-end/57344 */
> +
> +struct __attribute__((packed)) S
> +{
> +  long long int a : 59;
> +  long long int b : 54;
> +  char c;
> +  long long int : 0;
> +} s[2];
> +int i;
> +
> +__attribute__((noinline, noclone)) void
> +foo (long long int x)
> +{
> +  if (x != -1220975898975746LL)
> +    __builtin_abort ();
> +  asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  struct S t = { 0, -1220975898975746LL };
> +  s[1] = t;
> +  for (; i < 1; i++)
> +    foo (s[1].b);
> +  return 0;
> +}
> 
> 	Jakub
> 
>

Patch

--- gcc/expmed.c.jj	2013-05-14 10:54:58.000000000 +0200
+++ gcc/expmed.c	2013-05-21 11:59:00.275839242 +0200
@@ -1094,10 +1094,14 @@  store_split_bit_field (rtx op0, unsigned
       thispos = (bitpos + bitsdone) % unit;
 
       /* When region of bytes we can touch is restricted, decrease
-	 UNIT close to the end of the region as needed.  */
+	 UNIT close to the end of the region as needed.  If op0 is a REG
+	 or SUBREG of REG, don't do this, as there can't be data races
+	 on a register and we can expand shorter code in some cases.  */
       if (bitregion_end
 	  && unit > BITS_PER_UNIT
-	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
+	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
+	  && !REG_P (op0)
+	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
 	{
 	  unit = unit / 2;
 	  continue;
@@ -1147,14 +1151,15 @@  store_split_bit_field (rtx op0, unsigned
 	 the current word starting from the base register.  */
       if (GET_CODE (op0) == SUBREG)
 	{
-	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
+	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
+			    + (offset * unit / BITS_PER_WORD);
 	  enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
 	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
 	    word = word_offset ? const0_rtx : op0;
 	  else
 	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
 					  GET_MODE (SUBREG_REG (op0)));
-	  offset = 0;
+	  offset &= BITS_PER_WORD / unit - 1;
 	}
       else if (REG_P (op0))
 	{
@@ -1162,8 +1167,9 @@  store_split_bit_field (rtx op0, unsigned
 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
 	    word = offset ? const0_rtx : op0;
 	  else
-	    word = operand_subword_force (op0, offset, GET_MODE (op0));
-	  offset = 0;
+	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
+					  GET_MODE (op0));
+	  offset &= BITS_PER_WORD / unit - 1;
 	}
       else
 	word = op0;
--- gcc/testsuite/gcc.c-torture/execute/pr57344-1.c.jj	2013-05-21 11:38:07.828956569 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr57344-1.c	2013-05-21 11:58:21.242061844 +0200
@@ -0,0 +1,32 @@ 
+/* PR middle-end/57344 */
+
+struct __attribute__((packed)) S
+{
+  int a : 11;
+#if __SIZEOF_INT__ * __CHAR_BIT__ >= 32
+  int b : 22;
+#else
+  int b : 13;
+#endif
+  char c;
+  int : 0;
+} s[2];
+int i;
+
+__attribute__((noinline, noclone)) void
+foo (int x)
+{
+  if (x != -3161)
+    __builtin_abort ();
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  struct S t = { 0, -3161L };
+  s[1] = t;
+  for (; i < 1; i++)
+    foo (s[1].b);
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr57344-2.c.jj	2013-05-21 11:38:13.536922710 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr57344-2.c	2013-05-21 11:58:36.119977546 +0200
@@ -0,0 +1,32 @@ 
+/* PR middle-end/57344 */
+
+struct __attribute__((packed)) S
+{
+  int a : 27;
+#if __SIZEOF_INT__ * __CHAR_BIT__ >= 32
+  int b : 22;
+#else
+  int b : 13;
+#endif
+  char c;
+  int : 0;
+} s[2];
+int i;
+
+__attribute__((noinline, noclone)) void
+foo (int x)
+{
+  if (x != -3161)
+    __builtin_abort ();
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  struct S t = { 0, -3161L };
+  s[1] = t;
+  for (; i < 1; i++)
+    foo (s[1].b);
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr57344-3.c.jj	2013-05-21 11:43:11.157231567 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr57344-3.c	2013-05-21 11:55:33.220012554 +0200
@@ -0,0 +1,28 @@ 
+/* PR middle-end/57344 */
+
+struct __attribute__((packed)) S
+{
+  long long int a : 43;
+  long long int b : 22;
+  char c;
+  long long int : 0;
+} s[2];
+int i;
+
+__attribute__((noinline, noclone)) void
+foo (long long int x)
+{
+  if (x != -3161LL)
+    __builtin_abort ();
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  struct S t = { 0, -3161LL };
+  s[1] = t;
+  for (; i < 1; i++)
+    foo (s[1].b);
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr57344-4.c.jj	2013-05-21 11:55:09.346146134 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr57344-4.c	2013-05-21 11:53:31.000000000 +0200
@@ -0,0 +1,28 @@ 
+/* PR middle-end/57344 */
+
+struct __attribute__((packed)) S
+{
+  long long int a : 59;
+  long long int b : 54;
+  char c;
+  long long int : 0;
+} s[2];
+int i;
+
+__attribute__((noinline, noclone)) void
+foo (long long int x)
+{
+  if (x != -1220975898975746LL)
+    __builtin_abort ();
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  struct S t = { 0, -1220975898975746LL };
+  s[1] = t;
+  for (; i < 1; i++)
+    foo (s[1].b);
+  return 0;
+}