diff mbox

4.4/4.5 PATCH: PR middle-end/45678: [4.4/4.5/4.6 Regression] crash on vector code with -m32 -msse

Message ID 20100921135928.GU1269@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Sept. 21, 2010, 1:59 p.m. UTC
On Tue, Sep 21, 2010 at 06:13:21AM -0700, H.J. Lu wrote:
> >> With your patch, gcc 4.5 generates:
> >>
> >>        xorps   %xmm0, %xmm0
> >>        movlps  (%esp), %xmm0
> >>        movhps  8(%esp), %xmm0
> >>
> >> on gcc.dg/torture/pr45678-2.c. Where does xorps come from?
> >
> > It prevents a reformatting penalty I presume.
> >
> >
> 
> Partial SSE register stall.

Yeah
          if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
            emit_move_insn (op0, CONST0_RTX (mode));
          else
            emit_clobber (op0);

The patch bootstrap/regtest on x86_64-linux and i686-linux now succeeded,
ok for 4.5?  Below is the 4.4 version, which I've so far just tested on the
testcases.  Ok for 4.4 if it bootstraps/regtests?

2010-09-21  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/45678
	* expr.c (expand_expr_real_1) <case VIEW_CONVERT_EXPR>: If
	op0 isn't sufficiently aligned and there is movmisalignM
	insn for mode, use it to load op0 into a temporary register.

	Backport from mainline
	2010-09-20  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/45678
	* cfgexpand.c (expand_one_stack_var_at): Limit alignment to
	crtl->max_used_stack_slot_alignment.

2010-09-21  Jakub Jelinek  <jakub@redhat.com>

	Backport from mainline
	2010-09-17  Richard Guenther  <rguenther@suse.de>
		    H.J. Lu  <hongjiu.lu@intel.com>

	PR middle-end/45678
	* gcc.dg/torture/pr45678-1.c: New.
	* gcc.dg/torture/pr45678-2.c: Likewise.



	Jakub

Comments

Richard Biener Sept. 21, 2010, 2:08 p.m. UTC | #1
On Tue, Sep 21, 2010 at 3:59 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Sep 21, 2010 at 06:13:21AM -0700, H.J. Lu wrote:
>> >> With your patch, gcc 4.5 generates:
>> >>
>> >>        xorps   %xmm0, %xmm0
>> >>        movlps  (%esp), %xmm0
>> >>        movhps  8(%esp), %xmm0
>> >>
>> >> on gcc.dg/torture/pr45678-2.c. Where does xorps come from?
>> >
>> > It prevents a reformatting penalty I presume.
>> >
>> >
>>
>> Partial SSE register stall.
>
> Yeah
>          if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
>            emit_move_insn (op0, CONST0_RTX (mode));
>          else
>            emit_clobber (op0);
>
> The patch bootstrap/regtest on x86_64-linux and i686-linux now succeeded,
> ok for 4.5?  Below is the 4.4 version, which I've so far just tested on the
> testcases.  Ok for 4.4 if it bootstraps/regtests?

Ok.

Thanks,
Richard.

> 2010-09-21  Jakub Jelinek  <jakub@redhat.com>
>
>        PR middle-end/45678
>        * expr.c (expand_expr_real_1) <case VIEW_CONVERT_EXPR>: If
>        op0 isn't sufficiently aligned and there is movmisalignM
>        insn for mode, use it to load op0 into a temporary register.
>
>        Backport from mainline
>        2010-09-20  Jakub Jelinek  <jakub@redhat.com>
>
>        PR middle-end/45678
>        * cfgexpand.c (expand_one_stack_var_at): Limit alignment to
>        crtl->max_used_stack_slot_alignment.
>
> 2010-09-21  Jakub Jelinek  <jakub@redhat.com>
>
>        Backport from mainline
>        2010-09-17  Richard Guenther  <rguenther@suse.de>
>                    H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR middle-end/45678
>        * gcc.dg/torture/pr45678-1.c: New.
>        * gcc.dg/torture/pr45678-2.c: Likewise.
>
> --- gcc/expr.c.jj       2010-06-11 11:06:01.320346755 +0200
> +++ gcc/expr.c  2010-09-21 15:54:55.512656325 +0200
> @@ -8314,10 +8314,32 @@ expand_expr_real_1 (tree exp, rtx target
>         results.  */
>       if (MEM_P (op0))
>        {
> +         enum insn_code icode;
>          op0 = copy_rtx (op0);
>
>          if (TYPE_ALIGN_OK (type))
>            set_mem_align (op0, MAX (MEM_ALIGN (op0), TYPE_ALIGN (type)));
> +         else if (mode != BLKmode
> +                  && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode)
> +                  /* If the target does have special handling for unaligned
> +                     loads of mode then use them.  */
> +                  && ((icode = optab_handler (movmisalign_optab,
> +                                              mode)->insn_code)
> +                      != CODE_FOR_nothing))
> +             {
> +               rtx reg, insn;
> +
> +               op0 = adjust_address (op0, mode, 0);
> +               /* We've already validated the memory, and we're creating a
> +                  new pseudo destination.  The predicates really can't
> +                  fail.  */
> +               reg = gen_reg_rtx (mode);
> +
> +               /* Nor can the insn generator.  */
> +               insn = GEN_FCN (icode) (reg, op0);
> +               emit_insn (insn);
> +               return reg;
> +             }
>          else if (STRICT_ALIGNMENT
>                   && mode != BLKmode
>                   && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode))
> --- gcc/cfgexpand.c.jj  2010-06-11 11:06:01.000000000 +0200
> +++ gcc/cfgexpand.c     2010-09-21 15:56:38.953377699 +0200
> @@ -826,7 +826,7 @@ dump_stack_var_partition (void)
>  static void
>  expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset)
>  {
> -  HOST_WIDE_INT align;
> +  HOST_WIDE_INT align, max_align;
>   rtx x;
>
>   /* If this fails, we've overflowed the stack frame.  Error nicely?  */
> @@ -839,8 +839,9 @@ expand_one_stack_var_at (tree decl, HOST
>   offset -= frame_phase;
>   align = offset & -offset;
>   align *= BITS_PER_UNIT;
> -  if (align > STACK_BOUNDARY || align == 0)
> -    align = STACK_BOUNDARY;
> +  max_align = crtl->max_used_stack_slot_alignment;
> +  if (align == 0 || align > max_align)
> +    align = max_align;
>   DECL_ALIGN (decl) = align;
>   DECL_USER_ALIGN (decl) = 0;
>
> --- gcc/testsuite/gcc.dg/torture/pr45678-1.c.jj 2010-09-21 15:54:55.516770526 +0200
> +++ gcc/testsuite/gcc.dg/torture/pr45678-1.c    2010-09-21 15:54:55.517780104 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +
> +typedef float V __attribute__ ((vector_size (16)));
> +V g;
> +float d[4] = { 4, 3, 2, 1 };
> +
> +int
> +main ()
> +{
> +  V e;
> +  __builtin_memcpy (&e, &d, sizeof (d));
> +  V f = { 5, 15, 25, 35 };
> +  e = e * f;
> +  g = e;
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/torture/pr45678-2.c.jj 2010-09-21 15:54:55.518403039 +0200
> +++ gcc/testsuite/gcc.dg/torture/pr45678-2.c    2010-09-21 15:54:55.518403039 +0200
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +
> +typedef float V __attribute__ ((vector_size (16)));
> +V g;
> +
> +int
> +main ()
> +{
> +  float d[4] = { 4, 3, 2, 1 };
> +  V e;
> +  __builtin_memcpy (&e, &d, sizeof (d));
> +  V f = { 5, 15, 25, 35 };
> +  e = e * f;
> +  g = e;
> +  return 0;
> +}
>
>
>        Jakub
>
diff mbox

Patch

--- gcc/expr.c.jj	2010-06-11 11:06:01.320346755 +0200
+++ gcc/expr.c	2010-09-21 15:54:55.512656325 +0200
@@ -8314,10 +8314,32 @@  expand_expr_real_1 (tree exp, rtx target
 	 results.  */
       if (MEM_P (op0))
 	{
+	  enum insn_code icode;
 	  op0 = copy_rtx (op0);
 
 	  if (TYPE_ALIGN_OK (type))
 	    set_mem_align (op0, MAX (MEM_ALIGN (op0), TYPE_ALIGN (type)));
+	  else if (mode != BLKmode
+		   && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode)
+		   /* If the target does have special handling for unaligned
+		      loads of mode then use them.  */
+		   && ((icode = optab_handler (movmisalign_optab,
+					       mode)->insn_code)
+		       != CODE_FOR_nothing))
+	      {
+		rtx reg, insn;
+
+		op0 = adjust_address (op0, mode, 0);
+		/* We've already validated the memory, and we're creating a
+		   new pseudo destination.  The predicates really can't
+		   fail.  */
+		reg = gen_reg_rtx (mode);
+
+		/* Nor can the insn generator.  */
+		insn = GEN_FCN (icode) (reg, op0);
+		emit_insn (insn);
+		return reg;
+	      }
 	  else if (STRICT_ALIGNMENT
 		   && mode != BLKmode
 		   && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode))
--- gcc/cfgexpand.c.jj	2010-06-11 11:06:01.000000000 +0200
+++ gcc/cfgexpand.c	2010-09-21 15:56:38.953377699 +0200
@@ -826,7 +826,7 @@  dump_stack_var_partition (void)
 static void
 expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset)
 {
-  HOST_WIDE_INT align;
+  HOST_WIDE_INT align, max_align;
   rtx x;
 
   /* If this fails, we've overflowed the stack frame.  Error nicely?  */
@@ -839,8 +839,9 @@  expand_one_stack_var_at (tree decl, HOST
   offset -= frame_phase;
   align = offset & -offset;
   align *= BITS_PER_UNIT;
-  if (align > STACK_BOUNDARY || align == 0)
-    align = STACK_BOUNDARY;
+  max_align = crtl->max_used_stack_slot_alignment;
+  if (align == 0 || align > max_align)
+    align = max_align;
   DECL_ALIGN (decl) = align;
   DECL_USER_ALIGN (decl) = 0;
 
--- gcc/testsuite/gcc.dg/torture/pr45678-1.c.jj	2010-09-21 15:54:55.516770526 +0200
+++ gcc/testsuite/gcc.dg/torture/pr45678-1.c	2010-09-21 15:54:55.517780104 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+
+typedef float V __attribute__ ((vector_size (16)));
+V g;
+float d[4] = { 4, 3, 2, 1 };
+
+int
+main ()
+{
+  V e;
+  __builtin_memcpy (&e, &d, sizeof (d));
+  V f = { 5, 15, 25, 35 };
+  e = e * f;
+  g = e;
+  return 0;
+}
--- gcc/testsuite/gcc.dg/torture/pr45678-2.c.jj	2010-09-21 15:54:55.518403039 +0200
+++ gcc/testsuite/gcc.dg/torture/pr45678-2.c	2010-09-21 15:54:55.518403039 +0200
@@ -0,0 +1,16 @@ 
+/* { dg-do run } */
+
+typedef float V __attribute__ ((vector_size (16)));
+V g;
+
+int
+main ()
+{
+  float d[4] = { 4, 3, 2, 1 };
+  V e;
+  __builtin_memcpy (&e, &d, sizeof (d));
+  V f = { 5, 15, 25, 35 };
+  e = e * f;
+  g = e;
+  return 0;
+}