Message ID | 20100921135928.GU1269@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Tue, Sep 21, 2010 at 3:59 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Tue, Sep 21, 2010 at 06:13:21AM -0700, H.J. Lu wrote: >> >> With your patch, gcc 4.5 generates: >> >> >> >> xorps %xmm0, %xmm0 >> >> movlps (%esp), %xmm0 >> >> movhps 8(%esp), %xmm0 >> >> >> >> on gcc.dg/torture/pr45678-2.c. Where does xorps come from? >> > >> > It prevents a reformatting penalty I presume. >> > >> > >> >> Partial SSE register stall. > > Yeah > if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) > emit_move_insn (op0, CONST0_RTX (mode)); > else > emit_clobber (op0); > > The patch bootstrap/regtest on x86_64-linux and i686-linux now succeeded, > ok for 4.5? Below is the 4.4 version, which I've so far just tested on the > testcases. Ok for 4.4 if it bootstraps/regtests? Ok. Thanks, Richard. > 2010-09-21 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/45678 > * expr.c (expand_expr_real_1) <case VIEW_CONVERT_EXPR>: If > op0 isn't sufficiently aligned and there is movmisalignM > insn for mode, use it to load op0 into a temporary register. > > Backport from mainline > 2010-09-20 Jakub Jelinek <jakub@redhat.com> > > PR middle-end/45678 > * cfgexpand.c (expand_one_stack_var_at): Limit alignment to > crtl->max_used_stack_slot_alignment. > > 2010-09-21 Jakub Jelinek <jakub@redhat.com> > > Backport from mainline > 2010-09-17 Richard Guenther <rguenther@suse.de> > H.J. Lu <hongjiu.lu@intel.com> > > PR middle-end/45678 > * gcc.dg/torture/pr45678-1.c: New. > * gcc.dg/torture/pr45678-2.c: Likewise. > > --- gcc/expr.c.jj 2010-06-11 11:06:01.320346755 +0200 > +++ gcc/expr.c 2010-09-21 15:54:55.512656325 +0200 > @@ -8314,10 +8314,32 @@ expand_expr_real_1 (tree exp, rtx target > results. */ > if (MEM_P (op0)) > { > + enum insn_code icode; > op0 = copy_rtx (op0); > > if (TYPE_ALIGN_OK (type)) > set_mem_align (op0, MAX (MEM_ALIGN (op0), TYPE_ALIGN (type))); > + else if (mode != BLKmode > + && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode) > + /* If the target does have special handling for unaligned > + loads of mode then use them. */ > + && ((icode = optab_handler (movmisalign_optab, > + mode)->insn_code) > + != CODE_FOR_nothing)) > + { > + rtx reg, insn; > + > + op0 = adjust_address (op0, mode, 0); > + /* We've already validated the memory, and we're creating a > + new pseudo destination. The predicates really can't > + fail. */ > + reg = gen_reg_rtx (mode); > + > + /* Nor can the insn generator. */ > + insn = GEN_FCN (icode) (reg, op0); > + emit_insn (insn); > + return reg; > + } > else if (STRICT_ALIGNMENT > && mode != BLKmode > && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode)) > --- gcc/cfgexpand.c.jj 2010-06-11 11:06:01.000000000 +0200 > +++ gcc/cfgexpand.c 2010-09-21 15:56:38.953377699 +0200 > @@ -826,7 +826,7 @@ dump_stack_var_partition (void) > static void > expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset) > { > - HOST_WIDE_INT align; > + HOST_WIDE_INT align, max_align; > rtx x; > > /* If this fails, we've overflowed the stack frame. Error nicely? */ > @@ -839,8 +839,9 @@ expand_one_stack_var_at (tree decl, HOST > offset -= frame_phase; > align = offset & -offset; > align *= BITS_PER_UNIT; > - if (align > STACK_BOUNDARY || align == 0) > - align = STACK_BOUNDARY; > + max_align = crtl->max_used_stack_slot_alignment; > + if (align == 0 || align > max_align) > + align = max_align; > DECL_ALIGN (decl) = align; > DECL_USER_ALIGN (decl) = 0; > > --- gcc/testsuite/gcc.dg/torture/pr45678-1.c.jj 2010-09-21 15:54:55.516770526 +0200 > +++ gcc/testsuite/gcc.dg/torture/pr45678-1.c 2010-09-21 15:54:55.517780104 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > + > +typedef float V __attribute__ ((vector_size (16))); > +V g; > +float d[4] = { 4, 3, 2, 1 }; > + > +int > +main () > +{ > + V e; > + __builtin_memcpy (&e, &d, sizeof (d)); > + V f = { 5, 15, 25, 35 }; > + e = e * f; > + g = e; > + return 0; > +} > --- gcc/testsuite/gcc.dg/torture/pr45678-2.c.jj 2010-09-21 15:54:55.518403039 +0200 > +++ gcc/testsuite/gcc.dg/torture/pr45678-2.c 2010-09-21 15:54:55.518403039 +0200 > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > + > +typedef float V __attribute__ ((vector_size (16))); > +V g; > + > +int > +main () > +{ > + float d[4] = { 4, 3, 2, 1 }; > + V e; > + __builtin_memcpy (&e, &d, sizeof (d)); > + V f = { 5, 15, 25, 35 }; > + e = e * f; > + g = e; > + return 0; > +} > > > Jakub >
--- gcc/expr.c.jj 2010-06-11 11:06:01.320346755 +0200 +++ gcc/expr.c 2010-09-21 15:54:55.512656325 +0200 @@ -8314,10 +8314,32 @@ expand_expr_real_1 (tree exp, rtx target results. */ if (MEM_P (op0)) { + enum insn_code icode; op0 = copy_rtx (op0); if (TYPE_ALIGN_OK (type)) set_mem_align (op0, MAX (MEM_ALIGN (op0), TYPE_ALIGN (type))); + else if (mode != BLKmode + && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode) + /* If the target does have special handling for unaligned + loads of mode then use them. */ + && ((icode = optab_handler (movmisalign_optab, + mode)->insn_code) + != CODE_FOR_nothing)) + { + rtx reg, insn; + + op0 = adjust_address (op0, mode, 0); + /* We've already validated the memory, and we're creating a + new pseudo destination. The predicates really can't + fail. */ + reg = gen_reg_rtx (mode); + + /* Nor can the insn generator. */ + insn = GEN_FCN (icode) (reg, op0); + emit_insn (insn); + return reg; + } else if (STRICT_ALIGNMENT && mode != BLKmode && MEM_ALIGN (op0) < GET_MODE_ALIGNMENT (mode)) --- gcc/cfgexpand.c.jj 2010-06-11 11:06:01.000000000 +0200 +++ gcc/cfgexpand.c 2010-09-21 15:56:38.953377699 +0200 @@ -826,7 +826,7 @@ dump_stack_var_partition (void) static void expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset) { - HOST_WIDE_INT align; + HOST_WIDE_INT align, max_align; rtx x; /* If this fails, we've overflowed the stack frame. Error nicely? */ @@ -839,8 +839,9 @@ expand_one_stack_var_at (tree decl, HOST offset -= frame_phase; align = offset & -offset; align *= BITS_PER_UNIT; - if (align > STACK_BOUNDARY || align == 0) - align = STACK_BOUNDARY; + max_align = crtl->max_used_stack_slot_alignment; + if (align == 0 || align > max_align) + align = max_align; DECL_ALIGN (decl) = align; DECL_USER_ALIGN (decl) = 0; --- gcc/testsuite/gcc.dg/torture/pr45678-1.c.jj 2010-09-21 15:54:55.516770526 +0200 +++ gcc/testsuite/gcc.dg/torture/pr45678-1.c 2010-09-21 15:54:55.517780104 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ + +typedef float V __attribute__ ((vector_size (16))); +V g; +float d[4] = { 4, 3, 2, 1 }; + +int +main () +{ + V e; + __builtin_memcpy (&e, &d, sizeof (d)); + V f = { 5, 15, 25, 35 }; + e = e * f; + g = e; + return 0; +} --- gcc/testsuite/gcc.dg/torture/pr45678-2.c.jj 2010-09-21 15:54:55.518403039 +0200 +++ gcc/testsuite/gcc.dg/torture/pr45678-2.c 2010-09-21 15:54:55.518403039 +0200 @@ -0,0 +1,16 @@ +/* { dg-do run } */ + +typedef float V __attribute__ ((vector_size (16))); +V g; + +int +main () +{ + float d[4] = { 4, 3, 2, 1 }; + V e; + __builtin_memcpy (&e, &d, sizeof (d)); + V f = { 5, 15, 25, 35 }; + e = e * f; + g = e; + return 0; +}