Fix 300.twolf regression caused by STV
diff mbox series

Message ID alpine.LSU.2.20.1908191409580.32458@zhemvz.fhfr.qr
State New
Headers show
Series
  • Fix 300.twolf regression caused by STV
Related show

Commit Message

Richard Biener Aug. 19, 2019, 12:13 p.m. UTC
Uros noted that STV with !TImode isn't supposed to run before combine.
The following adjusts things accordingly and now the pass runs twice
for TARGET_64BIT.  I've also adjusted another gpr->xmm move to
use (vec_merge (vec_duplicate..)) style rather than using a subreg.
This isn't strictly neccesary to fix the bug though and my previous
needs to do this might have been caused by the pass running too early.

So - with or without this consistency part?

Bootstrap / regtest running on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

2019-08-19  Richard Biener  <rguenther@suse.de>

	PR target/91154
	* config/i386/i386-features.c (general_scalar_chain::convert_op):
	Use (vec_merge (vec_duplicate..)) style vector from scalar move.
	(convert_scalars_to_vector): Add timode_p parameter and use it
	to guard TImode-only operation.
	(pass_stv::gate): Adjust so STV runs twice for TARGET_64BIT.
	(pass_stv::execute): Pass down timode_p.

	* gcc.target/i386/minmax-7.c: New testcase.

Comments

Richard Biener Aug. 19, 2019, 12:21 p.m. UTC | #1
On Mon, 19 Aug 2019, Richard Biener wrote:

> 
> Uros noted that STV with !TImode isn't supposed to run before combine.
> The following adjusts things accordingly and now the pass runs twice
> for TARGET_64BIT.  I've also adjusted another gpr->xmm move to
> use (vec_merge (vec_duplicate..)) style rather than using a subreg.
> This isn't strictly neccesary to fix the bug though and my previous
> needs to do this might have been caused by the pass running too early.
> 
> So - with or without this consistency part?
> 
> Bootstrap / regtest running on x86_64-unknown-linux-gnu, OK?

It regresses

FAIL: gcc.target/i386/minmax-6.c scan-assembler-not rsp

Have to investigate that again then (it caused the change to
use (vec_merge (vec_duplicate..)) for conversion in the first place)

Richard.

> Thanks,
> Richard.
> 
> 2019-08-19  Richard Biener  <rguenther@suse.de>
> 
> 	PR target/91154
> 	* config/i386/i386-features.c (general_scalar_chain::convert_op):
> 	Use (vec_merge (vec_duplicate..)) style vector from scalar move.
> 	(convert_scalars_to_vector): Add timode_p parameter and use it
> 	to guard TImode-only operation.
> 	(pass_stv::gate): Adjust so STV runs twice for TARGET_64BIT.
> 	(pass_stv::execute): Pass down timode_p.
> 
> 	* gcc.target/i386/minmax-7.c: New testcase.
> 
> Index: gcc/config/i386/i386-features.c
> ===================================================================
> --- gcc/config/i386/i386-features.c	(revision 274666)
> +++ gcc/config/i386/i386-features.c	(working copy)
> @@ -910,7 +910,9 @@ general_scalar_chain::convert_op (rtx *o
>      {
>        rtx tmp = gen_reg_rtx (GET_MODE (*op));
>  
> -      emit_insn_before (gen_move_insn (tmp, *op), insn);
> +      emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
> +				     gen_gpr_to_xmm_move_src (vmode, *op)),
> +			insn);
>        *op = gen_rtx_SUBREG (vmode, tmp, 0);
>  
>        if (dump_file)
> @@ -1664,7 +1666,7 @@ timode_remove_non_convertible_regs (bitm
>     instructions into vector mode when profitable.  */
>  
>  static unsigned int
> -convert_scalars_to_vector ()
> +convert_scalars_to_vector (bool timode_p)
>  {
>    basic_block bb;
>    int converted_insns = 0;
> @@ -1690,7 +1692,7 @@ convert_scalars_to_vector ()
>      {
>        rtx_insn *insn;
>        FOR_BB_INSNS (bb, insn)
> -	if (TARGET_64BIT
> +	if (timode_p
>  	    && timode_scalar_to_vector_candidate_p (insn))
>  	  {
>  	    if (dump_file)
> @@ -1699,7 +1701,7 @@ convert_scalars_to_vector ()
>  
>  	    bitmap_set_bit (&candidates[2], INSN_UID (insn));
>  	  }
> -	else
> +	else if (!timode_p)
>  	  {
>  	    /* Check {SI,DI}mode.  */
>  	    for (unsigned i = 0; i <= 1; ++i)
> @@ -1715,7 +1717,7 @@ convert_scalars_to_vector ()
>  	  }
>      }
>  
> -  if (TARGET_64BIT)
> +  if (timode_p)
>      timode_remove_non_convertible_regs (&candidates[2]);
>    for (unsigned i = 0; i <= 1; ++i)
>      general_remove_non_convertible_regs (&candidates[i]);
> @@ -1875,13 +1877,13 @@ public:
>    /* opt_pass methods: */
>    virtual bool gate (function *)
>      {
> -      return (timode_p == !!TARGET_64BIT
> +      return ((!timode_p || TARGET_64BIT)
>  	      && TARGET_STV && TARGET_SSE2 && optimize > 1);
>      }
>  
>    virtual unsigned int execute (function *)
>      {
> -      return convert_scalars_to_vector ();
> +      return convert_scalars_to_vector (timode_p);
>      }
>  
>    opt_pass *clone ()
> Index: gcc/testsuite/gcc.target/i386/minmax-7.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/minmax-7.c	(nonexistent)
> +++ gcc/testsuite/gcc.target/i386/minmax-7.c	(working copy)
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=haswell" } */
> +
> +extern int numBins;
> +extern int binOffst;
> +extern int binWidth;
> +extern int Trybin;
> +void foo (int);
> +
> +void bar (int aleft, int axcenter)
> +{
> +  int a1LoBin = (((Trybin=((axcenter + aleft)-binOffst)/binWidth)<0)
> +		 ? 0 : ((Trybin>numBins) ? numBins : Trybin));
> +  foo (a1LoBin);
> +}
> +
> +/* We do not want the RA to spill %esi for it's dual-use but using
> +   pminsd is OK.  */
> +/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
> +/* { dg-final { scan-assembler "pminsd" } } */
>

Patch
diff mbox series

Index: gcc/config/i386/i386-features.c
===================================================================
--- gcc/config/i386/i386-features.c	(revision 274666)
+++ gcc/config/i386/i386-features.c	(working copy)
@@ -910,7 +910,9 @@  general_scalar_chain::convert_op (rtx *o
     {
       rtx tmp = gen_reg_rtx (GET_MODE (*op));
 
-      emit_insn_before (gen_move_insn (tmp, *op), insn);
+      emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
+				     gen_gpr_to_xmm_move_src (vmode, *op)),
+			insn);
       *op = gen_rtx_SUBREG (vmode, tmp, 0);
 
       if (dump_file)
@@ -1664,7 +1666,7 @@  timode_remove_non_convertible_regs (bitm
    instructions into vector mode when profitable.  */
 
 static unsigned int
-convert_scalars_to_vector ()
+convert_scalars_to_vector (bool timode_p)
 {
   basic_block bb;
   int converted_insns = 0;
@@ -1690,7 +1692,7 @@  convert_scalars_to_vector ()
     {
       rtx_insn *insn;
       FOR_BB_INSNS (bb, insn)
-	if (TARGET_64BIT
+	if (timode_p
 	    && timode_scalar_to_vector_candidate_p (insn))
 	  {
 	    if (dump_file)
@@ -1699,7 +1701,7 @@  convert_scalars_to_vector ()
 
 	    bitmap_set_bit (&candidates[2], INSN_UID (insn));
 	  }
-	else
+	else if (!timode_p)
 	  {
 	    /* Check {SI,DI}mode.  */
 	    for (unsigned i = 0; i <= 1; ++i)
@@ -1715,7 +1717,7 @@  convert_scalars_to_vector ()
 	  }
     }
 
-  if (TARGET_64BIT)
+  if (timode_p)
     timode_remove_non_convertible_regs (&candidates[2]);
   for (unsigned i = 0; i <= 1; ++i)
     general_remove_non_convertible_regs (&candidates[i]);
@@ -1875,13 +1877,13 @@  public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (timode_p == !!TARGET_64BIT
+      return ((!timode_p || TARGET_64BIT)
 	      && TARGET_STV && TARGET_SSE2 && optimize > 1);
     }
 
   virtual unsigned int execute (function *)
     {
-      return convert_scalars_to_vector ();
+      return convert_scalars_to_vector (timode_p);
     }
 
   opt_pass *clone ()
Index: gcc/testsuite/gcc.target/i386/minmax-7.c
===================================================================
--- gcc/testsuite/gcc.target/i386/minmax-7.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/minmax-7.c	(working copy)
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+
+extern int numBins;
+extern int binOffst;
+extern int binWidth;
+extern int Trybin;
+void foo (int);
+
+void bar (int aleft, int axcenter)
+{
+  int a1LoBin = (((Trybin=((axcenter + aleft)-binOffst)/binWidth)<0)
+		 ? 0 : ((Trybin>numBins) ? numBins : Trybin));
+  foo (a1LoBin);
+}
+
+/* We do not want the RA to spill %esi for it's dual-use but using
+   pminsd is OK.  */
+/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler "pminsd" } } */