diff mbox

[i386] Support ANDN in stv pass

Message ID 20160115154630.GA15775@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Jan. 15, 2016, 3:46 p.m. UTC
Hi,

This patch continues resolving andn regression case in stv pass
(see https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01017.html).
In this patch a new andn pattern added similar to other bit
DI patterns we have for stv pass.

This improves performance of 462.libquantum benchmark on Haswell
(+2.6% on -O2, +1% on -O3 -flto).

Unfortunately this patch doesn't enable generation of pandn in case
target doesn't have BMI.  Probably peephole may be used for such targets?
Or we may allow andn and then split it back to and + xor for them.

Bootstrapped and regtested on x86_64-unknown-linux-gnu.  OK for trunk?

Thanks,
Ilya
--
gcc/

2016-01-15  Ilya Enkovich  <enkovich.gnu@gmail.com>

	* config/i386/i386.c (scalar_to_vector_candidate_p): Support
	andnot instruction.
	(scalar_chain::convert_op): Likewise.
	* config/i386/i386.md (*andndi3_doubleword): New.

gcc/testsuite/

2016-01-15  Ilya Enkovich  <enkovich.gnu@gmail.com>

	* gcc.target/i386/pr65105-5.c: Adjust to andn generation.

Comments

Uros Bizjak Jan. 17, 2016, 9:17 p.m. UTC | #1
On Fri, Jan 15, 2016 at 4:46 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> Hi,
>
> This patch continues resolving andn regression case in stv pass
> (see https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01017.html).
> In this patch a new andn pattern added similar to other bit
> DI patterns we have for stv pass.
>
> This improves performance of 462.libquantum benchmark on Haswell
> (+2.6% on -O2, +1% on -O3 -flto).
>
> Unfortunately this patch doesn't enable generation of pandn in case
> target doesn't have BMI.  Probably peephole may be used for such targets?
> Or we may allow andn and then split it back to and + xor for them.

IMO, we want a splitter here. We should optimize compiler for newer targets.

> Bootstrapped and regtested on x86_64-unknown-linux-gnu.  OK for trunk?
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2016-01-15  Ilya Enkovich  <enkovich.gnu@gmail.com>
>
>         * config/i386/i386.c (scalar_to_vector_candidate_p): Support
>         andnot instruction.
>         (scalar_chain::convert_op): Likewise.
>         * config/i386/i386.md (*andndi3_doubleword): New.
>
> gcc/testsuite/
>
> 2016-01-15  Ilya Enkovich  <enkovich.gnu@gmail.com>
>
>         * gcc.target/i386/pr65105-5.c: Adjust to andn generation.

OK for mainline.

Thanks,
Uros.

>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index de41477..a0b0d68 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2815,7 +2815,11 @@ scalar_to_vector_candidate_p (rtx_insn *insn)
>        return false;
>      }
>
> -  if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
> +  if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))
> +      /* Check for andnot case.  */
> +      && (GET_CODE (src) != AND
> +         || GET_CODE (XEXP (src, 0)) != NOT
> +         || !REG_P (XEXP (XEXP (src, 0), 0))))
>        return false;
>
>    if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
> @@ -3383,7 +3387,12 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
>  {
>    *op = copy_rtx_if_shared (*op);
>
> -  if (MEM_P (*op))
> +  if (GET_CODE (*op) == NOT)
> +    {
> +      convert_op (&XEXP (*op, 0), insn);
> +      PUT_MODE (*op, V2DImode);
> +    }
> +  else if (MEM_P (*op))
>      {
>        rtx tmp = gen_reg_rtx (DImode);
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 71941d0..f16b42a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -8645,6 +8645,23 @@
>               (clobber (reg:CC FLAGS_REG))])]
>    "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
>
> +(define_insn_and_split "*andndi3_doubleword"
> +  [(set (match_operand:DI 0 "register_operand" "=r,r")
> +       (and:DI
> +         (not:DI (match_operand:DI 1 "register_operand" "r,r"))
> +         (match_operand:DI 2 "nonimmediate_operand" "r,m")))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE"
> +  "#"
> +  "&& reload_completed"
> +  [(parallel [(set (match_dup 0)
> +                  (and:SI (not:SI (match_dup 1)) (match_dup 2)))
> +             (clobber (reg:CC FLAGS_REG))])
> +   (parallel [(set (match_dup 3)
> +                  (and:SI (not:SI (match_dup 4)) (match_dup 5)))
> +             (clobber (reg:CC FLAGS_REG))])]
> +  "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
> +
>  (define_insn "*<code>hi_1"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k")
>         (any_or:HI
> diff --git a/gcc/testsuite/gcc.target/i386/pr65105-5.c b/gcc/testsuite/gcc.target/i386/pr65105-5.c
> index 5818c1c..639bbe1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr65105-5.c
> +++ b/gcc/testsuite/gcc.target/i386/pr65105-5.c
> @@ -1,7 +1,7 @@
>  /* PR target/pr65105 */
>  /* { dg-do compile { target { ia32 } } } */
>  /* { dg-options "-O2 -march=core-avx2" } */
> -/* { dg-final { scan-assembler "pand" } } */
> +/* { dg-final { scan-assembler "pandn" } } */
>  /* { dg-final { scan-assembler "pxor" } } */
>  /* { dg-final { scan-assembler "ptest" } } */
>
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index de41477..a0b0d68 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2815,7 +2815,11 @@  scalar_to_vector_candidate_p (rtx_insn *insn)
       return false;
     }
 
-  if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
+  if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))
+      /* Check for andnot case.  */
+      && (GET_CODE (src) != AND
+	  || GET_CODE (XEXP (src, 0)) != NOT
+	  || !REG_P (XEXP (XEXP (src, 0), 0))))
       return false;
 
   if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
@@ -3383,7 +3387,12 @@  scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
   *op = copy_rtx_if_shared (*op);
 
-  if (MEM_P (*op))
+  if (GET_CODE (*op) == NOT)
+    {
+      convert_op (&XEXP (*op, 0), insn);
+      PUT_MODE (*op, V2DImode);
+    }
+  else if (MEM_P (*op))
     {
       rtx tmp = gen_reg_rtx (DImode);
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 71941d0..f16b42a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8645,6 +8645,23 @@ 
 	      (clobber (reg:CC FLAGS_REG))])]
   "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
 
+(define_insn_and_split "*andndi3_doubleword"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(and:DI
+	  (not:DI (match_operand:DI 1 "register_operand" "r,r"))
+	  (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 3)
+		   (and:SI (not:SI (match_dup 4)) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
+
 (define_insn "*<code>hi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k")
 	(any_or:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr65105-5.c b/gcc/testsuite/gcc.target/i386/pr65105-5.c
index 5818c1c..639bbe1 100644
--- a/gcc/testsuite/gcc.target/i386/pr65105-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr65105-5.c
@@ -1,7 +1,7 @@ 
 /* PR target/pr65105 */
 /* { dg-do compile { target { ia32 } } } */
 /* { dg-options "-O2 -march=core-avx2" } */
-/* { dg-final { scan-assembler "pand" } } */
+/* { dg-final { scan-assembler "pandn" } } */
 /* { dg-final { scan-assembler "pxor" } } */
 /* { dg-final { scan-assembler "ptest" } } */