diff mbox series

[RFC/RFA,v2,05/12] i386: Implement new expander for efficient CRC computation

Message ID CAE65F3M7pvjsOX6wM+HgOrSfPH2AWNmbOs9V3vW7i6E2fDL7UQ@mail.gmail.com
State New
Headers show
Series [RFC/RFA,v2,01/12] Implement internal functions for efficient CRC computation | expand

Commit Message

Mariam Arutunian July 26, 2024, 6:07 p.m. UTC
This patch introduces two new expanders for the i386 backend,
dedicated to generating optimized code for CRC computations.
   The new expanders are designed to leverage specific hardware
capabilities to achieve faster CRC calculations,
   particularly using the pclmulqdq or crc32 instructions when supported by
the target architecture.

   Expander 1: Bit-Forward CRC (crc<SWI124dup:mode><SWI124:mode>4)
   For targets that support both pclmulqdq instruction (TARGET_PCLMUL) and
are 64-bit (TARGET_64BIT),
   the expander will generate code that uses the pclmulqdq instruction for
CRC computation.

   Expander 2: Bit-Reversed CRC (crc_rev<SWI124dup:mode><SWI124:mode>4)
   The expander first checks if the target supports the CRC32 instruction
set (TARGET_CRC32)
   and the polynomial in use is 0x1EDC6F41 (iSCSI). If the conditions are
met,
   it emits calls to the corresponding crc32 instruction (crc32b, crc32w,
or crc32l depending on the data size).
   If the target does not support crc32 but supports pclmulqdq, it then
uses the pclmulqdq instruction for bit-reversed CRC computation.
   Otherwise table-based CRC is generated.

     gcc/config/i386/

       * i386-protos.h (ix86_expand_crc_using_pclmul): New extern function
declaration.
       (ix86_expand_reversed_crc_using_pclmul):  Likewise.
       * i386.cc (ix86_expand_crc_using_pclmul): New function.
       (ix86_expand_reversed_crc_using_pclmul):  Likewise.
       * i386.md (UNSPEC_CRC, UNSPEC_CRC_REV):  New unspecs.
       (SWI124dup): New iterator.
       (crc<SWI124dup:mode><SWI124:mode>4): New expander for bit-forward
CRC.
       (crc_rev<SWI124dup:mode><SWI124:mode>4): New expander for reversed
CRC.

     gcc/testsuite/gcc.target/i386/

       * crc-crc32-data16.c: New test.
       * crc-crc32-data32.c: Likewise.
       * crc-crc32-data8.c: Likewise.
       * crc-1-pclmul.c: Likewise.
       * crc-10-pclmul.c: Likewise.
       * crc-12-pclmul.c: Likewise.
       * crc-13-pclmul.c: Likewise.
       * crc-14-pclmul.c: Likewise.
       * crc-17-pclmul.c: Likewise.
       * crc-18-pclmul.c: Likewise.
       * crc-21-pclmul.c: Likewise.
       * crc-22-pclmul.c: Likewise.
       * crc-23-pclmul.c: Likewise.
       * crc-4-pclmul.c: Likewise.
       * crc-5-pclmul.c: Likewise.
       * crc-6-pclmul.c: Likewise.
       * crc-7-pclmul.c: Likewise.
       * crc-8-pclmul.c: Likewise.
       * crc-9-pclmul.c: Likewise.
       * crc-CCIT-data16-pclmul.c: Likewise.
       * crc-CCIT-data8-pclmul.c: Likewise.
       * crc-coremark-16bitdata-pclmul.c: Likewise.

   Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index dbc861fb1ea..845a5dcd9ab 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -288,6 +288,8 @@  extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
 extern void ix86_expand_sse2_abs (rtx, rtx);
 extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
 					       rtx);
+extern void ix86_expand_crc_using_pclmul (rtx *);
+extern void ix86_expand_reversed_crc_using_pclmul (rtx *);
 extern bool ix86_extract_perm_from_pool_constant (int*, rtx);
 
 /* In i386-c.cc  */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 69cd4ae05a7..33a2ab4f99c 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26185,6 +26185,135 @@  ix86_run_selftests (void)
 
 } // namespace selftest
 
+/* Generate assembly to calculate CRC using pclmulqdq instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+ix86_expand_crc_using_pclmul (rtx *operands)
+{
+/* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+  gcc_assert (crc_size <= 32);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+  unsigned HOST_WIDE_INT DImode_size = GET_MODE_BITSIZE (DImode);
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+  if (crc_size > data_size)
+    crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size,
+			NULL_RTX, 1);
+
+  /* Keep the quotient in V2DImode.  */
+  rtx q_v2di = gen_reg_rtx (V2DImode);
+  rtx quotient = gen_reg_rtx (DImode);
+  convert_move (quotient, gen_int_mode (q, DImode), 0);
+  emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+  /* crc ^ data and keep in V2DImode.  */
+  rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			     OPTAB_WIDEN);
+  rtx res = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+  /* Perform carry-less multiplication.  */
+  emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+  res = expand_shift (RSHIFT_EXPR, V2DImode, res, crc_size, NULL_RTX, 0);
+
+  /* Keep the polynomial in V2DImode.  */
+  rtx polynomial = gen_reg_rtx (DImode);
+  convert_move (polynomial, operands[3], 0);
+  rtx p_v2di = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (p_v2di, polynomial, const0_rtx));
+
+  /* Perform carry-less multiplication and get low part.  */
+  emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+  rtx crc_part = gen_reg_rtx (DImode);
+  emit_insn (gen_vec_extractv2didi (crc_part, res, const0_rtx));
+
+  if (crc_size > data_size)
+    {
+      rtx shift = expand_shift (LSHIFT_EXPR, DImode, operands[1], data_size,
+				NULL_RTX, 1);
+      crc_part = expand_binop (DImode, xor_optab, crc_part, shift, NULL_RTX, 1,
+			       OPTAB_DIRECT);
+    }
+  /* Zero upper bits beyond crc_size.  */
+  res = expand_shift (RSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+		      NULL_RTX, 1);
+  res = expand_shift (LSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+		      NULL_RTX, 0);
+  emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), crc_part));
+}
+
+/* Generate assembly to calculate reversed CRC using pclmulqdq instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+ix86_expand_reversed_crc_using_pclmul (rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+  gcc_assert (crc_size <= 32);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+  /* Reflect the calculated quotient.  */
+  q = reflect (q);
+  rtx q_v2di = gen_reg_rtx (V2DImode);
+  rtx quotient = gen_reg_rtx (DImode);
+  convert_move (quotient, gen_int_mode (q >> (data_size - 4), DImode), 0);
+  emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+  /* crc ^ data and keep in V2DImode.  */
+  rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			     OPTAB_WIDEN);
+
+  /* Perform carry-less multiplication.  */
+  rtx res = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+  emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+  res = expand_shift (LSHIFT_EXPR, V2DImode, res, 64 - crc_size - 3,
+		      NULL_RTX, 0);
+
+  /* Reflect the polynomial and keep in V2DImode.  */
+  unsigned HOST_WIDE_INT reflected_op3 = reflect (UINTVAL (operands[3]));
+  rtx polynomial = gen_reg_rtx (DImode);
+  convert_move (polynomial, gen_int_mode (reflected_op3 << 1, DImode), 0);
+  rtx p_v2di = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (p_v2di, polynomial, const0_rtx));
+
+  /* Perform carry-less multiplication and get high part.  */
+  emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+  rtx res_high = gen_reg_rtx (DImode);
+  emit_insn (gen_vec_extractv2didi (res_high, res, const1_rtx));
+
+  if (crc_size > data_size)
+    {
+      rtx shift = expand_shift (RSHIFT_EXPR, DImode, crc, data_size,
+				NULL_RTX, 1);
+      res_high = expand_binop (DImode, xor_optab, res_high, shift, NULL_RTX, 1,
+			       OPTAB_DIRECT);
+    }
+  emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), res_high));
+}
 #endif /* CHECKING_P */
 
 static const scoped_attribute_specs *const ix86_attribute_table[] =
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e8073f5a200..681e0fed59b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -189,6 +189,10 @@ 
   ;; For CRC32 support
   UNSPEC_CRC32
 
+  ;; For CRC support
+  UNSPEC_CRC
+  UNSPEC_CRC_REV
+
   ;; For LZCNT suppoprt
   UNSPEC_LZCNT
 
@@ -27175,6 +27179,61 @@ 
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
+
+;; Same as SWI124.  Added for different combinations.
+(define_mode_iterator SWI124dup [QI HI SI])
+
+;; CRC 8, 16, 32 for TARGET_64
+(define_expand "crc<SWI124dup:mode><SWI124:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:SWI124 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:SWI124dup 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:SWI124 3)]
+		      UNSPEC_CRC))]
+  /* The case when data's size is bigger than CRC's size is not supported.  */
+  "TARGET_PCLMUL && TARGET_64BIT && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{
+   ix86_expand_crc_using_pclmul (operands);
+   DONE;
+})
+
+;; Reversed CRC 8, 16, 32 for TARGET_64
+(define_expand "crc_rev<SWI124dup:mode><SWI124:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:SWI124 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:SWI124dup 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:SWI124 3)]
+		      UNSPEC_CRC_REV))]
+  /* The case when data's size is bigger than CRC's size is not supported.  */
+  "((TARGET_PCLMUL && TARGET_64BIT) || TARGET_CRC32)
+    && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{ /* If it is iSCSI polynomial (0x1EDC6F41), generate crc32 instruction.  */
+  if (TARGET_CRC32 && INTVAL (operands[3]) == 517762881)
+    {
+      rtx crc_part = gen_reg_rtx (SImode);
+      rtx crc = operands[1];
+      rtx data = operands[2];
+      emit_insn (gen_sse4_2_crc32<SWI124dup:mode> (crc_part, crc, data));
+      emit_move_insn (operands[0],
+		      gen_lowpart (GET_MODE (operands[0]), crc_part));
+    }
+  else if (TARGET_PCLMUL && TARGET_64BIT)
+    ix86_expand_reversed_crc_using_pclmul (operands);
+  else
+    expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+				     operands[3], GET_MODE (operands[2]),
+				     generate_reflecting_code_standard);
+  DONE;
+})
+
 (define_insn "rdpmc"
   [(set (match_operand:DI 0 "register_operand" "=A")
   	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
diff --git a/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
new file mode 100644
index 00000000000..21edf417f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+
+#include "../../gcc.dg/torture/crc-1.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
new file mode 100644
index 00000000000..54e3310c17b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-10.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
new file mode 100644
index 00000000000..1ac9a6bf56d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-12.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
new file mode 100644
index 00000000000..d5ac93525b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-13.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
new file mode 100644
index 00000000000..3f916b913cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-14.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
new file mode 100644
index 00000000000..6c3c8460535
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-17.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
new file mode 100644
index 00000000000..4d3ac62a4ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-18.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
new file mode 100644
index 00000000000..e9569bebd25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-21.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
new file mode 100644
index 00000000000..92f1559b040
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-22.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
new file mode 100644
index 00000000000..0417f10e5e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-23.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
new file mode 100644
index 00000000000..6c6c0608541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-4.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
new file mode 100644
index 00000000000..b80368e81bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -w -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-5.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
new file mode 100644
index 00000000000..d3ac2cb09fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-6.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
new file mode 100644
index 00000000000..8bbb5098e66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-7.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
new file mode 100644
index 00000000000..fe9f1e90270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
new file mode 100644
index 00000000000..91936c07b39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-9.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
new file mode 100644
index 00000000000..ca728120858
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
new file mode 100644
index 00000000000..816e0561d8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
new file mode 100644
index 00000000000..817d960b0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-coremark16-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
new file mode 100644
index 00000000000..49ab5f31ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint16_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint16_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint16_t i = 0; i < 0xffff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
new file mode 100644
index 00000000000..08d6c193a77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint32_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 32; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint32_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 32; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint8_t i = 0; i < 0xff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
new file mode 100644
index 00000000000..7a76b27fd28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint8_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint8_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint8_t i = 0; i < 0xff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
-- 
2.25.1