diff mbox series

[1/3,APX,CCMP] Support APX CCMP

Message ID 20240515082054.3934069-2-hongyu.wang@intel.com
State New
Headers show
Series Support Intel APX CCMP | expand

Commit Message

Hongyu Wang May 15, 2024, 8:20 a.m. UTC
APX CCMP feature implements conditional compare which executes compare
when EFLAGS matches certain condition.

CCMP introduces default flags value (dfv), when conditional compare does
not execute, it will directly set the flags according to dfv.

The instruction goes like

ccmpeq {dfv=sf,of,cf,zf}  %rax, %r16

For this instruction, it will test EFLAGS regs if it matches conditional
code EQ, if yes, compare %rax and %r16 like legacy cmp. If no, the
EFLAGS will be updated according to dfv, which means SF,OF,CF,ZF are
set. PF will be set according to CF in dfv, and AF will always be
cleared.

The dfv part can be a combination of sf,of,cf,zf, like {dfv=cf,zf} which
sets CF and ZF only and clear others, or {dfv=} which clears all EFLAGS.

To enable CCMP, we implemented the target hook TARGET_GEN_CCMP_FIRST and
TARGET_GEN_CCMP_NEXT to reuse the current ccmp infrastructure. Also we
extended the cstorem4 optab to support storing different CCmode to fit
current ccmp infrasturcture.

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_gen_ccmp_first): New function
	that test if the first compare can be generated.
	(ix86_gen_ccmp_next): New function to emit a simgle compare and ccmp
	sequence.
	* config/i386/i386-opts.h (enum apx_features): Add apx_ccmp.
	* config/i386/i386-protos.h (ix86_gen_ccmp_first): New proto
	declare.
	(ix86_gen_ccmp_next): Likewise.
	(ix86_get_flags_cc): Likewise.
	* config/i386/i386.cc (ix86_flags_cc): New enum.
	(ix86_ccmp_dfv_mapping): New string array to map conditional
	code to dfv.
	(ix86_print_operand): Handle special dfv flag for CCMP.
	(ix86_get_flags_cc): New function to return x86 CC enum.
	(TARGET_GEN_CCMP_FIRST): Define.
	(TARGET_GEN_CCMP_NEXT): Likewise.
	* config/i386/i386.h (TARGET_APX_CCMP): Define.
	* config/i386/i386.md (@ccmp<mode>): New define_insn to support
	ccmp.
	(UNSPEC_APX_DFV): New unspec for ccmp dfv.
	(ALL_CC): New mode iterator.
	(cstorecc4): Change to ...
	(cstore<mode>4) ... this, use ALL_CC to loop through all
	available CCmodes.
	* config/i386/i386.opt (apx_ccmp): Add enum value for ccmp.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ccmp-1.c: New compile test.
	* gcc.target/i386/apx-ccmp-2.c: New runtime test.
---
 gcc/config/i386/i386-expand.cc             | 121 +++++++++++++++++++++
 gcc/config/i386/i386-opts.h                |   6 +-
 gcc/config/i386/i386-protos.h              |   5 +
 gcc/config/i386/i386.cc                    |  50 +++++++++
 gcc/config/i386/i386.h                     |   1 +
 gcc/config/i386/i386.md                    |  35 +++++-
 gcc/config/i386/i386.opt                   |   3 +
 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c |  63 +++++++++++
 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c |  57 ++++++++++
 9 files changed, 337 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1ab22fe7973..f00525e449f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25554,4 +25554,125 @@  ix86_expand_fast_convert_bf_to_sf (rtx val)
   return ret;
 }
 
+rtx
+ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
+			rtx_code code, tree treeop0, tree treeop1)
+{
+  if (!TARGET_APX_CCMP)
+    return NULL_RTX;
+
+  rtx op0, op1, res;
+  machine_mode op_mode;
+
+  start_sequence ();
+  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+
+  op_mode = GET_MODE (op0);
+  if (op_mode == VOIDmode)
+    op_mode = GET_MODE (op1);
+
+  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
+	|| op_mode == QImode))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  /* Canonicalize the operands according to mode.  */
+  if (!nonimmediate_operand (op0, op_mode))
+    op0 = force_reg (op_mode, op0);
+  if (!x86_64_general_operand (op1, op_mode))
+    op1 = force_reg (op_mode, op1);
+
+  *prep_seq = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+
+  res = ix86_expand_compare (code, op0, op1);
+
+  if (!res)
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+  *gen_seq = get_insns ();
+  end_sequence ();
+
+  return res;
+}
+
+rtx
+ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
+		       rtx_code cmp_code, tree treeop0, tree treeop1,
+		       rtx_code bit_code)
+{
+  if (!TARGET_APX_CCMP)
+    return NULL_RTX;
+
+  rtx op0, op1, target;
+  machine_mode op_mode, cmp_mode, cc_mode = CCmode;
+  int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
+  insn_code icode;
+  rtx_code prev_code;
+  struct expand_operand ops[5];
+  int dfv;
+
+  push_to_sequence (*prep_seq);
+  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+
+  cmp_mode = op_mode = GET_MODE (op0);
+
+  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
+	|| op_mode == QImode))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  icode = code_for_ccmp (op_mode);
+
+  op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
+  op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
+  if (!op0 || !op1)
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  *prep_seq = get_insns ();
+  end_sequence ();
+
+  target = gen_rtx_REG (cc_mode, FLAGS_REG);
+  dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
+
+  prev_code = GET_CODE (prev);
+
+  if (bit_code != AND)
+    prev_code = reverse_condition (prev_code);
+  else
+    dfv = (int)(dfv ^ 1);
+
+  prev = gen_rtx_fmt_ee (prev_code, VOIDmode, XEXP (prev, 0),
+			 const0_rtx);
+
+  create_fixed_operand (&ops[0], target);
+  create_fixed_operand (&ops[1], prev);
+  create_fixed_operand (&ops[2], op0);
+  create_fixed_operand (&ops[3], op1);
+  create_fixed_operand (&ops[4], GEN_INT (dfv));
+
+  push_to_sequence (*gen_seq);
+  if (!maybe_expand_insn (icode, 5, ops))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  *gen_seq = get_insns ();
+  end_sequence ();
+
+  return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
+}
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 60176ce609f..5fcc4927978 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,8 +140,10 @@  enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_nf = 1<< 4,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
+  apx_nf = 1 << 4,
+  apx_ccmp = 1 << 5,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd
+	    | apx_ppx | apx_nf | apx_ccmp,
 };
 
 #endif
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index dbc861fb1ea..26e29df7312 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -242,6 +242,11 @@  extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code,
 extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx,
 				      bool, rtx_code_label *);
 extern rtx ix86_expand_fast_convert_bf_to_sf (rtx);
+extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code,
+				tree, tree);
+extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
+			       enum rtx_code, tree, tree, enum rtx_code);
+extern int ix86_get_flags_cc (enum rtx_code);
 extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
 extern bool ix86_memtag_can_tag_addresses (void);
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b4838b7939e..2363cab1eae 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -433,6 +433,22 @@  static bool i386_asm_output_addr_const_extra (FILE *, rtx);
 static bool ix86_can_inline_p (tree, tree);
 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
 
+typedef enum ix86_flags_cc
+{
+  X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
+  X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
+  X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
+  X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
+} ix86_cc;
+
+static const char *ix86_ccmp_dfv_mapping[] =
+{
+  "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
+  "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
+  "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
+  "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
+};
+
 
 /* Whether -mtune= or -march= were specified */
 int ix86_tune_defaulted;
@@ -13690,6 +13706,7 @@  print_reg (rtx x, int code, FILE *file)
    M -- print addr32 prefix for TARGET_X32 with VSIB address.
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
    N -- print maskz if it's constant 0 operand.
+   G -- print embedded flag for ccmp/ctest.
  */
 
 void
@@ -14083,6 +14100,14 @@  ix86_print_operand (FILE *file, rtx x, int code)
 			      file);
 	  return;
 
+	case 'G':
+	  {
+	    int dfv = INTVAL (x);
+	    const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
+	    fputs (dfv_suffix, file);
+	  }
+	  return;
+
 	case 'H':
 	  if (!offsettable_memref_p (x))
 	    {
@@ -16466,6 +16491,24 @@  ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
   return val.to_shwi ();
 }
 
+int ix86_get_flags_cc (rtx_code code)
+{
+  switch (code)
+    {
+      case NE: return X86_CCNE;
+      case EQ: return X86_CCE;
+      case GE: return X86_CCNL;
+      case GT: return X86_CCNLE;
+      case LE: return X86_CCLE;
+      case LT: return X86_CCL;
+      case GEU: return X86_CCNB;
+      case GTU: return X86_CCNBE;
+      case LEU: return X86_CCBE;
+      case LTU: return X86_CCB;
+      default: return -1;
+    }
+}
+
 /* Return TRUE or FALSE depending on whether the first SET in INSN
    has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
@@ -26765,6 +26808,13 @@  ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_TAG_SIZE
 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
 
+#undef TARGET_GEN_CCMP_FIRST
+#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
+
+#undef TARGET_GEN_CCMP_NEXT
+#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
+
+
 static bool
 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f20ae4726da..5631bc4695a 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -56,6 +56,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
 #define TARGET_APX_NF (ix86_apx_features & apx_nf)
+#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ddde83e57f5..49978d1f383 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -217,6 +217,10 @@  (define_c_enum "unspec" [
 
   ;; For APX PPX support
   UNSPEC_APX_PPX
+
+  ;; For APX CCMP support
+  ;; DFV = default flag value
+  UNSPEC_APX_DFV
 ])
 
 (define_c_enum "unspecv" [
@@ -1504,6 +1508,25 @@  (define_expand "cstore<mode>4"
   DONE;
 })
 
+(define_insn "@ccmp<mode>"
+ [(set (match_operand:CC 0 "flags_reg_operand")
+       (if_then_else:CC
+	 (match_operator 1 "comparison_operator"
+	  [(reg:CC FLAGS_REG) (const_int 0)])
+	(compare:CC
+	  (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>m,<r>")
+		     (match_operand:SWI 3 "<general_operand>" "<r><i>,<r><m>"))
+	  (const_int 0))
+	(unspec:SI
+	  [(match_operand:SI 4 "const_0_to_15_operand")]
+	  UNSPEC_APX_DFV)))]
+ "TARGET_APX_CCMP"
+ "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"
+ [(set_attr "type" "icmp")
+  (set_attr "mode" "<MODE>")
+  (set_attr "length_immediate" "1")
+  (set_attr "prefix" "evex")])
+
 (define_expand "@cmp<mode>_1"
   [(set (reg:CC FLAGS_REG)
 	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
@@ -1850,10 +1873,18 @@  (define_expand "cbranchcc4"
   DONE;
 })
 
-(define_expand "cstorecc4"
+;; For conditonal compare, the middle-end hook will convert
+;; CCmode to sub-CCmode using SELECT_CC_MODE macro and try
+;; to find cstore<submodes> in optab. Add ALL_CC to support
+;; the cstore after ccmp sequence.
+
+(define_mode_iterator ALL_CC
+ [CCGC CCGOC CCNO CCGZ CCA CCC CCO CCP CCS CCZ CC])
+
+(define_expand "cstore<mode>4"
   [(set (match_operand:QI 0 "register_operand")
               (match_operator 1 "comparison_operator"
-               [(match_operand 2 "flags_reg_operand")
+               [(match_operand:ALL_CC 2 "flags_reg_operand")
                 (match_operand 3 "const0_operand")]))]
   ""
 {
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 66021d59d4e..7e6fe91d1d6 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1359,6 +1359,9 @@  Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 EnumValue
 Enum(apx_features) String(nf) Value(apx_nf) Set(6)
 
+EnumValue
+Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
new file mode 100644
index 00000000000..5a2dad89f1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
@@ -0,0 +1,63 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapx-features=ccmp" } */
+
+int
+f1 (int a)
+{
+  return a < 17 || a == 32;
+}
+
+int
+f2 (int a)
+{
+  return a > 33 || a == 18;
+}
+
+int
+f3 (int a, int b)
+{
+  return a != 19 && b > 34;
+}
+
+int
+f4 (int a, int b)
+{
+  return a < 35 && b == 20;
+}
+
+int
+f5 (short a)
+{
+  return a == 0 || a == 5;
+}
+
+int
+f6 (long long a)
+{
+  return a == 6 || a == 0;
+}
+
+int
+f7 (char a, char b)
+{
+  return a > 0 && b <= 7;
+}
+
+int
+f8 (int a, int b)
+{
+  return a == 9 && b > 0;
+}
+
+int
+f9 (int a, int b)
+{
+  a += b;
+  return a == 3 || a == 0;
+}
+
+/* { dg-final { scan-assembler-times "ccmpg" 2 } } */
+/* { dg-final { scan-assembler-times "ccmple" 2 } } */
+/* { dg-final { scan-assembler-times "ccmpne" 4 } } */
+/* { dg-final { scan-assembler-times "ccmpe" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
new file mode 100644
index 00000000000..30a1c216c1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
@@ -0,0 +1,57 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-O3 -mno-apxf" } */
+
+__attribute__((noinline, noclone, target("apxf")))
+int foo_apx(int a, int b, int c, int d)
+{
+  int sum = a;
+
+  if (a != c)
+    {
+      c += d;
+      a += b;
+      sum += a + c;
+      if (b != d && sum < c || sum > d)
+	{
+	  b += d;
+	  sum += b;
+	}
+    }
+
+  return sum;
+}
+
+__attribute__((noinline, noclone, target("no-apxf")))
+int foo_noapx(int a, int b, int c, int d)
+{
+  int sum = a;
+
+  if (a != c)
+    {
+      c += d;
+      a += b;
+      sum += a + c;
+      if (b != d && sum < c || sum > d)
+	{
+	  b += d;
+	  sum += b;
+	}
+    }
+
+  return sum;
+}
+
+int main (void)
+{
+  if (!__builtin_cpu_supports ("apxf"))
+    return 0;
+
+  int val1 = foo_noapx (23, 17, 32, 44);
+  int val2 = foo_apx (23, 17, 32, 44);
+
+  if (val1 != val2)
+    __builtin_abort ();
+
+  return 0;
+}