diff mbox series

[8/8,APX,NF] Support APX NF for lzcnt/tzcnt/popcnt

Message ID DM4PR11MB5487F4AE2EE74476609287F8ECEC2@DM4PR11MB5487.namprd11.prod.outlook.com
State New
Headers show
Series [1/8,APX,NF] : Support APX NF add | expand

Commit Message

Kong, Lingling May 15, 2024, 7:47 a.m. UTC
gcc/ChangeLog:

	* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
	(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
	(<lt_zcnt>_<mode>_nf): Ditto.
	(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
	(<lt_zcnt>_hi_nf): Ditto.
	(popcount<mode>2_nf): Ditto.
	(*popcount<mode>2_falsedep_nf): Ditto.
	(popcounthi2_nf): Ditto.
---
 gcc/config/i386/i386.md | 132 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

 
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "<lt_zcnt>_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21182,6 +21231,20 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec:SWI48
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<lt_zcnt>_<mode>_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(unspec:SWI48
@@ -21196,6 +21259,17 @@
    (set_attr "prefix_rep" "1")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<lt_zcnt>_hi_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec:HI
+	  [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} <lt_zcnt>{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "<lt_zcnt_type>")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "<lt_zcnt>_hi"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(unspec:HI
@@ -21620,6 +21694,30 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "popcount<mode>2_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (popcount:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "popcount<mode>2"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21649,6 +21747,24 @@
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(popcount:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; #endif }
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*popcount<mode>2_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(popcount:SWI48
@@ -21806,6 +21922,22 @@
   DONE;
 })
 
+(define_insn "popcounthi2_nf"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(popcount:HI
+	  (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "%{nf%} popcnt\t{%1, %0|%0, %1}"; #else
+  return "%{nf%} popcnt{w}\t{%1, %0|%0, %1}"; #endif }
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
 (define_insn "popcounthi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(popcount:HI
--
2.31.1
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 55f65a31b16..ddde83e57f5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21029,6 +21029,24 @@ 
   operands[3] = gen_reg_rtx (<MODE>mode);
 })
 
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (clz:SWI48 (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "clz<mode>2_lzcnt"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21052,6 +21070,18 @@ 
 ; False dependency happens when destination is only updated by tzcnt,  ; lzcnt or popcnt.  There is no false dependency when destination is  ; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(clz:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*clz<mode>2_lzcnt_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(clz:SWI48
@@ -21158,6 +21188,25 @@ 
 ;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version  ;; provides operand size as output when source operand is zero.