Patchwork [i386] : Fix PR 53399, "*ffs" pattern generates wrong code with BMI enabled

login
register
mail settings
Submitter Uros Bizjak
Date May 21, 2012, 3:43 p.m.
Message ID <CAFULd4b_iwiYpWEK+vM9HP2Snc497RTUr23NWSWPDx-jM+dr1Q@mail.gmail.com>
Download mbox | patch
Permalink /patch/160385/
State New
Headers show

Comments

Uros Bizjak - May 21, 2012, 3:43 p.m.
Hello!

It turned out that "rep; bsf" is not a 100% substitute for "tzcnt"
since the former sets ZF for zero input, while the later sets CF.
Attached patch fixes this oversight in ffs sequence.

2012-05-24  Uros Bizjak  <ubizjak@gmail.com>

	PR target/53399
	* config/i386/i386.md (ffs<mode>2): Generate CCCmode compare
	for TARGET_BMI.
	(ffssi2_no_cmove): Ditto.
	(*ffs<mode>_1): Remove insn pattern.
	(*tzcnt<mode>_1): New insn pattern.
	(*bsf<mode>1): Ditto.

Tested on x86_64-pc-linux-gnu {,-m32}. Tested on BMI target by Kirill.

Committed to mainline SVN.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 187694)
+++ i386.md	(working copy)
@@ -12132,26 +12132,31 @@ 
 
 (define_expand "ffs<mode>2"
   [(set (match_dup 2) (const_int -1))
-   (parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ
-		     (match_operand:SWI48 1 "nonimmediate_operand")
-		     (const_int 0)))
+   (parallel [(set (match_dup 3) (match_dup 4))
 	      (set (match_operand:SWI48 0 "register_operand")
-		   (ctz:SWI48 (match_dup 1)))])
+		   (ctz:SWI48
+		     (match_operand:SWI48 1 "nonimmediate_operand")))])
    (set (match_dup 0) (if_then_else:SWI48
-			(eq (reg:CCZ FLAGS_REG) (const_int 0))
+			(eq (match_dup 3) (const_int 0))
 			(match_dup 2)
 			(match_dup 0)))
    (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
 {
+  enum machine_mode flags_mode;
+
   if (<MODE>mode == SImode && !TARGET_CMOVE)
     {
       emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
       DONE;
     }
+
+  flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+
   operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
+  operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
 })
 
 (define_insn_and_split "ffssi2_no_cmove"
@@ -12162,11 +12167,10 @@ 
   "!TARGET_CMOVE"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (match_dup 1) (const_int 0)))
+  [(parallel [(set (match_dup 4) (match_dup 5))
 	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
    (set (strict_low_part (match_dup 3))
-	(eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
+	(eq:QI (match_dup 4) (const_int 0)))
    (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])
    (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
@@ -12174,37 +12178,38 @@ 
    (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
+  enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+
   operands[3] = gen_lowpart (QImode, operands[2]);
+  operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
+  operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
+
   ix86_expand_clear (operands[2]);
 })
 
-(define_insn "*ffs<mode>_1"
+(define_insn "*tzcnt<mode>_1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(ctz:SWI48 (match_dup 1)))]
+  "TARGET_BMI"
+  "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bsf<mode>_1"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
 		     (const_int 0)))
    (set (match_operand:SWI48 0 "register_operand" "=r")
 	(ctz:SWI48 (match_dup 1)))]
   ""
-{
-  if (TARGET_BMI)
-    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
-  else if (optimize_function_for_size_p (cfun))
-    ;
-  else if (TARGET_GENERIC)
-    /* tzcnt expands to rep;bsf and we can use it even if !TARGET_BMI.  */
-    return "rep; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
-
-  return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
-}
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set (attr "prefix_rep")
-     (if_then_else
-       (ior (match_test "TARGET_BMI")
-	    (and (not (match_test "optimize_function_for_size_p (cfun)"))
-		 (match_test "TARGET_GENERIC")))
-       (const_string "1")
-       (const_string "0")))
    (set_attr "mode" "<MODE>")])
 
 (define_insn "ctz<mode>2"