diff mbox

rs6000: clz/ctz/ffs improvement (PR78683)

Message ID 244a377478f58e23a6508758901f84cf5b3ab4d2.1481277748.git.segher@kernel.crashing.org
State New
Headers show

Commit Message

Segher Boessenkool Dec. 9, 2016, 10:37 a.m. UTC
On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for
the ctz sequences than we do today.

CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the
same fixed value (only dependent on TARGET_* options).

I originally tried to have the generic code handle this; that would be
too much surgery for stage 3 though.

Bootstrapped and tested on powerpc64-linux {-m32,-m64}; also tested
manually with {-m32,-m64} -mcpu=power{4,7,9}.  Is this okay for trunk?


Segher


2016-12-09  Segher Boessenkool  <segher@kernel.crashing.org>

	PR target/78683
	* config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO):
	Use GET_MODE_BITSIZE.  Return 2.
	(CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE.  Return 2. Handle
	TARGET_POPCNTD the same as TARGET_CTZ.
	* config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
	(ffs<mode>2): Reimplement.

---
 gcc/config/rs6000/rs6000.h  | 11 ++++----
 gcc/config/rs6000/rs6000.md | 62 +++++++++++++++++++++++----------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

Comments

David Edelsohn Dec. 9, 2016, 2:53 p.m. UTC | #1
On Fri, Dec 9, 2016 at 2:37 AM, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for
> the ctz sequences than we do today.
>
> CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the
> same fixed value (only dependent on TARGET_* options).
>
> I originally tried to have the generic code handle this; that would be
> too much surgery for stage 3 though.
>
> Bootstrapped and tested on powerpc64-linux {-m32,-m64}; also tested
> manually with {-m32,-m64} -mcpu=power{4,7,9}.  Is this okay for trunk?
>
>
> Segher
>
>
> 2016-12-09  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         PR target/78683
>         * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO):
>         Use GET_MODE_BITSIZE.  Return 2.
>         (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE.  Return 2. Handle
>         TARGET_POPCNTD the same as TARGET_CTZ.
>         * config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
>         (ffs<mode>2): Reimplement.

Okay.

Thanks, David
diff mbox

Patch

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5d56927..fe314bf 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2199,14 +2199,15 @@  do {									     \
 
 /* The cntlzw and cntlzd instructions return 32 and 64 for input of zero.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+  ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
 
 /* The CTZ patterns that are implemented in terms of CLZ return -1 for input of
-   zero.  The hardware instructions added in Power9 return 32 or 64.  */
+   zero.  The hardware instructions added in Power9 and the sequences using
+   popcount return 32 or 64.  */
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				\
-  ((!TARGET_CTZ)							\
-   ? ((VALUE) = -1, 1)							\
-   : ((VALUE) = ((MODE) == SImode ? 32 : 64), 1))
+  (TARGET_CTZ || TARGET_POPCNTD						\
+   ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2)				\
+   : ((VALUE) = -1, 2))
 
 /* Specify the machine mode that pointers have.
    After generation of rtl, the compiler makes no further distinction
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4726d73..777b996 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2220,17 +2220,8 @@  (define_insn "clz<mode>2"
   [(set_attr "type" "cntlz")])
 
 (define_expand "ctz<mode>2"
-  [(set (match_dup 2)
-	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
-   (set (match_dup 3)
-	(and:GPR (match_dup 1)
-		 (match_dup 2)))
-   (set (match_dup 4)
-	(clz:GPR (match_dup 3)))
-   (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
-		   (minus:GPR (match_dup 5)
-			      (match_dup 4)))
-	      (clobber (reg:GPR CA_REGNO))])]
+   [(set (match_operand:GPR 0 "gpc_reg_operand")
+	 (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
   ""
 {
   if (TARGET_CTZ)
@@ -2239,10 +2230,26 @@  (define_expand "ctz<mode>2"
       DONE;
     }
 
-  operands[2] = gen_reg_rtx (<MODE>mode);
-  operands[3] = gen_reg_rtx (<MODE>mode);
-  operands[4] = gen_reg_rtx (<MODE>mode);
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  rtx tmp2 = gen_reg_rtx (<MODE>mode);
+  rtx tmp3 = gen_reg_rtx (<MODE>mode);
+
+  if (TARGET_POPCNTD)
+    {
+      emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx));
+      emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1]));
+      emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2));
+      emit_insn (gen_popcntd<mode>2 (operands[0], tmp3));
+    }
+  else
+    {
+      emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+      emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+      emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+      emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3));
+    }
+
+  DONE;
 })
 
 (define_insn "ctz<mode>2_hw"
@@ -2253,23 +2260,18 @@  (define_insn "ctz<mode>2_hw"
   [(set_attr "type" "cntlz")])
 
 (define_expand "ffs<mode>2"
-  [(set (match_dup 2)
-	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
-   (set (match_dup 3)
-	(and:GPR (match_dup 1)
-		 (match_dup 2)))
-   (set (match_dup 4)
-	(clz:GPR (match_dup 3)))
-   (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
-		   (minus:GPR (match_dup 5)
-			      (match_dup 4)))
-	      (clobber (reg:GPR CA_REGNO))])]
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
   ""
 {
-  operands[2] = gen_reg_rtx (<MODE>mode);
-  operands[3] = gen_reg_rtx (<MODE>mode);
-  operands[4] = gen_reg_rtx (<MODE>mode);
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  rtx tmp2 = gen_reg_rtx (<MODE>mode);
+  rtx tmp3 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+  emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+  emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+  emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3));
+  DONE;
 })