Patchwork Add support for lzd and popc instructions on sparc.

login
register
mail settings
Submitter David Miller
Date Oct. 6, 2011, 7:18 p.m.
Message ID <20111006.151854.2226720039458797767.davem@davemloft.net>
Download mbox | patch
Permalink /patch/118154/
State New
Headers show

Comments

David Miller - Oct. 6, 2011, 7:18 p.m.
From: Richard Henderson <rth@redhat.com>
Date: Thu, 06 Oct 2011 10:47:28 -0700

> You've said that POPC only operates on the full 64-bit register,
> but I see no zero-extend of the SImode input?  Similarly for 
> the clzsi patterns.

This addresses all of the problems you found, and also exposes the
"sub 32" to the compiler in RTL on 64-bit so it can be optimized.  And
I've verified that it does so when expanding ffs() and friends.

Committed to trunk.

--------------------

[PATCH] Correct errors in sparc SImode popcount/clz patterns when 64-bit.

	* config/sparc/sparc.md (popcount<mode>2, clz<mode>2): Split up into...
	(popcountdi2, popcountsi2, clzdi2, clzsi2): Explicit expanders, in the
	SI mode 64-bit code gen case explicitly zero-extend and truncate.
	(*popcount<mode>_sp64): Split up into...
	(*popcountdi_sp64, *popcountsi_64): Explicit instantiations, and in the
	SImode case use truncate.
	(*clzsi_sp64): Rewrite to use truncate, and let the expander emit the
	subtract so the compiler can optimize it.
	(SIDI): Remove unused mode iterator.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@179628 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog             |   12 +++++
 gcc/config/sparc/sparc.md |   99 +++++++++++++++++++++++++++++++-------------
 2 files changed, 82 insertions(+), 29 deletions(-)

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a99d5ef..0649bbe 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@ 
+2011-10-06  David S. Miller  <davem@davemloft.net>
+
+	* config/sparc/sparc.md (popcount<mode>2, clz<mode>2): Split up into...
+	(popcountdi2, popcountsi2, clzdi2, clzsi2): Explicit expanders, in the
+	SI mode 64-bit code gen case explicitly zero-extend and truncate.
+	(*popcount<mode>_sp64): Split up into...
+	(*popcountdi_sp64, *popcountsi_64): Explicit instantiations, and in the
+	SImode case use truncate.
+	(*clzsi_sp64): Rewrite to use truncate, and let the expander emit the
+	subtract so the compiler can optimize it.
+	(SIDI): Remove unused mode iterator.
+
 2011-10-06  Bernd Schmidt  <bernds@codesourcery.com>
 
 	* function.c (thread_prologue_and_epilogue_insns): Emit split
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 15552b2..a6eba6c 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -206,8 +206,6 @@ 
 
 (define_mode_iterator V64N8 [V2SI V4HI])
 
-(define_mode_iterator SIDI [SI DI])
-
 ;; The upper 32 fp regs on the v9 can't hold SFmode values.  To deal with this
 ;; a second register class, EXTRA_FP_REGS, exists for the v9 chip.  The name
 ;; is a bit of a misnomer as it covers all 64 fp regs.  The corresponding
@@ -6806,36 +6804,24 @@ 
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
 
-(define_expand "popcount<mode>2"
-  [(set (match_operand:SIDI 0 "register_operand" "")
-        (popcount:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+(define_expand "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (popcount:DI (match_operand:DI 1 "register_operand" "")))]
   "TARGET_POPC"
 {
   if (! TARGET_ARCH64)
     {
-      emit_insn (gen_popcount<mode>_v8plus (operands[0], operands[1]));
+      emit_insn (gen_popcountdi_v8plus (operands[0], operands[1]));
       DONE;
     }
 })
 
-(define_insn "*popcount<mode>_sp64"
-  [(set (match_operand:SIDI 0 "register_operand" "=r")
-        (popcount:SIDI (match_operand:SIDI 1 "register_operand" "r")))]
+(define_insn "*popcountdi_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (popcount:DI (match_operand:DI 1 "register_operand" "r")))]
   "TARGET_POPC && TARGET_ARCH64"
   "popc\t%1, %0")
 
-(define_insn "popcountsi_v8plus"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
-  "TARGET_POPC && ! TARGET_ARCH64"
-{
-  if (sparc_check_64 (operands[1], insn) <= 0)
-    output_asm_insn ("srl\t%1, 0, %1", operands);
-  return "popc\t%1, %0";
-}
-  [(set_attr "type" "multi")
-   (set_attr "length" "2")])
-
 (define_insn "popcountdi_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (popcount:DI (match_operand:DI 1 "register_operand" "r")))
@@ -6849,14 +6835,49 @@ 
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
-(define_expand "clz<mode>2"
-  [(set (match_operand:SIDI 0 "register_operand" "")
-        (clz:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+(define_expand "popcountsi2"
+  [(set (match_dup 2)
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+        (truncate:SI (popcount:DI (match_dup 2))))]
+  "TARGET_POPC"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_popcountsi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_insn "*popcountsi_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (truncate:SI
+          (popcount:DI (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_POPC && TARGET_ARCH64"
+  "popc\t%1, %0")
+
+(define_insn "popcountsi_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_POPC && ! TARGET_ARCH64"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%1, 0, %1", operands);
+  return "popc\t%1, %0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (clz:DI (match_operand:DI 1 "register_operand" "")))]
   "TARGET_VIS3"
 {
   if (! TARGET_ARCH64)
     {
-      emit_insn (gen_clz<mode>_v8plus (operands[0], operands[1]));
+      emit_insn (gen_clzdi_v8plus (operands[0], operands[1]));
       DONE;
     }
 })
@@ -6880,13 +6901,33 @@ 
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
+(define_expand "clzsi2"
+  [(set (match_dup 2)
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 3)
+        (truncate:SI (clz:DI (match_dup 2))))
+   (set (match_operand:SI 0 "register_operand" "")
+        (minus:SI (match_dup 3) (const_int 32)))]
+  "TARGET_VIS3"
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_clzsi_v8plus (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      operands[2] = gen_reg_rtx (DImode);
+      operands[3] = gen_reg_rtx (SImode);
+    }
+})
+
 (define_insn "*clzsi_sp64"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+        (truncate:SI
+          (clz:DI (match_operand:DI 1 "register_operand" "r"))))]
   "TARGET_VIS3 && TARGET_ARCH64"
-  "lzd\t%1, %0\n\tsub\t%0, 32, %0"
-  [(set_attr "type" "multi")
-   (set_attr "length" "2")])
+  "lzd\t%1, %0")
 
 (define_insn "clzsi_v8plus"
   [(set (match_operand:SI 0 "register_operand" "=r")