@@ -1,3 +1,16 @@
+2011-10-05 David S. Miller <davem@davemloft.net>
+
+ * config/sparc/sparc.opt (POPC): New option.
+ * doc/invoke.texi: Document it.
+ * config/sparc/sparc.c (sparc_option_override): Enable MASK_POPC by
+ default on Niagara-2 and later.
+ * config/sparc/sparc.h (CLZ_DEFINED_VALUE_AT_ZERO): Define.
+ * config/sparc/sparc.md (SIDI): New mode iterator.
+ (ffsdi2): Delete commented out pattern and comments.
+ (popcount<mode>2, clz<mode>2): New expanders.
+ (*popcount<mode>_sp64, popcountsi_v8plus, popcountdi_v8plus,
+ *clzdi_sp64, clzdi_v8plus, *clzsi_sp64, clzsi_v8plus): New insns.
+
2011-10-06 Artjoms Sinkarovs <artyom.shinkaroff@gmail.com>
PR middle-end/50607
@@ -774,11 +774,11 @@ sparc_option_override (void)
{ MASK_ISA,
MASK_V9|MASK_DEPRECATED_V8_INSNS},
/* UltraSPARC T2 */
- { MASK_ISA, MASK_V9|MASK_VIS2},
+ { MASK_ISA, MASK_V9|MASK_POPC|MASK_VIS2},
/* UltraSPARC T3 */
- { MASK_ISA, MASK_V9|MASK_VIS2|MASK_VIS3|MASK_FMAF},
+ { MASK_ISA, MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF},
/* UltraSPARC T4 */
- { MASK_ISA, MASK_V9|MASK_VIS2|MASK_VIS3|MASK_FMAF},
+ { MASK_ISA, MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF},
};
const struct cpu_table *cpu;
unsigned int i;
@@ -1608,6 +1608,11 @@ do { \
is done just by pretending it is already truncated. */
#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+/* For SImode, we make sure the top 32-bits of the register are clear and
+ then we subtract 32 from the lzd instruction result. */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+
/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
return the mode to be used for the comparison. For floating-point,
CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
@@ -206,6 +206,8 @@
(define_mode_iterator V64N8 [V2SI V4HI])
+(define_mode_iterator SIDI [SI DI])
+
;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this
;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name
;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding
@@ -6804,21 +6806,99 @@
[(set_attr "type" "multi")
(set_attr "length" "8")])
-;; ??? This should be a define expand, so that the extra instruction have
-;; a chance of being optimized away.
-
-;; Disabled because none of the UltraSPARCs implement popc. The HAL R1
-;; does, but no one uses that and we don't have a switch for it.
-;
-;(define_insn "ffsdi2"
-; [(set (match_operand:DI 0 "register_operand" "=&r")
-; (ffs:DI (match_operand:DI 1 "register_operand" "r")))
-; (clobber (match_scratch:DI 2 "=&r"))]
-; "TARGET_ARCH64"
-; "neg\t%1, %2\;xnor\t%1, %2, %2\;popc\t%2, %0\;movzr\t%1, 0, %0"
-; [(set_attr "type" "multi")
-; (set_attr "length" "4")])
+(define_expand "popcount<mode>2"
+ [(set (match_operand:SIDI 0 "register_operand" "")
+ (popcount:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+ "TARGET_POPC"
+{
+ if (! TARGET_ARCH64)
+ {
+ emit_insn (gen_popcount<mode>_v8plus (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "*popcount<mode>_sp64"
+ [(set (match_operand:SIDI 0 "register_operand" "=r")
+ (popcount:SIDI (match_operand:SIDI 1 "register_operand" "r")))]
+ "TARGET_POPC && TARGET_ARCH64"
+ "popc\t%1, %0")
+(define_insn "popcountsi_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_POPC && ! TARGET_ARCH64"
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%1, 0, %1", operands);
+ return "popc\t%1, %0";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "2")])
+
+(define_insn "popcountdi_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_operand:DI 1 "register_operand" "r")))
+ (clobber (match_scratch:SI 2 "=&h"))]
+ "TARGET_POPC && ! TARGET_ARCH64"
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%L1, 0, %L1", operands);
+ return "sllx\t%H1, 32, %2\n\tor\t%L1, %2, %2\n\tpopc\t%2, %L0\n\tclr\t%H0";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_expand "clz<mode>2"
+ [(set (match_operand:SIDI 0 "register_operand" "")
+ (clz:SIDI (match_operand:SIDI 1 "register_operand" "")))]
+ "TARGET_VIS3"
+{
+ if (! TARGET_ARCH64)
+ {
+ emit_insn (gen_clz<mode>_v8plus (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "*clzdi_sp64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "register_operand" "r")))]
+ "TARGET_VIS3 && TARGET_ARCH64"
+ "lzd\t%1, %0")
+
+(define_insn "clzdi_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "register_operand" "r")))
+ (clobber (match_scratch:SI 2 "=&h"))]
+ "TARGET_VIS3 && ! TARGET_ARCH64"
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%L1, 0, %L1", operands);
+ return "sllx\t%H1, 32, %2\n\tor\t%L1, %2, %2\n\tlzd\t%2, %L0\n\tclr\t%H0";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_insn "*clzsi_sp64"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_VIS3 && TARGET_ARCH64"
+ "lzd\t%1, %0\n\tsub\t%0, 32, %0"
+ [(set_attr "type" "multi")
+ (set_attr "length" "2")])
+
+(define_insn "clzsi_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "register_operand" "r")))]
+ "TARGET_VIS3 && ! TARGET_ARCH64"
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%1, 0, %1", operands);
+ return "lzd\t%1, %0\n\tsub\t%0, 32, %0";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "3")])
;; Peepholes go at the end.
@@ -73,6 +73,10 @@ mfmaf
Target Report Mask(FMAF)
Use UltraSPARC Fused Multiply-Add extensions
+mpopc
+Target Report Mask(POPC)
+Use UltraSPARC Population-Count instruction
+
mptr64
Target Report RejectNegative Mask(PTR64)
Pointers are 64-bit
@@ -882,7 +882,7 @@ See RS/6000 and PowerPC Options.
-munaligned-doubles -mno-unaligned-doubles @gol
-mv8plus -mno-v8plus -mvis -mno-vis @gol
-mvis2 -mno-vis2 -mvis3 -mno-vis3 @gol
--mfmaf -mno-fmaf}
+-mfmaf -mno-fmaf -mpopc -mno-popc}
@emph{SPU Options}
@gccoptlist{-mwarn-reloc -merror-reloc @gol
@@ -17494,6 +17494,15 @@ default is @option{-mvis3} when targetting a cpu that supports such
instructions, such as niagara-3 and later. Setting @option{-mvis3}
also sets @option{-mvis2} and @option{-mvis}.
+@item -mpopc
+@itemx -mno-popc
+@opindex mpopc
+@opindex mno-popc
+With @option{-mpopc}, GCC generates code that takes advantage of the UltraSPARC
+population count instruction. The default is @option{-mpopc}
+when targetting a cpu that supports such instructions, such as Niagara-2 and
+later.
+
@item -mfmaf
@itemx -mno-fmaf
@opindex mfmaf
@@ -1,3 +1,8 @@
+2011-10-05 David S. Miller <davem@davemloft.net>
+
+ * gcc.target/sparc/lzd.c: New test.
+ * gcc.target/sparc/popc.c: New test.
+
2011-10-05 Uros Bizjak <ubizjak@gmail.com>
* gcc.dg/strlenopt-21.c (dg-options): Remove -fdump-tree-optimized.
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=niagara3" } */
+int test_clz(int a)
+{
+ return __builtin_clz(a);
+}
+
+long test_clzl(long a)
+{
+ return __builtin_clzl(a);
+}
+
+long long test_clzll(long long a)
+{
+ return __builtin_clzll(a);
+}
+
+/* { dg-final { scan-assembler-times "lzd\t%" 3 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=niagara2" } */
+int test_popcount(int a)
+{
+ return __builtin_popcount(a);
+}
+
+long test_popcountl(long a)
+{
+ return __builtin_popcountl(a);
+}
+
+long long test_popcountll(long long a)
+{
+ return __builtin_popcountll(a);
+}
+
+/* { dg-final { scan-assembler-times "popc\t%" 3 } } */