===================================================================
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+
+int ismod (int a, int b) { return a%b; }
+long lsmod (long a, long b) { return a%b; }
+unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
+unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
+
+/* { dg-final { scan-assembler-times "modsw " 1 } } */
+/* { dg-final { scan-assembler-times "modsd " 1 } } */
+/* { dg-final { scan-assembler-times "moduw " 1 } } */
+/* { dg-final { scan-assembler-times "modud " 1 } } */
+/* { dg-final { scan-assembler-not "mullw " } } */
+/* { dg-final { scan-assembler-not "mulld " } } */
+/* { dg-final { scan-assembler-not "divw " } } */
+/* { dg-final { scan-assembler-not "divd " } } */
+/* { dg-final { scan-assembler-not "divwu " } } */
+/* { dg-final { scan-assembler-not "divdu " } } */
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+
+int ismod (int a, int b) { return a%b; }
+unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
+
+/* { dg-final { scan-assembler-times "modsw " 1 } } */
+/* { dg-final { scan-assembler-times "moduw " 1 } } */
+/* { dg-final { scan-assembler-not "mullw " } } */
+/* { dg-final { scan-assembler-not "divw " } } */
+/* { dg-final { scan-assembler-not "divwu " } } */
===================================================================
@@ -1635,6 +1635,30 @@ proc check_p8vector_hw_available { } {
}]
}
+# Return 1 if the target supports executing power9 vector instructions, 0
+# otherwise. Cache the result.
+
+proc check_p9vector_hw_available { } {
+ return [check_cached_effective_target p9vector_hw_available {
+ # Some simulators are known to not support VSX/power8 instructions.
+ # For now, disable on Darwin
+ if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} {
+ expr 0
+ } else {
+ set options "-mpower9-vector"
+ check_runtime_nocache p9vector_hw_available {
+ int main()
+ {
+ long e = -1;
+ vector double v = (vector double) { 0.0, 0.0 };
+ asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v));
+ return e;
+ }
+ } $options
+ }
+ }]
+}
+
# Return 1 if the target supports executing VSX instructions, 0
# otherwise. Cache the result.
@@ -3358,6 +3382,31 @@ proc check_effective_target_powerpc_p8ve
}
}
+# Return 1 if this is a PowerPC target supporting -mpower9-vector
+
+proc check_effective_target_powerpc_p9vector_ok { } {
+ if { ([istarget powerpc*-*-*]
+ && ![istarget powerpc-*-linux*paired*])
+ || [istarget rs6000-*-*] } {
+ # AltiVec is not supported on AIX before 5.3.
+ if { [istarget powerpc*-*-aix4*]
+ || [istarget powerpc*-*-aix5.1*]
+ || [istarget powerpc*-*-aix5.2*] } {
+ return 0
+ }
+ return [check_no_compiler_messages powerpc_p9vector_ok object {
+ int main (void) {
+ long e = -1;
+ vector double v = (vector double) { 0.0, 0.0 };
+ asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v));
+ return e;
+ }
+ } "-mpower9-vector"]
+ } else {
+ return 0
+ }
+}
+
# Return 1 if this is a PowerPC target supporting -mvsx
proc check_effective_target_powerpc_vsx_ok { } {
@@ -5459,6 +5508,7 @@ proc is-effective-target { arg } {
"vmx_hw" { set selected [check_vmx_hw_available] }
"vsx_hw" { set selected [check_vsx_hw_available] }
"p8vector_hw" { set selected [check_p8vector_hw_available] }
+ "p9vector_hw" { set selected [check_p9vector_hw_available] }
"ppc_recip_hw" { set selected [check_ppc_recip_hw_available] }
"dfp_hw" { set selected [check_dfp_hw_available] }
"htm_hw" { set selected [check_htm_hw_available] }
@@ -5483,6 +5533,7 @@ proc is-effective-target-keyword { arg }
"vmx_hw" { return 1 }
"vsx_hw" { return 1 }
"p8vector_hw" { return 1 }
+ "p9vector_hw" { return 1 }
"ppc_recip_hw" { return 1 }
"dfp_hw" { return 1 }
"htm_hw" { return 1 }
@@ -6186,7 +6237,9 @@ proc check_vect_support_and_set_flags {
}
lappend DEFAULT_VECTCFLAGS "-maltivec"
- if [check_p8vector_hw_available] {
+ if [check_p9vector_hw_available] {
+ lappend DEFAULT_VECTCFLAGS "-mpower9-vector"
+ } elseif [check_p8vector_hw_available] {
lappend DEFAULT_VECTCFLAGS "-mpower8-vector"
} elseif [check_vsx_hw_available] {
lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
===================================================================
@@ -31844,8 +31844,8 @@ rs6000_rtx_costs (rtx x, machine_mode mo
else
*total = rs6000_cost->divsi;
}
- /* Add in shift and subtract for MOD. */
- if (code == MOD || code == UMOD)
+ /* Add in shift and subtract for MOD unless we have a mod instruction. */
+ if (!TARGET_MODULO && (code == MOD || code == UMOD))
*total += COSTS_N_INSNS (2);
return false;
===================================================================
@@ -2885,9 +2885,9 @@ (define_insn_and_split "*div<mode>3_sra_
(set_attr "cell_micro" "not")])
(define_expand "mod<mode>3"
- [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
- (use (match_operand:GPR 1 "gpc_reg_operand" ""))
- (use (match_operand:GPR 2 "reg_or_cint_operand" ""))]
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+ (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+ (match_operand:GPR 2 "reg_or_cint_operand" "")))]
""
{
int i;
@@ -2897,16 +2897,93 @@ (define_expand "mod<mode>3"
if (GET_CODE (operands[2]) != CONST_INT
|| INTVAL (operands[2]) <= 0
|| (i = exact_log2 (INTVAL (operands[2]))) < 0)
- FAIL;
+ {
+ if (!TARGET_MODULO)
+ FAIL;
- temp1 = gen_reg_rtx (<MODE>mode);
- temp2 = gen_reg_rtx (<MODE>mode);
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
+ else
+ {
+ temp1 = gen_reg_rtx (<MODE>mode);
+ temp2 = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
- emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
- emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
- DONE;
+ emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+ emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
+ emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+ DONE;
+ }
})
+
+;; In order to enable using a peephole2 for combining div/mod to eliminate the
+;; mod, prefer putting the result of mod into a different register
+(define_insn "*mod<mode>3"
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+ (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+ "TARGET_MODULO"
+ "mods<wd> %0,%1,%2"
+ [(set_attr "type" "div")
+ (set_attr "size" "<bits>")])
+
+
+(define_insn "umod<mode>3"
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+ (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+ "TARGET_MODULO"
+ "modu<wd> %0,%1,%2"
+ [(set_attr "type" "div")
+ (set_attr "size" "<bits>")])
+
+;; On machines with modulo support, do a combined div/mod the old fashioned
+;; method, since the multiply/subtract is faster than doing the mod instruction
+;; after a divide.
+
+(define_peephole2
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+ (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+ (match_operand:GPR 2 "gpc_reg_operand" "")))
+ (set (match_operand:GPR 3 "gpc_reg_operand" "")
+ (mod:GPR (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_MODULO
+ && ! reg_mentioned_p (operands[0], operands[1])
+ && ! reg_mentioned_p (operands[0], operands[2])
+ && ! reg_mentioned_p (operands[3], operands[1])
+ && ! reg_mentioned_p (operands[3], operands[2])"
+ [(set (match_dup 0)
+ (div:GPR (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 3)
+ (mult:GPR (match_dup 0)
+ (match_dup 2)))
+ (set (match_dup 3)
+ (minus:GPR (match_dup 1)
+ (match_dup 3)))])
+
+(define_peephole2
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+ (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+ (match_operand:GPR 2 "gpc_reg_operand" "")))
+ (set (match_operand:GPR 3 "gpc_reg_operand" "")
+ (umod:GPR (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_MODULO
+ && ! reg_mentioned_p (operands[0], operands[1])
+ && ! reg_mentioned_p (operands[0], operands[2])
+ && ! reg_mentioned_p (operands[3], operands[1])
+ && ! reg_mentioned_p (operands[3], operands[2])"
+ [(set (match_dup 0)
+ (div:GPR (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 3)
+ (mult:GPR (match_dup 0)
+ (match_dup 2)))
+ (set (match_dup 3)
+ (minus:GPR (match_dup 1)
+ (match_dup 3)))])
+
;; Logical instructions
;; The logical instructions are mostly combined by using match_operator,