Improve Powerpc (double)(int) rounding

Message ID	20101022213337.GA27105@hungry-tiger.westford.ibm.com
State	New
Headers	show Return-Path: <gcc-patches-return-276197-incoming=patchwork.ozlabs.org@gcc.gnu.org> Date: Fri, 22 Oct 2010 17:33:37 -0400 From: Michael Meissner <meissner@linux.vnet.ibm.com> To: gcc-patches@gcc.gnu.org, dje.gcc@gmail.com Subject: [PATCH] Improve Powerpc (double)(int) rounding Message-ID: <20101022213337.GA27105@hungry-tiger.westford.ibm.com> Mail-Followup-To: Michael Meissner <meissner@linux.vnet.ibm.com>, gcc-patches@gcc.gnu.org, dje.gcc@gmail.com MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="mP3DRpeJDSE+ciuQ" Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

--- gcc/config/rs6000/rs6000.md.~1~ 2010-10-22 14:31:38.000000000 -0400 +++ gcc/config/rs6000/rs6000.md 2010-10-22 14:33:23.000000000 -0400 @@ -6605,68 +6605,73 @@ "lfiwax %0,%y1" [(set_attr "type" "fpload")]) +; This split must be run before register allocation because it allocates the +; memory slot that is needed to move values to/from the FPR. We don't allocate +; it earlier to allow for the combiner to merge insns together where it might +; not be needed and also in case the insns are deleted as dead code. + (define_insn_and_split "floatsi<mode>2_lfiwax" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>,<rreg2>") - (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "Z,r"))) - (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) - (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX - && <SI_CONVERT_FP>" + && <SI_CONVERT_FP> && can_create_pseudo_p ()" "#" - "MEM_P (operands[1]) || reload_completed" + "" [(pc)] " { - if (MEM_P (operands[1])) - { - operands[1] = rs6000_address_for_fpconvert (operands[1]); - emit_insn (gen_lfiwax (operands[3], operands[1])); - } + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + tmp = convert_to_mode (DImode, src, false); else { - emit_move_insn (operands[2], operands[1]); - emit_insn (gen_lfiwax (operands[3], operands[2])); + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwax (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwax (tmp, stack)); + } } - emit_insn (gen_floatdi<mode>2 (operands[0], operands[3])); + emit_insn (gen_floatdi<mode>2 (dest, tmp)); DONE; }" - [(set_attr "length" "8,12")]) + [(set_attr "length" "12") + (set_attr "type" "fpload")]) (define_insn_and_split "floatsi<mode>2_lfiwax_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") - (float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX - && <SI_CONVERT_FP>" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_lfiwax (operands[2], operands[1])); - emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) - -(define_insn_and_split "floatsi<mode>2_lfiwax_mem2" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>") (float:SFDF (sign_extend:DI - (match_operand:SI 1 "memory_operand" "Z")))) - (clobber (match_scratch:DI 2 "=d"))] + (match_operand:SI 1 "memory_operand" "Z,Z")))) + (clobber (match_scratch:DI 2 "=0,d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX && <SI_CONVERT_FP>" "#" - "&& reload_completed" + "" [(pc)] " { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); emit_insn (gen_lfiwax (operands[2], operands[1])); emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_insn "lfiwzx" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") @@ -6677,67 +6682,67 @@ [(set_attr "type" "fpload")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>,<rreg2>") - (unsigned_float:SFDF (match_operand:SI 1 "gpc_reg_operand" "Z,r"))) - (clobber (match_operand:SI 2 "indexed_or_indirect_operand" "=Z,Z")) - (clobber (match_operand:DI 3 "gpc_reg_operand" "=d,d"))] + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX && <SI_CONVERT_FP>" "#" - "MEM_P (operands[1]) || reload_completed" + "" [(pc)] " { - if (MEM_P (operands[1])) - { - operands[1] = rs6000_address_for_fpconvert (operands[1]); - emit_insn (gen_lfiwzx (operands[3], operands[1])); - } + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp; + + if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + tmp = convert_to_mode (DImode, src, true); else { - emit_move_insn (operands[2], operands[1]); - emit_insn (gen_lfiwzx (operands[3], operands[2])); + tmp = operands[2]; + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + if (MEM_P (src)) + { + src = rs6000_address_for_fpconvert (src); + emit_insn (gen_lfiwzx (tmp, src)); + } + else + { + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_move_insn (stack, src); + emit_insn (gen_lfiwzx (tmp, stack)); + } } - emit_insn (gen_floatdi<mode>2 (operands[0], operands[3])); + emit_insn (gen_floatdi<mode>2 (dest, tmp)); DONE; }" - [(set_attr "length" "8,12")]) + [(set_attr "length" "12") + (set_attr "type" "fpload")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") - (unsigned_float:SFDF (match_operand:SI 1 "memory_operand" "Z"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX - && <SI_CONVERT_FP>" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_lfiwzx (operands[2], operands[1])); - emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) - -(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem2" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>") (unsigned_float:SFDF (zero_extend:DI - (match_operand:SI 1 "memory_operand" "Z")))) - (clobber (match_scratch:DI 2 "=d"))] + (match_operand:SI 1 "memory_operand" "Z,Z")))) + (clobber (match_scratch:DI 2 "=0,d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX && <SI_CONVERT_FP>" "#" - "&& reload_completed" + "" [(pc)] " { + operands[1] = rs6000_address_for_fpconvert (operands[1]); + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DImode); emit_insn (gen_lfiwzx (operands[2], operands[1])); emit_insn (gen_floatdi<mode>2 (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) ; For each of these conversions, there is a define_expand, a define_insn ; with a '#' template, and a define_split (with C code). The idea is @@ -6765,7 +6770,7 @@ } else if (TARGET_LFIWAX && TARGET_FCFID) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + emit_insn (gen_floatsidf2_lfiwax (operands[0], operands[1])); DONE; } else if (TARGET_FCFID) @@ -6819,7 +6824,8 @@ emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); DONE; }" - [(set_attr "length" "24")]) + [(set_attr "length" "24") + (set_attr "type" "fp")]) ;; If we don't have a direct conversion to single precision, don't enable this ;; conversion for 32-bit without fast math, because we don't have the insn to @@ -6842,7 +6848,7 @@ } else if (TARGET_LFIWZX && TARGET_FCFIDUS) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); + emit_insn (gen_floatunssisf2_lfiwzx (operands[0], operands[1])); DONE; } else @@ -6876,8 +6882,8 @@ } else if (TARGET_LFIWZX && TARGET_FCFID) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], true); - DONE; + emit_insn (gen_floatunssidf2_lfiwzx (operands[0], operands[1])); + DONE; } else if (TARGET_FCFID) { @@ -6927,7 +6933,8 @@ emit_insn (gen_subdf3 (operands[0], operands[5], operands[3])); DONE; }" - [(set_attr "length" "20")]) + [(set_attr "length" "20") + (set_attr "type" "fp")]) (define_expand "fix_trunc<mode>si2" [(set (match_operand:SI 0 "gpc_reg_operand" "") @@ -6941,12 +6948,7 @@ rtx tmp, stack; if (TARGET_STFIWX) - { - tmp = gen_reg_rtx (DImode); - stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (gen_fix_trunc<mode>si2_stfiwx (operands[0], operands[1], - tmp, stack)); - } + emit_insn (gen_fix_trunc<mode>si2_stfiwx (operands[0], operands[1])); else { tmp = gen_reg_rtx (DImode); @@ -6958,58 +6960,56 @@ } }") +; Like the convert to float patterns, this insn must be split before +; register allocation so that it can allocate the memory slot if it +; needed (define_insn_and_split "fix_trunc<mode>si2_stfiwx" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:SI 3 "indexed_or_indirect_operand" "=Z"))] + [(set (match_operand:SI 0 "general_operand" "=rm") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT) - && TARGET_STFIWX" + && TARGET_STFIWX && can_create_pseudo_p ()" "#" - "&& reload_completed" + "" [(pc)] - " { - emit_insn (gen_fctiwz_<mode> (operands[2], operands[1])); - if (TARGET_MFPGPR && TARGET_POWERPC64 && REG_P (operands[0]) - && INT_REGNO_P (REGNO (operands[0]))) + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_<mode> (tmp, src)); + if (MEM_P (dest)) { - rtx reg = gen_lowpart (DImode, operands[0]); - emit_move_insn (reg, operands[2]); + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_MFPGPR && TARGET_POWERPC64) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; } else { - emit_insn (gen_stfiwx (operands[3], operands[2])); - emit_move_insn (operands[0], operands[3]); + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; } - DONE; -}" - [(set_attr "length" "12")]) - -(define_insn_and_split "*fix_trunc<mode>si2_mem" - [(set (match_operand:SI 0 "memory_operand" "=Z") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT) - && TARGET_STFIWX" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_fctiwz_<mode> (operands[2], operands[1])); - emit_insn (gen_stfiwx (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) (define_insn_and_split "fix_trunc<mode>si2_internal" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<rreg>"))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o"))] + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,?r") + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>"))) + (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d")) + (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))] "(TARGET_POWER2 || TARGET_POWERPC) && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" "#" @@ -7026,7 +7026,8 @@ emit_move_insn (operands[0], lowword); DONE; }" - [(set_attr "length" "16")]) + [(set_attr "length" "16") + (set_attr "type" "fp")]) (define_expand "fix_trunc<mode>di2" [(set (match_operand:DI 0 "gpc_reg_operand" "") @@ -7053,59 +7054,51 @@ { if (!<E500_CONVERT>) { - rtx tmp = gen_reg_rtx (DImode); - rtx stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1], - tmp, stack)); + emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1])); DONE; } }") (define_insn_and_split "fixuns_trunc<mode>si2_stfiwx" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (unsigned_fix:SI - (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))) - (clobber (match_operand:DI 2 "gpc_reg_operand" "=d")) - (clobber (match_operand:SI 3 "indexed_or_indirect_operand" "=Z"))] + [(set (match_operand:SI 0 "general_operand" "=rm") + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))) + (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ - && TARGET_STFIWX" + && TARGET_STFIWX && can_create_pseudo_p ()" "#" - "&& reload_completed" + "" [(pc)] - " { - emit_insn (gen_fctiwuz_<mode> (operands[2], operands[1])); - if (TARGET_MFPGPR && TARGET_POWERPC64 && REG_P (operands[0]) - && INT_REGNO_P (REGNO (operands[0]))) + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_<mode> (tmp, src)); + if (MEM_P (dest)) { - rtx reg = gen_lowpart (DImode, operands[0]); - emit_move_insn (reg, operands[2]); + dest = rs6000_address_for_fpconvert (dest); + emit_insn (gen_stfiwx (dest, tmp)); + DONE; + } + else if (TARGET_MFPGPR && TARGET_POWERPC64) + { + dest = gen_lowpart (DImode, dest); + emit_move_insn (dest, tmp); + DONE; } else { - emit_insn (gen_stfiwx (operands[3], operands[2])); - emit_move_insn (operands[0], operands[3]); + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (dest, stack); + DONE; } - DONE; -}" - [(set_attr "length" "12")]) - -(define_insn_and_split "*fixuns_trunc<mode>si2_mem" - [(set (match_operand:SI 0 "memory_operand" "=Z") - (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))) - (clobber (match_scratch:DI 2 "=d"))] - "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ - && TARGET_STFIWX" - "#" - "&& reload_completed" - [(pc)] - " -{ - emit_insn (gen_fctiwuz_<mode> (operands[2], operands[1])); - emit_insn (gen_stfiwx (operands[0], operands[2])); - DONE; -}" - [(set_attr "length" "8")]) +} + [(set_attr "length" "12") + (set_attr "type" "fp")]) (define_expand "fixuns_trunc<mode>di2" [(set (match_operand:DI 0 "register_operand" "") @@ -7155,6 +7148,76 @@ "friz %0,%1" [(set_attr "type" "fp")]) +;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a +;; load to properly sign extend the value, but at least doing a store, load +;; into a GPR to sign extend, a store from the GPR and a load back into the FPR +;; if we have 32-bit memory ops +(define_insn_and_split "*round32<mode>2_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (float:SFDF + (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && <SI_CONVERT_FP> && TARGET_LFIWAX && TARGET_STFIWX && TARGET_FCFID + && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwz_<mode> (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwax (tmp2, stack)); + emit_insn (gen_floatdi<mode>2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + +(define_insn_and_split "*roundu32<mode>2_fprs" + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + (unsigned_float:SFDF + (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))) + (clobber (match_scratch:DI 2 "=d")) + (clobber (match_scratch:DI 3 "=d"))] + "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT + && TARGET_LFIWZX && TARGET_STFIWX && TARGET_FCFIDU + && can_create_pseudo_p ()" + "#" + "" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + + if (GET_CODE (tmp1) == SCRATCH) + tmp1 = gen_reg_rtx (DImode); + if (GET_CODE (tmp2) == SCRATCH) + tmp2 = gen_reg_rtx (DImode); + + emit_insn (gen_fctiwuz_<mode> (tmp1, src)); + emit_insn (gen_stfiwx (stack, tmp1)); + emit_insn (gen_lfiwzx (tmp2, stack)); + emit_insn (gen_floatdi<mode>2 (dest, tmp2)); + DONE; +} + [(set_attr "type" "fpload") + (set_attr "length" "16")]) + ;; No VSX equivalent to fctid (define_insn "lrint<mode>di2" [(set (match_operand:DI 0 "gpc_reg_operand" "=d") @@ -7251,7 +7314,14 @@ } else if (TARGET_FCFIDS && TARGET_LFIWAX) { - rs6000_expand_convert_si_to_sfdf (operands[0], operands[1], false); + emit_insn (gen_floatsisf2_lfiwax (operands[0], operands[1])); + DONE; + } + else if (TARGET_FCFID && TARGET_LFIWAX) + { + rtx dfreg = gen_reg_rtx (DFmode); + emit_insn (gen_floatsidf2_lfiwax (dfreg, operands[1])); + emit_insn (gen_truncdfsf2 (operands[0], dfreg)); DONE; } else @@ -7294,7 +7364,8 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:DF (match_dup 2)))] "" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_expand "floatunsdidf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") @@ -7321,7 +7392,8 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (unsigned_float:DF (match_dup 2)))] "" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) (define_expand "floatdisf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") @@ -7373,6 +7445,7 @@ ;; This is not IEEE compliant if rounding mode is "round to nearest". ;; If the DI->DF conversion is inexact, then it's possible to suffer ;; from double rounding. +;; Instead of creating a new cpu type for two FP operations, just use fp (define_insn_and_split "floatdisf2_internal1" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (float:SF (match_operand:DI 1 "gpc_reg_operand" "d"))) @@ -7384,7 +7457,9 @@ (float:DF (match_dup 1))) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] - "") + "" + [(set_attr "length" "8") + (set_attr "type" "fp")]) ;; Twiddles bits to avoid double rounding. ;; Bits that might be truncated when converting to DFmode are replaced @@ -7448,7 +7523,8 @@ emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2])); DONE; }" - [(set_attr "length" "8")]) + [(set_attr "length" "8") + (set_attr "type" "fpload")]) ;; Define the DImode operations that can be done in a small number ;; of instructions. The & constraints are to prevent the register --- gcc/config/rs6000/rs6000.c.~2~ 2010-10-22 14:11:08.000000000 -0400 +++ gcc/config/rs6000/rs6000.c 2010-10-22 14:47:51.000000000 -0400 @@ -27164,82 +27164,30 @@ rs6000_address_for_fpconvert (rtx x) addr = XEXP (x, 0); if (! legitimate_indirect_address_p (addr, strict_p) && ! legitimate_indexed_address_p (addr, strict_p)) - x = replace_equiv_address (x, copy_addr_to_reg (addr)); - - return x; -} - -/* Expand 32-bit int -> floating point conversions. Return true if - successful. */ - -void -rs6000_expand_convert_si_to_sfdf (rtx dest, rtx src, bool unsigned_p) -{ - enum machine_mode dmode = GET_MODE (dest); - rtx (*func_si) (rtx, rtx, rtx, rtx); - rtx (*func_si_mem) (rtx, rtx); - rtx (*func_di) (rtx, rtx); - rtx reg, stack; - - gcc_assert (GET_MODE (src) == SImode); - - if (dmode == SFmode) { - if (unsigned_p) + if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) { - gcc_assert (TARGET_FCFIDUS && TARGET_LFIWZX); - func_si = gen_floatunssisf2_lfiwzx; - func_si_mem = gen_floatunssisf2_lfiwzx_mem; - func_di = gen_floatunsdisf2; + rtx reg = XEXP (addr, 0); + HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); + rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); + gcc_assert (REG_P (reg)); + emit_insn (gen_add3_insn (reg, reg, size_rtx)); + addr = reg; } - else + else if (GET_CODE (addr) == PRE_MODIFY) { - gcc_assert (TARGET_FCFIDS && TARGET_LFIWAX); - func_si = gen_floatsisf2_lfiwax; - func_si_mem = gen_floatsisf2_lfiwax_mem; - func_di = gen_floatdisf2; + rtx reg = XEXP (addr, 0); + rtx expr = XEXP (addr, 1); + gcc_assert (REG_P (reg)); + gcc_assert (GET_CODE (expr) == PLUS); + emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); + addr = reg; } - } - else if (dmode == DFmode) - { - if (unsigned_p) - { - gcc_assert (TARGET_FCFIDU && TARGET_LFIWZX); - func_si = gen_floatunssidf2_lfiwzx; - func_si_mem = gen_floatunssidf2_lfiwzx_mem; - func_di = gen_floatunsdidf2; - } - else - { - gcc_assert (TARGET_FCFID && TARGET_LFIWAX); - func_si = gen_floatsidf2_lfiwax; - func_si_mem = gen_floatsidf2_lfiwax_mem; - func_di = gen_floatdidf2; - } + x = replace_equiv_address (x, copy_addr_to_reg (addr)); } - else - gcc_unreachable (); - - if (MEM_P (src)) - { - src = rs6000_address_for_fpconvert (src); - emit_insn (func_si_mem (dest, src)); - } - else if (!TARGET_MFPGPR) - { - reg = gen_reg_rtx (DImode); - stack = rs6000_allocate_stack_temp (SImode, false, true); - emit_insn (func_si (dest, src, stack, reg)); - } - else - { - if (!REG_P (src)) - src = force_reg (SImode, src); - reg = convert_to_mode (DImode, src, unsigned_p); - emit_insn (func_di (dest, reg)); - } + return x; } #include "gt-rs6000.h" --- gcc/testsuite/gcc.target/powerpc/ppc-round.c.~1~ 2010-10-22 16:42:51.000000000 -0400 +++ gcc/testsuite/gcc.target/powerpc/ppc-round.c 2010-10-22 16:38:51.000000000 -0400 @@ -0,0 +1,37 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-final { scan-assembler-times "stfiwx" 4 } } */ +/* { dg-final { scan-assembler-times "lfiwax" 2 } } */ +/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */ +/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ +/* { dg-final { scan-assembler-times "xscvsxddp" 2 } } */ +/* { dg-final { scan-assembler-times "fcfids" 2 } } */ +/* { dg-final { scan-assembler-not "lwz" } } */ +/* { dg-final { scan-assembler-not "stw" } } */ + +/* Make sure we don't have loads/stores to the GPR unit. */ +double +round_double_int (double a) +{ + return (double)(int)a; +} + +float +round_float_int (float a) +{ + return (float)(int)a; +} + +double +round_double_uint (double a) +{ + return (double)(unsigned int)a; +} + +float +round_float_uint (float a) +{ + return (float)(unsigned int)a; +}

Improve Powerpc (double)(int) rounding

Commit Message

Comments

Patch