Message ID | 5748D0D6.80902@samsung.com |
---|---|
State | New |
Headers | show |
On Fri, May 27, 2016 at 05:57:26PM -0500, Evandro Menezes wrote: > 2016-04-04 Evandro Menezes <e.menezes@samsung.com> > Wilco Dijkstra <wilco.dijkstra@arm.com> > > gcc/ > * config/aarch64/aarch64-protos.h > (aarch64_emit_approx_rsqrt): Replace with new function > "aarch64_emit_approx_sqrt". > (cpu_approx_modes): New member "sqrt". > * config/aarch64/aarch64.c > (generic_approx_modes): New member "sqrt". > (exynosm1_approx_modes): Likewise. > (xgene1_approx_modes): Likewise. > (aarch64_emit_approx_rsqrt): Replace with new function > "aarch64_emit_approx_sqrt". > (aarch64_override_options_after_change_1): Handle new option. > * config/aarch64/aarch64-simd.md > (rsqrt<mode>2): Use new function instead. > (sqrt<mode>2): New expansion and insn definitions. > * config/aarch64/aarch64.md: Likewise. > * config/aarch64/aarch64.opt > (mlow-precision-sqrt): Add new option description. > * doc/invoke.texi (mlow-precision-sqrt): Likewise. > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index 6156281..2f407fd 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -192,7 +192,8 @@ struct cpu_branch_cost > /* Allowed modes for approximations. */ > struct cpu_approx_modes > { > - const unsigned int recip_sqrt; /* Reciprocal square root. */ > + const unsigned int sqrt; /* Square root. */ > + const unsigned int recip_sqrt; /* Reciprocal square root. */ > }; > > struct tune_params > @@ -388,7 +389,7 @@ void aarch64_register_pragmas (void); > void aarch64_relayout_simd_types (void); > void aarch64_reset_previous_fndecl (void); > void aarch64_save_restore_target_globals (tree); > -void aarch64_emit_approx_rsqrt (rtx, rtx); > +bool aarch64_emit_approx_sqrt (rtx, rtx, bool); There's a goal to try to keep these in alphabetical order (first by return type, then by name). This should go up by the other "bool" return types. > +/* Emit instruction sequence to compute either the approximate square root > + or its approximate reciprocal, depending on the flag RECP, and return > + whether the sequence was emitted or not. */ > > -void > -aarch64_emit_approx_rsqrt (rtx dst, rtx src) > +bool > +aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp) > { > - machine_mode mode = GET_MODE (src); > - gcc_assert ( > - mode == SFmode || mode == V2SFmode || mode == V4SFmode > - || mode == DFmode || mode == V2DFmode); > - > - rtx xsrc = gen_reg_rtx (mode); > - emit_move_insn (xsrc, src); > - rtx x0 = gen_reg_rtx (mode); > + machine_mode mode = GET_MODE (dst); > + machine_mode mmsk = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode)), > + GET_MODE_NUNITS (mode)); Long line. You can run your patch through contrib/check-gnu-style.sh to find these and other GNU style issues. > + bool use_approx_sqrt_p = (!recp > + && (flag_mlow_precision_sqrt > + || (aarch64_tune_params.approx_modes->sqrt > + & AARCH64_APPROX_MODE (mode)))); > + bool use_approx_rsqrt_p = (recp > + && (flag_mrecip_low_precision_sqrt > + || (aarch64_tune_params.approx_modes->recip_sqrt Long line. > + & AARCH64_APPROX_MODE (mode)))); > + > + if (!flag_finite_math_only > + || flag_trapping_math > + || !flag_unsafe_math_optimizations > + || optimize_function_for_size_p (cfun) > + || !(use_approx_sqrt_p || use_approx_rsqrt_p)) Swap these two cases to avoid the slightly more expensive call if we fail the cheap flags check. > + return false; > > - emit_insn ((*get_rsqrte_type (mode)) (x0, xsrc)); > + rtx xmsk = gen_reg_rtx (mmsk); > + if (!recp) > + /* When calculating the approximate square root, compare the argument with > + 0.0 and create a mask. */ > + emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src, > + CONST0_RTX (mode))))); I guess you've done it this way rather than calling gen_aarch64_cmeq<mode> directly to avoid having a switch on mode? I wonder whether it is worth just writing that helper function to make it explicit what instruction we want to match? > > - bool double_mode = (mode == DFmode || mode == V2DFmode); > + /* Estimate the approximate reciprocal square root. */ > + rtx xdst = gen_reg_rtx (mode); > + emit_insn ((*get_rsqrte_type (mode)) (xdst, src)); > > - int iterations = double_mode ? 3 : 2; > + /* Iterate over the series twice for SF and thrice for DF. */ > + int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; > > - /* Optionally iterate over the series one less time than otherwise. */ > - if (flag_mrecip_low_precision_sqrt) > + /* Optionally iterate over the series once less for faster performance > + while sacrificing the accuracy. */ > + if ((recp && flag_mrecip_low_precision_sqrt) > + || (!recp && flag_mlow_precision_sqrt)) > iterations--; > > - for (int i = 0; i < iterations; ++i) > + /* Iterate over the series to calculate the approximate reciprocal square root. */ Long line. > + rtx x1 = gen_reg_rtx (mode); > + while (iterations--) > { > - rtx x1 = gen_reg_rtx (mode); > rtx x2 = gen_reg_rtx (mode); > - rtx x3 = gen_reg_rtx (mode); > - emit_set_insn (x2, gen_rtx_MULT (mode, x0, x0)); > + emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst)); > > - emit_insn ((*get_rsqrts_type (mode)) (x3, xsrc, x2)); > + emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2)); > > - emit_set_insn (x1, gen_rtx_MULT (mode, x0, x3)); > - x0 = x1; > + if (iterations > 0) > + emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1)); > } > > - emit_move_insn (dst, x0); > + if (!recp) > + { > + /* Qualify the approximate reciprocal square root when the argument is > + 0.0 by squashing the intermediary result to 0.0. */ > + rtx xtmp = gen_reg_rtx (mmsk); > + emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk), > + gen_rtx_SUBREG (mmsk, xdst, 0))); > + emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0)); > + > + /* Calculate the approximate square root. */ > + emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src)); > + } > + > + /* Finalize the approximation. */ > + emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1)); > + > + return true; > } > > /* Return the number of instructions that can be issued per cycle. */ > @@ -8108,6 +8146,12 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) > && (aarch64_cmodel == AARCH64_CMODEL_TINY > || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)) > aarch64_nopcrelative_literal_loads = false; > + > + /* When enabling the lower precision Newton series for the square root, also > + enable it for the reciprocal square root, since the later is an > + intermediary step for the latter. */ This doesn't make sense to me. I think you mean "since the former is an intermediary step ...". > + if (flag_mlow_precision_sqrt) > + flag_mrecip_low_precision_sqrt = true; > } > > /* 'Unpack' up the internal tuning structs and update the options > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 9b282f1..aab3e00 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4683,7 +4683,16 @@ > [(set_attr "type" "ffarith<s>")] > ) > > -(define_insn "sqrt<mode>2" > +(define_expand "sqrt<mode>2" > + [(set (match_operand:GPF 0 "register_operand") > + (sqrt:GPF (match_operand:GPF 1 "register_operand")))] > + "TARGET_SIMD" This should be TARGET_FLOAT. > +{ > + if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) > + DONE; > +}) > + > +(define_insn "*sqrt<mode>2" > [(set (match_operand:GPF 0 "register_operand" "=w") > (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))] > "TARGET_FLOAT" > diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt > index c637ff4..ffd5540 100644 > --- a/gcc/config/aarch64/aarch64.opt > +++ b/gcc/config/aarch64/aarch64.opt > @@ -151,5 +151,10 @@ PC relative literal loads. > > mlow-precision-recip-sqrt > Common Var(flag_mrecip_low_precision_sqrt) Optimization > -When calculating the reciprocal square root approximation, > -uses one less step than otherwise, thus reducing latency and precision. > +When calculating the approximate reciprocal square root, > +use one less step than otherwise, thus reducing latency and precision. > + > +mlow-precision-sqrt > +Common Var(flag_mlow_precision_sqrt) Optimization > +When calculating the approximate square root, > +use one less step than otherwise, thus reducing latency and precision. > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 4340b08..76b7a5c 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -574,6 +574,7 @@ Objective-C and Objective-C++ Dialects}. > -mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol > -mfix-cortex-a53-843419 -mno-fix-cortex-a53-843419 @gol > -mlow-precision-recip-sqrt -mno-low-precision-recip-sqrt@gol > +-mlow-precision-sqrt -mno-low-precision-sqrt@gol > -march=@var{name} -mcpu=@var{name} -mtune=@var{name}} > > @emph{Adapteva Epiphany Options} > @@ -12941,6 +12942,15 @@ uses one less step than otherwise, thus reducing latency and precision. > This is only relevant if @option{-ffast-math} enables the reciprocal square root > approximation. > > +@item -mlow-precision-sqrt > +@item -mno-low-precision-sqrt > +@opindex -mlow-precision-sqrt > +@opindex -mno-low-precision-sqrt > +When calculating the square root approximation, > +uses one less step than otherwise, thus reducing latency and precision. Looks like you've reintroduced the "uses" typo you fixed in the aarch64.opt hunk above. Regardless, can you look at Wilco's wording for -mlow-precision-recip-sqrt on trunk and decide whether that would be more appropriate? > +This is only relevant if @option{-ffast-math} enables the square root > +approximation. > + > @item -march=@var{name} > @opindex march > Specify the name of the target architecture and, optionally, one or Thanks, James
From 42e1a3d1289ba3951d3b9a7fd523422b6c0e56c8 Mon Sep 17 00:00:00 2001 From: Evandro Menezes <e.menezes@samsung.com> Date: Mon, 4 Apr 2016 11:23:29 -0500 Subject: [PATCH 2/3] [AArch64] Emit square root using the Newton series 2016-04-04 Evandro Menezes <e.menezes@samsung.com> Wilco Dijkstra <wilco.dijkstra@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_emit_approx_rsqrt): Replace with new function "aarch64_emit_approx_sqrt". (cpu_approx_modes): New member "sqrt". * config/aarch64/aarch64.c (generic_approx_modes): New member "sqrt". (exynosm1_approx_modes): Likewise. (xgene1_approx_modes): Likewise. (aarch64_emit_approx_rsqrt): Replace with new function "aarch64_emit_approx_sqrt". (aarch64_override_options_after_change_1): Handle new option. * config/aarch64/aarch64-simd.md (rsqrt<mode>2): Use new function instead. (sqrt<mode>2): New expansion and insn definitions. * config/aarch64/aarch64.md: Likewise. * config/aarch64/aarch64.opt (mlow-precision-sqrt): Add new option description. * doc/invoke.texi (mlow-precision-sqrt): Likewise. --- gcc/config/aarch64/aarch64-protos.h | 5 +- gcc/config/aarch64/aarch64-simd.md | 13 ++++- gcc/config/aarch64/aarch64.c | 108 +++++++++++++++++++++++++----------- gcc/config/aarch64/aarch64.md | 11 +++- gcc/config/aarch64/aarch64.opt | 9 ++- gcc/doc/invoke.texi | 10 ++++ 6 files changed, 117 insertions(+), 39 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 6156281..2f407fd 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -192,7 +192,8 @@ struct cpu_branch_cost /* Allowed modes for approximations. */ struct cpu_approx_modes { - const unsigned int recip_sqrt; /* Reciprocal square root. */ + const unsigned int sqrt; /* Square root. */ + const unsigned int recip_sqrt; /* Reciprocal square root. */ }; struct tune_params @@ -388,7 +389,7 @@ void aarch64_register_pragmas (void); void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); void aarch64_save_restore_target_globals (tree); -void aarch64_emit_approx_rsqrt (rtx, rtx); +bool aarch64_emit_approx_sqrt (rtx, rtx, bool); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bd73bce..47ccb18 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -405,7 +405,7 @@ UNSPEC_RSQRT))] "TARGET_SIMD" { - aarch64_emit_approx_rsqrt (operands[0], operands[1]); + aarch64_emit_approx_sqrt (operands[0], operands[1], true); DONE; }) @@ -4307,7 +4307,16 @@ ;; sqrt -(define_insn "sqrt<mode>2" +(define_expand "sqrt<mode>2" + [(set (match_operand:VDQF 0 "register_operand") + (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))] + "TARGET_SIMD" +{ + if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) + DONE; +}) + +(define_insn "*sqrt<mode>2" [(set (match_operand:VDQF 0 "register_operand" "=w") (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] "TARGET_SIMD" diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e532cfc..d55ca34 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -397,18 +397,21 @@ static const struct cpu_branch_cost cortexa57_branch_cost = /* Generic approximation modes. */ static const cpu_approx_modes generic_approx_modes = { + AARCH64_APPROX_NONE, /* sqrt */ AARCH64_APPROX_NONE /* recip_sqrt */ }; /* Approximation modes for Exynos M1. */ static const cpu_approx_modes exynosm1_approx_modes = { + AARCH64_APPROX_ALL, /* sqrt */ AARCH64_APPROX_ALL /* recip_sqrt */ }; /* Approximation modes for Xgene1. */ static const cpu_approx_modes xgene1_approx_modes = { + AARCH64_APPROX_NONE, /* sqrt */ AARCH64_APPROX_ALL /* recip_sqrt */ }; @@ -7503,8 +7506,8 @@ aarch64_builtin_reciprocal (tree fndecl) typedef rtx (*rsqrte_type) (rtx, rtx); -/* Select reciprocal square root initial estimate - insn depending on machine mode. */ +/* Select reciprocal square root initial estimate insn depending on machine + mode. */ rsqrte_type get_rsqrte_type (machine_mode mode) @@ -7516,14 +7519,13 @@ get_rsqrte_type (machine_mode mode) case V2DFmode: return gen_aarch64_rsqrte_v2df2; case V2SFmode: return gen_aarch64_rsqrte_v2sf2; case V4SFmode: return gen_aarch64_rsqrte_v4sf2; - default: gcc_unreachable (); + default: gcc_unreachable (); } } typedef rtx (*rsqrts_type) (rtx, rtx, rtx); -/* Select reciprocal square root Newton-Raphson step - insn depending on machine mode. */ +/* Select reciprocal square root series step insn depending on machine mode. */ rsqrts_type get_rsqrts_type (machine_mode mode) @@ -7535,50 +7537,86 @@ get_rsqrts_type (machine_mode mode) case V2DFmode: return gen_aarch64_rsqrts_v2df3; case V2SFmode: return gen_aarch64_rsqrts_v2sf3; case V4SFmode: return gen_aarch64_rsqrts_v4sf3; - default: gcc_unreachable (); + default: gcc_unreachable (); } } -/* Emit instruction sequence to compute the reciprocal square root using the - Newton-Raphson series. Iterate over the series twice for SF - and thrice for DF. */ +/* Emit instruction sequence to compute either the approximate square root + or its approximate reciprocal, depending on the flag RECP, and return + whether the sequence was emitted or not. */ -void -aarch64_emit_approx_rsqrt (rtx dst, rtx src) +bool +aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp) { - machine_mode mode = GET_MODE (src); - gcc_assert ( - mode == SFmode || mode == V2SFmode || mode == V4SFmode - || mode == DFmode || mode == V2DFmode); - - rtx xsrc = gen_reg_rtx (mode); - emit_move_insn (xsrc, src); - rtx x0 = gen_reg_rtx (mode); + machine_mode mode = GET_MODE (dst); + machine_mode mmsk = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode)), + GET_MODE_NUNITS (mode)); + bool use_approx_sqrt_p = (!recp + && (flag_mlow_precision_sqrt + || (aarch64_tune_params.approx_modes->sqrt + & AARCH64_APPROX_MODE (mode)))); + bool use_approx_rsqrt_p = (recp + && (flag_mrecip_low_precision_sqrt + || (aarch64_tune_params.approx_modes->recip_sqrt + & AARCH64_APPROX_MODE (mode)))); + + if (!flag_finite_math_only + || flag_trapping_math + || !flag_unsafe_math_optimizations + || optimize_function_for_size_p (cfun) + || !(use_approx_sqrt_p || use_approx_rsqrt_p)) + return false; - emit_insn ((*get_rsqrte_type (mode)) (x0, xsrc)); + rtx xmsk = gen_reg_rtx (mmsk); + if (!recp) + /* When calculating the approximate square root, compare the argument with + 0.0 and create a mask. */ + emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src, + CONST0_RTX (mode))))); - bool double_mode = (mode == DFmode || mode == V2DFmode); + /* Estimate the approximate reciprocal square root. */ + rtx xdst = gen_reg_rtx (mode); + emit_insn ((*get_rsqrte_type (mode)) (xdst, src)); - int iterations = double_mode ? 3 : 2; + /* Iterate over the series twice for SF and thrice for DF. */ + int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; - /* Optionally iterate over the series one less time than otherwise. */ - if (flag_mrecip_low_precision_sqrt) + /* Optionally iterate over the series once less for faster performance + while sacrificing the accuracy. */ + if ((recp && flag_mrecip_low_precision_sqrt) + || (!recp && flag_mlow_precision_sqrt)) iterations--; - for (int i = 0; i < iterations; ++i) + /* Iterate over the series to calculate the approximate reciprocal square root. */ + rtx x1 = gen_reg_rtx (mode); + while (iterations--) { - rtx x1 = gen_reg_rtx (mode); rtx x2 = gen_reg_rtx (mode); - rtx x3 = gen_reg_rtx (mode); - emit_set_insn (x2, gen_rtx_MULT (mode, x0, x0)); + emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst)); - emit_insn ((*get_rsqrts_type (mode)) (x3, xsrc, x2)); + emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2)); - emit_set_insn (x1, gen_rtx_MULT (mode, x0, x3)); - x0 = x1; + if (iterations > 0) + emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1)); } - emit_move_insn (dst, x0); + if (!recp) + { + /* Qualify the approximate reciprocal square root when the argument is + 0.0 by squashing the intermediary result to 0.0. */ + rtx xtmp = gen_reg_rtx (mmsk); + emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk), + gen_rtx_SUBREG (mmsk, xdst, 0))); + emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0)); + + /* Calculate the approximate square root. */ + emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src)); + } + + /* Finalize the approximation. */ + emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1)); + + return true; } /* Return the number of instructions that can be issued per cycle. */ @@ -8108,6 +8146,12 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) && (aarch64_cmodel == AARCH64_CMODEL_TINY || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)) aarch64_nopcrelative_literal_loads = false; + + /* When enabling the lower precision Newton series for the square root, also + enable it for the reciprocal square root, since the later is an + intermediary step for the latter. */ + if (flag_mlow_precision_sqrt) + flag_mrecip_low_precision_sqrt = true; } /* 'Unpack' up the internal tuning structs and update the options diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 9b282f1..aab3e00 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4683,7 +4683,16 @@ [(set_attr "type" "ffarith<s>")] ) -(define_insn "sqrt<mode>2" +(define_expand "sqrt<mode>2" + [(set (match_operand:GPF 0 "register_operand") + (sqrt:GPF (match_operand:GPF 1 "register_operand")))] + "TARGET_SIMD" +{ + if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) + DONE; +}) + +(define_insn "*sqrt<mode>2" [(set (match_operand:GPF 0 "register_operand" "=w") (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))] "TARGET_FLOAT" diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index c637ff4..ffd5540 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -151,5 +151,10 @@ PC relative literal loads. mlow-precision-recip-sqrt Common Var(flag_mrecip_low_precision_sqrt) Optimization -When calculating the reciprocal square root approximation, -uses one less step than otherwise, thus reducing latency and precision. +When calculating the approximate reciprocal square root, +use one less step than otherwise, thus reducing latency and precision. + +mlow-precision-sqrt +Common Var(flag_mlow_precision_sqrt) Optimization +When calculating the approximate square root, +use one less step than otherwise, thus reducing latency and precision. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4340b08..76b7a5c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -574,6 +574,7 @@ Objective-C and Objective-C++ Dialects}. -mfix-cortex-a53-835769 -mno-fix-cortex-a53-835769 @gol -mfix-cortex-a53-843419 -mno-fix-cortex-a53-843419 @gol -mlow-precision-recip-sqrt -mno-low-precision-recip-sqrt@gol +-mlow-precision-sqrt -mno-low-precision-sqrt@gol -march=@var{name} -mcpu=@var{name} -mtune=@var{name}} @emph{Adapteva Epiphany Options} @@ -12941,6 +12942,15 @@ uses one less step than otherwise, thus reducing latency and precision. This is only relevant if @option{-ffast-math} enables the reciprocal square root approximation. +@item -mlow-precision-sqrt +@item -mno-low-precision-sqrt +@opindex -mlow-precision-sqrt +@opindex -mno-low-precision-sqrt +When calculating the square root approximation, +uses one less step than otherwise, thus reducing latency and precision. +This is only relevant if @option{-ffast-math} enables the square root +approximation. + @item -march=@var{name} @opindex march Specify the name of the target architecture and, optionally, one or -- 2.6.3