diff mbox

[1/3,AArch64] Add more choices for the reciprocal square root approximation

Message ID 5748D0D3.2000001@samsung.com
State New
Headers show

Commit Message

Evandro Menezes May 27, 2016, 10:57 p.m. UTC
On 05/25/16 05:15, James Greenhalgh wrote:
> On Wed, Apr 27, 2016 at 04:13:33PM -0500, Evandro Menezes wrote:
>>     gcc/
>>          * config/aarch64/aarch64-protos.h
>>          (AARCH64_APPROX_MODE): New macro.
>>          (AARCH64_APPROX_{NONE,SP,DP,DFORM,QFORM,SCALAR,VECTOR,ALL}):
>>     Likewise.
>>          (tune_params): New member "approx_rsqrt_modes".
>>          * config/aarch64/aarch64-tuning-flags.def
>>          (AARCH64_EXTRA_TUNE_APPROX_RSQRT): Remove macro.
>>          * config/aarch64/aarch64.c
>>          (generic_tunings): New member "approx_rsqrt_modes".
>>          (cortexa35_tunings): Likewise.
>>          (cortexa53_tunings): Likewise.
>>          (cortexa57_tunings): Likewise.
>>          (cortexa72_tunings): Likewise.
>>          (exynosm1_tunings): Likewise.
>>          (thunderx_tunings): Likewise.
>>          (xgene1_tunings): Likewise.
>>          (use_rsqrt_p): New argument for the mode and use new member from
>>          "tune_params".
>>          (aarch64_builtin_reciprocal): Devise mode from builtin.
>>          (aarch64_optab_supported_p): New argument for the mode.
>>          * doc/invoke.texi (-mlow-precision-recip-sqrt): Reword description.
>>
>> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
>> index f22a31c..50f1d24 100644
>> --- a/gcc/config/aarch64/aarch64-protos.h
>> +++ b/gcc/config/aarch64/aarch64-protos.h
>> @@ -178,6 +178,32 @@ struct cpu_branch_cost
>>     const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
>>   };
>>   
>> +/* Control approximate alternatives to certain FP operators.  */
>> +#define AARCH64_APPROX_MODE(MODE) \
>> +  ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
>> +   ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
>> +   : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
>> +     ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
>> +	      + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
>> +     : (0))
>> +#define AARCH64_APPROX_NONE (0)
>> +#define AARCH64_APPROX_SP (AARCH64_APPROX_MODE (SFmode) \
>> +			   | AARCH64_APPROX_MODE (V2SFmode) \
>> +			   | AARCH64_APPROX_MODE (V4SFmode))
>> +#define AARCH64_APPROX_DP (AARCH64_APPROX_MODE (DFmode) \
>> +			   | AARCH64_APPROX_MODE (V2DFmode))
>> +#define AARCH64_APPROX_DFORM (AARCH64_APPROX_MODE (SFmode) \
>> +			      | AARCH64_APPROX_MODE (DFmode) \
>> +			      | AARCH64_APPROX_MODE (V2SFmode))
>> +#define AARCH64_APPROX_QFORM (AARCH64_APPROX_MODE (V4SFmode) \
>> +			      | AARCH64_APPROX_MODE (V2DFmode))
>> +#define AARCH64_APPROX_SCALAR (AARCH64_APPROX_MODE (SFmode) \
>> +			       | AARCH64_APPROX_MODE (DFmode))
>> +#define AARCH64_APPROX_VECTOR (AARCH64_APPROX_MODE (V2SFmode) \
>> +			       | AARCH64_APPROX_MODE (V4SFmode) \
>> +			       | AARCH64_APPROX_MODE (V2DFmode))
>> +#define AARCH64_APPROX_ALL (-1)
>> +
> Thanks for providing these various subsets, but I think they are
> unneccesary for the final submission. From what I can see, only
> AARCH64_APPROX_ALL and AARCH64_APPROX_NONE are used. Please remove the
> rest, they are easy enough to add back if a subtarget wants them.

OK

>>   struct tune_params
>>   {
>>     const struct cpu_cost_table *insn_extra_cost;
>> @@ -218,6 +244,7 @@ struct tune_params
>>     } autoprefetcher_model;
>>   
>>     unsigned int extra_tuning_flags;
>> +  unsigned int approx_rsqrt_modes;
> As we're going to add a few of these, lets follow the approach for some
> of the other costs (e.g. branch costs, vector costs) and bury them in a
> structure of their own.

OK

>>   };
>>   
>>   #define AARCH64_FUSION_PAIR(x, name) \
>> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
>> index 7e45a0c..048c2a3 100644
>> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
>> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
>> @@ -29,5 +29,3 @@
>>        AARCH64_TUNE_ to give an enum name. */
>>   
>>   AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
>> -AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
>> -
> Did you want to add another way to tune these by command line (not
> neccessary now, but as a follow-up)? See how instruction fusion is
> handled by the -moverride code for an example.

I prefer your suggestion a la mode of RS6000, something like 
-mapprox=<list>.

Thank you,

Comments

James Greenhalgh June 1, 2016, 8:35 a.m. UTC | #1
On Fri, May 27, 2016 at 05:57:23PM -0500, Evandro Menezes wrote:
> From 86d7690632d03ec85fd69bfaef8e89c0542518ad Mon Sep 17 00:00:00 2001
> From: Evandro Menezes <e.menezes@samsung.com>
> Date: Thu, 3 Mar 2016 18:13:46 -0600
> Subject: [PATCH 1/3] [AArch64] Add more choices for the reciprocal square root
>  approximation
> 
> Allow a target to prefer such operation depending on the operation mode.
> 
> 2016-03-03  Evandro Menezes  <e.menezes@samsung.com>
> 
> gcc/
> 	* config/aarch64/aarch64-protos.h
> 	(AARCH64_APPROX_MODE): New macro.
> 	(AARCH64_APPROX_{NONE,ALL}): Likewise.
> 	(cpu_approx_modes): New structure.
> 	(tune_params): New member "approx_modes".
> 	* config/aarch64/aarch64-tuning-flags.def
> 	(AARCH64_EXTRA_TUNE_APPROX_RSQRT): Remove macro.
> 	* config/aarch64/aarch64.c
> 	({generic,exynosm1,xgene1}_approx_modes): New core
> 	"cpu_approx_modes" structures.
> 	(generic_tunings): New member "approx_modes".
> 	(cortexa35_tunings): Likewise.
> 	(cortexa53_tunings): Likewise.
> 	(cortexa57_tunings): Likewise.
> 	(cortexa72_tunings): Likewise.
> 	(exynosm1_tunings): Likewise.
> 	(thunderx_tunings): Likewise.
> 	(xgene1_tunings): Likewise.
> 	(use_rsqrt_p): New argument for the mode and use new member from
> 	"tune_params".
> 	(aarch64_builtin_reciprocal): Devise mode from builtin.
> 	(aarch64_optab_supported_p): New argument for the mode.
> 	* doc/invoke.texi (-mlow-precision-recip-sqrt): Reword description.

We're almost there, just a couple of style comments left on this one
I think. Thanks for sticking with it so far.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index f22a31c..6156281 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -178,6 +178,23 @@ struct cpu_branch_cost
>    const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
>  };
>  
> +/* Control approximate alternatives to certain FP operators.  */
> +#define AARCH64_APPROX_MODE(MODE) \
> +  ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
> +   ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
> +   : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
> +     ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
> +	      + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
> +     : (0))
> +#define AARCH64_APPROX_NONE (0)
> +#define AARCH64_APPROX_ALL (-1)
> +
> +/* Allowed modes for approximations.  */
> +struct cpu_approx_modes
> +{
> +  const unsigned int recip_sqrt; /* Reciprocal square root.  */
> +};
> +
>  struct tune_params
>  {
>    const struct cpu_cost_table *insn_extra_cost;
> @@ -218,6 +235,8 @@ struct tune_params
>    } autoprefetcher_model;
>  
>    unsigned int extra_tuning_flags;
> +
> +  const struct cpu_approx_modes *approx_modes;

Sorry to be irritating, but would you mind putting this up beside the
other "struct" members of tune_params. So directly under the branch_costs
member?

>  };
>  
>  #define AARCH64_FUSION_PAIR(x, name) \


> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
> index 7e45a0c..048c2a3 100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -29,5 +29,3 @@
>       AARCH64_TUNE_ to give an enum name. */
>  
>  AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
> -AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
> -
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 9995494..e532cfc 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -38,6 +38,7 @@
>  #include "recog.h"
>  #include "diagnostic.h"
>  #include "insn-attr.h"
> +#include "insn-modes.h"

Do we need this include? My build test of just this patch suggests not (maybe
you need it later in the series?). This can probably be dropped.

>  #include "alias.h"
>  #include "fold-const.h"
>  #include "stor-layout.h"
> @@ -393,6 +394,24 @@ static const struct cpu_branch_cost cortexa57_branch_cost =
>    3   /* Unpredictable.  */
>  };
>  
> +/* Generic approximation modes.  */
> +static const cpu_approx_modes generic_approx_modes =
> +{
> +  AARCH64_APPROX_NONE	/* recip_sqrt  */
> +};
> +
> +/* Approximation modes for Exynos M1.  */
> +static const cpu_approx_modes exynosm1_approx_modes =
> +{
> +  AARCH64_APPROX_ALL	/* recip_sqrt  */
> +};
> +
> +/* Approximation modes for Xgene1.  */

This should be "X-Gene 1" looking at how Applied Micro style it in their
marketing materials.

> +static const cpu_approx_modes xgene1_approx_modes =
> +{
> +  AARCH64_APPROX_ALL	/* recip_sqrt  */
> +};
> +
>  static bool
> -use_rsqrt_p (void)
> +use_rsqrt_p (machine_mode mode)
>  {
>    return (!flag_trapping_math
>  	  && flag_unsafe_math_optimizations
> -	  && ((aarch64_tune_params.extra_tuning_flags
> -	       & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
> +	  && ((aarch64_tune_params.approx_modes->recip_sqrt
> +	       & AARCH64_APPROX_MODE (mode))
>  	      || flag_mrecip_low_precision_sqrt));
>  }
>  
> @@ -7467,7 +7494,9 @@ use_rsqrt_p (void)
>  static tree
>  aarch64_builtin_reciprocal (tree fndecl)
>  {
> -  if (!use_rsqrt_p ())
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
> +
> +  if (!use_rsqrt_p (mode))
>      return NULL_TREE;
>    return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
>  }
> @@ -13889,13 +13918,13 @@ aarch64_promoted_type (const_tree t)
>  /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
>  
>  static bool
> -aarch64_optab_supported_p (int op, machine_mode, machine_mode,
> +aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
>  			   optimization_type opt_type)
>  {
>    switch (op)
>      {
>      case rsqrt_optab:
> -      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
> +      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
>  
>      default:
>        return true;
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index f1ac257..4340b08 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12939,7 +12939,7 @@ corresponding flag to the linker.
>  When calculating the reciprocal square root approximation,
>  uses one less step than otherwise, thus reducing latency and precision.
>  This is only relevant if @option{-ffast-math} enables the reciprocal square root
> -approximation, which in turn depends on the target processor.
> +approximation.

This hunk no longer applies on trunk over Wilco's changes. Can you check
whether you're happy with the wording that is on trunk now, and drop or
update this hunk as appropriate.

Thanks,
James
diff mbox

Patch

From 86d7690632d03ec85fd69bfaef8e89c0542518ad Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Thu, 3 Mar 2016 18:13:46 -0600
Subject: [PATCH 1/3] [AArch64] Add more choices for the reciprocal square root
 approximation

Allow a target to prefer such operation depending on the operation mode.

2016-03-03  Evandro Menezes  <e.menezes@samsung.com>

gcc/
	* config/aarch64/aarch64-protos.h
	(AARCH64_APPROX_MODE): New macro.
	(AARCH64_APPROX_{NONE,ALL}): Likewise.
	(cpu_approx_modes): New structure.
	(tune_params): New member "approx_modes".
	* config/aarch64/aarch64-tuning-flags.def
	(AARCH64_EXTRA_TUNE_APPROX_RSQRT): Remove macro.
	* config/aarch64/aarch64.c
	({generic,exynosm1,xgene1}_approx_modes): New core
	"cpu_approx_modes" structures.
	(generic_tunings): New member "approx_modes".
	(cortexa35_tunings): Likewise.
	(cortexa53_tunings): Likewise.
	(cortexa57_tunings): Likewise.
	(cortexa72_tunings): Likewise.
	(exynosm1_tunings): Likewise.
	(thunderx_tunings): Likewise.
	(xgene1_tunings): Likewise.
	(use_rsqrt_p): New argument for the mode and use new member from
	"tune_params".
	(aarch64_builtin_reciprocal): Devise mode from builtin.
	(aarch64_optab_supported_p): New argument for the mode.
	* doc/invoke.texi (-mlow-precision-recip-sqrt): Reword description.
---
 gcc/config/aarch64/aarch64-protos.h         | 19 ++++++++++
 gcc/config/aarch64/aarch64-tuning-flags.def |  2 -
 gcc/config/aarch64/aarch64.c                | 57 ++++++++++++++++++++++-------
 gcc/doc/invoke.texi                         |  2 +-
 4 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f22a31c..6156281 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -178,6 +178,23 @@  struct cpu_branch_cost
   const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
 };
 
+/* Control approximate alternatives to certain FP operators.  */
+#define AARCH64_APPROX_MODE(MODE) \
+  ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
+   ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
+   : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
+     ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
+	      + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
+     : (0))
+#define AARCH64_APPROX_NONE (0)
+#define AARCH64_APPROX_ALL (-1)
+
+/* Allowed modes for approximations.  */
+struct cpu_approx_modes
+{
+  const unsigned int recip_sqrt; /* Reciprocal square root.  */
+};
+
 struct tune_params
 {
   const struct cpu_cost_table *insn_extra_cost;
@@ -218,6 +235,8 @@  struct tune_params
   } autoprefetcher_model;
 
   unsigned int extra_tuning_flags;
+
+  const struct cpu_approx_modes *approx_modes;
 };
 
 #define AARCH64_FUSION_PAIR(x, name) \
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 7e45a0c..048c2a3 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -29,5 +29,3 @@ 
      AARCH64_TUNE_ to give an enum name. */
 
 AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
-AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
-
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9995494..e532cfc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -38,6 +38,7 @@ 
 #include "recog.h"
 #include "diagnostic.h"
 #include "insn-attr.h"
+#include "insn-modes.h"
 #include "alias.h"
 #include "fold-const.h"
 #include "stor-layout.h"
@@ -393,6 +394,24 @@  static const struct cpu_branch_cost cortexa57_branch_cost =
   3   /* Unpredictable.  */
 };
 
+/* Generic approximation modes.  */
+static const cpu_approx_modes generic_approx_modes =
+{
+  AARCH64_APPROX_NONE	/* recip_sqrt  */
+};
+
+/* Approximation modes for Exynos M1.  */
+static const cpu_approx_modes exynosm1_approx_modes =
+{
+  AARCH64_APPROX_ALL	/* recip_sqrt  */
+};
+
+/* Approximation modes for Xgene1.  */
+static const cpu_approx_modes xgene1_approx_modes =
+{
+  AARCH64_APPROX_ALL	/* recip_sqrt  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -414,7 +433,8 @@  static const struct tune_params generic_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params cortexa35_tunings =
@@ -439,7 +459,8 @@  static const struct tune_params cortexa35_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -464,7 +485,8 @@  static const struct tune_params cortexa53_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -489,7 +511,8 @@  static const struct tune_params cortexa57_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params cortexa72_tunings =
@@ -514,7 +537,8 @@  static const struct tune_params cortexa72_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params exynosm1_tunings =
@@ -538,7 +562,8 @@  static const struct tune_params exynosm1_tunings =
   48,	/* max_case_values.  */
   64,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE), /* tune_flags.  */
+  &exynosm1_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params thunderx_tunings =
@@ -562,7 +587,8 @@  static const struct tune_params thunderx_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_approx_modes	/* approx_modes.  */
 };
 
 static const struct tune_params xgene1_tunings =
@@ -586,7 +612,8 @@  static const struct tune_params xgene1_tunings =
   0,	/* max_case_values.  */
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_APPROX_RSQRT)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &xgene1_approx_modes	/* approx_modes.  */
 };
 
 /* Support for fine-grained override of the tuning structures.  */
@@ -7452,12 +7479,12 @@  aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
    to optimize 1.0/sqrt.  */
 
 static bool
-use_rsqrt_p (void)
+use_rsqrt_p (machine_mode mode)
 {
   return (!flag_trapping_math
 	  && flag_unsafe_math_optimizations
-	  && ((aarch64_tune_params.extra_tuning_flags
-	       & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
+	  && ((aarch64_tune_params.approx_modes->recip_sqrt
+	       & AARCH64_APPROX_MODE (mode))
 	      || flag_mrecip_low_precision_sqrt));
 }
 
@@ -7467,7 +7494,9 @@  use_rsqrt_p (void)
 static tree
 aarch64_builtin_reciprocal (tree fndecl)
 {
-  if (!use_rsqrt_p ())
+  machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
+
+  if (!use_rsqrt_p (mode))
     return NULL_TREE;
   return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
 }
@@ -13889,13 +13918,13 @@  aarch64_promoted_type (const_tree t)
 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
 
 static bool
-aarch64_optab_supported_p (int op, machine_mode, machine_mode,
+aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
 			   optimization_type opt_type)
 {
   switch (op)
     {
     case rsqrt_optab:
-      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
 
     default:
       return true;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f1ac257..4340b08 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12939,7 +12939,7 @@  corresponding flag to the linker.
 When calculating the reciprocal square root approximation,
 uses one less step than otherwise, thus reducing latency and precision.
 This is only relevant if @option{-ffast-math} enables the reciprocal square root
-approximation, which in turn depends on the target processor.
+approximation.
 
 @item -march=@var{name}
 @opindex march
-- 
2.6.3