diff mbox series

[AARCH64] Enable compare branch fusion

Message ID VI1PR0801MB2127EC4F7CC47F70B4AFF96683290@VI1PR0801MB2127.eurprd08.prod.outlook.com
State New
Headers show
Series [AARCH64] Enable compare branch fusion | expand

Commit Message

Wilco Dijkstra Dec. 24, 2019, 3:57 p.m. UTC
Enable the most basic form of compare-branch fusion since various CPUs
support it. This has no measurable effect on cores which don't support
branch fusion, but increases fusion opportunities on cores which do.

Bootstrapped on AArch64, OK for commit?

ChangeLog:
2019-12-24  Wilco Dijkstra  <wdijkstr@arm.com>

	* config/aarch64/aarch64.c (generic_tunings): Add branch fusion.
	(neoversen1_tunings): Likewise.

--

Comments

Wilco Dijkstra Jan. 16, 2020, 5:43 p.m. UTC | #1
ping


Enable the most basic form of compare-branch fusion since various CPUs
support it. This has no measurable effect on cores which don't support
branch fusion, but increases fusion opportunities on cores which do.

Bootstrapped on AArch64, OK for commit?

ChangeLog:
2019-12-24  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.c (generic_tunings): Add branch fusion.
        (neoversen1_tunings): Likewise.

--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3b18b381e1748f8fe5e522bdec4f7c850821fe8..1c32a3543bec4031cc9b641973101829c77296b5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -726,7 +726,7 @@ static const struct tune_params generic_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   2, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
   "16:12",     /* function_align.  */
   "4", /* jump_align.  */
   "8", /* loop_align.  */
@@ -1130,7 +1130,7 @@ static const struct tune_params neoversen1_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   3, /* issue_rate  */
-  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
   "32:16",     /* function_align.  */
   "32:16",     /* jump_align.  */
   "32:16",     /* loop_align.  */
Richard Sandiford Jan. 17, 2020, 9:16 a.m. UTC | #2
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Enable the most basic form of compare-branch fusion since various CPUs
> support it. This has no measurable effect on cores which don't support
> branch fusion, but increases fusion opportunities on cores which do.

If you're able to say for the record which cores you tested, then that'd
be good.

> Bootstrapped on AArch64, OK for commit?
>
> ChangeLog:
> 2019-12-24  Wilco Dijkstra  <wdijkstr@arm.com>
>
> * config/aarch64/aarch64.c (generic_tunings): Add branch fusion.
> (neoversen1_tunings): Likewise.

OK, thanks.  I agree there doesn't seem to be an obvious reason why this
would pessimise any cores significantly.  And it looked from a quick
check like all AArch64 cores give these compares the lowest in-use
latency (as expected).

We can revisit this if anyone finds any counterexamples.

Richard


>
> --
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a3b18b381e1748f8fe5e522bdec4f7c850821fe8..1c32a3543bec4031cc9b641973101829c77296b5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -726,7 +726,7 @@ static const struct tune_params generic_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    4, /* memmov_cost  */
>    2, /* issue_rate  */
> -  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
>    "16:12",/* function_align.  */
>    "4",/* jump_align.  */
>    "8",/* loop_align.  */
> @@ -1130,7 +1130,7 @@ static const struct tune_params neoversen1_tunings =
>    SVE_NOT_IMPLEMENTED, /* sve_width  */
>    4, /* memmov_cost  */
>    3, /* issue_rate  */
> -  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */
> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
>    "32:16",/* function_align.  */
>    "32:16",/* jump_align.  */
>    "32:16",/* loop_align.  */
Wilco Dijkstra Jan. 17, 2020, 2:37 p.m. UTC | #3
Hi Richard,

> If you're able to say for the record which cores you tested, then that'd
> be good.

I've mostly checked it on Cortex-A57 - if there is any affect, it would be on
older cores.

> OK, thanks.  I agree there doesn't seem to be an obvious reason why this
> would pessimise any cores significantly.  And it looked from a quick
> check like all AArch64 cores give these compares the lowest in-use
> latency (as expected).

Indeed.

> We can revisit this if anyone finds any counterexamples.

Yes - it's unlikely there are any though!

Cheers,
Wilco





>

> --

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

> index a3b18b381e1748f8fe5e522bdec4f7c850821fe8..1c32a3543bec4031cc9b641973101829c77296b5 100644

> --- a/gcc/config/aarch64/aarch64.c

> +++ b/gcc/config/aarch64/aarch64.c

> @@ -726,7 +726,7 @@ static const struct tune_params generic_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    4, /* memmov_cost  */

>    2, /* issue_rate  */

> -  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */

> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */

>    "16:12",/* function_align.  */

>    "4",/* jump_align.  */

>    "8",/* loop_align.  */

> @@ -1130,7 +1130,7 @@ static const struct tune_params neoversen1_tunings =

>    SVE_NOT_IMPLEMENTED, /* sve_width  */

>    4, /* memmov_cost  */

>    3, /* issue_rate  */

> -  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */

> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */

>    "32:16",/* function_align.  */

>    "32:16",/* jump_align.  */

>    "32:16",/* loop_align.  */
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a3b18b381e1748f8fe5e522bdec4f7c850821fe8..1c32a3543bec4031cc9b641973101829c77296b5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -726,7 +726,7 @@  static const struct tune_params generic_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   2, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
   "16:12",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
@@ -1130,7 +1130,7 @@  static const struct tune_params neoversen1_tunings =
   SVE_NOT_IMPLEMENTED, /* sve_width  */
   4, /* memmov_cost  */
   3, /* issue_rate  */
-  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
   "32:16",	/* function_align.  */
   "32:16",	/* jump_align.  */
   "32:16",	/* loop_align.  */