diff mbox series

internal-fn: Avoid dropping the lhs of some calls [PR94941]

Message ID mpth7wvkgjr.fsf@arm.com
State New
Headers show
Series internal-fn: Avoid dropping the lhs of some calls [PR94941] | expand

Commit Message

Richard Sandiford May 4, 2020, 12:53 p.m. UTC
create_output_operand coerces an output operand to the insn's
predicates, using a suggested rtx location if convenient.
But if that rtx location is actually required rather than
optional, the builder of the insn has to emit a move afterwards.

(We could instead add a new interface that does this automatically,
but that's future work.)

This PR shows that we were failing to emit the move for some of the
vector load internal functions.  I think there are other routines in
internal-fn.c that potentially have the same problem, but this patch is
supposed to be a conservative subset suitable for backporting to GCC 10.

Tested on aarch64-linux-gnu (with and without SVE), arm-linux-gnueabihf
and x86_64-linux-gnu.  OK for trunk and GCC 10 branch?  It would be
nice if we could have this for GCC 10.1, but I realise it wouldn't
justify a new RC on its own, so I'll assume that "GCC 10 branch" is
"GCC 10 branch after the release".

Richard


2020-05-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR middle-end/94941
	* internal-fn.c (expand_load_lanes_optab_fn): Emit a move if the
	chosen lhs is different from the gcall lhs.
	(expand_mask_load_optab_fn): Likewise.
	(expand_gather_load_optab_fn): Likewise.

gcc/testsuite/
	PR middle-end/94941
	* gcc.target/aarch64/sve/acle/general/unoptimized_1.c: New test.
---
 gcc/internal-fn.c                             |  6 ++++++
 .../aarch64/sve/acle/general/unoptimized_1.c  | 21 +++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c

Comments

Richard Biener May 4, 2020, 1:03 p.m. UTC | #1
On Mon, May 4, 2020 at 2:55 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> create_output_operand coerces an output operand to the insn's
> predicates, using a suggested rtx location if convenient.
> But if that rtx location is actually required rather than
> optional, the builder of the insn has to emit a move afterwards.
>
> (We could instead add a new interface that does this automatically,
> but that's future work.)
>
> This PR shows that we were failing to emit the move for some of the
> vector load internal functions.  I think there are other routines in
> internal-fn.c that potentially have the same problem, but this patch is
> supposed to be a conservative subset suitable for backporting to GCC 10.
>
> Tested on aarch64-linux-gnu (with and without SVE), arm-linux-gnueabihf
> and x86_64-linux-gnu.  OK for trunk and GCC 10 branch?  It would be
> nice if we could have this for GCC 10.1, but I realise it wouldn't
> justify a new RC on its own, so I'll assume that "GCC 10 branch" is
> "GCC 10 branch after the release".

We seem to have inconsistent use of rtx_equal_p vs. pointer comparison
of target and ops[0].value - is that because some are eventually MEMs
and some (targets?) are always registers?  Of course I prefer the
cheaper != comparison.

Otherwise looks obvious to me, thus OK for trunk and branch (I don't
see what it could break so from my side it would be OK for 10.1
but let's ask Jakub).

Richard.

> Richard
>
>
> 2020-05-04  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         PR middle-end/94941
>         * internal-fn.c (expand_load_lanes_optab_fn): Emit a move if the
>         chosen lhs is different from the gcall lhs.
>         (expand_mask_load_optab_fn): Likewise.
>         (expand_gather_load_optab_fn): Likewise.
>
> gcc/testsuite/
>         PR middle-end/94941
>         * gcc.target/aarch64/sve/acle/general/unoptimized_1.c: New test.
> ---
>  gcc/internal-fn.c                             |  6 ++++++
>  .../aarch64/sve/acle/general/unoptimized_1.c  | 21 +++++++++++++++++++
>  2 files changed, 27 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 52d1638917a..5e9aa60721e 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -167,6 +167,8 @@ expand_load_lanes_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>    create_output_operand (&ops[0], target, TYPE_MODE (type));
>    create_fixed_operand (&ops[1], mem);
>    expand_insn (get_multi_vector_move (type, optab), 2, ops);
> +  if (!rtx_equal_p (target, ops[0].value))
> +    emit_move_insn (target, ops[0].value);
>  }
>
>  /* Expand STORE_LANES call STMT using optab OPTAB.  */
> @@ -2507,6 +2509,8 @@ expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>    create_fixed_operand (&ops[1], mem);
>    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
>    expand_insn (icode, 3, ops);
> +  if (!rtx_equal_p (target, ops[0].value))
> +    emit_move_insn (target, ops[0].value);
>  }
>
>  #define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
> @@ -2827,6 +2831,8 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
>    insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
>                                            TYPE_MODE (TREE_TYPE (offset)));
>    expand_insn (icode, i, ops);
> +  if (!rtx_equal_p (lhs_rtx, ops[0].value))
> +    emit_move_insn (lhs_rtx, ops[0].value);
>  }
>
>  /* Expand DIVMOD() using:
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c
> new file mode 100644
> index 00000000000..18d73e21a83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c
> @@ -0,0 +1,21 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +
> +#include <arm_sve.h>
> +
> +svfloat32_t
> +foo (float *ptr)
> +{
> +  svbool_t pg = svptrue_pat_b32 (SV_VL1);
> +  svfloat32_t res = svld1 (pg, ptr);
> +  return res;
> +}
> +
> +int
> +main (void)
> +{
> +  svbool_t pg = svptrue_pat_b32 (SV_VL1);
> +  float x[1] = { 1 };
> +  if (svptest_any (pg, svcmpne (pg, foo (x), 1.0)))
> +    __builtin_abort ();
> +  return 0;
> +}
Jakub Jelinek May 4, 2020, 1:11 p.m. UTC | #2
On Mon, May 04, 2020 at 03:03:01PM +0200, Richard Biener wrote:
> We seem to have inconsistent use of rtx_equal_p vs. pointer comparison
> of target and ops[0].value - is that because some are eventually MEMs
> and some (targets?) are always registers?  Of course I prefer the
> cheaper != comparison.

We don't know what exactly the target will be, pseudo is most likely, but
could be something else too.
And, rtx_equal_p starts with == comparison, so it is just the call overhead.

> Otherwise looks obvious to me, thus OK for trunk and branch (I don't
> see what it could break so from my side it would be OK for 10.1
> but let's ask Jakub).

I'm fine with it for 10.1, if the target is right, it won't do anything,
otherwise fix a wrong-code, and is limited to arm/aarch64 too (no other
target AFAIK has such ifns).

	Jakub
diff mbox series

Patch

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 52d1638917a..5e9aa60721e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -167,6 +167,8 @@  expand_load_lanes_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
   expand_insn (get_multi_vector_move (type, optab), 2, ops);
+  if (!rtx_equal_p (target, ops[0].value))
+    emit_move_insn (target, ops[0].value);
 }
 
 /* Expand STORE_LANES call STMT using optab OPTAB.  */
@@ -2507,6 +2509,8 @@  expand_mask_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_fixed_operand (&ops[1], mem);
   create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
   expand_insn (icode, 3, ops);
+  if (!rtx_equal_p (target, ops[0].value))
+    emit_move_insn (target, ops[0].value);
 }
 
 #define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
@@ -2827,6 +2831,8 @@  expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)),
 					   TYPE_MODE (TREE_TYPE (offset)));
   expand_insn (icode, i, ops);
+  if (!rtx_equal_p (lhs_rtx, ops[0].value))
+    emit_move_insn (lhs_rtx, ops[0].value);
 }
 
 /* Expand DIVMOD() using:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c
new file mode 100644
index 00000000000..18d73e21a83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/unoptimized_1.c
@@ -0,0 +1,21 @@ 
+/* { dg-do run { target aarch64_sve_hw } } */
+
+#include <arm_sve.h>
+
+svfloat32_t
+foo (float *ptr)
+{
+  svbool_t pg = svptrue_pat_b32 (SV_VL1);
+  svfloat32_t res = svld1 (pg, ptr);
+  return res;
+}
+
+int
+main (void)
+{
+  svbool_t pg = svptrue_pat_b32 (SV_VL1);
+  float x[1] = { 1 };
+  if (svptest_any (pg, svcmpne (pg, foo (x), 1.0)))
+    __builtin_abort ();
+  return 0;
+}