@@ -8221,14 +8221,17 @@ (define_split
|| (memory_operand (operands[0], V8DImode)
&& register_operand (operands[1], V8DImode)))
{
+ /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16. */
+ auto mode = STRICT_ALIGNMENT ? DImode : TImode;
+ int increment = GET_MODE_SIZE (mode);
std::pair<rtx, rtx> last_pair = {};
- for (int offset = 0; offset < 64; offset += 16)
+ for (int offset = 0; offset < 64; offset += increment)
{
std::pair<rtx, rtx> pair = {
- simplify_gen_subreg (TImode, operands[0], V8DImode, offset),
- simplify_gen_subreg (TImode, operands[1], V8DImode, offset)
+ simplify_gen_subreg (mode, operands[0], V8DImode, offset),
+ simplify_gen_subreg (mode, operands[1], V8DImode, offset)
};
- if (register_operand (pair.first, TImode)
+ if (register_operand (pair.first, mode)
&& reg_overlap_mentioned_p (pair.first, pair.second))
last_pair = pair;
else
new file mode 100644
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-mstrict-align" } */
+/* PR target/113657 */
+
+#pragma GCC target "+ls64"
+#pragma GCC aarch64 "arm_acle.h"
+__arm_data512_t foo(__arm_data512_t* ptr) { return *ptr; }
After r14-1187-gd6b756447cd58b, simplify_gen_subreg can return NULL for "unaligned" memory subreg. Since V8DI has an alignment of 8 bytes, using TImode causes simplify_gen_subreg to return NULL. This fixes the issue by using DImode instead for the loop. And then we will have later on the STP/LDP pass combine it back into STP/LDP if needed. Since strict align is less important (usually used for firmware and early boot only), not doing LDP/STP here is ok. Built and tested for aarch64-linux-gnu with no regressions. PR target/113657 gcc/ChangeLog: * config/aarch64/aarch64-simd.md (split for movv8di): For strict aligned mode, use DImode instead of TImode. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/ls64_strict_align.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com> --- gcc/config/aarch64/aarch64-simd.md | 11 +++++++---- .../gcc.target/aarch64/acle/ls64_strict_align.c | 7 +++++++ 2 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/ls64_strict_align.c