diff mbox

Improve _mm*loadu* intrinsics handling (PR target/59539)

Message ID 20131218151152.GD892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Dec. 18, 2013, 3:11 p.m. UTC
Hi!

As discussed in the PR, this patch similarly to the recent changes
in movmisalign expansion for TARGET_AVX for unaligned loads from
misaligned_operand just expands those as *mov<mode>_internal pattern,
because that pattern emits vmovdqu/vmovup[sd] too, but doesn't contain
UNSPECs and thus can be also merged into most other AVX insns that use
the load target if those insns accept a memory operand.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-12-18  Jakub Jelinek  <jakub@redhat.com>

	PR target/59539
	* config/i386/sse.md
	(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
	<sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
	prefix existing define_insn names with *.

	* gcc.target/i386/pr59539-1.c: New test.
	* gcc.target/i386/pr59539-2.c: New test.


	Jakub

Comments

Uros Bizjak Dec. 18, 2013, 4:28 p.m. UTC | #1
On Wed, Dec 18, 2013 at 4:11 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As discussed in the PR, this patch similarly to the recent changes
> in movmisalign expansion for TARGET_AVX for unaligned loads from
> misaligned_operand just expands those as *mov<mode>_internal pattern,
> because that pattern emits vmovdqu/vmovup[sd] too, but doesn't contain
> UNSPECs and thus can be also merged into most other AVX insns that use
> the load target if those insns accept a memory operand.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2013-12-18  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/59539
>         * config/i386/sse.md
>         (<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
>         <sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
>         prefix existing define_insn names with *.
>
>         * gcc.target/i386/pr59539-1.c: New test.
>         * gcc.target/i386/pr59539-2.c: New test.

OK for mainline, with a FIXME comment to review !<mask_applied>
condition once avx512f merge is finished.

Thanks,
Uros.
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2013-12-10 12:43:21.000000000 +0100
+++ gcc/config/i386/sse.md	2013-12-18 11:10:36.428643400 +0100
@@ -912,7 +912,27 @@  (define_expand "movmisalign<mode>"
   DONE;
 })
 
-(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode)
+      && !<mask_applied>)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
   [(set (match_operand:VF 0 "register_operand" "=v")
 	(unspec:VF
 	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
@@ -999,7 +1019,28 @@  (define_insn "avx512f_storeu<ssemodesuff
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+{
+  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+     just fine if misaligned_operand is true, and without the UNSPEC it can
+     be combined with arithmetic instructions.  If misaligned_operand is
+     false, still emit UNSPEC_LOADU insn to honor user's request for
+     misaligned load.  */
+  if (TARGET_AVX
+      && misaligned_operand (operands[1], <MODE>mode)
+      && !<mask_applied>)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
   [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
--- gcc/testsuite/gcc.target/i386/pr59539-1.c.jj	2013-12-18 08:46:26.023864371 +0100
+++ gcc/testsuite/gcc.target/i386/pr59539-1.c	2013-12-18 08:53:12.304743270 +0100
@@ -0,0 +1,16 @@ 
+/* PR target/59539 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+#include <immintrin.h>
+
+int
+foo (void *p1, void *p2)
+{
+  __m128i d1 = _mm_loadu_si128 ((__m128i *) p1);
+  __m128i d2 = _mm_loadu_si128 ((__m128i *) p2);
+  __m128i result = _mm_cmpeq_epi16 (d1, d2);
+  return _mm_movemask_epi8 (result);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr59539-2.c.jj	2013-12-18 08:46:33.130826198 +0100
+++ gcc/testsuite/gcc.target/i386/pr59539-2.c	2013-12-18 08:47:14.890608917 +0100
@@ -0,0 +1,16 @@ 
+/* PR target/59539 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+
+#include <immintrin.h>
+
+int
+foo (void *p1, void *p2)
+{
+  __m256i d1 = _mm256_loadu_si256 ((__m256i *) p1);
+  __m256i d2 = _mm256_loadu_si256 ((__m256i *) p2);
+  __m256i result = _mm256_cmpeq_epi16 (d1, d2);
+  return _mm256_movemask_epi8 (result);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */