Patchwork [i386] : Fix PR 22152 again

login
register
mail settings
Submitter Uros Bizjak
Date Sept. 6, 2010, 2:24 p.m.
Message ID <AANLkTi=kDA+_oMGOsRKOPmGDqwg4KdgvLEqUsNNU0Gj2@mail.gmail.com>
Download mbox | patch
Permalink /patch/63923/
State New
Headers show

Comments

Uros Bizjak - Sept. 6, 2010, 2:24 p.m.
Hello!

MMX moves need to be re-tuned a bit.

Attached patch generates expected loop kernel:

.L3:
	movq	(%ecx,%eax,8), %mm0
	paddq	(%ebx,%eax,8), %mm0
	addl	$1, %eax
	cmpl	%edx, %eax
	jne	.L3

The (unrelated) interesting part is with -m64 compilation:

	movl	$1, %eax
	movl	$1, %ecx
	.p2align 4,,10
	.p2align 3
.L3:
	movq	(%rdi,%rax,8), %mm0
	addl	$1, %ecx
	paddq	(%rsi,%rax,8), %mm0
(1)	mov	%ecx, %eax
	cmpq	%rdx, %rax
(2)	movq2dq	%mm0, %xmm0
	jb	.L3


(1) No need for another variable...

(2) This move should really be pushed out of the loop (at the bottom).

However, the source code is:

--cut here--
__m64
unsigned_add3 (const __m64 * a, const __m64 * b, unsigned long count)
{
  __m64 sum;
  unsigned int i;

  for (i = 1; i < count; i++)
    sum = _mm_add_si64 (a[i], b[i]);

  return sum;
}
--cut here--

So, why didn't gcc figured out that the loop can be reduced to "sum =
_mm_add_si64 (a[count-1], b[count-1]);" only?

2010-09-06  Uros Bizjak  <ubizjak@gmail.com>

	PR target/22152
	* config/i386/mmx.md (*mov<mode>_internal_rex64,
	*mov<mode>_internal_avx, *mov<mode>_internal,
	*movv2sf_internal_rex64_avx, *movv2sf_internal_rex64,
	*movv2sf_internal_avx, *movv2sf_internal): Split out !y-!y alternative.

Patch was bootstrapped on x86_64-pc-linux-gnu {,-m32}. Will be
committed to SVN soon.

Uros.
Richard Guenther - Sept. 6, 2010, 2:29 p.m.
2010/9/6 Uros Bizjak <ubizjak@gmail.com>:
> Hello!
>
> MMX moves need to be re-tuned a bit.
>
> Attached patch generates expected loop kernel:
>
> .L3:
>        movq    (%ecx,%eax,8), %mm0
>        paddq   (%ebx,%eax,8), %mm0
>        addl    $1, %eax
>        cmpl    %edx, %eax
>        jne     .L3
>
> The (unrelated) interesting part is with -m64 compilation:
>
>        movl    $1, %eax
>        movl    $1, %ecx
>        .p2align 4,,10
>        .p2align 3
> .L3:
>        movq    (%rdi,%rax,8), %mm0
>        addl    $1, %ecx
>        paddq   (%rsi,%rax,8), %mm0
> (1)     mov     %ecx, %eax
>        cmpq    %rdx, %rax
> (2)     movq2dq %mm0, %xmm0
>        jb      .L3
>
>
> (1) No need for another variable...
>
> (2) This move should really be pushed out of the loop (at the bottom).
>
> However, the source code is:
>
> --cut here--
> __m64
> unsigned_add3 (const __m64 * a, const __m64 * b, unsigned long count)
> {
>  __m64 sum;
>  unsigned int i;
>
>  for (i = 1; i < count; i++)
>    sum = _mm_add_si64 (a[i], b[i]);
>
>  return sum;
> }
> --cut here--
>
> So, why didn't gcc figured out that the loop can be reduced to "sum =
> _mm_add_si64 (a[count-1], b[count-1]);" only?

Because we don't do final value replacement that involves function calls.

Richard.

>
> 2010-09-06  Uros Bizjak  <ubizjak@gmail.com>
>
>        PR target/22152
>        * config/i386/mmx.md (*mov<mode>_internal_rex64,
>        *mov<mode>_internal_avx, *mov<mode>_internal,
>        *movv2sf_internal_rex64_avx, *movv2sf_internal_rex64,
>        *movv2sf_internal_avx, *movv2sf_internal): Split out !y-!y alternative.
>
> Patch was bootstrapped on x86_64-pc-linux-gnu {,-m32}. Will be
> committed to SVN soon.
>
> Uros.
>

Patch

Index: mmx.md
===================================================================
--- mmx.md	(revision 163902)
+++ mmx.md	(working copy)
@@ -65,9 +65,9 @@ 
 
 (define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-				"=rm,r,!?y,!?y ,m  ,!y,*Y2,x,x ,m,r,Yi")
+	 "=rm,r,!?y,!y,!?y,m  ,!y ,*Y2,x,x ,m,r ,Yi")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
-				"Cr ,m,C  ,!?ym,!?y,*Y2,!y,C,xm,x,Yi,r"))]
+	 "Cr ,m,C  ,!y,m  ,!?y,*Y2,!y ,C,xm,x,Yi,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -76,6 +76,7 @@ 
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     %vpxor\t%0, %d0
@@ -83,31 +84,32 @@ 
     %vmovq\t{%1, %0|%0, %1}
     %vmovq\t{%1, %0|%0, %1}
     %vmovq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,*,*,1,1,*,1,*,*,*")
-   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,*,1,1,1")
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,1,*,*,*")
+   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,*,*,1,1,1")
    (set (attr "prefix_rex")
-     (if_then_else (eq_attr "alternative" "8,9")
+     (if_then_else (eq_attr "alternative" "9,10")
        (symbol_ref "x86_extended_reg_mentioned_p (insn)")
        (const_string "*")))
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "7,8,9,10,11")
+     (if_then_else (eq_attr "alternative" "8,9,10,11,12")
        (const_string "maybe_vex")
        (const_string "orig")))
    (set_attr "mode" "DI")])
 
 (define_insn "*mov<mode>_internal_avx"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
-			"C   ,!ym,!?y,*Y2,!y ,C  ,*Y2m,*Y2,irm,r"))]
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C  ,*Y2m,*Y2,irm,r"))]
   "TARGET_AVX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     vpxor\t%0, %0, %0
@@ -115,26 +117,27 @@ 
     vmovq\t{%1, %0|%0, %1}
     #
     #"
-  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
-   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,1,1,*,*,*,*,*")
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*")
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "5,6,7")
+     (if_then_else (eq_attr "alternative" "6,7,8")
        (const_string "vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
 
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,r  ,m")
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,r  ,m")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
-			"C   ,!ym,!?y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     pxor\t%0, %0
@@ -146,11 +149,11 @@ 
     movlps\t{%1, %0|%0, %1}
     #
     #"
-  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*")
-   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,1,1,*,1,*,*,*,*,*,*,*")
-   (set_attr "prefix_data16" "*,*,*,*,*,*,*,1,*,*,*,*,*,*")
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,1,*,*,*,*,*,*,*")
+   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
 (define_expand "movv2sf"
   [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
@@ -163,9 +166,9 @@ 
 
 (define_insn "*movv2sf_internal_rex64_avx"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-				"=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
+	 "=rm,r,!?y,!y,!?y,m  ,!y,Y2,x,x,x,m,r,x")
         (match_operand:V2SF 1 "vector_move_operand"
-				"Cr ,m ,C  ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
+	 "Cr ,m,C  ,!y,m  ,!?y,Y2,!y,C,x,m,x,x,r"))]
   "TARGET_64BIT && TARGET_AVX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -174,6 +177,7 @@ 
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     vxorps\t%0, %0, %0
@@ -182,21 +186,21 @@ 
     vmovlps\t{%1, %0|%0, %1}
     vmovq\t{%1, %0|%0, %1}
     vmovq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,*,*,1,1,*,*,*,*,*,*")
-   (set_attr "length_vex" "*,*,*,*,*,*,*,*,*,*,*,4,4")
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "length_vex" "*,*,*,*,*,*,*,*,*,*,*,*,4,4")
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "7,8,9,10,11,12")
+     (if_then_else (eq_attr "alternative" "8,9,10,11,12,13")
        (const_string "vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
 (define_insn "*movv2sf_internal_rex64"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-				"=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi")
+	 "=rm,r,!?y,!y,!?y,m  ,!y ,*Y2,x,x,x,m,r ,Yi")
         (match_operand:V2SF 1 "vector_move_operand"
-				"Cr ,m ,C  ,!?ym,!y,*Y2,!y,C,x,m,x,Yi,r"))]
+	 "Cr ,m,C  ,!y,m  ,!?y,*Y2,!y ,C,x,m,x,Yi,r"))]
   "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -205,6 +209,7 @@ 
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     xorps\t%0, %0
@@ -213,22 +218,23 @@ 
     movlps\t{%1, %0|%0, %1}
     movd\t{%1, %0|%0, %1}
     movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,*,*,1,1,*,*,*,*,*,*")
-   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
 (define_insn "*movv2sf_internal_avx"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-			"=!?y,!?y ,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
         (match_operand:V2SF 1 "vector_move_operand"
-			"C   ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
   "TARGET_AVX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     vxorps\t%0, %0, %0
@@ -237,26 +243,27 @@ 
     vmovlps\t{%1, %0|%0, %1}
     #
     #"
-  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
-   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,1,1,*,*,*,*,*,*")
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*,*")
    (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "5,6,7,8")
+     (if_then_else (eq_attr "alternative" "6,7,8,9")
        (const_string "vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
 (define_insn "*movv2sf_internal"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
-			"=!?y,!?y ,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
         (match_operand:V2SF 1 "vector_move_operand"
-			"C   ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
   "TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     pxor\t%0, %0
     movq\t{%1, %0|%0, %1}
     movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
     xorps\t%0, %0
@@ -265,10 +272,10 @@ 
     movlps\t{%1, %0|%0, %1}
     #
     #"
-  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
-   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
-   (set_attr "prefix_rep" "*,*,*,1,1,*,*,*,*,*,*")
-   (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
 ;; %%% This multiword shite has got to go.
 (define_split