diff mbox series

vect: Adjust vect_transform_reduction assertion [PR114883]

Message ID ZjCfOxu0UWtqXXlk@tucnak
State New
Headers show
Series vect: Adjust vect_transform_reduction assertion [PR114883] | expand

Commit Message

Jakub Jelinek April 30, 2024, 7:35 a.m. UTC
Hi!

The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are
commutative conditional binary operations like ADD/MUL/AND/IOR/XOR,
and can be handled just fine.
In particular, we emit
	vminpd	%zmm3, %zmm5, %zmm0{%k2}
	vminpd	%zmm0, %zmm3, %zmm5{%k1}
and
	vmaxpd	%zmm3, %zmm5, %zmm0{%k2}
	vmaxpd	%zmm0, %zmm3, %zmm5{%k1}
in the vectorized loops of the first and second subroutine.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
14.1?

2024-04-30  Jakub Jelinek  <jakub@redhat.com>
	    Hongtao Liu  <hongtao.liu@intel.com>

	PR tree-optimization/114883
	* tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and
	IFN_COND_MAX in the assert.

	* gfortran.dg/pr114883.f90: New test.


	Jakub

Comments

Richard Biener April 30, 2024, 9:01 a.m. UTC | #1
On Tue, 30 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are
> commutative conditional binary operations like ADD/MUL/AND/IOR/XOR,
> and can be handled just fine.
> In particular, we emit
> 	vminpd	%zmm3, %zmm5, %zmm0{%k2}
> 	vminpd	%zmm0, %zmm3, %zmm5{%k1}
> and
> 	vmaxpd	%zmm3, %zmm5, %zmm0{%k2}
> 	vmaxpd	%zmm0, %zmm3, %zmm5{%k1}
> in the vectorized loops of the first and second subroutine.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> 14.1?

OK for both.

Richard.

> 2024-04-30  Jakub Jelinek  <jakub@redhat.com>
> 	    Hongtao Liu  <hongtao.liu@intel.com>
> 
> 	PR tree-optimization/114883
> 	* tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and
> 	IFN_COND_MAX in the assert.
> 
> 	* gfortran.dg/pr114883.f90: New test.
> 
> --- gcc/tree-vect-loop.cc.jj	2024-04-17 11:34:02.465185397 +0200
> +++ gcc/tree-vect-loop.cc	2024-04-29 20:41:04.973723992 +0200
> @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info
>      {
>        gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
>  		  || code == IFN_COND_MUL || code == IFN_COND_AND
> -		  || code == IFN_COND_IOR || code == IFN_COND_XOR);
> +		  || code == IFN_COND_IOR || code == IFN_COND_XOR
> +		  || code == IFN_COND_MIN || code == IFN_COND_MAX);
>        gcc_assert (op.num_ops == 4
>  		  && (op.ops[reduc_index]
>  		      == op.ops[internal_fn_else_index ((internal_fn) code)]));
> --- gcc/testsuite/gfortran.dg/pr114883.f90.jj	2024-04-29 20:39:39.000871849 +0200
> +++ gcc/testsuite/gfortran.dg/pr114883.f90	2024-04-29 20:39:27.757021972 +0200
> @@ -0,0 +1,53 @@
> +! PR tree-optimization/114883
> +! { dg-do compile }
> +! { dg-options "-O2 -fvect-cost-model=cheap" }
> +! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } }
> +
> +subroutine pr114883_1(a, b, c, d, e, f, g, h, o)
> +  real(8) :: c(1011), d(1011), e(0:1011)
> +  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> +  integer :: o(100), a, t, u
> +  p = 0.0_8
> +  r = bar()
> +  u = 1
> +  do i = 1,a
> +    do k = 1,1011
> +      km1 = max0(k-1,1)
> +      h(k) = c(k) * e(k-1) * d(km1)
> +      f = g(k) + h(k)
> +      if(f.gt.1.e-6)then
> +        p = min(p,r)
> +      endif
> +    end do
> +    q = 0.9_8 * p
> +    t = integer(b/q + 1)
> +    if(t>100)then
> +      u = t
> +    endif
> +    o(u) = o(u) + 1
> +  end do
> +end subroutine pr114883_1
> +subroutine pr114883_2(a, b, c, d, e, f, g, h, o)
> +  real(8) :: c(1011), d(1011), e(0:1011)
> +  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> +  integer :: o(100), a, t, u
> +  p = 0.0_8
> +  r = bar()
> +  u = 1
> +  do i = 1,a
> +    do k = 1,1011
> +      km1 = max0(k-1,1)
> +      h(k) = c(k) * e(k-1) * d(km1)
> +      f = g(k) + h(k)
> +      if(f.gt.1.e-6)then
> +        p = max(p,r)
> +      endif
> +    end do
> +    q = 0.9_8 * p
> +    t = integer(b/q + 1)
> +    if(t>100)then
> +      u = t
> +    endif
> +    o(u) = o(u) + 1
> +  end do
> +end subroutine pr114883_2
> 
> 	Jakub
> 
>
diff mbox series

Patch

--- gcc/tree-vect-loop.cc.jj	2024-04-17 11:34:02.465185397 +0200
+++ gcc/tree-vect-loop.cc	2024-04-29 20:41:04.973723992 +0200
@@ -8505,7 +8505,8 @@  vect_transform_reduction (loop_vec_info
     {
       gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
 		  || code == IFN_COND_MUL || code == IFN_COND_AND
-		  || code == IFN_COND_IOR || code == IFN_COND_XOR);
+		  || code == IFN_COND_IOR || code == IFN_COND_XOR
+		  || code == IFN_COND_MIN || code == IFN_COND_MAX);
       gcc_assert (op.num_ops == 4
 		  && (op.ops[reduc_index]
 		      == op.ops[internal_fn_else_index ((internal_fn) code)]));
--- gcc/testsuite/gfortran.dg/pr114883.f90.jj	2024-04-29 20:39:39.000871849 +0200
+++ gcc/testsuite/gfortran.dg/pr114883.f90	2024-04-29 20:39:27.757021972 +0200
@@ -0,0 +1,53 @@ 
+! PR tree-optimization/114883
+! { dg-do compile }
+! { dg-options "-O2 -fvect-cost-model=cheap" }
+! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } }
+
+subroutine pr114883_1(a, b, c, d, e, f, g, h, o)
+  real(8) :: c(1011), d(1011), e(0:1011)
+  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
+  integer :: o(100), a, t, u
+  p = 0.0_8
+  r = bar()
+  u = 1
+  do i = 1,a
+    do k = 1,1011
+      km1 = max0(k-1,1)
+      h(k) = c(k) * e(k-1) * d(km1)
+      f = g(k) + h(k)
+      if(f.gt.1.e-6)then
+        p = min(p,r)
+      endif
+    end do
+    q = 0.9_8 * p
+    t = integer(b/q + 1)
+    if(t>100)then
+      u = t
+    endif
+    o(u) = o(u) + 1
+  end do
+end subroutine pr114883_1
+subroutine pr114883_2(a, b, c, d, e, f, g, h, o)
+  real(8) :: c(1011), d(1011), e(0:1011)
+  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
+  integer :: o(100), a, t, u
+  p = 0.0_8
+  r = bar()
+  u = 1
+  do i = 1,a
+    do k = 1,1011
+      km1 = max0(k-1,1)
+      h(k) = c(k) * e(k-1) * d(km1)
+      f = g(k) + h(k)
+      if(f.gt.1.e-6)then
+        p = max(p,r)
+      endif
+    end do
+    q = 0.9_8 * p
+    t = integer(b/q + 1)
+    if(t>100)then
+      u = t
+    endif
+    o(u) = o(u) + 1
+  end do
+end subroutine pr114883_2