Message ID | ZjCfOxu0UWtqXXlk@tucnak |
---|---|
State | New |
Headers | show |
Series | vect: Adjust vect_transform_reduction assertion [PR114883] | expand |
On Tue, 30 Apr 2024, Jakub Jelinek wrote: > Hi! > > The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are > commutative conditional binary operations like ADD/MUL/AND/IOR/XOR, > and can be handled just fine. > In particular, we emit > vminpd %zmm3, %zmm5, %zmm0{%k2} > vminpd %zmm0, %zmm3, %zmm5{%k1} > and > vmaxpd %zmm3, %zmm5, %zmm0{%k2} > vmaxpd %zmm0, %zmm3, %zmm5{%k1} > in the vectorized loops of the first and second subroutine. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and > 14.1? OK for both. Richard. > 2024-04-30 Jakub Jelinek <jakub@redhat.com> > Hongtao Liu <hongtao.liu@intel.com> > > PR tree-optimization/114883 > * tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and > IFN_COND_MAX in the assert. > > * gfortran.dg/pr114883.f90: New test. > > --- gcc/tree-vect-loop.cc.jj 2024-04-17 11:34:02.465185397 +0200 > +++ gcc/tree-vect-loop.cc 2024-04-29 20:41:04.973723992 +0200 > @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info > { > gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB > || code == IFN_COND_MUL || code == IFN_COND_AND > - || code == IFN_COND_IOR || code == IFN_COND_XOR); > + || code == IFN_COND_IOR || code == IFN_COND_XOR > + || code == IFN_COND_MIN || code == IFN_COND_MAX); > gcc_assert (op.num_ops == 4 > && (op.ops[reduc_index] > == op.ops[internal_fn_else_index ((internal_fn) code)])); > --- gcc/testsuite/gfortran.dg/pr114883.f90.jj 2024-04-29 20:39:39.000871849 +0200 > +++ gcc/testsuite/gfortran.dg/pr114883.f90 2024-04-29 20:39:27.757021972 +0200 > @@ -0,0 +1,53 @@ > +! PR tree-optimization/114883 > +! { dg-do compile } > +! { dg-options "-O2 -fvect-cost-model=cheap" } > +! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } } > + > +subroutine pr114883_1(a, b, c, d, e, f, g, h, o) > + real(8) :: c(1011), d(1011), e(0:1011) > + real(8) :: p, q, f, r, g(1011), h(1011), b, bar > + integer :: o(100), a, t, u > + p = 0.0_8 > + r = bar() > + u = 1 > + do i = 1,a > + do k = 1,1011 > + km1 = max0(k-1,1) > + h(k) = c(k) * e(k-1) * d(km1) > + f = g(k) + h(k) > + if(f.gt.1.e-6)then > + p = min(p,r) > + endif > + end do > + q = 0.9_8 * p > + t = integer(b/q + 1) > + if(t>100)then > + u = t > + endif > + o(u) = o(u) + 1 > + end do > +end subroutine pr114883_1 > +subroutine pr114883_2(a, b, c, d, e, f, g, h, o) > + real(8) :: c(1011), d(1011), e(0:1011) > + real(8) :: p, q, f, r, g(1011), h(1011), b, bar > + integer :: o(100), a, t, u > + p = 0.0_8 > + r = bar() > + u = 1 > + do i = 1,a > + do k = 1,1011 > + km1 = max0(k-1,1) > + h(k) = c(k) * e(k-1) * d(km1) > + f = g(k) + h(k) > + if(f.gt.1.e-6)then > + p = max(p,r) > + endif > + end do > + q = 0.9_8 * p > + t = integer(b/q + 1) > + if(t>100)then > + u = t > + endif > + o(u) = o(u) + 1 > + end do > +end subroutine pr114883_2 > > Jakub > >
--- gcc/tree-vect-loop.cc.jj 2024-04-17 11:34:02.465185397 +0200 +++ gcc/tree-vect-loop.cc 2024-04-29 20:41:04.973723992 +0200 @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info { gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB || code == IFN_COND_MUL || code == IFN_COND_AND - || code == IFN_COND_IOR || code == IFN_COND_XOR); + || code == IFN_COND_IOR || code == IFN_COND_XOR + || code == IFN_COND_MIN || code == IFN_COND_MAX); gcc_assert (op.num_ops == 4 && (op.ops[reduc_index] == op.ops[internal_fn_else_index ((internal_fn) code)])); --- gcc/testsuite/gfortran.dg/pr114883.f90.jj 2024-04-29 20:39:39.000871849 +0200 +++ gcc/testsuite/gfortran.dg/pr114883.f90 2024-04-29 20:39:27.757021972 +0200 @@ -0,0 +1,53 @@ +! PR tree-optimization/114883 +! { dg-do compile } +! { dg-options "-O2 -fvect-cost-model=cheap" } +! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } } + +subroutine pr114883_1(a, b, c, d, e, f, g, h, o) + real(8) :: c(1011), d(1011), e(0:1011) + real(8) :: p, q, f, r, g(1011), h(1011), b, bar + integer :: o(100), a, t, u + p = 0.0_8 + r = bar() + u = 1 + do i = 1,a + do k = 1,1011 + km1 = max0(k-1,1) + h(k) = c(k) * e(k-1) * d(km1) + f = g(k) + h(k) + if(f.gt.1.e-6)then + p = min(p,r) + endif + end do + q = 0.9_8 * p + t = integer(b/q + 1) + if(t>100)then + u = t + endif + o(u) = o(u) + 1 + end do +end subroutine pr114883_1 +subroutine pr114883_2(a, b, c, d, e, f, g, h, o) + real(8) :: c(1011), d(1011), e(0:1011) + real(8) :: p, q, f, r, g(1011), h(1011), b, bar + integer :: o(100), a, t, u + p = 0.0_8 + r = bar() + u = 1 + do i = 1,a + do k = 1,1011 + km1 = max0(k-1,1) + h(k) = c(k) * e(k-1) * d(km1) + f = g(k) + h(k) + if(f.gt.1.e-6)then + p = max(p,r) + endif + end do + q = 0.9_8 * p + t = integer(b/q + 1) + if(t>100)then + u = t + endif + o(u) = o(u) + 1 + end do +end subroutine pr114883_2