Message ID | 20210715232918.458317-2-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Add support for _mm_minpos_epu16 | expand |
Hi Paul, LGTM. Recommend maintainers approve. Thanks for the cleanups, Bill On 7/15/21 6:29 PM, Paul A. Clarke wrote: > Add a naive implementation of the subject x86 intrinsic to > ease porting. > > 2021-07-15 Paul A. Clarke <pc@us.ibm.com> > > gcc > * config/rs6000/smmintrin.h (_mm_minpos_epu16): New. > --- > v3: Minor formatting changes per review from Bill. > v2: Minor formatting changes per review from Segher. > > gcc/config/rs6000/smmintrin.h | 27 +++++++++++++++++++++++++++ > 1 file changed, 27 insertions(+) > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index 16fd34d836ff..6a010fdbb96f 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) > return any_ones * any_zeros; > } > > +/* Return horizontal packed word minimum and its index in bits [15:0] > + and bits [18:16] respectively. */ > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_minpos_epu16 (__m128i __A) > +{ > + union __u > + { > + __m128i __m; > + __v8hu __uh; > + }; > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > + unsigned short __ridx = 0; > + unsigned short __rmin = __u.__uh[__ridx]; > + for (unsigned long __i = 1; __i < 8; __i++) > + { > + if (__u.__uh[__i] < __rmin) > + { > + __rmin = __u.__uh[__i]; > + __ridx = __i; > + } > + } > + __r.__uh[0] = __rmin; > + __r.__uh[1] = __ridx; > + return __r.__m; > +} > + > #endif
Hi! On Thu, Jul 15, 2021 at 06:29:17PM -0500, Paul A. Clarke wrote: > Add a naive implementation of the subject x86 intrinsic to > ease porting. > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) > return any_ones * any_zeros; > } > > +/* Return horizontal packed word minimum and its index in bits [15:0] > + and bits [18:16] respectively. */ > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_minpos_epu16 (__m128i __A) > +{ > + union __u > + { > + __m128i __m; > + __v8hu __uh; > + }; > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > + unsigned short __ridx = 0; > + unsigned short __rmin = __u.__uh[__ridx]; > + for (unsigned long __i = 1; __i < 8; __i++) > + { > + if (__u.__uh[__i] < __rmin) > + { > + __rmin = __u.__uh[__i]; > + __ridx = __i; > + } > + } > + __r.__uh[0] = __rmin; > + __r.__uh[1] = __ridx; > + return __r.__m; > +} As before: does this work correctly on BE? Was it tested there? Okay for trunk if so. Thanks! Segher
On Mon, Aug 02, 2021 at 05:29:08PM -0500, Segher Boessenkool wrote: > On Thu, Jul 15, 2021 at 06:29:17PM -0500, Paul A. Clarke wrote: > > Add a naive implementation of the subject x86 intrinsic to > > ease porting. > > > --- a/gcc/config/rs6000/smmintrin.h > > +++ b/gcc/config/rs6000/smmintrin.h > > @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) > > return any_ones * any_zeros; > > } > > > > +/* Return horizontal packed word minimum and its index in bits [15:0] > > + and bits [18:16] respectively. */ > > +__inline __m128i > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_minpos_epu16 (__m128i __A) > > +{ > > + union __u > > + { > > + __m128i __m; > > + __v8hu __uh; > > + }; > > + union __u __u = { .__m = __A }, __r = { .__m = {0} }; > > + unsigned short __ridx = 0; > > + unsigned short __rmin = __u.__uh[__ridx]; > > + for (unsigned long __i = 1; __i < 8; __i++) > > + { > > + if (__u.__uh[__i] < __rmin) > > + { > > + __rmin = __u.__uh[__i]; > > + __ridx = __i; > > + } > > + } > > + __r.__uh[0] = __rmin; > > + __r.__uh[1] = __ridx; > > + return __r.__m; > > +} > > As before: does this work correctly on BE? Was it tested there? Per the "cover letter": | Tested on BE, LE (32 and 64bit). > Okay for trunk if so. Thanks! Thanks! I'll push this shortly. PC
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 16fd34d836ff..6a010fdbb96f 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) return any_ones * any_zeros; } +/* Return horizontal packed word minimum and its index in bits [15:0] + and bits [18:16] respectively. */ +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_minpos_epu16 (__m128i __A) +{ + union __u + { + __m128i __m; + __v8hu __uh; + }; + union __u __u = { .__m = __A }, __r = { .__m = {0} }; + unsigned short __ridx = 0; + unsigned short __rmin = __u.__uh[__ridx]; + for (unsigned long __i = 1; __i < 8; __i++) + { + if (__u.__uh[__i] < __rmin) + { + __rmin = __u.__uh[__i]; + __ridx = __i; + } + } + __r.__uh[0] = __rmin; + __r.__uh[1] = __ridx; + return __r.__m; +} + #endif