diff mbox series

[v3,1/2] rs6000: Add support for _mm_minpos_epu16

Message ID 20210715232918.458317-2-pc@us.ibm.com
State New
Headers show
Series rs6000: Add support for _mm_minpos_epu16 | expand

Commit Message

Paul A. Clarke July 15, 2021, 11:29 p.m. UTC
Add a naive implementation of the subject x86 intrinsic to
ease porting.

2021-07-15  Paul A. Clarke  <pc@us.ibm.com>

gcc
        * config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
---
v3: Minor formatting changes per review from Bill.
v2: Minor formatting changes per review from Segher.

 gcc/config/rs6000/smmintrin.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

Comments

Li, Pan2 via Gcc-patches July 16, 2021, 8:06 p.m. UTC | #1
Hi Paul,

LGTM.  Recommend maintainers approve.

Thanks for the cleanups,
Bill

On 7/15/21 6:29 PM, Paul A. Clarke wrote:
> Add a naive implementation of the subject x86 intrinsic to
> ease porting.
>
> 2021-07-15  Paul A. Clarke  <pc@us.ibm.com>
>
> gcc
>          * config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
> ---
> v3: Minor formatting changes per review from Bill.
> v2: Minor formatting changes per review from Segher.
>
>   gcc/config/rs6000/smmintrin.h | 27 +++++++++++++++++++++++++++
>   1 file changed, 27 insertions(+)
>
> diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
> index 16fd34d836ff..6a010fdbb96f 100644
> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
>     return any_ones * any_zeros;
>   }
>   
> +/* Return horizontal packed word minimum and its index in bits [15:0]
> +   and bits [18:16] respectively.  */
> +__inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_minpos_epu16 (__m128i __A)
> +{
> +  union __u
> +    {
> +      __m128i __m;
> +      __v8hu __uh;
> +    };
> +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> +  unsigned short __ridx = 0;
> +  unsigned short __rmin = __u.__uh[__ridx];
> +  for (unsigned long __i = 1; __i < 8; __i++)
> +    {
> +      if (__u.__uh[__i] < __rmin)
> +	{
> +	  __rmin = __u.__uh[__i];
> +	  __ridx = __i;
> +	}
> +    }
> +  __r.__uh[0] = __rmin;
> +  __r.__uh[1] = __ridx;
> +  return __r.__m;
> +}
> +
>   #endif
Segher Boessenkool Aug. 2, 2021, 10:29 p.m. UTC | #2
Hi!

On Thu, Jul 15, 2021 at 06:29:17PM -0500, Paul A. Clarke wrote:
> Add a naive implementation of the subject x86 intrinsic to
> ease porting.

> --- a/gcc/config/rs6000/smmintrin.h
> +++ b/gcc/config/rs6000/smmintrin.h
> @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
>    return any_ones * any_zeros;
>  }
>  
> +/* Return horizontal packed word minimum and its index in bits [15:0]
> +   and bits [18:16] respectively.  */
> +__inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_minpos_epu16 (__m128i __A)
> +{
> +  union __u
> +    {
> +      __m128i __m;
> +      __v8hu __uh;
> +    };
> +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> +  unsigned short __ridx = 0;
> +  unsigned short __rmin = __u.__uh[__ridx];
> +  for (unsigned long __i = 1; __i < 8; __i++)
> +    {
> +      if (__u.__uh[__i] < __rmin)
> +	{
> +	  __rmin = __u.__uh[__i];
> +	  __ridx = __i;
> +	}
> +    }
> +  __r.__uh[0] = __rmin;
> +  __r.__uh[1] = __ridx;
> +  return __r.__m;
> +}

As before: does this work correctly on BE?  Was it tested there?

Okay for trunk if so.  Thanks!


Segher
Paul A. Clarke Aug. 3, 2021, 1:01 p.m. UTC | #3
On Mon, Aug 02, 2021 at 05:29:08PM -0500, Segher Boessenkool wrote:
> On Thu, Jul 15, 2021 at 06:29:17PM -0500, Paul A. Clarke wrote:
> > Add a naive implementation of the subject x86 intrinsic to
> > ease porting.
> 
> > --- a/gcc/config/rs6000/smmintrin.h
> > +++ b/gcc/config/rs6000/smmintrin.h
> > @@ -172,4 +172,31 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
> >    return any_ones * any_zeros;
> >  }
> >  
> > +/* Return horizontal packed word minimum and its index in bits [15:0]
> > +   and bits [18:16] respectively.  */
> > +__inline __m128i
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_minpos_epu16 (__m128i __A)
> > +{
> > +  union __u
> > +    {
> > +      __m128i __m;
> > +      __v8hu __uh;
> > +    };
> > +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> > +  unsigned short __ridx = 0;
> > +  unsigned short __rmin = __u.__uh[__ridx];
> > +  for (unsigned long __i = 1; __i < 8; __i++)
> > +    {
> > +      if (__u.__uh[__i] < __rmin)
> > +	{
> > +	  __rmin = __u.__uh[__i];
> > +	  __ridx = __i;
> > +	}
> > +    }
> > +  __r.__uh[0] = __rmin;
> > +  __r.__uh[1] = __ridx;
> > +  return __r.__m;
> > +}
> 
> As before: does this work correctly on BE?  Was it tested there?

Per the "cover letter":
| Tested on BE, LE (32 and 64bit).

> Okay for trunk if so.  Thanks!

Thanks! I'll push this shortly.

PC
diff mbox series

Patch

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 16fd34d836ff..6a010fdbb96f 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -172,4 +172,31 @@  _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
   return any_ones * any_zeros;
 }
 
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __A)
+{
+  union __u
+    {
+      __m128i __m;
+      __v8hu __uh;
+    };
+  union __u __u = { .__m = __A }, __r = { .__m = {0} };
+  unsigned short __ridx = 0;
+  unsigned short __rmin = __u.__uh[__ridx];
+  for (unsigned long __i = 1; __i < 8; __i++)
+    {
+      if (__u.__uh[__i] < __rmin)
+	{
+	  __rmin = __u.__uh[__i];
+	  __ridx = __i;
+	}
+    }
+  __r.__uh[0] = __rmin;
+  __r.__uh[1] = __ridx;
+  return __r.__m;
+}
+
 #endif