diff mbox series

[1/2] rs6000: Add support for _mm_minpos_epu16

Message ID 20210602221316.202627-2-pc@us.ibm.com
State New
Headers show
Series rs6000: Add support for _mm_minpos_epu16 | expand

Commit Message

Paul A. Clarke June 2, 2021, 10:13 p.m. UTC
Add a naive implementation of the subject x86 intrinsic to
ease porting.

2021-06-02  Paul A. Clarke  <pc@us.ibm.com>

gcc/ChangeLog:
        * config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
---
 gcc/config/rs6000/smmintrin.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

Comments

Segher Boessenkool June 3, 2021, 12:27 a.m. UTC | #1
Hi!

On Wed, Jun 02, 2021 at 05:13:15PM -0500, Paul A. Clarke wrote:
> Add a naive implementation of the subject x86 intrinsic to
> ease porting.

> +/* Return horizontal packed word minimum and its index in bits [15:0]
> +   and bits [18:16] respectively.  */
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_minpos_epu16 (__m128i __A)
> +{
> +  union __u
> +    {
> +      __m128i __m;
> +      __v8hu __uh;
> +    };
> +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> +  unsigned short __ridx = 0;
> +  unsigned short __rmin = __u.__uh[__ridx];
> +  for (unsigned long __i = __ridx+1;

(spaces around the "+"?)

> +       __i < sizeof (__u.__uh) / sizeof (__u.__uh[0]);

You should either use a macro for that, or just write "8" :-)

> +       __i++)
> +    {
> +      if (__u.__uh[__i] < __rmin)
> +        {
> +          __rmin = __u.__uh[__i];
> +          __ridx = __i;
> +        }
> +    }
> +  __r.__uh[0] = __rmin;
> +  __r.__uh[1] = __ridx;
> +  return __r.__m;
> +}

This does not compute the index correctly for big endian (it needs to
walk from right to left for that).  The construction of the return value
looks wrong as well.

Okay for trunk with that fixed.  Thanks!


Segher
Paul A. Clarke June 3, 2021, 5:59 p.m. UTC | #2
On Wed, Jun 02, 2021 at 07:27:35PM -0500, Segher Boessenkool wrote:
> On Wed, Jun 02, 2021 at 05:13:15PM -0500, Paul A. Clarke wrote:
> > Add a naive implementation of the subject x86 intrinsic to
> > ease porting.
> 
> > +/* Return horizontal packed word minimum and its index in bits [15:0]
> > +   and bits [18:16] respectively.  */
> > +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_minpos_epu16 (__m128i __A)
> > +{
> > +  union __u
> > +    {
> > +      __m128i __m;
> > +      __v8hu __uh;
> > +    };
> > +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> > +  unsigned short __ridx = 0;
> > +  unsigned short __rmin = __u.__uh[__ridx];
> > +  for (unsigned long __i = __ridx+1;
> 
> (spaces around the "+"?)

ok

> 
> > +       __i < sizeof (__u.__uh) / sizeof (__u.__uh[0]);
> 
> You should either use a macro for that, or just write "8" :-)

ok. (There should be a standard thing for this operation.)

> > +       __i++)
> > +    {
> > +      if (__u.__uh[__i] < __rmin)
> > +        {
> > +          __rmin = __u.__uh[__i];
> > +          __ridx = __i;
> > +        }
> > +    }
> > +  __r.__uh[0] = __rmin;
> > +  __r.__uh[1] = __ridx;
> > +  return __r.__m;
> > +}
> 
> This does not compute the index correctly for big endian (it needs to
> walk from right to left for that).  The construction of the return value
> looks wrong as well.
> 
> Okay for trunk with that fixed.  Thanks!

I'm not seeing the issue here. The values are numbered by element order,
and the results are in the "first" (minimum value) and "second" (index of
first encountered minimum value in element order) elements of the result.

PC
diff mbox series

Patch

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..358a48958192 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,31 @@  _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
 }
 
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __A)
+{
+  union __u
+    {
+      __m128i __m;
+      __v8hu __uh;
+    };
+  union __u __u = { .__m = __A }, __r = { .__m = {0} };
+  unsigned short __ridx = 0;
+  unsigned short __rmin = __u.__uh[__ridx];
+  for (unsigned long __i = __ridx+1;
+       __i < sizeof (__u.__uh) / sizeof (__u.__uh[0]);
+       __i++)
+    {
+      if (__u.__uh[__i] < __rmin)
+        {
+          __rmin = __u.__uh[__i];
+          __ridx = __i;
+        }
+    }
+  __r.__uh[0] = __rmin;
+  __r.__uh[1] = __ridx;
+  return __r.__m;
+}
 #endif