Patchwork [i386,PR57623] Introduce _bextr (single underscore) intrinsucs

login
register
mail settings
Submitter Kirill Yukhin
Date June 24, 2013, 8:55 a.m.
Message ID <51C8096F.9090003@gmail.com>
Download mbox | patch
Permalink /patch/253741/
State New
Headers show

Comments

Kirill Yukhin - June 24, 2013, 8:55 a.m.
Hello!
>> I actually don't understand how this can work, bmi_bextr_{si,di} expanders
>> have just 3 operands (one target, 2 arguments), so just by giving it
>> 4 operands instead just means the last one is dropped on the floor.
>> Why do you need a builtin for this at all?
>> I was expecting that _bextr_u{32,64} would be implemented either using
>> __bextr_u{32,64} or at least using it's underlying builtin, by constructing
>> the combined len/start argument with shifts/ands first.
>>
>> But I admit I haven't applied the patch and looked how it works.
This was my mistake. I've accidentally intermixed BMI's bextr with TBM's
bextr, which has dedicated expansion in i386.c and different patterns in
i386.md
The tests also was wrong. That's why bootstrapping and regtesting was
actually OK.
This is really strange to me, that when we're trying to call GEN_
function with number of arguments that excess number in the pattern -
nothing is happen. It silently trunc out extra arguments. I believe we
should give ICE here.
Here is snippet from i386.c:
    case
3:                                                                                                                                       

      pat = GEN_FCN (icode) (real_target, args[0].op,
args[1].op,                                                                                 

                            
args[2].op);                                                                                                         

     
break;                                                                                                                                      

For my bogus patch, icode was actually:  bmi_bextr_si/2.
And what was generated looked like this:
func_bextr32_3args:
.LFB520:
        .cfi_startproc
        movl    4(%esp), %eax   # 23    *movsi_internal/1       [length = 4]
        bextr   8(%esp), %eax, %eax     # 11    bmi_bextr_si/2  [length = 5]
        ret     # 27    simple_return_internal  [length = 1]
        .cfi_endproc

> OK, I assumed that the patch was tested according to established
> standards, and that it doesn't need to be reviewed for its most basic
> functionality. If the patch was not tested appropriately, I will
> simply ignore future submissions from the submitters that want to bend
> the rules. Sorry.
>
As I said before, bogus tests and bootstrap was actually working. I
sorry for the crap I've sent.

I've rewrote intrinsics and tests. Bootstrap is passing, new tests are
passing and they perform correct checks.

ChangeLog entry:
2013-06-24  Kirill Yukhin  <kirill.yukhin@intel.com>

    * config/i386/bmiintrin.h (_bextr_u32): New.
    (_bextr_u64): Ditto.

testsuite/ChangeLog entry:
2013-06-24  Kirill Yukhin  <kirill.yukhin@intel.com>

    * gcc.target/i386/bmi-1.c: Extend with new instrinsic.
    Fix scan patterns.
    * gcc.target/i386/bmi-1.c: Ditto.
    * gcc.target/i386/bmi-bextr-3.c: New.
    * gcc.target/i386/bmi-bextr-4.c: Ditto.

Could you pls take a look?

Thanks, K
Jakub Jelinek - June 24, 2013, 9:04 a.m.
On Mon, Jun 24, 2013 at 12:55:11PM +0400, Kirill Yukhin wrote:
> Here is snippet from i386.c:
>     case
> 3:                                                                                                                                       
> 
>       pat = GEN_FCN (icode) (real_target, args[0].op,
> args[1].op,                                                                                 
> 
>                             
> args[2].op);                                                                                                         

This is calling genfun, which is
typedef rtx (*insn_gen_fn) (rtx, ...);
therefore there is no compile time number of arguments verification here,
the caller and callee simply has to agree on the number of arguments or at
least caller needs to supply as many arguments as the callee relies on.

> Could you pls take a look?

This looks ok to me, but as I'm not an i386 maintainer, I'll defer it to
Uros or Richard.

> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 53a6cde..6d6aeac 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,8 @@
> +2013-06-20  Kirill Yukhin  <kirill.yukhin@intel.com>
> +
> +	* config/i386/bmiintrin.h (_bextr_u32): New.
> +	(_bextr_u64): Ditto.
> +
>  2013-06-19  Paolo Carlini  <paolo.carlini@oracle.com>
>  
>  	PR c++/56544
> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
> index 0087f5c..7a30cf0 100644
> --- a/gcc/config/i386/bmiintrin.h
> +++ b/gcc/config/i386/bmiintrin.h
> @@ -52,6 +52,12 @@ __bextr_u32 (unsigned int __X, unsigned int __Y)
>  }
>  
>  extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
> +{
> +  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
> +}
> +
> +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __blsi_u32 (unsigned int __X)
>  {
>    return __X & -__X;
> @@ -91,6 +97,12 @@ __bextr_u64 (unsigned long long __X, unsigned long long __Y)
>  }
>  
>  extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_bextr_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long __Z)
> +{
> +  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
> +}
> +
> +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  __blsi_u64 (unsigned long long __X)
>  {
>    return __X & -__X;

	Jakub
Richard Henderson - June 27, 2013, 4:28 p.m.
On 06/24/2013 01:55 AM, Kirill Yukhin wrote:
>  extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_bextr_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long __Z)
> +{
> +  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
> +}
> +

The __Y and __Z arguments have the wrong type, according to the intel manual:

> extern unsigned __int64 _bextr_u64(unsigned __int64 s1, unsigned int start_bit, unsigned int len_in_bits);

The patch is ok with that fixed.


r~
Kirill Yukhin - June 28, 2013, 9:04 a.m.
On 6/27/2013 8:28 PM, Richard Henderson wrote:
Hello,
> The patch is ok with that fixed.
>
Thanks, checked into trunk:
http://gcc.gnu.org/ml/gcc-cvs/2013-06/msg00941.html.

Thanks, K

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 53a6cde..6d6aeac 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@ 
+2013-06-20  Kirill Yukhin  <kirill.yukhin@intel.com>
+
+	* config/i386/bmiintrin.h (_bextr_u32): New.
+	(_bextr_u64): Ditto.
+
 2013-06-19  Paolo Carlini  <paolo.carlini@oracle.com>
 
 	PR c++/56544
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 0087f5c..7a30cf0 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -52,6 +52,12 @@  __bextr_u32 (unsigned int __X, unsigned int __Y)
 }
 
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
+{
+  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsi_u32 (unsigned int __X)
 {
   return __X & -__X;
@@ -91,6 +97,12 @@  __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 }
 
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long __Z)
+{
+  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsi_u64 (unsigned long long __X)
 {
   return __X & -__X;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1d7c2af..27ca5d2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@ 
+2013-06-20  Kirill Yukhin  <kirill.yukhin@intel.com>
+
+	* gcc.target/i386/bmi-1.c: Extend with new instrinsic.
+	Fix scan patterns.
+	* gcc.target/i386/bmi-1.c: Ditto.
+	* gcc.target/i386/bmi-bextr-3.c: New.
+	* gcc.target/i386/bmi-bextr-4.c: Ditto.
+
 2013-06-19  Manuel Lopez-Ibanez  <manu@gcc.gnu.org>
 
 	PR c++/57638
diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c b/gcc/testsuite/gcc.target/i386/bmi-1.c
index dc964ba..a05cb27 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-1.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
@@ -1,11 +1,11 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*(%|)eax" } } */
-/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */
-/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)eax" } } */
-/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)eax" } } */
-/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)eax" } } */
-/* { dg-final { scan-assembler "tzcntl\[^\\n]*(%|)eax" } } */
+/* { dg-final { scan-assembler "andn\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*eax" 2 } } */
+/* { dg-final { scan-assembler "blsi\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler "blsmsk\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler "blsr\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler "tzcntl\[^\\n]*eax" } } */
 
 #include <x86intrin.h>
 
@@ -22,6 +22,14 @@  func_bextr32 (unsigned int X, unsigned int Y)
 }
 
 unsigned int
+func_bextr32_3args (unsigned int X,
+		    unsigned int Y,
+		    unsigned int Z)
+{
+  return _bextr_u32(X, Y, Z);
+}
+
+unsigned int
 func_blsi32 (unsigned int X)
 {
   return __blsi_u32(X);
diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c b/gcc/testsuite/gcc.target/i386/bmi-2.c
index 56f7387..68d06a2 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-2.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
@@ -1,11 +1,11 @@ 
 /* { dg-do compile { target { ! { ia32 }  } } } */
 /* { dg-options "-O2 -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*(%|)rax" } } */
-/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */
-/* { dg-final { scan-assembler "blsi\[^\\n]*(%|)rax" } } */
-/* { dg-final { scan-assembler "blsmsk\[^\\n]*(%|)rax" } } */
-/* { dg-final { scan-assembler "blsr\[^\\n]*(%|)rax" } } */
-/* { dg-final { scan-assembler "tzcntq\[^\\n]*(%|)rax" } } */
+/* { dg-final { scan-assembler "andn\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*rax" 2 } } */
+/* { dg-final { scan-assembler "blsi\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler "blsmsk\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler "blsr\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler "tzcntq\[^\\n]*rax" } } */
 
 #include <x86intrin.h>
 
@@ -22,6 +22,14 @@  func_bextr64 (unsigned long long X, unsigned long long Y)
 }
 
 unsigned long long
+func_bextr64_3args (unsigned long long X,
+		    unsigned long long Y,
+		    unsigned long long Z)
+{
+  return _bextr_u64 (X, Y, Z);
+}
+
+unsigned long long
 func_blsi64 (unsigned long long X)
 {
   return __blsi_u64 (X);
diff --git a/gcc/testsuite/gcc.target/i386/bmi-bextr-3.c b/gcc/testsuite/gcc.target/i386/bmi-bextr-3.c
new file mode 100644
index 0000000..fd6e362
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-bextr-3.c
@@ -0,0 +1,48 @@ 
+/* { dg-do run { target { bmi && { ! ia32 } } } } */
+/* { dg-options "-O2 -mbmi -fno-inline" } */
+
+#include <x86intrin.h>
+
+#include "bmi-check.h"
+
+long long calc_bextr_u64 (unsigned long long src1,
+			  unsigned long long src2)
+{
+  long long res = 0;
+  unsigned char start = (src2 & 0xff);
+  unsigned char len = (int) ((src2 >> 8) & 0xff);
+  if (start < 64) {
+    unsigned i;
+    unsigned last = (start+len) < 64 ? start+len : 64;
+
+    src1 >>= start;
+    for (i=start; i<last; ++i) {
+      res |= (src1 & 1) << (i-start);
+      src1 >>= 1;
+    }
+  }
+
+  return res;
+}
+
+static void
+bmi_test ()
+{
+  unsigned i;
+  unsigned char start, len;
+  unsigned long long src1 = 0xfacec0ffeefacec0;
+  unsigned long long res, res_ref, src2;
+
+  for (i=0; i<5; ++i) {
+    start = i * 4;
+    len = i * 3;
+    src1 = src1 * 3;
+    src2 = (start & 0xff) | ((len & 0xff) << 8);
+
+    res_ref = calc_bextr_u64 (src1, src2);
+    res = _bextr_u64 (src1, start, len);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi-bextr-4.c b/gcc/testsuite/gcc.target/i386/bmi-bextr-4.c
new file mode 100644
index 0000000..2318847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-bextr-4.c
@@ -0,0 +1,49 @@ 
+/* { dg-do run { target { bmi } } } */
+/* { dg-require-effective-target bmi  } */
+/* { dg-options "-O2 -mbmi -fno-inline" } */
+
+#include <x86intrin.h>
+
+#include "bmi-check.h"
+
+unsigned calc_bextr_u32 (unsigned src1, unsigned src2)
+{
+  unsigned res = 0;
+  unsigned char start = (src2 & 0xff);
+  unsigned char len = (int) ((src2 >> 8) & 0xff);
+  if (start < 32) {
+    unsigned i;
+    unsigned last = (start+len) < 32 ? start+len : 32;
+
+    src1 >>= start;
+    for (i=start; i<last; ++i) {
+      res |= (src1 & 1) << (i-start);
+      src1 >>= 1;
+    }
+  }
+
+  return res;
+}
+
+static void
+bmi_test ()
+{
+  unsigned i;
+  unsigned char start, len;
+  unsigned src1 = 0xfacec0ff;
+  unsigned res, res_ref, src2;
+
+  for (i=0; i<5; ++i) {
+    start = i * 4;
+    len = i * 4;
+
+    src1 = src1 * 3;
+    src2 = (start & 0xff) | ((len & 0xff) << 8);
+
+    res_ref = calc_bextr_u32 (src1, src2);
+    res = _bextr_u32 (src1, start, len);
+
+    if (res != res_ref)
+      abort();
+  }
+}