diff mbox series

[AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64

Message ID f382c802-401c-901e-ead8-0d2188cf151c@arm.com
State New
Headers show
Series [AArch64] Implement new intrinsics vabsd_s64 and vnegd_s64 | expand

Commit Message

Vlad Lazar July 20, 2018, 9:37 a.m. UTC
Hi,

The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
(https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.

OK for trunk?

Thanks,
Vlad

gcc/
2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>

	* config/aarch64/arm_neon.h (vabsd_s64, vnegd_s64): New.

gcc/testsuite/
2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>

	* gcc.target/aarch64/scalar_intrinsics.c (test_vabsd_s64, test_vabsd_s64): New.

---

Comments

Sudakshina Das July 23, 2018, 4:21 p.m. UTC | #1
Hi Vlad


On Friday 20 July 2018 10:37 AM, Vlad Lazar wrote:
> Hi,
>
> The patch adds implementations for the NEON intrinsics vabsd_s64 and 
> vnegd_s64.
> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification) 
>
>
> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no 
> regressions.
>
> OK for trunk?

Thanks for doing this. This looks good to me but you will a maintainer's 
approval.

Thanks
Sudi
>
> Thanks,
> Vlad
>
> gcc/
> 2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>
>
>     * config/aarch64/arm_neon.h (vabsd_s64, vnegd_s64): New.
>
> gcc/testsuite/
> 2018-07-02  Vlad Lazar  <vlad.lazar@arm.com>
>
>     * gcc.target/aarch64/scalar_intrinsics.c (test_vabsd_s64, 
> test_vabsd_s64): New.
>
> ---
>
> diff --git a/gcc/config/aarch64/arm_neon.h 
> b/gcc/config/aarch64/arm_neon.h
> index 
> 2d18400040f031dfcdaf60269ad484647804e1be..19e22431a85bcd09d0ea759b42b0a52420b6c43c 
> 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -11822,6 +11822,13 @@ vabsq_s64 (int64x2_t __a)
>    return __builtin_aarch64_absv2di (__a);
>  }
>
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vabsd_s64 (int64_t __a)
> +{
> +  return __builtin_aarch64_absdi (__a);
> +}
> +
>  /* vadd */
>
>  __extension__ extern __inline int64_t
> @@ -22907,6 +22914,12 @@ vneg_s64 (int64x1_t __a)
>    return -__a;
>  }
>
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +  return -__a;
> +}
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vnegq_f32 (float32x4_t __a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c 
> b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> index 
> ea29066e369b967d0781d31c8a5208bda9e4f685..45afeec373971838e0cd107038b4aa51a2d4998f 
> 100644
> --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> @@ -603,6 +603,14 @@ test_vsqaddd_u64 (uint64_t a, int64_t b)
>    return vsqaddd_u64 (a, b);
>  }
>
> +/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vabsd_s64 (int64_t a)
> +{
> +  return vabsd_s64 (a);
> +}
> +
>  /* { dg-final { scan-assembler-times "\\tsqabs\\tb\[0-9\]+" 1 } } */
>
>  int8_t
> @@ -627,6 +635,14 @@ test_vqabss_s32 (int32_t a)
>    return vqabss_s32 (a);
>  }
>
> +/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vnegd_s64 (int64_t a)
> +{
> +  return vnegd_s64 (a);
> +}
> +
>  /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
>
>  int8_t
James Greenhalgh July 31, 2018, 9:48 p.m. UTC | #2
On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> Hi,
> 
> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> 
> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> 
> OK for trunk?
> 
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +  return -__a;
> +}

Does this give the correct behaviour for the minimum value of int64_t? That
would be undefined behaviour in C, but well-defined under ACLE.

Thanks,
James
Kyrill Tkachov Aug. 1, 2018, 11:52 a.m. UTC | #3
On 31/07/18 22:48, James Greenhalgh wrote:
> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> > Hi,
> >
> > The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> > (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >
> > Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >
> > OK for trunk?
> >
> > +__extension__ extern __inline int64_t
> > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > +vnegd_s64 (int64_t __a)
> > +{
> > +  return -__a;
> > +}
>
> Does this give the correct behaviour for the minimum value of int64_t? That
> would be undefined behaviour in C, but well-defined under ACLE.
>

Similar intrinsics such as vneg_s8, vneg_s16 etc use the same implementation
(though on vector types) and the test in the testsuite for them (gcc.target/aarch64/vneg_s.c)
has cases for these limit values, so it seems to work there.
Does the fact that those are using vector types rather than the scalar int64_t matter?

Kyrill

> Thanks,
> James
>
Vlad Lazar Aug. 1, 2018, 12:13 p.m. UTC | #4
On 31/07/18 22:48, James Greenhalgh wrote:
> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>> Hi,
>>
>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>
>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>
>> OK for trunk?
>>
>> +__extension__ extern __inline int64_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vnegd_s64 (int64_t __a)
>> +{
>> +  return -__a;
>> +}
> 
> Does this give the correct behaviour for the minimum value of int64_t? That
> would be undefined behaviour in C, but well-defined under ACLE.
> 
> Thanks,
> James
> 

Hi. Thanks for the review.

For the minimum value of int64_t it behaves as the ACLE specifies:
"The negative of the minimum (signed) value is itself."

Thanks,
Vlad
James Greenhalgh Aug. 1, 2018, 5:35 p.m. UTC | #5
On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
> On 31/07/18 22:48, James Greenhalgh wrote:
> > On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> >> Hi,
> >>
> >> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> >> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >>
> >> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >>
> >> OK for trunk?
> >>
> >> +__extension__ extern __inline int64_t
> >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >> +vnegd_s64 (int64_t __a)
> >> +{
> >> +  return -__a;
> >> +}
> > 
> > Does this give the correct behaviour for the minimum value of int64_t? That
> > would be undefined behaviour in C, but well-defined under ACLE.
> > 
> > Thanks,
> > James
> > 
> 
> Hi. Thanks for the review.
> 
> For the minimum value of int64_t it behaves as the ACLE specifies:
> "The negative of the minimum (signed) value is itself."

What should happen in this testcase? The spoiler is below, but try to work out
what should happen and what goes wrong with your implementation.

  int foo (int64_t x)
  {
    if (x < (int64_t) 0)
      return vnegd_s64(x) < (int64_t) 0;
    else
      return 0;
  }
  
  
  int bar (void)
  {
    return foo (INT64_MIN);
  }
 
Thanks,
James


-----

<spoiler!>




INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
vnegd_s64(INT64_MIN) is identity, so the return value should be
INT64_MIN < 0; i.e. True.

This isn't what the compiler thinks... The compiler makes use of the fact
that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
as a special case. The if statement gives you a range reduction to [-INF, -1],
negating that gives you a range [1, INF], and [1, INF] is never less than 0,
so the compiler folds the function to return false. We have a mismatch in
semantics
Vlad Lazar Aug. 8, 2018, 4:38 p.m. UTC | #6
On 01/08/18 18:35, James Greenhalgh wrote:
> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>> On 31/07/18 22:48, James Greenhalgh wrote:
>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>> Hi,
>>>>
>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>
>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>
>>>> OK for trunk?
>>>>
>>>> +__extension__ extern __inline int64_t
>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>> +vnegd_s64 (int64_t __a)
>>>> +{
>>>> +  return -__a;
>>>> +}
>>>
>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>
>>> Thanks,
>>> James
>>>
>>
>> Hi. Thanks for the review.
>>
>> For the minimum value of int64_t it behaves as the ACLE specifies:
>> "The negative of the minimum (signed) value is itself."
> 
> What should happen in this testcase? The spoiler is below, but try to work out
> what should happen and what goes wrong with your implementation.
> 
>    int foo (int64_t x)
>    {
>      if (x < (int64_t) 0)
>        return vnegd_s64(x) < (int64_t) 0;
>      else
>        return 0;
>    }
>    
>    
>    int bar (void)
>    {
>      return foo (INT64_MIN);
>    }
>   
> Thanks,
> James
> 
> 
> -----
> 
> <spoiler!>
> 
> 
> 
> 
> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
> vnegd_s64(INT64_MIN) is identity, so the return value should be
> INT64_MIN < 0; i.e. True.
> 
> This isn't what the compiler thinks... The compiler makes use of the fact
> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
> as a special case. The if statement gives you a range reduction to [-INF, -1],
> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
> so the compiler folds the function to return false. We have a mismatch in
> semantics
> 
I see your point now. I have updated the vnegd_s64 intrinsic to convert to
unsigned before negating. This means that if the predicted range of x is
[INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
which reflect the issue you've pointed out. Note that I've change the vabsd_s64
intrinsic in order to avoid moves between integer and vector registers.

See the updated patch below. Ok for trunk?

---

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2d18400040f031dfcdaf60269ad484647804e1be..fc734e1aa9e93c171c0670164e5a3a54209905d3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11822,6 +11822,18 @@ vabsq_s64 (int64x2_t __a)
    return __builtin_aarch64_absv2di (__a);
  }
  
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
  /* vadd */
  
  __extension__ extern __inline int64_t
@@ -22907,6 +22919,25 @@ vneg_s64 (int64x1_t __a)
    return -__a;
  }
  
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
  __extension__ extern __inline float32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vnegq_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index ea29066e369b967d0781d31c8a5208bda9e4f685..d943989768dd8c9aa87d9dcb899e199029ef3f8b 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -627,6 +627,14 @@ test_vqabss_s32 (int32_t a)
    return vqabss_s32 (a);
  }
  
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
  
  int8_t
diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf4e7ae4679d5b1896f35e3bf3135b0bd42befde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
new file mode 100644
index 0000000000000000000000000000000000000000..a0f88ee12c3ea0269041213899a68f6677d80d42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+	return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
index 911054053eaefb5a67b48578fac9e2ba428c3ab2..f708e97c34570eb75595915c040e5175562c2bea 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -75,6 +75,18 @@ extern void abort (void);
        }									\
    }
  
+#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
  int
  test_vneg_s8 ()
  {
@@ -179,6 +191,25 @@ test_vneg_s64 ()
  
  /* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
  
+int
+test_vnegd_s64 ()
+{
+  int64_t a, b;
+
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+
  int
  test_vnegq_s8 ()
  {
@@ -283,6 +314,9 @@ main (int argc, char **argv)
    if (test_vneg_s64 ())
      abort ();
  
+  if (test_vnegd_s64 ())
+    abort ();
+
    if (test_vnegq_s8 ())
      abort ();
  
diff --git a/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
new file mode 100644
index 0000000000000000000000000000000000000000..73d478ff49daf758e233958d134de8fb864090c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+
Vlad Lazar Aug. 28, 2018, 8:59 a.m. UTC | #7
Gentle ping.

On 08/08/18 17:38, Vlad Lazar wrote:
> On 01/08/18 18:35, James Greenhalgh wrote:
>> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>>> On 31/07/18 22:48, James Greenhalgh wrote:
>>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>>> Hi,
>>>>>
>>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>>
>>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>>
>>>>> OK for trunk?
>>>>>
>>>>> +__extension__ extern __inline int64_t
>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>> +vnegd_s64 (int64_t __a)
>>>>> +{
>>>>> +  return -__a;
>>>>> +}
>>>>
>>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>>
>>>> Thanks,
>>>> James
>>>>
>>>
>>> Hi. Thanks for the review.
>>>
>>> For the minimum value of int64_t it behaves as the ACLE specifies:
>>> "The negative of the minimum (signed) value is itself."
>>
>> What should happen in this testcase? The spoiler is below, but try to work out
>> what should happen and what goes wrong with your implementation.
>>
>>    int foo (int64_t x)
>>    {
>>      if (x < (int64_t) 0)
>>        return vnegd_s64(x) < (int64_t) 0;
>>      else
>>        return 0;
>>    }
>>    int bar (void)
>>    {
>>      return foo (INT64_MIN);
>>    }
>> Thanks,
>> James
>>
>>
>> -----
>>
>> <spoiler!>
>>
>>
>>
>>
>> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
>> vnegd_s64(INT64_MIN) is identity, so the return value should be
>> INT64_MIN < 0; i.e. True.
>>
>> This isn't what the compiler thinks... The compiler makes use of the fact
>> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
>> as a special case. The if statement gives you a range reduction to [-INF, -1],
>> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
>> so the compiler folds the function to return false. We have a mismatch in
>> semantics
>>
> I see your point now. I have updated the vnegd_s64 intrinsic to convert to
> unsigned before negating. This means that if the predicted range of x is
> [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
> ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
> which reflect the issue you've pointed out. Note that I've change the vabsd_s64
> intrinsic in order to avoid moves between integer and vector registers.
>
> See the updated patch below. Ok for trunk?
>
> ---
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 2d18400040f031dfcdaf60269ad484647804e1be..fc734e1aa9e93c171c0670164e5a3a54209905d3 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -11822,6 +11822,18 @@ vabsq_s64 (int64x2_t __a)
>     return __builtin_aarch64_absv2di (__a);
>   }
>
> +/* Try to avoid moving between integer and vector registers.
> +   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
> +   There is a testcase related to this issue:
> +   gcc.target/aarch64/vabsd_s64.c.  */
> +
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vabsd_s64 (int64_t __a)
> +{
> +  return __a < 0 ? - (uint64_t) __a : __a;
> +}
> +
>   /* vadd */
>
>   __extension__ extern __inline int64_t
> @@ -22907,6 +22919,25 @@ vneg_s64 (int64x1_t __a)
>     return -__a;
>   }
>
> +/* According to the ACLE, the negative of the minimum (signed)
> +   value is itself.  This leads to a semantics mismatch, as this is
> +   undefined behaviour in C.  The value range predictor is not
> +   aware that the negation of a negative number can still be negative
> +   and it may try to fold the expression.  See the test in
> +   gcc.target/aarch64/vnegd_s64.c for an example.
> +
> +   The cast below tricks the value range predictor to include
> +   INT64_MIN in the range it computes.  So for x in the range
> +   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
> +   be ~[INT64_MIN + 1, y].  */
> +
> +__extension__ extern __inline int64_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vnegd_s64 (int64_t __a)
> +{
> +  return - (uint64_t) __a;
> +}
> +
>   __extension__ extern __inline float32x4_t
>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>   vnegq_f32 (float32x4_t __a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> index ea29066e369b967d0781d31c8a5208bda9e4f685..d943989768dd8c9aa87d9dcb899e199029ef3f8b 100644
> --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
> @@ -627,6 +627,14 @@ test_vqabss_s32 (int32_t a)
>     return vqabss_s32 (a);
>   }
>
> +/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
> +
> +int64_t
> +test_vnegd_s64 (int64_t a)
> +{
> +  return vnegd_s64 (a);
> +}
> +
>   /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
>
>   int8_t
> diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..cf4e7ae4679d5b1896f35e3bf3135b0bd42befde
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
> @@ -0,0 +1,39 @@
> +/* Test the vabsd_s64 intrinsic.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
> +           : "=w"(V1)                                           \
> +           : "w"(V1)                                            \
> +           : /* No clobbers */);
> +
> +#define RUN_TEST(test, answ)   \
> +{                                      \
> +  force_simd (test);                   \
> +  force_simd (answ);                   \
> +  int64_t res = vabsd_s64 (test);      \
> +  force_simd (res);                    \
> +  if (res != answ)                     \
> +    abort ();                          \
> +}
> +
> +int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
> +int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
> +
> +int main (void)
> +{
> +  RUN_TEST (input[0], expected[0]);
> +  RUN_TEST (input[1], expected[1]);
> +  RUN_TEST (input[2], expected[2]);
> +  RUN_TEST (input[3], expected[3]);
> +  RUN_TEST (input[4], expected[4]);
> +  RUN_TEST (input[5], expected[5]);
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a0f88ee12c3ea0269041213899a68f6677d80d42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vabsd_s64.c
> @@ -0,0 +1,34 @@
> +/* Check that the compiler does not optimise the vabsd_s64 call out.
> +   We need to check for this because there is a mismatch in semantics
> +   between the ACLE, which states that he absolute value of the minimum
> +   (signed) value is itself, and C, where this is undefined behaviour.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -fno-inline -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +int
> +bar (int64_t x)
> +{
> +  if (x < (int64_t) 0)
> +    return vabsd_s64 (x) < (int64_t) 0;
> +  else
> +    return -1;
> +}
> +
> +int
> +main (void)
> +{
> +  int ans = 1;
> +  int res_abs = bar (INT64_MIN);
> +
> +  if (res_abs != ans)
> +    abort ();
> +
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> index 911054053eaefb5a67b48578fac9e2ba428c3ab2..f708e97c34570eb75595915c040e5175562c2bea 100644
> --- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
> @@ -75,6 +75,18 @@ extern void abort (void);
>         }                                    \
>     }
>
> +#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
> +  {                                                   \
> +    int64_t res;                                      \
> +    INHIB_OPTIMIZATION;                               \
> +    a = test_val;                                     \
> +    b = answ_val;                                     \
> +    force_simd (b);                                   \
> +    force_simd (a);                                   \
> +    res = vnegd_s64 (a);                              \
> +    force_simd (res);                                 \
> +  }
> +
>   int
>   test_vneg_s8 ()
>   {
> @@ -179,6 +191,25 @@ test_vneg_s64 ()
>
>   /* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
>
> +int
> +test_vnegd_s64 ()
> +{
> +  int64_t a, b;
> +
> +  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
> +  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
> +  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
> +  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
> +  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
> +  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
> +  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
> +  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
> +
>   int
>   test_vnegq_s8 ()
>   {
> @@ -283,6 +314,9 @@ main (int argc, char **argv)
>     if (test_vneg_s64 ())
>       abort ();
>
> +  if (test_vnegd_s64 ())
> +    abort ();
> +
>     if (test_vnegq_s8 ())
>       abort ();
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..73d478ff49daf758e233958d134de8fb864090c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vnegd_s64.c
> @@ -0,0 +1,36 @@
> +/* Check that the compiler does not optimise the negation out.
> +   We need to check for this because there is a mismatch in semantics
> +   between the ACLE, which states that he negative of the minimum
> +   (signed) value is itself and C, where this is undefined behaviour.  */
> +
> +/* { dg-do run } */
> +/* { dg-options "--save-temps -O2" } */
> +
> +#include <arm_neon.h>
> +#include <limits.h>
> +
> +extern void abort (void);
> +
> +int
> +foo (int64_t x)
> +{
> +  if (x < (int64_t) 0)
> +    return vnegd_s64 (x) < (int64_t) 0;
> +  else
> +    return -1;
> +}
> +
> +/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
> +
> +int
> +main (void)
> +{
> +  int ans = 1;
> +  int res = foo (INT64_MIN);
> +
> +  if (res != ans)
> +    abort ();
> +
> +  return 0;
> +}
> +
James Greenhalgh Aug. 28, 2018, 9:58 p.m. UTC | #8
On Tue, Aug 28, 2018 at 03:59:25AM -0500, Vlad Lazar wrote:
> Gentle ping.
> 
> On 08/08/18 17:38, Vlad Lazar wrote:
> > On 01/08/18 18:35, James Greenhalgh wrote:
> >> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
> >>> On 31/07/18 22:48, James Greenhalgh wrote:
> >>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
> >>>>> Hi,
> >>>>>
> >>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
> >>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
> >>>>>
> >>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
> >>>>>
> >>>>> OK for trunk?
> >>>>>
> >>>>> +__extension__ extern __inline int64_t
> >>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >>>>> +vnegd_s64 (int64_t __a)
> >>>>> +{
> >>>>> +  return -__a;
> >>>>> +}
> >>>>
> >>>> Does this give the correct behaviour for the minimum value of int64_t? That
> >>>> would be undefined behaviour in C, but well-defined under ACLE.
> >>>>
> >>>> Thanks,
> >>>> James
> >>>>
> >>>
> >>> Hi. Thanks for the review.
> >>>
> >>> For the minimum value of int64_t it behaves as the ACLE specifies:
> >>> "The negative of the minimum (signed) value is itself."
> >>
> >> What should happen in this testcase? The spoiler is below, but try to work out
> >> what should happen and what goes wrong with your implementation.
> >>
> >>    int foo (int64_t x)
> >>    {
> >>      if (x < (int64_t) 0)
> >>        return vnegd_s64(x) < (int64_t) 0;
> >>      else
> >>        return 0;
> >>    }
> >>    int bar (void)
> >>    {
> >>      return foo (INT64_MIN);
> >>    }
> >> Thanks,
> >> James
> >>
> >>
> >> -----
> >>
> >> <spoiler!>
> >>
> >>
> >>
> >>
> >> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
> >> vnegd_s64(INT64_MIN) is identity, so the return value should be
> >> INT64_MIN < 0; i.e. True.
> >>
> >> This isn't what the compiler thinks... The compiler makes use of the fact
> >> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
> >> as a special case. The if statement gives you a range reduction to [-INF, -1],
> >> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
> >> so the compiler folds the function to return false. We have a mismatch in
> >> semantics
> >>
> > I see your point now. I have updated the vnegd_s64 intrinsic to convert to
> > unsigned before negating. This means that if the predicted range of x is
> > [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
> > ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
> > which reflect the issue you've pointed out. Note that I've change the vabsd_s64
> > intrinsic in order to avoid moves between integer and vector registers.

I think from my reading of the standard that this is OK, but I may be rusty
and missing a corner case.

OK for trunk.

Thanks,
James
Vlad Lazar Aug. 31, 2018, 3:07 p.m. UTC | #9
On 28/08/18 22:58, James Greenhalgh wrote:
> On Tue, Aug 28, 2018 at 03:59:25AM -0500, Vlad Lazar wrote:
>> Gentle ping.
>>
>> On 08/08/18 17:38, Vlad Lazar wrote:
>>> On 01/08/18 18:35, James Greenhalgh wrote:
>>>> On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
>>>>> On 31/07/18 22:48, James Greenhalgh wrote:
>>>>>> On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
>>>>>>> (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)
>>>>>>>
>>>>>>> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>>>>>>>
>>>>>>> OK for trunk?
>>>>>>>
>>>>>>> +__extension__ extern __inline int64_t
>>>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>>>> +vnegd_s64 (int64_t __a)
>>>>>>> +{
>>>>>>> +  return -__a;
>>>>>>> +}
>>>>>>
>>>>>> Does this give the correct behaviour for the minimum value of int64_t? That
>>>>>> would be undefined behaviour in C, but well-defined under ACLE.
>>>>>>
>>>>>> Thanks,
>>>>>> James
>>>>>>
>>>>>
>>>>> Hi. Thanks for the review.
>>>>>
>>>>> For the minimum value of int64_t it behaves as the ACLE specifies:
>>>>> "The negative of the minimum (signed) value is itself."
>>>>
>>>> What should happen in this testcase? The spoiler is below, but try to work out
>>>> what should happen and what goes wrong with your implementation.
>>>>
>>>>     int foo (int64_t x)
>>>>     {
>>>>       if (x < (int64_t) 0)
>>>>         return vnegd_s64(x) < (int64_t) 0;
>>>>       else
>>>>         return 0;
>>>>     }
>>>>     int bar (void)
>>>>     {
>>>>       return foo (INT64_MIN);
>>>>     }
>>>> Thanks,
>>>> James
>>>>
>>>>
>>>> -----
>>>>
>>>> <spoiler!>
>>>>
>>>>
>>>>
>>>>
>>>> INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
>>>> vnegd_s64(INT64_MIN) is identity, so the return value should be
>>>> INT64_MIN < 0; i.e. True.
>>>>
>>>> This isn't what the compiler thinks... The compiler makes use of the fact
>>>> that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
>>>> as a special case. The if statement gives you a range reduction to [-INF, -1],
>>>> negating that gives you a range [1, INF], and [1, INF] is never less than 0,
>>>> so the compiler folds the function to return false. We have a mismatch in
>>>> semantics
>>>>
>>> I see your point now. I have updated the vnegd_s64 intrinsic to convert to
>>> unsigned before negating. This means that if the predicted range of x is
>>> [INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
>>> ~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
>>> which reflect the issue you've pointed out. Note that I've change the vabsd_s64
>>> intrinsic in order to avoid moves between integer and vector registers.
>
> I think from my reading of the standard that this is OK, but I may be rusty
> and missing a corner case.
>
> OK for trunk.
>
> Thanks,
> James
>
Committed with an obvious change to testsuite/gcc.target/aarch64/vneg_s.c testcase:
merged two scan assembler directives which were searching for the same pattern.
See the patch below.

Thanks,
Vlad
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 264018)
+++ ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-08-31  Vlad Lazar  <vlad.lazar@arm.com>
+
+	* config/aarch64/arm_neon.h (vabsd_s64): New.
+	(vnegd_s64): Likewise.
+
 2018-08-31  Martin Jambor  <mjambor@suse.cz>
 
 	* ipa-cp.c (estimate_local_effects): Replace wrong MAX with MIN.
Index: config/aarch64/arm_neon.h
===================================================================
--- config/aarch64/arm_neon.h	(revision 264018)
+++ config/aarch64/arm_neon.h	(working copy)
@@ -11822,6 +11822,18 @@
   return __builtin_aarch64_absv2di (__a);
 }
 
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
 /* vadd */
 
 __extension__ extern __inline int64_t
@@ -22907,6 +22919,25 @@
   return -__a;
 }
 
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vnegq_f32 (float32x4_t __a)
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 264018)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,12 @@
+2018-08-31  Vlad Lazar  <vlad.lazar@arm.com>
+
+	* gcc.target/aarch64/scalar_intrinsics.c (test_vnegd_s64): New.
+	* gcc.target/aarch64/vneg_s.c (RUN_TEST_SCALAR): New.
+	(test_vnegd_s64): Likewise.
+	* gcc.target/aarch64/vnegd_64.c: New.
+	* gcc.target/aarch64/vabsd_64.c: New.
+	* gcc.tartget/aarch64/vabs_intrinsic_3.c: New.
+
 2018-08-31  Nathan Sidwell  <nathan@acm.org>
 
 	PR c++/87155
Index: testsuite/gcc.target/aarch64/scalar_intrinsics.c
===================================================================
--- testsuite/gcc.target/aarch64/scalar_intrinsics.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/scalar_intrinsics.c	(working copy)
@@ -627,6 +627,14 @@
   return vqabss_s32 (a);
 }
 
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
 /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
 
 int8_t
Index: testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
===================================================================
--- testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(working copy)
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
Index: testsuite/gcc.target/aarch64/vabsd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vabsd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabsd_s64.c	(working copy)
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+	return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
Index: testsuite/gcc.target/aarch64/vneg_s.c
===================================================================
--- testsuite/gcc.target/aarch64/vneg_s.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/vneg_s.c	(working copy)
@@ -75,6 +75,18 @@
       }									\
   }
 
+#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
 int
 test_vneg_s8 ()
 {
@@ -177,8 +189,25 @@
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+int
+test_vnegd_s64 ()
+{
+  int64_t a, b;
 
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 16 } } */
+
 int
 test_vnegq_s8 ()
 {
@@ -283,6 +312,9 @@
   if (test_vneg_s64 ())
     abort ();
 
+  if (test_vnegd_s64 ())
+    abort ();
+
   if (test_vnegq_s8 ())
     abort ();
 
Index: testsuite/gcc.target/aarch64/vnegd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vnegd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vnegd_s64.c	(working copy)
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+
diff mbox series

Patch

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2d18400040f031dfcdaf60269ad484647804e1be..19e22431a85bcd09d0ea759b42b0a52420b6c43c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11822,6 +11822,13 @@  vabsq_s64 (int64x2_t __a)
    return __builtin_aarch64_absv2di (__a);
  }
  
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_absdi (__a);
+}
+
  /* vadd */
  
  __extension__ extern __inline int64_t
@@ -22907,6 +22914,12 @@  vneg_s64 (int64x1_t __a)
    return -__a;
  }
  
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return -__a;
+}
  __extension__ extern __inline float32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vnegq_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
index ea29066e369b967d0781d31c8a5208bda9e4f685..45afeec373971838e0cd107038b4aa51a2d4998f 100644
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -603,6 +603,14 @@  test_vsqaddd_u64 (uint64_t a, int64_t b)
    return vsqaddd_u64 (a, b);
  }
  
+/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+" 1 } }  */
+
+int64_t
+test_vabsd_s64 (int64_t a)
+{
+  return vabsd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqabs\\tb\[0-9\]+" 1 } } */
  
  int8_t
@@ -627,6 +635,14 @@  test_vqabss_s32 (int32_t a)
    return vqabss_s32 (a);
  }
  
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
  /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
  
  int8_t