diff mbox

[5/5] ARM 64 bit sync atomic operations [V3]

Message ID 20111006175454.GF12770@davesworkthinkpad
State New
Headers show

Commit Message

David Gilbert Oct. 6, 2011, 5:54 p.m. UTC
Test support for ARM 64bit sync intrinsics.

      gcc/testsuite/
        * gcc.dg/di-longlong64-sync-1.c: New test.
        * gcc.dg/di-sync-multithread.c: New test.
        * gcc.target/arm/di-longlong64-sync-withhelpers.c: New test.
        * gcc.target/arm/di-longlong64-sync-withldrexd.c: New test.
        * lib/target-supports.exp: (arm_arch_*_ok): Series of  effective-target
                tests for v5, v6, v6k, and v7-a, and add-options helpers.
          (check_effective_target_arm_arm_ok): New helper.
          (check_effective_target_sync_longlong): New helper.

Comments

Ramana Radhakrishnan Oct. 12, 2011, 12:53 a.m. UTC | #1
On 6 October 2011 18:54, Dr. David Alan Gilbert
<david.gilbert@linaro.org> wrote:
>   Test support for ARM 64bit sync intrinsics.
>
>      gcc/testsuite/
>        * gcc.dg/di-longlong64-sync-1.c: New test.
>        * gcc.dg/di-sync-multithread.c: New test.
>        * gcc.target/arm/di-longlong64-sync-withhelpers.c: New test.
>        * gcc.target/arm/di-longlong64-sync-withldrexd.c: New test.
>        * lib/target-supports.exp: (arm_arch_*_ok): Series of  effective-target
>                tests for v5, v6, v6k, and v7-a, and add-options helpers.
>          (check_effective_target_arm_arm_ok): New helper.
>          (check_effective_target_sync_longlong): New helper.

I would like one of the testsuite maintainers to have a second look at
this. I'm not confident about my dejagnu foo to fully review this.

Ramana
Mike Stump Oct. 12, 2011, 1:44 a.m. UTC | #2
On Oct 6, 2011, at 10:54 AM, Dr. David Alan Gilbert wrote:
>   Test support for ARM 64bit sync intrinsics.

Ok.  Watch for any fallout on non-arm systems.  I'd always invite people who think they know the best way to test volatile to chime in.  There is the new infrastructure to test multi core synch issues with gdb trickery.  As you want more beef, you can consider it.

I'll note that I do sometimes wonder if this type of code isn't better handled in #if feature tests inside the testcases themselves.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c b/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c
new file mode 100644
index 0000000..82a4ea2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c
@@ -0,0 +1,164 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sync_longlong } */
+/* { dg-options "-std=gnu99" } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+
+
+/* Test basic functionality of the intrinsics.  The operations should
+   not be optimized away if no one checks the return values.  */
+
+/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long
+   (an explicit 64bit type maybe a better bet) and 2) Use values that cross
+   the 32bit boundary and cause carries since the actual maths are done as
+   pairs of 32 bit instructions.  */
+
+/* Note: This file is #included by some of the ARM tests.  */
+
+__extension__ typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+/* Temporary space where the work actually gets done.  */
+static long long AL[24];
+/* Values copied into AL before we start.  */
+static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1,
+
+				 0x100000002ll, 0x100000002ll,
+				 0x100000002ll, 0x100000002ll,
+
+				 0, 0x1000e0de0000ll,
+				 42 , 0xc001c0de0000ll,
+
+				 -1ll, 0, 0xff00ff0000ll, -1ll,
+
+				 0, 0x1000e0de0000ll,
+				 42 , 0xc001c0de0000ll,
+
+				 -1ll, 0, 0xff00ff0000ll, -1ll};
+/* This is what should be in AL at the end.  */
+static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0,
+
+				 0x100000002ll, 0x100000002ll,
+				 0x100000002ll, 0x100000002ll,
+
+				 1, 0xc001c0de0000ll,
+				 20, 0x1000e0de0000ll,
+
+				 0x300000007ll , 0x500000009ll,
+				 0xf100ff0001ll, ~0xa00000007ll,
+
+				 1, 0xc001c0de0000ll,
+				 20, 0x1000e0de0000ll,
+
+				 0x300000007ll , 0x500000009ll,
+				 0xf100ff0001ll, ~0xa00000007ll };
+
+/* First check they work in terms of what they do to memory.  */
+static void
+do_noret_di (void)
+{
+  __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll);
+  __sync_lock_test_and_set (AL+2, 1);
+  __sync_lock_release (AL+3);
+
+  /* The following tests should not change the value since the
+     original does NOT match.  */
+  __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll);
+  __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll);
+
+  __sync_fetch_and_add (AL+8, 1);
+  __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry.  */
+  __sync_fetch_and_sub (AL+10, 22);
+  __sync_fetch_and_sub (AL+11, 0xb000e0000000ll);
+
+  __sync_fetch_and_and (AL+12, 0x300000007ll);
+  __sync_fetch_and_or (AL+13, 0x500000009ll);
+  __sync_fetch_and_xor (AL+14, 0xe00000001ll);
+  __sync_fetch_and_nand (AL+15, 0xa00000007ll);
+
+  /* These should be the same as the fetch_and_* cases except for
+     return value.  */
+  __sync_add_and_fetch (AL+16, 1);
+  /* add to both halves & carry.  */
+  __sync_add_and_fetch (AL+17, 0xb000e0000000ll);
+  __sync_sub_and_fetch (AL+18, 22);
+  __sync_sub_and_fetch (AL+19, 0xb000e0000000ll);
+
+  __sync_and_and_fetch (AL+20, 0x300000007ll);
+  __sync_or_and_fetch (AL+21, 0x500000009ll);
+  __sync_xor_and_fetch (AL+22, 0xe00000001ll);
+  __sync_nand_and_fetch (AL+23, 0xa00000007ll);
+}
+
+/* Now check return values.  */
+static void
+do_ret_di (void)
+{
+  if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) !=
+	1) abort ();
+  if (__sync_lock_test_and_set (AL+2, 1) != 0) abort ();
+  __sync_lock_release (AL+3); /* no return value, but keep to match results.  */
+
+  /* The following tests should not change the value since the
+     original does NOT match.  */
+  if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) !=
+	0) abort ();
+  if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) !=
+	0) abort ();
+
+  if (__sync_fetch_and_add (AL+8, 1) != 0) abort ();
+  if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort ();
+  if (__sync_fetch_and_sub (AL+10, 22) != 42) abort ();
+  if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll)
+	abort ();
+
+  if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort ();
+  if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort ();
+  if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort ();
+  if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort ();
+
+  /* These should be the same as the fetch_and_* cases except for
+     return value.  */
+  if (__sync_add_and_fetch (AL+16, 1) != 1) abort ();
+  if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll)
+	abort ();
+  if (__sync_sub_and_fetch (AL+18, 22) != 20) abort ();
+  if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll)
+	abort ();
+
+  if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort ();
+  if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort ();
+  if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort ();
+  if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort ();
+}
+
+int main ()
+{
+  memcpy (AL, init_di, sizeof (init_di));
+
+  do_noret_di ();
+
+  if (memcmp (AL, test_di, sizeof (test_di)))
+    abort ();
+
+  memcpy (AL, init_di, sizeof (init_di));
+
+  do_ret_di ();
+
+  if (memcmp (AL, test_di, sizeof (test_di)))
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/di-sync-multithread.c b/gcc/testsuite/gcc.dg/di-sync-multithread.c
new file mode 100644
index 0000000..c5b3d89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/di-sync-multithread.c
@@ -0,0 +1,205 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sync_longlong } */
+/* { dg-require-effective-target pthread_h } */
+/* { dg-require-effective-target pthread } */
+/* { dg-options "-pthread -std=gnu99" } */
+
+/* test of long long atomic ops performed in parallel in 3 pthreads
+   david.gilbert@linaro.org */
+
+#include <pthread.h>
+#include <unistd.h>
+
+/*#define DEBUGIT 1 */
+
+#ifdef DEBUGIT
+#include <stdio.h>
+
+#define DOABORT(x,...) {\
+	 fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\
+	 }
+
+#else
+
+#define DOABORT(x,...) abort ();
+
+#endif
+
+/* Passed to each thread to describe which bits it is going to work on.  */
+struct threadwork {
+  unsigned long long count; /* incremented each time the worker loops.  */
+  unsigned int thread;    /* ID */
+  unsigned int addlsb;    /* 8 bit */
+  unsigned int logic1lsb; /* 5 bit */
+  unsigned int logic2lsb; /* 8 bit */
+};
+
+/* The shared word where all the atomic work is done.  */
+static volatile long long workspace;
+
+/* A shared word to tell the workers to quit when non-0.  */
+static long long doquit;
+
+extern void abort (void);
+
+/* Note this test doesn't test the return values much.  */
+void*
+worker (void* data)
+{
+  struct threadwork *tw = (struct threadwork*)data;
+  long long add1bit = 1ll << tw->addlsb;
+  long long logic1bit = 1ll << tw->logic1lsb;
+  long long logic2bit = 1ll << tw->logic2lsb;
+
+  /* Clear the bits we use.  */
+  __sync_and_and_fetch (&workspace, ~(0xffll * add1bit));
+  __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit));
+  __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit));
+
+  do
+    {
+      long long tmp1, tmp2, tmp3;
+      /* OK, lets try and do some stuff to the workspace - by the end
+         of the main loop our area should be the same as it is now - i.e. 0.  */
+
+      /* Push the arithmetic section upto 128 - one of the threads will
+         case this to carry accross the 32bit boundary.  */
+      for (tmp2 = 0; tmp2 < 64; tmp2++)
+	{
+	  /* Add 2 using the two different adds.  */
+	  tmp1 = __sync_add_and_fetch (&workspace, add1bit);
+	  tmp3 = __sync_fetch_and_add (&workspace, add1bit);
+
+	  /* The value should be the intermediate add value in both cases.  */
+	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
+	    DOABORT ("Mismatch of add intermediates on thread %d "
+			"workspace=0x%llx tmp1=0x%llx "
+			"tmp2=0x%llx tmp3=0x%llx\n",
+			 tw->thread, workspace, tmp1, tmp2, tmp3);
+	}
+
+      /* Set the logic bits.  */
+      tmp2=__sync_or_and_fetch (&workspace,
+			  0x1fll * logic1bit | 0xffll * logic2bit);
+
+      /* Check the logic bits are set and the arithmetic value is correct.  */
+      if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit
+			| 0xffll * add1bit))
+	  != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit))
+	DOABORT ("Midloop check failed on thread %d "
+			"workspace=0x%llx tmp2=0x%llx "
+			"masktmp2=0x%llx expected=0x%llx\n",
+		tw->thread, workspace, tmp2,
+		tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit |
+			 0xffll * add1bit),
+		(0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit));
+
+      /* Pull the arithmetic set back down to 0 - again this should cause a
+	 carry across the 32bit boundary in one thread.  */
+
+      for (tmp2 = 0; tmp2 < 64; tmp2++)
+	{
+	  /* Subtract 2 using the two different subs.  */
+	  tmp1=__sync_sub_and_fetch (&workspace, add1bit);
+	  tmp3=__sync_fetch_and_sub (&workspace, add1bit);
+
+	  /* The value should be the intermediate sub value in both cases.  */
+	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
+	    DOABORT ("Mismatch of sub intermediates on thread %d "
+			"workspace=0x%llx tmp1=0x%llx "
+			"tmp2=0x%llx tmp3=0x%llx\n",
+			tw->thread, workspace, tmp1, tmp2, tmp3);
+	}
+
+
+      /* Clear the logic bits.  */
+      __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit);
+      tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit));
+
+      /* The logic bits and the arithmetic bits should be zero again.  */
+      if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit))
+	DOABORT ("End of worker loop; bits none 0 on thread %d "
+			"workspace=0x%llx tmp3=0x%llx "
+			"mask=0x%llx maskedtmp3=0x%llx\n",
+		tw->thread, workspace, tmp3, (0x1fll * logic1bit |
+			0xffll * logic2bit | 0xffll * add1bit),
+		tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit));
+
+      __sync_add_and_fetch (&tw->count, 1);
+    }
+  while (!__sync_bool_compare_and_swap (&doquit, 1, 1));
+
+  pthread_exit (0);
+}
+
+int
+main ()
+{
+  /* We have 3 threads doing three sets of operations, an 8 bit
+     arithmetic field, a 5 bit logic field and an 8 bit logic
+     field (just to pack them all in).
+
+  6      5       4       4       3       2       1
+  3      6       8       0       2       4       6       8       0
+  |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,...
+  - T0   --  T1  -- T2   --T2 --  T0  -*- T2-- T1-- T1   -***- T0-
+   logic2  logic2  arith   log2  arith  log1 log1  arith     log1
+
+  */
+  unsigned int t;
+  long long tmp;
+  int err;
+
+  struct threadwork tw[3]={
+    { 0ll, 0, 27, 0, 56 },
+    { 0ll, 1,  8,16, 48 },
+    { 0ll, 2, 40,21, 35 }
+  };
+
+  pthread_t threads[3];
+
+  __sync_lock_release (&doquit);
+
+  /* Get the work space into a known value - All 1's.  */
+  __sync_lock_release (&workspace); /* Now all 0.  */
+  tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll);
+  if (tmp!=0)
+    DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx "
+		"tmp=0x%llx\n", workspace,tmp);
+
+  for (t = 0; t < 3; t++)
+  {
+    err=pthread_create (&threads[t], NULL , worker, &tw[t]);
+    if (err) DOABORT ("pthread_create failed on thread %d with error %d\n",
+	t, err);
+  };
+
+  sleep (5);
+
+  /* Stop please.  */
+  __sync_lock_test_and_set (&doquit, 1ll);
+
+  for (t = 0; t < 3; t++)
+    {
+      err=pthread_join (threads[t], NULL);
+      if (err)
+	DOABORT ("pthread_join failed on thread %d with error %d\n", t, err);
+    };
+
+  __sync_synchronize ();
+
+  /* OK, so all the workers have finished -
+     the workers should have zero'd their workspace, the unused areas
+     should still be 1.  */
+  if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0))
+    DOABORT ("End of run workspace mismatch, got %llx\n", workspace);
+
+  /* All the workers should have done some work.  */
+  for (t = 0; t < 3; t++)
+    {
+      if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t);
+    };
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c b/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c
new file mode 100644
index 0000000..19342bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v5_ok } */
+/* { dg-options "-std=gnu99" } */
+/* { dg-add-options arm_arch_v5 } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
+
+#include "../../gcc.dg/di-longlong64-sync-1.c"
+
+/* On an old ARM we have no ldrexd or strexd so we have to use helpers.  */
+/* { dg-final { scan-assembler-not "ldrexd" } } */
+/* { dg-final { scan-assembler-not "strexd" } } */
+/* { dg-final { scan-assembler "__sync_" } } */
diff --git a/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c b/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c
new file mode 100644
index 0000000..dab6940
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-options "-marm -std=gnu99" } */
+/* { dg-require-effective-target arm_arch_v6k_ok } */
+/* { dg-add-options arm_arch_v6k } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
+
+#include "../../gcc.dg/di-longlong64-sync-1.c"
+
+/* We should be using ldrexd, strexd and no helpers or shorter ldrex.  */
+/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */
+/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */
+/* { dg-final { scan-assembler-not "__sync_" } } */
+/* { dg-final { scan-assembler-not "ldrex\t" } } */
+/* { dg-final { scan-assembler-not "strex\t" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5d236f7..086fbc9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2067,6 +2067,47 @@  proc check_effective_target_arm_fp16_ok { } {
 		check_effective_target_arm_fp16_ok_nocache]
 }
 
+# Creates a series of routines that return 1 if the given architecture
+# can be selected and a routine to give the flags to select that architecture
+# Note: Extra flags may be added to disable options from newer compilers
+# (Thumb in particular - but others may be added in the future)
+# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
+#        /* { dg-add-options arm_arch_v5 } */
+foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__
+				     v6 "-march=armv6" __ARM_ARCH_6__
+				     v6k "-march=armv6k" __ARM_ARCH_6K__
+				     v7a "-march=armv7-a" __ARM_ARCH_7A__ } {
+    eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
+	proc check_effective_target_arm_arch_FUNC_ok { } {
+	    if { [ string match "*-marm*" "FLAG" ] &&
+		![check_effective_target_arm_arm_ok] } {
+		return 0
+	    }
+	    return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
+		#if !defined (DEF)
+		#error FOO
+		#endif
+	    } "FLAG" ]
+	}
+
+	proc add_options_for_arm_arch_FUNC { flags } {
+	    return "$flags FLAG"
+	}
+    }]
+}
+
+# Return 1 if this is an ARM target where -marm causes ARM to be
+# used (not Thumb)
+
+proc check_effective_target_arm_arm_ok { } {
+    return [check_no_compiler_messages arm_arm_ok assembly {
+	#if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__)
+	#error FOO
+	#endif
+    } "-marm"]
+}
+
+
 # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be
 # used.
 
@@ -3458,6 +3499,31 @@  proc check_effective_target_sync_int_long { } {
     return $et_sync_int_long_saved
 }
 
+# Return 1 if the target supports atomic operations on "long long" and can
+# execute them
+# So far only put checks in for ARM, others may want to add their own
+proc check_effective_target_sync_longlong { } {
+    return [check_runtime sync_longlong_runtime {
+      #include <stdlib.h>
+      int main ()
+      {
+	long long l1;
+
+	if (sizeof (long long) != 8)
+	  exit (1);
+
+      #ifdef __arm__
+	/* Just check for native; checking for kernel fallback is tricky.  */
+	asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1");
+      #else
+      # error "Add other suitable archs here"
+      #endif
+
+	exit (0);
+      }
+    } "" ]
+}
+
 # Return 1 if the target supports atomic operations on "char" and "short".
 
 proc check_effective_target_sync_char_short { } {