===================================================================
@@ -0,0 +1,1851 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_access.h"
+#include "gupcr_sync.h"
+#include "gupcr_sup.h"
+#include "gupcr_portals.h"
+#include "gupcr_node.h"
+#include "gupcr_gmem.h"
+#include "gupcr_utils.h"
+
+/**
+ * @file gupcr_access.c
+ * GUPC compiler access functions.
+ */
+
+/**
+ * @addtogroup IFACE GUPC Interface Routines
+ * @{
+ */
+
+//begin lib_inline_access
+
+/**
+ * Relaxed shared "char (8 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Char (8 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intQI_t
+__getqi2 (upc_shared_ptr_t p)
+{
+ u_intQI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R QI LOCAL");
+ result = *(u_intQI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R QI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "short (16 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Short (16 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intHI_t
+__gethi2 (upc_shared_ptr_t p)
+{
+ u_intHI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R HI LOCAL");
+ result = *(u_intHI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R HI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "int (32 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Int (32 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intSI_t
+__getsi2 (upc_shared_ptr_t p)
+{
+ u_intSI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R SI LOCAL");
+ result = *(u_intSI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R SI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "long (64 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long (64 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intDI_t
+__getdi2 (upc_shared_ptr_t p)
+{
+ u_intDI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R DI LOCAL");
+ result = *(u_intDI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R DI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%llx",
+ thread, (long unsigned) offset, (long long unsigned) result);
+ return result;
+}
+
+#if GUPCR_TARGET64
+/**
+ * Relaxed shared "long long (128 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long long (128 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intTI_t
+__getti2 (upc_shared_ptr_t p)
+{
+ u_intTI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R TI LOCAL");
+ result = *(u_intTI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R TI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%llx",
+ thread, (long unsigned) offset, (long long unsigned) result);
+ return result;
+}
+#endif /* GUPCR_TARGET64 */
+/**
+ * Relaxed shared "float" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Float value at the shared address given by 'p'.
+ */
+//inline
+float
+__getsf2 (upc_shared_ptr_t p)
+{
+ float result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R SF LOCAL");
+ result = *(float *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R SF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6g",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Double value at the shared address given by 'p'.
+ */
+//inline
+double
+__getdf2 (upc_shared_ptr_t p)
+{
+ double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R DF LOCAL");
+ result = *(double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R DF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6g",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "long double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long double value at the shared address given by 'p'.
+ */
+//inline
+long double
+__gettf2 (upc_shared_ptr_t p)
+{
+ long double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R TF LOCAL");
+ result = *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R TF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6Lg",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared "long double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long double value at the shared address given by 'p'.
+ */
+//inline
+long double
+__getxf2 (upc_shared_ptr_t p)
+{
+ long double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R XF LOCAL");
+ result = *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER R XF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6Lg",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Relaxed shared memory block get operation.
+ * Copy the data at the shared address 'src' into the local memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Local address of the destination memory block.
+ * @param [in] src Shared address of the source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__getblk3 (void *dest, upc_shared_ptr_t src, size_t n)
+{
+ int thread = GUPCR_PTS_THREAD (src);
+ size_t offset = GUPCR_PTS_OFFSET (src);
+ gupcr_trace (FC_MEM, "GETBLK ENTER R");
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ GUPCR_MEM_BARRIER ();
+ memcpy (dest, GUPCR_GMEM_OFF_TO_LOCAL (thread, offset), n);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_gmem_get (dest, thread, offset, n);
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GETBLK EXIT R %d:0x%lx 0x%lx %lu",
+ thread, (long unsigned) offset,
+ (long unsigned) dest, (long unsigned) n);
+}
+
+/**
+ * Relaxed shared "char (8 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putqi2 (upc_shared_ptr_t p, u_intQI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R QI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ *(u_intQI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R QI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R QI");
+}
+
+/**
+ * Relaxed shared "short (16 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__puthi2 (upc_shared_ptr_t p, u_intHI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R HI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ *(u_intHI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R HI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R HI");
+}
+
+/**
+ * Relaxed shared "int (32 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsi2 (upc_shared_ptr_t p, u_intSI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R SI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ *(u_intSI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R SI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R SI");
+}
+
+/**
+ * Relaxed shared "long (64 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putdi2 (upc_shared_ptr_t p, u_intDI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R DI LOCAL "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ *(u_intDI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R DI REMOTE "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R DI");
+}
+
+#if GUPCR_TARGET64
+/**
+ * Relaxed shared "long long (128 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putti2 (upc_shared_ptr_t p, u_intTI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R TI LOCAL "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ *(u_intTI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R TI REMOTE "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R TI");
+}
+#endif /* GUPCR_TARGET64 */
+/**
+ * Relaxed shared "float" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsf2 (upc_shared_ptr_t p, float v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R SF LOCAL "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ *(float *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R SF REMOTE "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R SF");
+}
+
+/**
+ * Relaxed shared "double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putdf2 (upc_shared_ptr_t p, double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R DF LOCAL "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ *(double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R DF REMOTE "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R DF");
+}
+
+/**
+ * Relaxed shared "long double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__puttf2 (upc_shared_ptr_t p, long double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R TF LOCAL "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R TF REMOTE "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R TF");
+}
+
+/**
+ * Relaxed shared "long double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putxf2 (upc_shared_ptr_t p, long double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R XF LOCAL "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER R XF REMOTE "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ /* There can be only one outstanding unordered put. */
+ gupcr_pending_strict_put = 1;
+ }
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT R XF");
+}
+
+/**
+ * Relaxed shared memory block put operation.
+ * Copy the data at the local address 'src' into the shared memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Shared address of the destination memory block.
+ * @param [in] src Local address of the source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__putblk3 (upc_shared_ptr_t dest, void *src, size_t n)
+{
+ int thread = GUPCR_PTS_THREAD (dest);
+ size_t offset = GUPCR_PTS_OFFSET (dest);
+ gupcr_trace (FC_MEM, "PUTBLK ENTER R 0x%lx %d:0x%lx %lu",
+ (long unsigned) src, thread,
+ (long unsigned) offset, (long unsigned) n);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ memcpy (GUPCR_GMEM_OFF_TO_LOCAL (thread, offset), src, n);
+ }
+ else
+ {
+ gupcr_gmem_put (thread, offset, src, n);
+ }
+ gupcr_trace (FC_MEM, "PUT_BLK EXIT R");
+}
+
+/**
+ * Relaxed shared memory block copy operation.
+ * Copy the data at the shared address 'src' into the shared memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Shared address of destination memory block.
+ * @param [in] src Shared address of source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__copyblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
+{
+ int dthread = GUPCR_PTS_THREAD (dest);
+ size_t doffset = GUPCR_PTS_OFFSET (dest);
+ int sthread = GUPCR_PTS_THREAD (src);
+ size_t soffset = GUPCR_PTS_OFFSET (src);
+ gupcr_trace (FC_MEM, "COPYBLK ENTER R %d:0x%lx %d:0x%lx %lu",
+ sthread, (long unsigned) soffset,
+ dthread, (long unsigned) doffset, (long unsigned) n);
+ gupcr_assert (dthread < THREADS);
+ gupcr_assert (doffset != 0);
+ gupcr_assert (sthread < THREADS);
+ gupcr_assert (soffset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (dthread) && GUPCR_GMEM_IS_LOCAL (sthread))
+ {
+ memcpy (GUPCR_GMEM_OFF_TO_LOCAL (dthread, doffset),
+ GUPCR_GMEM_OFF_TO_LOCAL (sthread, soffset), n);
+ }
+ else if (GUPCR_GMEM_IS_LOCAL (dthread))
+ {
+ gupcr_gmem_get (GUPCR_GMEM_OFF_TO_LOCAL (dthread, doffset),
+ sthread, soffset, n);
+ gupcr_gmem_sync_gets ();
+ }
+ else if (GUPCR_GMEM_IS_LOCAL (sthread))
+ {
+ gupcr_gmem_put (dthread, doffset,
+ GUPCR_GMEM_OFF_TO_LOCAL (sthread, soffset), n);
+ }
+ else
+ {
+ gupcr_gmem_copy (dthread, doffset, sthread, soffset, n);
+ }
+ gupcr_trace (FC_MEM, "COPY_BLK EXIT R");
+}
+
+/**
+ * Strict shared "char (8 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Char (8 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intQI_t
+__getsqi2 (upc_shared_ptr_t p)
+{
+ u_intQI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S QI LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(u_intQI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S QI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "short (16 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Short (16 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intHI_t
+__getshi2 (upc_shared_ptr_t p)
+{
+ u_intHI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S HI LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(u_intHI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S HI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "int (32 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Int (32 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intSI_t
+__getssi2 (upc_shared_ptr_t p)
+{
+ u_intSI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S SI LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(u_intSI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S SI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%x",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "long (64 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long (64 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intDI_t
+__getsdi2 (upc_shared_ptr_t p)
+{
+ u_intDI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S DI LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(u_intDI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S DI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%llx",
+ thread, (long unsigned) offset, (long long unsigned) result);
+ return result;
+}
+
+#if GUPCR_TARGET64
+/**
+ * Strict shared "long long (128 bits)" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long long (128 bits) value at the shared address given by 'p'.
+ */
+//inline
+u_intTI_t
+__getsti2 (upc_shared_ptr_t p)
+{
+ u_intTI_t result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S TI LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(u_intTI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S TI REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx 0x%llx",
+ thread, (long unsigned) offset, (long long unsigned) result);
+ return result;
+}
+#endif /* GUPCR_TARGET64 */
+/**
+ * Strict shared "float" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Float value at the shared address given by 'p'.
+ */
+//inline
+float
+__getssf2 (upc_shared_ptr_t p)
+{
+ float result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S SF LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(float *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S SF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6g",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Double value at the shared address given by 'p'.
+ */
+//inline
+double
+__getsdf2 (upc_shared_ptr_t p)
+{
+ double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S DF LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S DF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6g",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "long double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long double value at the shared address given by 'p'.
+ */
+//inline
+long double
+__getstf2 (upc_shared_ptr_t p)
+{
+ long double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S TF LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S TF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6Lg",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared "long double" get operation.
+ * Return the value at the shared address 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the source operand.
+ * @return Long double value at the shared address given by 'p'.
+ */
+//inline
+long double
+__getsxf2 (upc_shared_ptr_t p)
+{
+ long double result;
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S XF LOCAL");
+ GUPCR_MEM_BARRIER ();
+ result = *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "GET ENTER S XF REMOTE");
+ gupcr_gmem_get (&result, thread, offset, sizeof (result));
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GET EXIT %d:0x%lx %6Lg",
+ thread, (long unsigned) offset, result);
+ return result;
+}
+
+/**
+ * Strict shared memory block get operation.
+ * Copy the data at the shared address 'src' into the local memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Local address of the destination memory block.
+ * @param [in] src Shared address of the source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__getsblk3 (void *dest, upc_shared_ptr_t src, size_t n)
+{
+ int thread = GUPCR_PTS_THREAD (src);
+ size_t offset = GUPCR_PTS_OFFSET (src);
+ gupcr_trace (FC_MEM, "GETBLK ENTER S");
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ GUPCR_MEM_BARRIER ();
+ memcpy (dest, GUPCR_GMEM_OFF_TO_LOCAL (thread, offset), n);
+ GUPCR_READ_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_gmem_get (dest, thread, offset, n);
+ /* All 'get' operations are synchronous. */
+ gupcr_gmem_sync_gets ();
+ }
+ gupcr_trace (FC_MEM, "GETBLK EXIT S %d:0x%lx 0x%lx %lu",
+ thread, (long unsigned) offset,
+ (long unsigned) dest, (long unsigned) n);
+}
+
+/**
+ * Strict shared "char (8 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsqi2 (upc_shared_ptr_t p, u_intQI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S QI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(u_intQI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S QI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S QI");
+}
+
+/**
+ * Strict shared "short (16 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putshi2 (upc_shared_ptr_t p, u_intHI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S HI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(u_intHI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S HI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S HI");
+}
+
+/**
+ * Strict shared "int (32 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putssi2 (upc_shared_ptr_t p, u_intSI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S SI LOCAL "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(u_intSI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S SI REMOTE "
+ "0x%x %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S SI");
+}
+
+/**
+ * Strict shared "long (64 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsdi2 (upc_shared_ptr_t p, u_intDI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S DI LOCAL "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(u_intDI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S DI REMOTE "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S DI");
+}
+
+#if GUPCR_TARGET64
+/**
+ * Strict shared "long long (128 bits)" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsti2 (upc_shared_ptr_t p, u_intTI_t v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S TI LOCAL "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(u_intTI_t *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S TI REMOTE "
+ "0x%llx %d:0x%lx",
+ (long long unsigned) v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S TI");
+}
+#endif /* GUPCR_TARGET64 */
+/**
+ * Strict shared "float" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putssf2 (upc_shared_ptr_t p, float v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S SF LOCAL "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(float *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S SF REMOTE "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S SF");
+}
+
+/**
+ * Strict shared "double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsdf2 (upc_shared_ptr_t p, double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S DF LOCAL "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S DF REMOTE "
+ "%6g %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S DF");
+}
+
+/**
+ * Strict shared "long double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putstf2 (upc_shared_ptr_t p, long double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S TF LOCAL "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S TF REMOTE "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S TF");
+}
+
+/**
+ * Strict shared "long double" put operation.
+ * Store the value given by 'v' into the shared memory destination at 'p'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] p Shared address of the destination address.
+ * @param [in] v Source value.
+ */
+//inline
+void
+__putsxf2 (upc_shared_ptr_t p, long double v)
+{
+ int thread = GUPCR_PTS_THREAD (p);
+ size_t offset = GUPCR_PTS_OFFSET (p);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S XF LOCAL "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ GUPCR_WRITE_MEM_BARRIER ();
+ *(long double *) GUPCR_GMEM_OFF_TO_LOCAL (thread, offset) = v;
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_trace (FC_MEM, "PUT ENTER S XF REMOTE "
+ "%6Lg %d:0x%lx", v, thread, (long unsigned) offset);
+ if (sizeof (v) <= (size_t) GUPCR_PORTALS_MAX_ORDERED_SIZE)
+ {
+ /* Ordered puts can proceed in parallel. */
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ else
+ {
+ /* Wait for any outstanding 'put' operation. */
+ gupcr_gmem_sync_puts ();
+ gupcr_gmem_put (thread, offset, &v, sizeof (v));
+ }
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT EXIT S XF");
+}
+
+/**
+ * Strict shared memory block put operation.
+ * Copy the data at the local address 'src' into the shared memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Shared address of the destination memory block.
+ * @param [in] src Local address of the source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__putsblk3 (upc_shared_ptr_t dest, void *src, size_t n)
+{
+ int thread = GUPCR_PTS_THREAD (dest);
+ size_t offset = GUPCR_PTS_OFFSET (dest);
+ gupcr_trace (FC_MEM, "PUTBLK ENTER S 0x%lx %d:0x%lx %lu",
+ (long unsigned) src, thread,
+ (long unsigned) offset, (long unsigned) n);
+ gupcr_assert (thread < THREADS);
+ gupcr_assert (offset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ GUPCR_WRITE_MEM_BARRIER ();
+ memcpy (GUPCR_GMEM_OFF_TO_LOCAL (thread, offset), src, n);
+ GUPCR_MEM_BARRIER ();
+ }
+ else
+ {
+ gupcr_gmem_put (thread, offset, src, n);
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "PUT_BLK EXIT S");
+}
+
+/**
+ * Strict shared memory block copy operation.
+ * Copy the data at the shared address 'src' into the shared memory
+ * destination at the address 'dest'.
+ *
+ * The interface to this procedure is defined by the UPC compiler API.
+ *
+ * @param [in] dest Shared address of destination memory block.
+ * @param [in] src Shared address of source memory block.
+ * @param [in] n Number of bytes to transfer.
+ */
+//inline
+void
+__copysblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
+{
+ int dthread = GUPCR_PTS_THREAD (dest);
+ size_t doffset = GUPCR_PTS_OFFSET (dest);
+ int sthread = GUPCR_PTS_THREAD (src);
+ size_t soffset = GUPCR_PTS_OFFSET (src);
+ gupcr_trace (FC_MEM, "COPYBLK ENTER S %d:0x%lx %d:0x%lx %lu",
+ sthread, (long unsigned) soffset,
+ dthread, (long unsigned) doffset, (long unsigned) n);
+ gupcr_assert (dthread < THREADS);
+ gupcr_assert (doffset != 0);
+ gupcr_assert (sthread < THREADS);
+ gupcr_assert (soffset != 0);
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+ if (GUPCR_GMEM_IS_LOCAL (dthread) && GUPCR_GMEM_IS_LOCAL (sthread))
+ {
+ GUPCR_WRITE_MEM_BARRIER ();
+ memcpy (GUPCR_GMEM_OFF_TO_LOCAL (dthread, doffset),
+ GUPCR_GMEM_OFF_TO_LOCAL (sthread, soffset), n);
+ GUPCR_MEM_BARRIER ();
+ }
+ else if (GUPCR_GMEM_IS_LOCAL (dthread))
+ {
+ gupcr_gmem_get (GUPCR_GMEM_OFF_TO_LOCAL (dthread, doffset),
+ sthread, soffset, n);
+ gupcr_gmem_sync_gets ();
+ }
+ else if (GUPCR_GMEM_IS_LOCAL (sthread))
+ {
+ gupcr_gmem_put (dthread, doffset,
+ GUPCR_GMEM_OFF_TO_LOCAL (sthread, soffset), n);
+ gupcr_pending_strict_put = 1;
+ }
+ else
+ {
+ gupcr_gmem_copy (dthread, doffset, sthread, soffset, n);
+ gupcr_pending_strict_put = 1;
+ }
+ gupcr_trace (FC_MEM, "COPY_BLK EXIT S");
+}
+
+/**
+ * upc_fence implementation.
+ */
+//inline
+void
+__upc_fence (void)
+{
+ GUPCR_MEM_BARRIER ();
+ gupcr_gmem_sync ();
+}
+
+//end lib_inline_access
+/** @} */
===================================================================
@@ -0,0 +1,179 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#ifndef _GUPCR_ACCESS_H_
+#define _GUPCR_ACCESS_H_
+
+/**
+ * @file gupcr_access.h
+ * GUPC compiler access functions prototypes.
+ */
+
+//begin lib_access_prototypes
+/* Relaxed accesses. */
+
+extern u_intQI_t __getqi2 (upc_shared_ptr_t);
+extern u_intHI_t __gethi2 (upc_shared_ptr_t);
+extern u_intSI_t __getsi2 (upc_shared_ptr_t);
+extern u_intDI_t __getdi2 (upc_shared_ptr_t);
+#if GUPCR_TARGET64
+extern u_intTI_t __getti2 (upc_shared_ptr_t);
+#endif
+extern float __getsf2 (upc_shared_ptr_t);
+extern double __getdf2 (upc_shared_ptr_t);
+extern long double __gettf2 (upc_shared_ptr_t);
+extern long double __getxf2 (upc_shared_ptr_t);
+extern void __getblk3 (void *, upc_shared_ptr_t, size_t);
+
+extern void __putqi2 (upc_shared_ptr_t, u_intQI_t);
+extern void __puthi2 (upc_shared_ptr_t, u_intHI_t);
+extern void __putsi2 (upc_shared_ptr_t, u_intSI_t);
+extern void __putdi2 (upc_shared_ptr_t, u_intDI_t);
+#if GUPCR_TARGET64
+extern void __putti2 (upc_shared_ptr_t, u_intTI_t);
+#endif
+extern void __putsf2 (upc_shared_ptr_t, float);
+extern void __putdf2 (upc_shared_ptr_t, double);
+extern void __puttf2 (upc_shared_ptr_t, long double);
+extern void __putxf2 (upc_shared_ptr_t, long double);
+extern void __putblk3 (upc_shared_ptr_t, void *, size_t);
+extern void __copyblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);
+
+/* Strict accesses. */
+
+extern u_intQI_t __getsqi2 (upc_shared_ptr_t);
+extern u_intHI_t __getshi2 (upc_shared_ptr_t);
+extern u_intSI_t __getssi2 (upc_shared_ptr_t);
+extern u_intDI_t __getsdi2 (upc_shared_ptr_t);
+#if GUPCR_TARGET64
+extern u_intTI_t __getsti2 (upc_shared_ptr_t);
+#endif
+extern float __getssf2 (upc_shared_ptr_t);
+extern double __getsdf2 (upc_shared_ptr_t);
+extern long double __getstf2 (upc_shared_ptr_t);
+extern long double __getsxf2 (upc_shared_ptr_t);
+extern void __getsblk3 (void *, upc_shared_ptr_t, size_t);
+
+extern void __putsqi2 (upc_shared_ptr_t, u_intQI_t);
+extern void __putshi2 (upc_shared_ptr_t, u_intHI_t);
+extern void __putssi2 (upc_shared_ptr_t, u_intSI_t);
+extern void __putsdi2 (upc_shared_ptr_t, u_intDI_t);
+#if GUPCR_TARGET64
+extern void __putsti2 (upc_shared_ptr_t, u_intTI_t);
+#endif
+extern void __putssf2 (upc_shared_ptr_t, float);
+extern void __putsdf2 (upc_shared_ptr_t, double);
+extern void __putstf2 (upc_shared_ptr_t, long double);
+extern void __putsxf2 (upc_shared_ptr_t, long double);
+extern void __putsblk3 (upc_shared_ptr_t, void *, size_t);
+extern void __copysblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);
+
+/* Relaxed accesses (profiled). */
+
+extern u_intQI_t __getgqi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intHI_t __getghi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intSI_t __getgsi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intDI_t __getgdi3 (upc_shared_ptr_t, const char *file, int line);
+#if GUPCR_TARGET64
+extern u_intTI_t __getgti3 (upc_shared_ptr_t, const char *file, int line);
+#endif
+extern float __getgsf3 (upc_shared_ptr_t, const char *file, int line);
+extern double __getgdf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getgtf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getgxf3 (upc_shared_ptr_t, const char *file, int line);
+extern void __getgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
+ int line);
+
+extern void __putgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
+ int line);
+extern void __putghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
+ int line);
+extern void __putgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
+ int line);
+extern void __putgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
+ int line);
+#if GUPCR_TARGET64
+extern void __putgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
+ int line);
+#endif
+extern void __putgsf4 (upc_shared_ptr_t, float, const char *file, int line);
+extern void __putgdf4 (upc_shared_ptr_t, double, const char *file, int line);
+extern void __putgtf4 (upc_shared_ptr_t, long double, const char *file,
+ int line);
+extern void __putgxf4 (upc_shared_ptr_t, long double, const char *file,
+ int line);
+extern void __putgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
+ int line);
+extern void __copygblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
+ const char *file, int line);
+
+/* Strict accesses (profiled). */
+
+extern u_intQI_t __getsgqi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intHI_t __getsghi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intSI_t __getsgsi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intDI_t __getsgdi3 (upc_shared_ptr_t, const char *file, int line);
+#if GUPCR_TARGET64
+extern u_intTI_t __getsgti3 (upc_shared_ptr_t, const char *file, int line);
+#endif
+extern float __getsgsf3 (upc_shared_ptr_t, const char *file, int line);
+extern double __getsgdf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getsgtf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getsgxf3 (upc_shared_ptr_t, const char *file, int line);
+extern void __getsgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
+ int line);
+
+extern void __putsgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
+ int line);
+extern void __putsghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
+ int line);
+extern void __putsgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
+ int line);
+extern void __putsgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
+ int line);
+#if GUPCR_TARGET64
+extern void __putsgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
+ int line);
+#endif
+extern void __putsgsf4 (upc_shared_ptr_t, float, const char *file, int line);
+extern void __putsgdf4 (upc_shared_ptr_t, double, const char *file, int line);
+extern void __putsgtf4 (upc_shared_ptr_t, long double, const char *file,
+ int line);
+extern void __putsgxf4 (upc_shared_ptr_t, long double, const char *file,
+ int line);
+extern void __putsgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
+ int line);
+extern void __copysgblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
+ const char *file, int line);
+
+/* Miscellaneous access related prototypes. */
+extern void __upc_fence (void);
+
+//end lib_access_prototypes
+
+
+#endif /* gupcr_access.h */
===================================================================
@@ -0,0 +1,195 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_sup.h"
+#include "gupcr_portals.h"
+#include "gupcr_node.h"
+#include "gupcr_gmem.h"
+#include "gupcr_utils.h"
+
+/**
+ * @file gupcr_addr.c
+ * GUPC Portals4 shared address utility routines
+ */
+
+/**
+ * @addtogroup IFACE GUPC Interface Routines
+ * @{
+ */
+
+//begin lib_inline_access
+/**
+ * Find local pointer from pointer-to-shared.
+ *
+ * @param [in] p Pointer-to-shared
+ * @return Local address associated with "p"
+ */
+//inline
+void *
+__cvtaddr (upc_shared_ptr_t p)
+{
+ void *addr;
+ if (GUPCR_PTS_IS_NULL (p))
+ return (void *) 0;
+ addr = GUPCR_GMEM_OFF_TO_LOCAL (GUPCR_PTS_THREAD (p), GUPCR_PTS_OFFSET (p));
+ return addr;
+}
+
+//end lib_inline_access
+
+/**
+ * Find local pointer from pointer-to-shared.
+ *
+ * The pointer-to-shared value must have affinity to the current thread.
+ * @param [in] p Pointer-to-shared
+ * @return Local address associated with "p"
+ */
+void *
+__getaddr (upc_shared_ptr_t p)
+{
+ void *addr;
+ if (GUPCR_PTS_IS_NULL (p))
+ return (void *) 0;
+ if ((int) GUPCR_PTS_THREAD (p) != MYTHREAD)
+ gupcr_fatal_error
+ ("invalid conversion of shared address to local pointer;\n"
+ "thread does not have affinity to shared address");
+ addr = GUPCR_GMEM_OFF_TO_LOCAL (MYTHREAD, GUPCR_PTS_OFFSET (p));
+ return addr;
+}
+
+/** @} */
+
+/**
+ * @addtogroup PTSMANIP UPC Pointer-to-shared Manipulation Functions
+ * @{
+ */
+
+/**
+ * Return the thread of a pointer-to-shared value.
+ *
+ * The upc_threadof function returns the index of the thread
+ * that has affinity to the shared object pointed to by the argument.
+ * @param [in] p Pointer-to-shared argument
+ * @retval Thread ID of the argument
+ */
+size_t
+upc_threadof (upc_shared_ptr_t p)
+{
+ if ((int) GUPCR_PTS_THREAD (p) >= THREADS)
+ gupcr_fatal_error ("thread number %d in shared address is out of range",
+ (int) GUPCR_PTS_THREAD (p));
+ return (size_t) GUPCR_PTS_THREAD (p);
+}
+
+/**
+ * Return the phase of a pointer-to-shared value.
+ *
+ * The upc_phaseof function returns the phase component of the
+ * pointer-to-shared argument.
+ * @param [in] p Pointer-to-shared argument
+ * @retval Phase of the argument
+ */
+size_t
+upc_phaseof (upc_shared_ptr_t p)
+{
+ if ((int) GUPCR_PTS_THREAD (p) >= THREADS)
+ gupcr_fatal_error ("thread number %d in shared address is out of range",
+ (int) GUPCR_PTS_THREAD (p));
+ return (size_t) GUPCR_PTS_PHASE (p);
+}
+
+/**
+ * Reset the phase field of a pointer-to-shared value.
+ *
+ * The upc_resetphase function returns a pointer-to-shared value which
+ * is identical to its input except that it has zero phase.
+ * @param [in] p Pointer-to-shared argument
+ * @retval Pointer-to-shared with zero phase
+ */
+upc_shared_ptr_t
+upc_resetphase (upc_shared_ptr_t p)
+{
+ upc_shared_ptr_t result;
+ result = p;
+ GUPCR_PTS_SET_PHASE (result, 0);
+ return result;
+}
+
+/**
+ * Return the address field of a pointer-to-shared value.
+ *
+ * The upc_addrfield function returns an implementation-defined
+ * value reflecting the 'local address' of the object pointed to
+ * by the pointer-to-shared argument.
+ * @param [in] p Pointer-to-shared argument
+ * @retval Address field of the argument
+ */
+size_t
+upc_addrfield (upc_shared_ptr_t p)
+{
+ if ((int) GUPCR_PTS_THREAD (p) >= THREADS)
+ gupcr_fatal_error ("thread number %d in shared address is out of range",
+ (int) GUPCR_PTS_THREAD (p));
+ return (size_t) GUPCR_PTS_VADDR (p);
+}
+
+/**
+ * Return the size of the local portion of the shared data
+ * with a layout described by the input parameters.
+ *
+ * A convenience function which calculates the exact size
+ * of the local portion of the data in a shared object with affinity to
+ * the thread identified by the 'threadid' parameter.
+ * @param [in] totalsize Size of the shared data
+ * @param [in] nbytes Size of the block
+ * @param [in] threadid Requested thread number
+ * @retval Size of the shared space described by the function arguments
+ */
+size_t
+upc_affinitysize (size_t totalsize, size_t nbytes, size_t threadid)
+{
+ size_t result;
+ if (nbytes == 0 || totalsize == 0 || nbytes >= totalsize)
+ result = (size_t) (threadid == 0 ? totalsize : 0);
+ else
+ {
+ size_t const nblocks = (totalsize / nbytes);
+ size_t const cutoff = (nblocks % THREADS);
+ if (threadid < cutoff)
+ result = (size_t) ((nblocks + THREADS - 1) / THREADS) * nbytes;
+ else if (threadid > cutoff)
+ result = (size_t) (nblocks / THREADS) * nbytes;
+ else
+ result = (size_t) ((nblocks / THREADS) * nbytes)
+ + totalsize - nblocks * nbytes;
+ }
+ return result;
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,47 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/**
+ * @file gupcr_alloc.h
+ * GUPC Portals4 UPC dynamic shared memory allocation.
+ */
+
+#ifndef _GUPCR_ALLOC_H_
+#define _GUPCR_ALLOC_H_ 1
+
+extern void gupcr_alloc_init (upc_shared_ptr_t, size_t);
+
+#ifndef __UPC__
+
+extern upc_shared_ptr_t upc_global_alloc (size_t, size_t);
+extern upc_shared_ptr_t upc_all_alloc (size_t, size_t);
+extern upc_shared_ptr_t upc_local_alloc (size_t, size_t);
+extern upc_shared_ptr_t upc_alloc (size_t);
+extern void upc_free (upc_shared_ptr_t);
+
+#endif /* !__UPC__ */
+
+#endif /* gupcr_alloc.h */
===================================================================
@@ -0,0 +1,641 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+/**
+ * @file gupcr_alloc.upc
+ * GUPC Portals4 UPC dynamic shared memory allocation.
+ *
+ * Implement UPC's dynamic memory allocation routines.
+ * The implementation is written in UPC, because
+ * it needs to run above the runtime library's memory mapping
+ * facility. Internal runtime spin locks are used rather than
+ * the UPC language-defined locks, because those locks
+ * depend upon dynamic memory management, and we need to
+ * break the circular dependency.
+ *
+ * @addtogroup ALLOC GUPCR Shared Memory Allocator Functions
+ * @{
+ */
+
+#include <upc.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <assert.h>
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_utils.h"
+#include "gupcr_barrier.h"
+#include "gupcr_lock.h"
+
+struct upc_heap_list_struct;
+typedef shared struct upc_heap_list_struct *upc_heap_list_p;
+typedef struct upc_heap_list_struct
+{
+ upc_heap_list_p next;
+ upc_heap_list_p prev;
+} upc_heap_list_t;
+typedef upc_heap_list_t upc_heap_pool_t[GUPCR_HEAP_NUM_POOLS];
+typedef struct upc_heap_struct
+{
+ shared void *base;
+ upc_lock_t *lock;
+ size_t size;
+ int is_global;
+ size_t pool_avail;
+ upc_heap_pool_t pool;
+} upc_heap_t;
+typedef shared upc_heap_t *upc_heap_p;
+
+typedef struct upc_heap_node_struct
+{
+ upc_heap_list_t link; /* Must be first. */
+ size_t size;
+ int alloc_tag;
+ int is_global;
+} upc_heap_node_t;
+typedef shared upc_heap_node_t *upc_heap_node_p;
+
+static shared void *gupcr_heap_region_base;
+static shared void *gupcr_heap_region_top;
+static size_t gupcr_heap_region_size;
+
+static shared upc_heap_t gupcr_global_heap_info;
+static shared upc_heap_t gupcr_local_heap_info[THREADS];
+static strict shared size_t gupcr_heap_global_hi_water_mark;
+static strict shared size_t gupcr_heap_local_low_water_mark;
+static upc_heap_p gupcr_global_heap;
+static upc_heap_p gupcr_local_heap;
+
+/** Increment a shared pointer, by 'nbytes'. */
+static inline shared void *
+gupcr_pts_add_offset (shared void *ptr, ptrdiff_t nbytes)
+{
+ return (shared void *) (((shared [] char *) ptr) + nbytes);
+}
+
+/** Return the difference between 'ptr1' and 'ptr2'. Both
+ pointers must be non-NULL and have affinity to the same thread. */
+static inline ptrdiff_t
+gupcr_pts_diff (shared void *ptr1, shared void *ptr2)
+{
+ return (ptrdiff_t) (((shared [] char *) ptr1) - ((shared [] char *) ptr2));
+}
+
+/** Return the smallest power of 2 that is >= 'v',
+ scaled so that gupcr_log2 of the minimum allocation size is 0. */
+static inline unsigned int
+gupcr_plog2 (unsigned long long v)
+{
+ return gupcr_log2 (GUPCR_MAX (v, GUPCR_HEAP_ALLOC_MIN)) -
+ GUPCR_HEAP_ALLOC_MIN_BITS;
+}
+
+/** Return TRUE if 'list' is empty. */
+static inline int
+gupcr_heap_is_empty_list (upc_heap_list_p list)
+{
+ gupcr_assert (list != NULL);
+ return list->next == list;
+}
+
+/** Insert 'node' after 'here' in the double linked free list. */
+static inline void
+gupcr_heap_list_insert (upc_heap_list_p here, upc_heap_list_p node)
+{
+ upc_heap_list_p next;
+ gupcr_assert (here != NULL);
+ gupcr_assert (node != NULL);
+ next = here->next;
+ gupcr_assert (next != NULL);
+ node->next = next;
+ node->prev = here;
+ next->prev = node;
+ here->next = node;
+}
+
+/** Remove 'node' from its position in doubly-linked free list. */
+static inline void
+gupcr_heap_list_remove (upc_heap_list_p node)
+{
+ upc_heap_list_p next, prev;
+ gupcr_assert (node != NULL);
+ prev = node->prev;
+ gupcr_assert (prev != NULL);
+ next = node->next;
+ gupcr_assert (next != NULL);
+ prev->next = next;
+ next->prev = prev;
+ node->next = NULL;
+ node->prev = NULL;
+}
+
+/** Pop a node from the front of the free list
+ rooted at the 'p'-th pool in 'heap'. */
+static inline upc_heap_node_p
+gupcr_heap_list_pop (upc_heap_p heap, unsigned int p)
+{
+ upc_heap_node_p node = NULL;
+ upc_heap_list_p list;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (p < GUPCR_HEAP_NUM_POOLS);
+ list = (upc_heap_list_p) &heap->pool[p];
+ if (!gupcr_heap_is_empty_list (list))
+ {
+ upc_heap_list_p first;
+ first = list->next;
+ gupcr_assert (first != NULL);
+ gupcr_heap_list_remove (first);
+ node = (upc_heap_node_p) first;
+ if (gupcr_heap_is_empty_list (list))
+ heap->pool_avail = gupcr_clear_bit (heap->pool_avail, p);
+ }
+ return node;
+}
+
+/** Push 'node' onto the front of the free list
+ rooted at the 'p'-th pool in 'heap'. */
+static inline void
+gupcr_heap_list_push (upc_heap_p heap, unsigned int p, upc_heap_node_p node)
+{
+ upc_heap_list_p list;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (node != NULL);
+ gupcr_assert (p < GUPCR_HEAP_NUM_POOLS);
+ list = (upc_heap_list_p) &heap->pool[p];
+ if (gupcr_heap_is_empty_list (list))
+ heap->pool_avail = gupcr_set_bit (heap->pool_avail, p);
+ gupcr_heap_list_insert (list, (upc_heap_list_p) node);
+}
+
+/**
+ * Split 'node' into two nodes each of half the size.
+ *
+ * Push one of the half-sized nodes back onto an appropriate free list.
+ * Return the other half-size node. Before calling this routine,
+ * 'node' must not be on any free list.
+ */
+static inline upc_heap_node_p
+gupcr_heap_list_split (upc_heap_p heap, upc_heap_node_p node)
+{
+ size_t node_size;
+ size_t half_size;
+ unsigned int is_global;
+ upc_heap_node_p free_half;
+ unsigned int p;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (node != NULL);
+ is_global = heap->is_global;
+ node_size = node->size;
+ half_size = ((size_t) 1 << (gupcr_log2 (node_size) - 1));
+ p = gupcr_plog2 (half_size);
+ if (is_global)
+ free_half = gupcr_pts_add_offset (node, half_size);
+ else
+ {
+ free_half = node;
+ node = gupcr_pts_add_offset (free_half, half_size);
+ }
+ upc_memset (free_half, '\0', GUPCR_HEAP_ALLOC_OVERHEAD);
+ free_half->size = half_size;
+ free_half->is_global = is_global;
+ gupcr_heap_list_push (heap, p, free_half);
+ upc_memset (node, '\0', GUPCR_HEAP_ALLOC_OVERHEAD);
+ node->size = half_size;
+ node->is_global = is_global;
+ return node;
+}
+
+/**
+ * Return the buddy of 'node'.
+ *
+ * The buddy is calculated at binary level 'p' by exclusive or-ing
+ * the p'th bit of the offset of 'node' within the heap.
+ * If there is no buddy for this block, return NULL.
+ */
+static inline upc_heap_node_p
+gupcr_heap_get_buddy (upc_heap_p heap, upc_heap_node_p node)
+{
+ shared void *heap_base;
+ size_t heap_size;
+ ptrdiff_t heap_offset, buddy_offset, max_buddy_offset;
+ unsigned int p;
+ upc_heap_node_p buddy = NULL;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (node != NULL);
+ heap_base = heap->base;
+ heap_size = heap->size;
+ heap_offset = gupcr_pts_diff (node, heap_base);
+ gupcr_assert (heap_offset >= 0);
+ p = gupcr_log2 (node->size);
+ buddy_offset = heap_offset ^ ((ptrdiff_t) 1 << p);
+ max_buddy_offset = (ptrdiff_t) heap_size - GUPCR_HEAP_ALLOC_MIN;
+ if (buddy_offset <= max_buddy_offset)
+ buddy = gupcr_pts_add_offset (heap_base, buddy_offset);
+ return buddy;
+}
+
+/**
+ * Attempt to join the node pointed to by 'node_ref'
+ * to its buddy in 'heap' of log2 size 'p'.
+ *
+ * Return TRUE if successful. If the buddy node
+ * is the 'left' buddy, update the node pointed
+ * to by 'node_ref' to point to the buddy.
+ */
+static inline unsigned int
+gupcr_heap_list_join (upc_heap_p heap,
+ unsigned int p, upc_heap_node_p *node_ref)
+{
+ unsigned int joined = 0;
+ upc_heap_node_p buddy, node;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (node_ref);
+ gupcr_assert (p < GUPCR_HEAP_NUM_POOLS);
+ node = *node_ref;
+ gupcr_assert (node != NULL);
+ buddy = gupcr_heap_get_buddy (heap, node);
+ /* The node can be joined with its buddy if:
+ 1. The buddy is free.
+ 2. The buddy has the same power-of-2 size. */
+ if (buddy && !buddy->alloc_tag)
+ {
+ unsigned int p_buddy;
+ gupcr_assert (buddy->size > 0);
+ p_buddy = gupcr_plog2 (buddy->size);
+ if (p == p_buddy)
+ {
+ unsigned int p_above = p + 1;
+ upc_heap_list_p list;
+ joined = 1;
+ gupcr_heap_list_remove ((upc_heap_list_p) buddy);
+ list = (upc_heap_list_p) &heap->pool[p];
+ if (gupcr_heap_is_empty_list (list))
+ heap->pool_avail = gupcr_clear_bit (heap->pool_avail, p);
+ if (gupcr_pts_diff (buddy, node) < 0)
+ {
+ node = buddy;
+ *node_ref = node;
+ }
+ node->alloc_tag = 0;
+ node->size = ((size_t) 1 << (p_above + GUPCR_HEAP_ALLOC_MIN_BITS));
+ }
+ }
+ return joined;
+}
+
+/**
+ * Initialize the data structure used to manage
+ * operations on 'heap'.
+ *
+ * 'is_global' is TRUE if the heap is a global heap.
+ *
+ * For global heaps, 'base' points to the bottom of the heap
+ * storage area. For local heaps, 'base' initially points
+ * to the top of the heap storage area and then grows downward.
+ */
+static inline void
+gupcr_heap_init_info (upc_heap_p heap,
+ unsigned int is_global, shared void *base)
+{
+ unsigned int p;
+ shared [] upc_heap_list_t *pool;
+ gupcr_assert (heap != NULL);
+ upc_memset (heap, '\0', sizeof (upc_heap_t));
+ gupcr_assert (base != NULL);
+ heap->base = base;
+ heap->is_global = is_global;
+ if (is_global)
+ heap->lock = gupcr_global_heap_lock;
+ else
+ heap->lock = gupcr_local_heap_lock;
+ for (p = 0, pool = &heap->pool[0]; p < GUPCR_HEAP_NUM_POOLS; ++p, ++pool)
+ {
+ pool->next = (upc_heap_list_p) pool;
+ pool->prev = (upc_heap_list_p) pool;
+ }
+ heap->pool_avail = 0;
+}
+
+/**
+ * Initialize the global and local heap data structures.
+ *
+ * 'heap_region_base' is the shared address where the heap should begin,
+ * and 'heap_region_size' is the maximum number of bytes available
+ * for dynamic shared memory allocation.
+ */
+void
+gupcr_alloc_init (shared void *heap_region_base, size_t heap_region_size)
+{
+ shared void *local_heap_base;
+ gupcr_assert (upc_threadof (heap_region_base) == (size_t) MYTHREAD);
+ gupcr_assert (gupcr_is_pow_2 (GUPCR_HEAP_ALLOC_MIN));
+ gupcr_assert ((GUPCR_HEAP_ALLOC_OVERHEAD % 16) == 0);
+ gupcr_assert (GUPCR_HEAP_ALLOC_OVERHEAD >= sizeof (upc_heap_node_t));
+ gupcr_heap_region_base = heap_region_base;
+ gupcr_heap_region_size = heap_region_size;
+ gupcr_heap_region_top =
+ gupcr_pts_add_offset (heap_region_base, heap_region_size);
+ gupcr_global_heap = &gupcr_global_heap_info;
+ gupcr_local_heap = &gupcr_local_heap_info[MYTHREAD];
+ if (!MYTHREAD)
+ {
+ gupcr_heap_global_hi_water_mark = 0;
+ gupcr_heap_local_low_water_mark = heap_region_size;
+ gupcr_heap_init_info (&gupcr_global_heap_info, 1, heap_region_base);
+ }
+ /* The local heap base is initially the top of the UPC heap region, */
+ local_heap_base = gupcr_heap_region_top;
+ gupcr_heap_init_info (gupcr_local_heap, 0, local_heap_base);
+}
+
+/**
+ * Allocate 'size' bytes from the heap memory region.
+ *
+ * Global allocations raise the high water mark.
+ * Local allocations potentially decrease the low water mark.
+ * Space is available as long as the high water mark
+ * does not cross above the low water mark.
+ *
+ * If successful, return a pointer to the newly allocated space.
+ * Return NULL if there is not enough space.
+ *
+ * The 'size' argument is constrained to be an exact power of 2.
+ *
+ */
+static shared void *
+gupcr_heap_region_alloc (upc_heap_p heap, size_t size)
+{
+ shared void *mem = NULL;
+ unsigned int is_global;
+ size_t heap_size, new_heap_size;
+ shared void *heap_base;
+ unsigned int have_enough_space;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (size > 0);
+ gupcr_assert (gupcr_is_pow_2 (size));
+ is_global = heap->is_global;
+ heap_size = heap->size;
+ heap_base = heap->base;
+ new_heap_size = heap_size + size;
+ have_enough_space = 0;
+ upc_lock (gupcr_heap_region_lock);
+ if (is_global)
+ {
+ size_t new_hi_water_mark;
+ new_hi_water_mark = new_heap_size;
+ if (new_hi_water_mark <= gupcr_heap_local_low_water_mark)
+ {
+ gupcr_heap_global_hi_water_mark = new_hi_water_mark;
+ have_enough_space = 1;
+ }
+ }
+ else
+ {
+ if (new_heap_size <= gupcr_heap_region_size)
+ {
+ size_t new_low_water_mark;
+ new_low_water_mark = gupcr_heap_region_size - new_heap_size;
+ if (new_low_water_mark >= gupcr_heap_global_hi_water_mark)
+ {
+ if (new_low_water_mark < gupcr_heap_local_low_water_mark)
+ gupcr_heap_local_low_water_mark = new_low_water_mark;
+ have_enough_space = 1;
+ }
+ }
+ }
+ upc_unlock (gupcr_heap_region_lock);
+ if (have_enough_space)
+ {
+ ptrdiff_t heap_size_offset;
+ if (is_global)
+ {
+ heap_size_offset = (ptrdiff_t) heap_size;
+ mem = gupcr_pts_add_offset (heap_base, heap_size_offset);
+ }
+ else
+ {
+ heap_size_offset = -((ptrdiff_t) size);
+ heap_base = gupcr_pts_add_offset (heap_base, heap_size_offset);
+ heap->base = heap_base;
+ mem = heap_base;
+ }
+ heap->size = new_heap_size;
+ }
+ return mem;
+}
+
+/**
+ * Repetitively double the size of 'heap' until a free block
+ * of at least 'size' bytes (rounded up to the next power of 2)
+ * is created.
+ */
+static void
+gupcr_heap_extend (upc_heap_p heap, size_t size)
+{
+ size_t heap_size;
+ size_t extend_size;
+ unsigned int is_global;
+ unsigned int p;
+ size_t free_block_size;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (size > 0);
+ heap_size = heap->size;
+ is_global = heap->is_global;
+ extend_size = ((size_t) 1 << gupcr_log2 (size));
+ do
+ {
+ upc_heap_node_p free_block;
+ free_block_size = heap_size ? heap_size : extend_size;
+ free_block = gupcr_heap_region_alloc (heap, free_block_size);
+ if (free_block == NULL)
+ return;
+ upc_memset (free_block, '\0', GUPCR_HEAP_ALLOC_OVERHEAD);
+ free_block->size = free_block_size;
+ free_block->is_global = is_global;
+ p = gupcr_plog2 (free_block_size);
+ gupcr_heap_list_push (heap, p, free_block);
+ heap_size += free_block_size;
+ heap->size = heap_size;
+ }
+ while (free_block_size < extend_size);
+}
+
+/**
+ * Allocate a block of 'size' bytes from 'heap'.
+ */
+static shared void *
+gupcr_heap_alloc (upc_heap_p heap, size_t size)
+{
+ shared void *mem = NULL;
+ const size_t alloc_size = GUPCR_MAX (size + GUPCR_HEAP_ALLOC_OVERHEAD,
+ GUPCR_HEAP_ALLOC_MIN);
+ const unsigned int pool_fit = gupcr_plog2 (alloc_size);
+ unsigned long long int pool_avail;
+ unsigned int p;
+ upc_heap_node_p alloc = NULL;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (size > 0);
+ upc_lock (heap->lock);
+ pool_avail = heap->pool_avail << pool_fit;
+ if (!pool_avail)
+ {
+ gupcr_heap_extend (heap, alloc_size);
+ pool_avail = heap->pool_avail << pool_fit;
+ }
+ if (pool_avail)
+ {
+ p = pool_fit + gupcr_find_first_one (pool_avail);
+ for (alloc = gupcr_heap_list_pop (heap, p); p > pool_fit; --p)
+ alloc = gupcr_heap_list_split (heap, alloc);
+ alloc->alloc_tag = GUPCR_HEAP_ALLOC_TAG;
+ }
+ if (alloc)
+ mem = gupcr_pts_add_offset (alloc, GUPCR_HEAP_ALLOC_OVERHEAD);
+ upc_unlock (heap->lock);
+ return mem;
+}
+
+/**
+ * Return the block given by 'node' into 'heap'.
+ */
+static void
+gupcr_heap_free (upc_heap_p heap, upc_heap_node_p node)
+{
+ unsigned int p;
+ upc_heap_node_p free_node;
+ gupcr_assert (heap != NULL);
+ gupcr_assert (node != NULL);
+ upc_lock (heap->lock);
+ for (p = gupcr_plog2 (node->size), free_node = node;
+ gupcr_heap_list_join (heap, p, &free_node); ++p) /* loop */ ;
+ free_node->alloc_tag = 0;
+ gupcr_heap_list_push (heap, p, free_node);
+ upc_unlock (heap->lock);
+}
+
+shared void *
+upc_global_alloc (size_t nblocks, size_t nbytes)
+{
+ size_t request_size = GUPCR_ROUND (nblocks, THREADS) * nbytes;
+ size_t alloc_size = request_size / THREADS;
+ shared void *mem = NULL;
+ gupcr_trace (FC_ALLOC, "ALLOC GLOBAL_ALLOC ENTER");
+ if (alloc_size > 0)
+ mem = gupcr_heap_alloc (gupcr_global_heap, alloc_size);
+ gupcr_trace (FC_ALLOC, "ALLOC GLOBAL_ALLOC EXIT %u:0x%lx %lu",
+ (unsigned) upc_threadof (mem),
+ (long unsigned) upc_addrfield (mem),
+ (long unsigned) nbytes);
+ return mem;
+}
+
+shared void *
+upc_all_alloc (size_t nblocks, size_t nbytes)
+{
+ size_t request_size = GUPCR_ROUND (nblocks, THREADS) * nbytes;
+ size_t alloc_size = request_size / THREADS;
+ shared void *mem = NULL;
+ gupcr_trace (FC_ALLOC, "ALLOC ALL_ALLOC ENTER");
+ if (alloc_size > 0)
+ {
+ if (MYTHREAD == 0)
+ {
+ mem = gupcr_heap_alloc (gupcr_global_heap, alloc_size);
+ gupcr_bcast_send (&mem, sizeof (mem));
+ }
+ else
+ gupcr_bcast_recv (&mem, sizeof (mem));
+ }
+ gupcr_trace (FC_ALLOC, "ALLOC ALL_ALLOC EXIT %u:0x%lx %lu",
+ (unsigned) upc_threadof (mem),
+ (long unsigned) upc_addrfield (mem),
+ (long unsigned) nbytes);
+ return mem;
+}
+
+shared void *
+upc_alloc (size_t nbytes)
+{
+ shared void *mem = NULL;
+ gupcr_trace (FC_ALLOC, "ALLOC ALLOC ENTER");
+ if (nbytes)
+ mem = gupcr_heap_alloc (gupcr_local_heap, nbytes);
+ gupcr_trace (FC_ALLOC, "ALLOC ALLOC EXIT %u:0x%lx %lu",
+ (unsigned) upc_threadof (mem),
+ (long unsigned) upc_addrfield (mem),
+ (long unsigned) nbytes);
+ return mem;
+}
+
+void
+upc_all_free (shared void *ptr)
+{
+ if (ptr)
+ {
+ const int thread = (int) upc_threadof (ptr);
+ upc_barrier - 1;
+ /* Check for errors only on thread 0. */
+ if ((MYTHREAD == 0) && (thread >= THREADS))
+ gupcr_error ("upc_all_free() called with invalid shared pointer");
+ if (thread == MYTHREAD)
+ upc_free (ptr);
+ }
+}
+
+void
+upc_free (shared void *ptr)
+{
+ gupcr_trace (FC_ALLOC, "ALLOC FREE ENTER %u:0x%lx",
+ (unsigned) upc_threadof (ptr),
+ (long unsigned) upc_addrfield (ptr));
+ if (ptr)
+ {
+ const size_t offset __attribute__ ((unused)) = upc_addrfield (ptr);
+ const int thread = (int) upc_threadof (ptr);
+ const size_t phase = upc_phaseof (ptr);
+ upc_heap_p heap;
+ upc_heap_node_p node;
+ unsigned int is_global;
+ if (phase || thread >= THREADS)
+ gupcr_error ("upc_free() called with invalid shared pointer");
+ node = gupcr_pts_add_offset (ptr, -GUPCR_HEAP_ALLOC_OVERHEAD);
+ is_global = node->is_global;
+ if (is_global && thread)
+ gupcr_error ("upc_free() called with invalid shared pointer");
+ if (node->alloc_tag != GUPCR_HEAP_ALLOC_TAG)
+ gupcr_error ("upc_free() called with pointer to unallocated space");
+ if (is_global)
+ heap = gupcr_global_heap;
+ else
+ heap = &gupcr_local_heap_info[thread];
+ gupcr_heap_free (heap, node);
+ }
+ gupcr_trace (FC_ALLOC, "ALLOC FREE EXIT");
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,600 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <upc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <upc_atomic.h>
+#include <portals4.h>
+#include "gupcr_gmem.h"
+#include "gupcr_utils.h"
+#include "gupcr_atomic_sup.h"
+
+/**
+ * @file gupcr_atomic.upc
+ * GUPC Portals4 UPC atomics implementation.
+ *
+ * All UPC atomic operations and data types, with exception of UPC_PTS,
+ * are almost completely matched to the corresponding Portals4 atomics.
+ * The following exceptions are made:
+ *
+ * UPC_SUB Converted into Portals4 atomic add of a negative number.
+ * UPC_INC Converted into Portals4 atomic add of one.
+ * UPC_DEC Converted into Portals4 atomic add of negative one.
+ *
+ * UPC_PTS data type does not use Portals4 atomic operations (even though
+ * 64 bit pointer-to-shared can fit into the int64 container). This is
+ * mainly due to the fact that pointer-to-shared comparison has to
+ * disregard the phase part of the pointer and Portals4 does not have
+ * support for CSWAP with a mask.
+ */
+
+/**
+ * @addtogroup ATOMIC GUPCR Atomics Support Functions
+ * @{
+ */
+
+/** Atomic domain representation */
+struct upc_atomicdomain_struct
+{
+ upc_lock_t *lock;
+ upc_op_t ops;
+ upc_type_t type;
+};
+
+/**
+ * Convert UPC to Portals4 atomic data type.
+ *
+ * @param [in] upc_type UPC atomic data type
+ * @retval Portals4 atomic data type
+ */
+static inline ptl_datatype_t
+gupcr_atomic_to_ptl_type (upc_type_t upc_type)
+{
+ switch (upc_type)
+ {
+ case UPC_INT:
+ return UPC_ATOMIC_TO_PTL_INT;
+ case UPC_UINT:
+ return UPC_ATOMIC_TO_PTL_UINT;
+ case UPC_LONG:
+ return UPC_ATOMIC_TO_PTL_LONG;
+ case UPC_ULONG:
+ return UPC_ATOMIC_TO_PTL_ULONG;
+ case UPC_INT32:
+ return UPC_ATOMIC_TO_PTL_INT32;
+ case UPC_UINT32:
+ return UPC_ATOMIC_TO_PTL_UINT32;
+ case UPC_INT64:
+ return UPC_ATOMIC_TO_PTL_INT64;
+ case UPC_UINT64:
+ return UPC_ATOMIC_TO_PTL_UINT64;
+ case UPC_FLOAT:
+ return UPC_ATOMIC_TO_PTL_FLOAT;
+ case UPC_DOUBLE:
+ return UPC_ATOMIC_TO_PTL_DOUBLE;
+ default:
+ gupcr_error ("invalid UPC atomic type %d", (int) upc_type);
+ }
+ return -1;
+}
+
+/**
+ * Convert UPC to Portals4 atomic operation.
+ *
+ * @param [in] upc_op UPC atomic operation
+ * @retval Portals4 atomic operation
+ */
+static inline ptl_op_t
+gupcr_atomic_to_ptl_op (upc_op_t upc_op)
+{
+ switch (upc_op)
+ {
+ case UPC_ADD:
+ return PTL_SUM;
+ case UPC_MULT:
+ return PTL_PROD;
+ case UPC_MAX:
+ return PTL_MAX;
+ case UPC_MIN:
+ return PTL_MIN;
+ case UPC_AND:
+ return PTL_BAND;
+ case UPC_OR:
+ return PTL_BOR;
+ case UPC_XOR:
+ return PTL_BXOR;
+ default:
+ gupcr_error ("invalid UPC atomic op %d", (int) upc_op);
+ }
+ return -1;
+}
+
+/**
+ * Convert UPC atomic operation into a string.
+ *
+ * @param [in] upc_op UPC atomic operation
+ * @retval Character string
+ */
+static const char *
+gupcr_get_atomic_op_as_string (upc_op_t upc_op)
+{
+ switch (upc_op)
+ {
+ case UPC_ADD:
+ return "UPC_ADD";
+ case UPC_AND:
+ return "UPC_AND";
+ case UPC_CSWAP:
+ return "UPC_CSWAP";
+ case UPC_DEC:
+ return "UPC_DEC";
+ case UPC_INC:
+ return "UPC_INC";
+ case UPC_GET:
+ return "UPC_GET";
+ case UPC_MAX:
+ return "UPC_MAX";
+ case UPC_MIN:
+ return "UPC_MIN";
+ case UPC_MULT:
+ return "UPC_MULT";
+ case UPC_OR:
+ return "UPC_OR";
+ case UPC_SET:
+ return "UPC_SET";
+ case UPC_SUB:
+ return "UPC_SUB";
+ case UPC_XOR:
+ return "UPC_XOR";
+ }
+ return "UNKNOWN ATOMIC OP";
+}
+
+/**
+ * Convert UPC atomic type into a string.
+ *
+ * @param [in] upc_type UPC atomic type
+ * @retval Character string
+ */
+static const char *
+gupcr_get_atomic_type_as_string (upc_type_t upc_type)
+{
+ switch (upc_type)
+ {
+ case UPC_INT:
+ return "UPC_INT";
+ case UPC_UINT:
+ return "UPC_UINT";
+ case UPC_LONG:
+ return "UPC_LONG";
+ case UPC_ULONG:
+ return "UPC_ULONG";
+ case UPC_INT32:
+ return "UPC_INT32";
+ case UPC_UINT32:
+ return "UPC_UINT32";
+ case UPC_INT64:
+ return "UPC_INT64";
+ case UPC_UINT64:
+ return "UPC_UINT64";
+ case UPC_FLOAT:
+ return "UPC_FLOAT";
+ case UPC_DOUBLE:
+ return "UPC_DOUBLE";
+ case UPC_PTS:
+ return "UPC_PTS";
+ }
+ return "UNKNOWN ATOMIC TYPE";
+}
+
+/** Set value by UPC atomic type macro */
+#define FUNC_TYPE_SET(__name__,__type__) \
+ *(__type__ *) buf = (__type__) value
+
+/**
+ * Set buffer to the value of the particular UPC atomic type.
+ *
+ * @param [in] buf Pointer to the buffer to set
+ * @param [in] type UPC atomic type
+ * @param [in] value Value to be set
+ */
+static void
+gupcr_set_optype_val (void *buf, upc_type_t type, int value)
+{
+ switch (type)
+ {
+ case UPC_INT:
+ FUNC_TYPE_SET (UPC_INT, int);
+ break;
+ case UPC_UINT:
+ FUNC_TYPE_SET (UPC_UINT, unsigned int);
+ break;
+ case UPC_LONG:
+ FUNC_TYPE_SET (UPC_LONG, long);
+ break;
+ case UPC_ULONG:
+ FUNC_TYPE_SET (UPC_ULONG, unsigned long);
+ break;
+ case UPC_INT32:
+ FUNC_TYPE_SET (UPC_INT32, int32_t);
+ break;
+ case UPC_UINT32:
+ FUNC_TYPE_SET (UPC_UINT32, uint32_t);
+ break;
+ case UPC_INT64:
+ FUNC_TYPE_SET (UPC_INT64, int64_t);
+ break;
+ case UPC_UINT64:
+ FUNC_TYPE_SET (UPC_UINT64, uint64_t);
+ break;
+ case UPC_FLOAT:
+ FUNC_TYPE_SET (UPC_FLOAT, float);
+ break;
+ case UPC_DOUBLE:
+ FUNC_TYPE_SET (UPC_DOUBLE, double);
+ break;
+ default:
+ gupcr_error ("wrong UPC type (%d)", type);
+ }
+}
+
+/** Negate value by UPC atomic type macro */
+#define FUNC_TYPE_NEGATE(__name__,__type__) \
+ *(__type__ *) dbuf = - *(__type__*) sbuf
+
+/**
+ * Negate value of the particular UPC atomic type.
+ *
+ * @param [in] dbuf Pointer to negated value
+ * @param [in] sbuf Pointer to original value
+ * @param [in] type UPC atomic type
+ */
+static void
+gupcr_negate_atomic_type (void *dbuf, const void *sbuf, upc_type_t type)
+{
+ switch (type)
+ {
+ case UPC_INT:
+ FUNC_TYPE_NEGATE (UPC_INT, int);
+ break;
+ case UPC_UINT:
+ FUNC_TYPE_NEGATE (UPC_UINT, unsigned int);
+ break;
+ case UPC_LONG:
+ FUNC_TYPE_NEGATE (UPC_LONG, long);
+ break;
+ case UPC_ULONG:
+ FUNC_TYPE_NEGATE (UPC_ULONG, unsigned long);
+ break;
+ case UPC_INT32:
+ FUNC_TYPE_NEGATE (UPC_INT32, int32_t);
+ break;
+ case UPC_UINT32:
+ FUNC_TYPE_NEGATE (UPC_UINT32, uint32_t);
+ break;
+ case UPC_INT64:
+ FUNC_TYPE_NEGATE (UPC_INT64, int64_t);
+ break;
+ case UPC_UINT64:
+ FUNC_TYPE_NEGATE (UPC_UINT64, uint64_t);
+ break;
+ case UPC_FLOAT:
+ FUNC_TYPE_NEGATE (UPC_FLOAT, float);
+ break;
+ case UPC_DOUBLE:
+ FUNC_TYPE_NEGATE (UPC_DOUBLE, double);
+ break;
+ default:
+ gupcr_error ("wrong UPC type (%d)", type);
+ }
+}
+
+/** @} */
+
+/**
+ * @addtogroup UPCATOMIC UPC Atomics Functions
+ * @{
+ */
+
+/**
+ * UPC atomic relaxed operation.
+ *
+ * @param [in] domain Atomic domain
+ * @param [in] fetch_ptr Target of the update
+ * @param [in] op Atomic operation
+ * @param [in] target Target address of the operation
+ * @param [in] operand1 Operation required argument
+ * @param [in] operand2 Operation required argument
+ */
+void
+upc_atomic_relaxed (upc_atomicdomain_t * domain,
+ void *restrict fetch_ptr, upc_op_t op,
+ shared void *restrict target,
+ const void *restrict operand1,
+ const void *restrict operand2)
+{
+ struct upc_atomicdomain_struct *ldomain;
+ char cvt_buf[GUPC_MAX_ATOMIC_SIZE];
+
+ /* Complete all strict operations. Portals4 runtime allows only
+ outstanding put operations. */
+ if (gupcr_pending_strict_put)
+ gupcr_gmem_sync_puts ();
+
+ if (domain == NULL)
+ gupcr_fatal_error ("NULL atomic domain pointer specified");
+
+ ldomain = (struct upc_atomicdomain_struct *) &domain[MYTHREAD];
+
+ gupcr_trace (FC_ATOMIC, "ATOMIC ENTER %s %s",
+ gupcr_get_atomic_op_as_string (op),
+ gupcr_get_atomic_type_as_string (ldomain->type));
+
+ if (target == NULL)
+ gupcr_fatal_error ("NULL atomic target pointer specified");
+
+ if (!(op && ldomain->ops))
+ {
+ gupcr_fatal_error ("invalid operation (%s) for specified domain",
+ gupcr_get_atomic_op_as_string (op));
+ }
+
+ /* Check arguments. */
+ switch (op)
+ {
+ case UPC_GET:
+ if (fetch_ptr == NULL)
+ gupcr_fatal_error (
+ "atomic operation (UPC_GET) requires a non-NULL fetch pointer");
+ case UPC_INC:
+ case UPC_DEC:
+ if (operand1 != NULL)
+ gupcr_error ("atomic operation (%s) requires a NULL operand1",
+ gupcr_get_atomic_op_as_string (op));
+ if (operand2 != NULL)
+ gupcr_error ("atomic operation (%s) requires a NULL operand2",
+ gupcr_get_atomic_op_as_string (op));
+ break;
+ case UPC_CSWAP:
+ if (operand1 == NULL)
+ gupcr_fatal_error (
+ "atomic operation (UPC_CSWAP) requires a non-NULL operand1");
+ if (operand2 == NULL)
+ gupcr_fatal_error (
+ "atomic operation (UPC_CSWAP) requires a non-NULL operand2");
+ break;
+ default:
+ if (operand1 == NULL)
+ gupcr_fatal_error (
+ "atomic operation (%s) requires a non-NULL operand1",
+ gupcr_get_atomic_op_as_string (op));
+ if (operand2 != NULL)
+ gupcr_error ("atomic operation (%s) requires a NULL operand2",
+ gupcr_get_atomic_op_as_string (op));
+ }
+
+ /* UPC_PTS data type does not use Portals4 atomic operations,
+ even though 64 bit pointer-to-shared fits in the int64
+ container. UPC_PTS supports only access operations (get, set, cswap)
+ and as pointer compare needs to disregards the phase during
+ comparison we are unable to place the pointer in some integral
+ container (e.g. int64) and use Portals4 atomic ops. */
+ if (ldomain->type == UPC_PTS)
+ {
+ upc_lock (ldomain->lock);
+ switch (op)
+ {
+ case UPC_GET:
+ *(shared void **) fetch_ptr = *(shared void *shared *) target;
+ break;
+ case UPC_SET:
+ if (fetch_ptr)
+ *(shared void **) fetch_ptr = *(shared void *shared *) target;
+ *(shared void *shared *) target = *(shared void **) operand1;
+ break;
+ case UPC_CSWAP:
+ {
+ shared void *tmp = *(shared void *shared *) target;
+ if (*(shared void **) operand1 == tmp)
+ *(shared void *shared *) target = *(shared void **) operand2;
+ if (fetch_ptr)
+ *(shared void **) fetch_ptr = tmp;
+ }
+ break;
+ default:
+ upc_unlock (ldomain->lock);
+ gupcr_fatal_error ("invalid atomic operation (%s) for UPC_PTS",
+ gupcr_get_atomic_op_as_string (op));
+ }
+ upc_unlock (ldomain->lock);
+ }
+ else
+ {
+ size_t dthread = upc_threadof (target);
+ size_t doffset = upc_addrfield (target);
+
+ switch (op)
+ {
+ case UPC_GET:
+ gupcr_atomic_get (dthread, doffset, fetch_ptr,
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_SET:
+ gupcr_atomic_set (dthread, doffset, fetch_ptr, operand1,
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_CSWAP:
+ gupcr_atomic_cswap (dthread, doffset, fetch_ptr, operand1, operand2,
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_AND:
+ case UPC_OR:
+ case UPC_XOR:
+ if (ldomain->type == UPC_PTS ||
+ ldomain->type == UPC_FLOAT || ldomain->type == UPC_DOUBLE)
+ {
+ gupcr_fatal_error (
+ "invalid atomic operation (%s) for %s type",
+ gupcr_get_atomic_op_as_string (op),
+ gupcr_get_atomic_type_as_string (ldomain->type));
+ }
+ gupcr_atomic_op (dthread, doffset, fetch_ptr, operand1,
+ gupcr_atomic_to_ptl_op (op),
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_ADD:
+ case UPC_MULT:
+ case UPC_MIN:
+ case UPC_MAX:
+ gupcr_atomic_op (dthread, doffset, fetch_ptr,
+ operand1, gupcr_atomic_to_ptl_op (op),
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_SUB:
+ /* As Portals4 does not have atomic subtract, UPC_SUB must be
+ converted into atomic add, UPC_ADD. */
+ gupcr_negate_atomic_type (cvt_buf, operand1, ldomain->type);
+ gupcr_atomic_op (dthread, doffset, fetch_ptr,
+ cvt_buf, gupcr_atomic_to_ptl_op (UPC_ADD),
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ case UPC_INC:
+ case UPC_DEC:
+ if (op == UPC_INC)
+ gupcr_set_optype_val (cvt_buf, ldomain->type, 1);
+ else
+ gupcr_set_optype_val (cvt_buf, ldomain->type, -1);
+ gupcr_atomic_op (dthread, doffset, fetch_ptr, cvt_buf, PTL_SUM,
+ gupcr_atomic_to_ptl_type (ldomain->type));
+ break;
+ default:
+ gupcr_fatal_error ("invalid atomic operation: %s",
+ gupcr_get_atomic_op_as_string (op));
+ }
+ }
+ gupcr_trace (FC_ATOMIC, "ATOMIC EXIT");
+}
+
+/**
+ * UPC atomic strict operation.
+ *
+ * @param [in] domain Atomic domain
+ * @param [in] fetch_ptr Target of the update
+ * @param [in] op Atomic operation
+ * @param [in] target Target address of the operation
+ * @param [in] operand1 Operation required argument
+ * @param [in] operand2 Operation required argument
+ */
+void
+upc_atomic_strict (upc_atomicdomain_t * domain,
+ void *restrict fetch_ptr,
+ upc_op_t op,
+ shared void *restrict target,
+ const void *restrict operand1,
+ const void *restrict operand2)
+{
+ upc_fence;
+ upc_atomic_relaxed (domain, fetch_ptr, op, target, operand1, operand2);
+ upc_fence;
+}
+
+/**
+ * Collective allocation of atomic domain.
+ *
+ * Implementation uses native Portals4 atomic functions and the
+ * hint field is ignored.
+ *
+ * @parm [in] type Atomic operation type
+ * @parm [in] ops Atomic domain operations
+ * @parm [in] hints Atomic operation hint
+ * @retval Allocated atomic domain pointer
+ */
+upc_atomicdomain_t *
+upc_all_atomicdomain_alloc (upc_type_t type,
+ upc_op_t ops,
+ upc_atomichint_t hints __attribute__ ((unused)))
+{
+ struct upc_atomicdomain_struct *ldomain;
+ shared upc_atomicdomain_t *domain;
+
+ gupcr_trace (FC_ATOMIC, "ATOMIC DOMAIN_ALLOC ENTER %s ops(%X)",
+ gupcr_get_atomic_type_as_string (type), (unsigned) ops);
+ domain = (upc_atomicdomain_t *)
+ upc_all_alloc (THREADS, sizeof (struct upc_atomicdomain_struct));
+ gupcr_assert (domain != NULL);
+
+ ldomain = (struct upc_atomicdomain_struct *) &domain[MYTHREAD];
+ ldomain->lock = NULL;
+ if (type == UPC_PTS)
+ ldomain->lock = upc_all_lock_alloc ();
+ ldomain->ops = ops;
+ ldomain->type = type;
+ gupcr_trace (FC_ATOMIC, "ATOMIC DOMAIN_ALLOC EXIT");
+ return domain;
+}
+
+/**
+ * Collective free of the atomic domain.
+ *
+ * @param [in] domain Pointer to atomic domain
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+void
+upc_all_atomicdomain_free (upc_atomicdomain_t * domain)
+{
+ if (domain == NULL)
+ gupcr_fatal_error ("NULL atomic domain pointer specified");
+ upc_barrier;
+ if (MYTHREAD == 0)
+ {
+ upc_lock_free (domain->lock);
+ upc_free (domain);
+ }
+ upc_barrier;
+}
+
+/**
+ * Query implementation for expected performance.
+ *
+ * @parm [in] ops Atomic domain operations
+ * @parm [in] type Atomic operation type
+ * @parm [in] addr Atomic address
+ * @retval Expected performance
+ */
+int
+upc_atomic_isfast (upc_type_t type __attribute__ ((unused)),
+ upc_op_t ops __attribute__ ((unused)),
+ shared void *addr __attribute__ ((unused)))
+{
+ if (type == UPC_PTS)
+ return UPC_ATOMIC_PERFORMANCE_NOT_FAST;
+ return UPC_ATOMIC_PERFORMANCE_FAST;
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,292 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_lib.h"
+#include "gupcr_sup.h"
+#include "gupcr_portals.h"
+#include "gupcr_gmem.h"
+#include "gupcr_utils.h"
+#include "gupcr_coll_sup.h"
+#include "gupcr_atomic_sup.h"
+
+/**
+ * @file gupcr_atomic_sup.c
+ * GUPC Portals4 atomic support routines.
+ *
+ * @addtogroup ATOMIC GUPCR Atomics Support Functions
+ * @{
+ */
+
+/** Atomic local access MD handle */
+static ptl_handle_md_t gupcr_atomic_md;
+/** Atomic local access MD counting events handle */
+static ptl_handle_ct_t gupcr_atomic_md_ct;
+/** Atomic local access MD event queue handle */
+static ptl_handle_eq_t gupcr_atomic_md_eq;
+/** Atomic number of received ACKs on local md */
+static ptl_size_t gupcr_atomic_md_count;
+
+/** Atomic operations use remote gmem PTE */
+#define GUPCR_PTL_PTE_ATOMIC GUPCR_PTL_PTE_GMEM
+
+/**
+ * Atomic GET operation.
+ *
+ * A simple Portals4 get operation is sufficient for data
+ * types supported by UPC.
+ *
+ * @param[in] thread Destination thread
+ * @param[in] doffset Destination offset
+ * @param[in] fetch_ptr Fetch value pointer
+ * @param[in] type Atomic data type
+ */
+void
+gupcr_atomic_get (size_t dthread, size_t doffset, void *fetch_ptr,
+ ptl_datatype_t type)
+{
+ ptl_ct_event_t ct;
+ ptl_process_t rpid;
+ char tmpbuf[128] __attribute__ ((unused));
+ size_t size;
+
+ gupcr_debug (FC_ATOMIC, "%lu:0x%lx", dthread, doffset);
+ if (fetch_ptr == NULL)
+ gupcr_error ("UPC_GET fetch pointer is NULL");
+
+ size = gupcr_get_atomic_size (type);
+ rpid.rank = dthread;
+ gupcr_portals_call (PtlGet, (gupcr_atomic_md, (ptl_size_t) fetch_ptr,
+ size, rpid, GUPCR_PTL_PTE_ATOMIC,
+ PTL_NO_MATCH_BITS, doffset,
+ PTL_NULL_USER_PTR));
+ gupcr_atomic_md_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_atomic_md_ct, gupcr_atomic_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_atomic_md_eq);
+ gupcr_fatal_error ("received an error on atomic MD");
+ }
+ gupcr_debug (FC_ATOMIC, "ov(%s)",
+ gupcr_get_buf_as_hex (tmpbuf, fetch_ptr, size));
+}
+
+/**
+ * Portals4 atomic set operation.
+ *
+ * Execute Portals4 PtlSwap with PTL_SWAP operation.
+ *
+ * @param[in] thread Destination thread
+ * @param[in] doffset Destination offset
+ * @param[in] fetch_ptr Fetch value pointer (optional)
+ * @param[in] value New value of atomic variable
+ * @param[in] type Atomic data type
+ */
+void
+gupcr_atomic_set (size_t dthread, size_t doffset, void *fetch_ptr,
+ const void *value, ptl_datatype_t type)
+{
+ ptl_process_t rpid;
+ ptl_ct_event_t ct;
+ char tmpbuf[128] __attribute__ ((unused));
+ char atomic_tmp_buf[GUPC_MAX_ATOMIC_SIZE];
+ size_t size = gupcr_get_atomic_size (type);
+ gupcr_debug (FC_ATOMIC, "%lu:0x%lx v(%s)", dthread, doffset,
+ gupcr_get_buf_as_hex (tmpbuf, value, size));
+ rpid.rank = dthread;
+ gupcr_portals_call (PtlSwap, (gupcr_atomic_md,
+ (ptl_size_t) atomic_tmp_buf,
+ gupcr_atomic_md, (ptl_size_t) value,
+ size, rpid, GUPCR_PTL_PTE_ATOMIC,
+ PTL_NO_MATCH_BITS, doffset, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA, NULL, PTL_SWAP, type));
+ gupcr_atomic_md_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_atomic_md_ct, gupcr_atomic_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_atomic_md_eq);
+ gupcr_fatal_error ("received an error on atomic MD");
+ }
+ if (fetch_ptr)
+ {
+ gupcr_debug (FC_ATOMIC, "ov(%s)",
+ gupcr_get_buf_as_hex (tmpbuf, atomic_tmp_buf, size));
+ memcpy (fetch_ptr, atomic_tmp_buf, size);
+ }
+}
+
+/**
+ * Portals4 atomic CSWAP operation.
+ *
+ * Execute Portals4 PtlSwap with PTL_CSWAP operation.
+ *
+ * @param[in] thread Destination thread
+ * @param[in] doffset Destination offset
+ * @param[in] fetch_ptr Fetch value pointer (optional)
+ * @param[in] expected Expected value of atomic variable
+ * @param[in] value New value of atomic variable
+ * @param[in] type Atomic data type
+ */
+void
+gupcr_atomic_cswap (size_t dthread, size_t doffset, void *fetch_ptr,
+ const void *expected, const void *value,
+ ptl_datatype_t type)
+{
+ ptl_process_t rpid;
+ ptl_ct_event_t ct;
+ char tmpbuf[128] __attribute__ ((unused));
+ char atomic_tmp_buf[GUPC_MAX_ATOMIC_SIZE];
+ size_t size = gupcr_get_atomic_size (type);
+ gupcr_debug (FC_ATOMIC, "%lu:0x%lx v(%s) e(%s)", dthread, doffset,
+ gupcr_get_buf_as_hex (tmpbuf, value, size),
+ gupcr_get_buf_as_hex (tmpbuf, expected, size));
+ rpid.rank = dthread;
+ gupcr_portals_call (PtlSwap, (gupcr_atomic_md,
+ (ptl_size_t) atomic_tmp_buf,
+ gupcr_atomic_md, (ptl_size_t) value,
+ size, rpid,
+ GUPCR_PTL_PTE_ATOMIC, PTL_NO_MATCH_BITS,
+ doffset, PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA,
+ expected, PTL_CSWAP, type));
+ gupcr_atomic_md_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_atomic_md_ct, gupcr_atomic_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_atomic_md_eq);
+ gupcr_fatal_error ("received an error on atomic MD");
+ }
+ if (fetch_ptr)
+ {
+ gupcr_debug (FC_ATOMIC, "ov(%s)",
+ gupcr_get_buf_as_hex (tmpbuf, atomic_tmp_buf, size));
+ memcpy (fetch_ptr, atomic_tmp_buf, size);
+ }
+}
+
+/**
+ * Portals4 atomic operation.
+ *
+ * Execute Portals4 atomic function and return the old value
+ * if requested.
+ * @param[in] thread Destination thread
+ * @param[in] doffset Destination offset
+ * @param[in] fetch_ptr Fetch value pointer (optional)
+ * @param[in] value Atomic value for the operation
+ * @param[in] op Atomic operation
+ * @param[in] type Atomic data type
+ */
+void
+gupcr_atomic_op (size_t dthread, size_t doffset, void *fetch_ptr,
+ const void *value, ptl_op_t op, ptl_datatype_t type)
+{
+ ptl_process_t rpid;
+ ptl_ct_event_t ct;
+ char tmpbuf[128] __attribute__ ((unused));
+ char atomic_tmp_buf[GUPC_MAX_ATOMIC_SIZE];
+ size_t size = gupcr_get_atomic_size (type);
+ gupcr_debug (FC_ATOMIC, "%lu:0x%lx %s:%s v(%s)", dthread, doffset,
+ gupcr_strptlop (op), gupcr_strptldatatype (type),
+ gupcr_get_buf_as_hex (tmpbuf, value, size));
+ rpid.rank = dthread;
+ if (fetch_ptr)
+ {
+ gupcr_portals_call (PtlFetchAtomic,
+ (gupcr_atomic_md, (ptl_size_t) atomic_tmp_buf,
+ gupcr_atomic_md, (ptl_size_t) value,
+ size, rpid, GUPCR_PTL_PTE_ATOMIC,
+ PTL_NO_MATCH_BITS, doffset,
+ PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA, op, type));
+ }
+ else
+ {
+ gupcr_portals_call (PtlAtomic,
+ (gupcr_atomic_md, (ptl_size_t) value,
+ size, PTL_ACK_REQ, rpid, GUPCR_PTL_PTE_ATOMIC,
+ PTL_NO_MATCH_BITS, doffset,
+ PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA, op, type));
+ }
+ gupcr_atomic_md_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_atomic_md_ct, gupcr_atomic_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_atomic_md_eq);
+ gupcr_fatal_error ("received an error on atomic MD");
+ }
+ if (fetch_ptr)
+ {
+ gupcr_debug (FC_ATOMIC, "ov(%s)",
+ gupcr_get_buf_as_hex (tmpbuf, atomic_tmp_buf, size));
+ memcpy (fetch_ptr, atomic_tmp_buf, size);
+ }
+}
+
+/**
+ * Initialize atomics resources.
+ * @ingroup INIT
+ */
+void
+gupcr_atomic_init (void)
+{
+ ptl_md_t md;
+
+ gupcr_log (FC_ATOMIC, "atomic init called");
+
+ /* Setup the Portals MD for local source/destination copying.
+ We need to map the whole user's space (same as gmem). */
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_atomic_md_ct));
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_atomic_md_eq));
+ md.length = (ptl_size_t) USER_PROG_MEM_SIZE;
+ md.start = (void *) USER_PROG_MEM_START;
+ md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_CT_REPLY |
+ PTL_MD_EVENT_SUCCESS_DISABLE;
+ md.eq_handle = gupcr_atomic_md_eq;
+ md.ct_handle = gupcr_atomic_md_ct;
+ gupcr_portals_call (PtlMDBind, (gupcr_ptl_ni, &md, &gupcr_atomic_md));
+
+ /* Reset number of acknowledgments. */
+ gupcr_atomic_md_count = 0;
+}
+
+/**
+ * Release atomics resources.
+ * @ingroup INIT
+ */
+void
+gupcr_atomic_fini (void)
+{
+ gupcr_log (FC_ATOMIC, "atomic fini called");
+ /* Release atomic MD and its resources. */
+ gupcr_portals_call (PtlMDRelease, (gupcr_atomic_md));
+ gupcr_portals_call (PtlCTFree, (gupcr_atomic_md_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_atomic_md_eq));
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,84 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _GUPCR_ATOMIC_SUP_H_
+#define _GUPCR_ATOMIC_SUP_H_ 1
+
+/**
+ * @file gupcr_atomic_sup.h
+ * GUPC Portals4 atomics implementation support routines.
+ *
+ * @addtogroup ATOMIC GUPCR Atomics Support Functions
+ * @{
+ */
+
+/** Maximum size of atomic types */
+#define GUPC_MAX_ATOMIC_SIZE 16
+
+/** Convert from UPC atomics int to Portals atomic type */
+#if __SIZEOF_INT__ == 4
+#define UPC_ATOMIC_TO_PTL_INT PTL_INT32_T
+#define UPC_ATOMIC_TO_PTL_UINT PTL_UINT32_T
+#elif __SIZEOF_INT__ == 8
+#define UPC_ATOMIC_TO_PTL_INT PTL_INT64_T
+#define UPC_ATOMIC_TO_PTL_UINT PTL_UINT64_T
+#else
+#error "Size of int not supported"
+#endif
+/** Convert from UPC atomics long to Portals atomic type */
+#if __SIZEOF_LONG__ == 4
+#define UPC_ATOMIC_TO_PTL_LONG PTL_INT32_T
+#define UPC_ATOMIC_TO_PTL_ULONG PTL_UINT32_T
+#elif __SIZEOF_LONG__ == 8
+#define UPC_ATOMIC_TO_PTL_LONG PTL_INT64_T
+#define UPC_ATOMIC_TO_PTL_ULONG PTL_UINT64_T
+#else
+#error "Size of long not supported"
+#endif
+/** Convert from UPC atomic int32 to Portals atomic type */
+#define UPC_ATOMIC_TO_PTL_INT32 PTL_INT32_T
+#define UPC_ATOMIC_TO_PTL_UINT32 PTL_UINT32_T
+/** Convert from UPC atomic int64 to Portals atomic type */
+#define UPC_ATOMIC_TO_PTL_INT64 PTL_INT64_T
+#define UPC_ATOMIC_TO_PTL_UINT64 PTL_UINT64_T
+/** Convert from UPC atomic float to Portals atomic type */
+#define UPC_ATOMIC_TO_PTL_FLOAT PTL_FLOAT
+/** Convert from UPC atomic double to Portals atomic type */
+#define UPC_ATOMIC_TO_PTL_DOUBLE PTL_DOUBLE
+
+/** @} */
+
+void gupcr_atomic_put (size_t, size_t, size_t, ptl_op_t op, ptl_datatype_t);
+void gupcr_atomic_get (size_t, size_t, void *, ptl_datatype_t);
+void gupcr_atomic_set (size_t, size_t, void *, const void *, ptl_datatype_t);
+void gupcr_atomic_cswap (size_t, size_t, void *, const void *,
+ const void *, ptl_datatype_t);
+void gupcr_atomic_op (size_t, size_t, void *, const void *,
+ ptl_op_t, ptl_datatype_t);
+void gupcr_atomic_init (void);
+void gupcr_atomic_fini (void);
+
+#endif /* gupcr_atomic_sup.h */
===================================================================
@@ -0,0 +1,400 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_sup.h"
+#include "gupcr_utils.h"
+#include "gupcr_backtrace.h"
+#include "gupcr_barrier.h"
+#include <signal.h>
+#include <string.h>
+#if HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
+/** Skip over frames belonging to the backtrace code itself. */
+#define GUPCR_BT_SKIP_FRAME_CNT 3
+/** Maximum number of stack frames to display. */
+#define GUPCR_BT_DEPTH_CNT 128
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024
+#endif
+
+/** Default backtrace file name prefix. */
+#define UPC_BACKTRACE_PREFIX "backtrace"
+
+/** Full path of the executable program. */
+static char *gupcr_abs_execname;
+
+/** Backtrace on faults enabled flag. */
+static int bt_enabled = 0;
+
+/**
+ * GLIBC backtrace.
+ *
+ * Show backtrace by using the GLIBC backtrace functionality.
+ * Backtrace is improved with the source file/line numbers if
+ * addr2line is available.
+ *
+ * By default backtrace lines are sent to the 'stderr' file
+ * descriptor. However, an environment variable
+ * UPC_BACKTRACEFILE can be used to redirect the backtrace
+ * to an actual file and it is used as a simple prefix for
+ * the backtrace file. For example, if it is set to "/tmp/trace-upc",
+ * the actual trace file is going to be "/tmp/trace-upc-PID.MYTHREAD".
+ * If empty environment variable is provided, a simple "trace" prefix
+ * is used.
+ *
+ */
+void
+gupcr_backtrace (void)
+{
+ void *strace[GUPCR_BT_DEPTH_CNT];
+ size_t size,i;
+ char **strace_str;
+ char *file_env;
+ int under_upc_main = 1;
+ FILE *traceout = stderr;
+
+ file_env = getenv (GUPCR_BACKTRACE_FILE_ENV);
+ if (file_env)
+ {
+ #define MAX_INT_STRING ".2147483647"
+ char *tracefile;
+ int len, lenw;
+ /* Use default trace file name if one not specified by the user. */
+ if (!strlen (file_env))
+ file_env = (char *) UPC_BACKTRACE_PREFIX;
+ len = strlen (file_env) + strlen (MAX_INT_STRING) + 1;
+ tracefile = malloc (len);
+ if (!tracefile)
+ gupcr_fatal_error ("cannot allocate (%d) memory for backtrace file %s",
+ len, file_env);
+ lenw = snprintf (tracefile, len, "%s.%d", file_env, MYTHREAD);
+ if ((lenw >= len) || (lenw < 0))
+ gupcr_fatal_error ("cannot create backtrace file name: %s", file_env);
+ traceout = fopen (tracefile, "w");
+ if (!traceout)
+ gupcr_fatal_error ("cannot open backtrace file: %s", tracefile);
+ free (tracefile);
+ }
+ else
+ fprintf (traceout, "Thread %d backtrace:\n", MYTHREAD);
+
+ /* Use "backtrace" functionality of glibc to receive
+ backtrace addresses. */
+ size = backtrace (strace, GUPCR_BT_DEPTH_CNT);
+ /* Add symbolic information to each address
+ and print the stack trace. */
+ for (i = GUPCR_BT_SKIP_FRAME_CNT; i < size; i++)
+ {
+ if (under_upc_main)
+ {
+# if HAVE_UPC_BACKTRACE_ADDR2LINE
+ /* Call addr2line to generate source files, line numbers,
+ and functions. In case of any error (malloc, snprintf)
+ do not abort the program. */
+ FILE *a2l;
+ #define CMD_TMPL "%s -f -e %s %p"
+ /* Allow space for addr2line, filename, command line options,
+ and address argument for addr2line. */
+ int cmd_size = strlen (GUPCR_BACKTRACE_ADDR2LINE) +
+ strlen (gupcr_abs_execname) +
+ strlen (CMD_TMPL) +
+ strlen ("0x1234567812345678");
+ int sz;
+ char *cmd = malloc (cmd_size);
+ /* Create an actual addr2line command. */
+ sz = snprintf (cmd, cmd_size, CMD_TMPL, GUPCR_BACKTRACE_ADDR2LINE,
+ gupcr_abs_execname, strace[i]);
+ if ((sz >= cmd_size) || (sz < 0))
+ {
+ fprintf (traceout, "unable to create addr2line "
+ "command line\n");
+ return;
+ }
+ /* Execute addr2line. */
+ a2l = popen (cmd, "r");
+ free (cmd);
+ if (a2l)
+ {
+ /* addr2line responds with two lines: procedure name and
+ the file name with line number. */
+ int max_rep = 2 * FILENAME_MAX;
+ /* Build a data structure that is identical to the
+ structure returned by the glibc backtrace_symbol(). */
+ struct back_trace {
+ char *addr;
+ char data[1];
+ };
+ struct back_trace *rep = malloc (max_rep);
+ int index = 0;
+ if (!rep)
+ {
+ fprintf (traceout, "unable to acquire memory "
+ "for backtracing\n");
+ return;
+ }
+ rep->data[0] = '\0';
+ /* Read addr2line response. */
+ while (fgets(&rep->data[index], max_rep-index, a2l))
+ {
+ /* Remove all the new lines, as addr2line returns
+ info in multiple lines. */
+ index = strlen (&rep->data[0]);
+ if (rep->data[index - 1] == '\n')
+ rep->data[index - 1] = ' ';
+ }
+ pclose (a2l);
+ rep->addr = &rep->data[0];
+ strace_str = &rep->addr;
+ }
+ else
+ {
+ /* Somehow we failed to invoke addr2line, fall back
+ to glibc. */
+ strace_str = backtrace_symbols (&strace[i], 1);
+ }
+# else
+ strace_str = backtrace_symbols (&strace[i], 1);
+# endif
+ fprintf (traceout, "[%4d][%lld] %s\n", MYTHREAD,
+ (long long int) (i - GUPCR_BT_SKIP_FRAME_CNT), *strace_str);
+ /* Extra info for the barrier. */
+ if (strstr( *strace_str, "__upc_wait"))
+ {
+ fprintf (traceout, "[%4d] BARRIER ID: %d\n", MYTHREAD,
+ gupcr_barrier_id);
+ }
+ if (strstr (*strace_str, "upc_main"))
+ under_upc_main = 0;
+ /* Symbol trace buffer must be released. */
+ free (strace_str);
+ }
+ }
+ fflush (traceout);
+ if (file_env)
+ fclose (traceout);
+}
+
+#define GUPCR_BACKTRACE_PID_BUFLEN 16
+
+/**
+ * Backtrace on fatal errors.
+ *
+ * Print backtrace (stack frames) on fatal errors: run-time
+ * fatal error or segmentation fault.
+ *
+ * Only print backtrace if environment variable UPC_BACKTRACE
+ * is set to 1. The following order of backtrace capabilities
+ * is searched and executed:
+ *
+ * (1) Use GDB for backtrace (if enabled)
+ * (2) Use GLIBC backtrace with source file/line display (if
+ * addr2line is available)
+ * (3) Use GLIBC backtrace with raw addresses (display is
+ * improved if -rdynamic option is supported by the linker)
+ *
+ */
+void
+gupcr_fatal_error_backtrace (void)
+{
+ if (bt_enabled)
+ {
+#ifdef HAVE_UPC_BACKTRACE_GDB
+ {
+ char *env;
+ const char *gdb;
+ char pid_buf[GUPCR_BACKTRACE_PID_BUFLEN];
+ int child_pid;
+ /* Which gdb to use? */
+ env = getenv (GUPCR_BACKTRACE_GDB_ENV);
+ if (!env || (strlen (env) == 0))
+ gdb = GUPCR_BACKTRACE_GDB;
+ else
+ gdb = (const char *) env;
+ if (strcmp (gdb, "none"))
+ {
+ const char *err_msg = 0;
+ char tmpf[PATH_MAX];
+ int fbt;
+ const char *btcmd = "backtrace 30\n";
+ fprintf (stderr, "Thread %d GDB backtrace:\n", MYTHREAD);
+ /* Get pid and name of the running program. */
+ sprintf(pid_buf, "%d", getpid());
+ /* Create temp file for GDB commands. */
+ if ((fbt = gupcr_create_temp_file
+ ("upc_bt_gdb.XXXXXX", tmpf, &err_msg)) == -1)
+ {
+ fprintf (stderr, "cannot open gdb command - %s\n", err_msg);
+ return;
+ }
+ if (write (fbt, btcmd, sizeof (btcmd)) == -1)
+ {
+ perror ("cannot write gdb command file for backtrace");
+ return;
+ }
+ if (close (fbt))
+ {
+ perror ("cannot close gdb command file for backtrace");
+ return;
+ }
+ child_pid = fork();
+ if (!child_pid)
+ {
+ dup2(2,1);
+ execlp(gdb, gdb, "-nx", "-batch", "-x", tmpf,
+ gupcr_abs_execname, pid_buf, NULL);
+ fprintf (stderr, "cannot start GDB - %s\n", gdb);
+ abort(); /* If gdb failed to start */
+ }
+ else
+ waitpid(child_pid,NULL,0);
+ unlink (tmpf);
+ return;
+ }
+ }
+#endif /* GUPCR_BACKTRACE_GDB */
+
+ /* Simple backtrace only. */
+ gupcr_backtrace ();
+ }
+}
+
+/**
+ * Backtrace signal handler.
+ *
+ * Display stack frames on a request. In case of the
+ * monitor thread only print the mappings between the
+ * UPC threads and processes.
+ */
+static void
+gupcr_backtrace_handler (int sig __attribute__ ((unused)),
+ siginfo_t *siginfo __attribute__ ((unused)),
+ void *context __attribute__ ((unused)))
+{
+ gupcr_backtrace ();
+}
+
+/**
+ * Backtrace fault handler.
+ *
+ * A fault happened and backtrace is enabled. Allow for only
+ * one thread to print the backtrace. The restore signal
+ * handlers to their default and return ensures that
+ * signal terminates the thread and allows for the monitor
+ * thread to terminate all the other threads..
+ */
+static void
+gupcr_fault_handler (int sig __attribute__ ((unused)),
+ siginfo_t *siginfo __attribute__ ((unused)),
+ void *context __attribute__ ((unused)))
+{
+ gupcr_backtrace_restore_handlers ();
+ gupcr_fatal_error_backtrace ();
+}
+
+/**
+ * Initialize UPC backtrace.
+ */
+void
+gupcr_backtrace_init (const char *execname)
+{
+ /* Find the full path for the executable. On linux systems we
+ might be able to read "/proc/self/exe" to the get the full
+ executable path. But, it is not portable. */
+ int slen = sizeof (gupcr_abs_execname) - strlen (execname) - 2;
+ gupcr_abs_execname = malloc (PATH_MAX + 1);
+ if (!gupcr_abs_execname)
+ gupcr_fatal_error ("cannot allocate space for executable file name");
+ *gupcr_abs_execname = '\0';
+ if (execname[0] != '/')
+ {
+ if (!getcwd (gupcr_abs_execname, slen))
+ strcpy (gupcr_abs_execname, "/BT_CANNOT_CREATE_ABS_PATH");
+ strcat (gupcr_abs_execname, "/");
+ }
+ strcat (gupcr_abs_execname, execname);
+
+#ifdef HAVE_UPC_BACKTRACE_SIGNAL
+ {
+ /* Install backtrace signal handler (backtrace on request). */
+ struct sigaction act;
+ memset (&act, '\0', sizeof(act));
+ act.sa_sigaction = &gupcr_backtrace_handler;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(GUPCR_BACKTRACE_SIGNAL, &act, NULL) < 0) {
+ perror ("was not able to install backtrace handler");
+ }
+ }
+#endif
+
+ /* Install signal handlers only if backtrace is enabled. */
+ bt_enabled = gupcr_is_backtrace_enabled ();
+
+ if (bt_enabled)
+ {
+ struct sigaction act;
+ memset (&act, '\0', sizeof(act));
+ act.sa_sigaction = &gupcr_fault_handler;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGABRT, &act, NULL) < 0)
+ perror ("unable to install SIGABRT handler");
+ if (sigaction(SIGILL, &act, NULL) < 0)
+ perror ("unable to install SIGILL handler");
+ if (sigaction(SIGSEGV, &act, NULL) < 0)
+ perror ("unable to install SIGSEGV handler");
+ if (sigaction(SIGBUS, &act, NULL) < 0)
+ perror ("unable to install SIGBUS handler");
+ if (sigaction(SIGFPE, &act, NULL) < 0)
+ perror ("unable to install SIGFPE handler");
+ }
+}
+
+/**
+ * Restore default handlers.
+ *
+ * Has to be called once the run-time discovered
+ * a fatal error.
+ */
+void
+gupcr_backtrace_restore_handlers (void)
+{
+ /* Don't handle any signals with backtrace code. Install
+ default handlers. */
+ signal (SIGABRT, SIG_DFL);
+ signal (SIGILL, SIG_DFL);
+ signal (SIGSEGV, SIG_DFL);
+ signal (SIGBUS, SIG_DFL);
+ signal (SIGFPE, SIG_DFL);
+}
===================================================================
@@ -0,0 +1,45 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#ifndef GUPCR_BACKTRACE_H_
+#define GUPCR_BACKTRACE_H_
+
+/* Environment variables. */
+/** Enable/Disable backtrace env variable. */
+#define GUPCR_BACKTRACE_ENV "UPC_BACKTRACE"
+/** Enable/Disable STAT backtrace env variable. */
+#define GUPCR_BACKTRACE_FILE_ENV "UPC_BACKTRACEFILE"
+/** GDB command for backtrace env variable. */
+#define GUPCR_BACKTRACE_GDB_ENV "UPC_BACKTRACE_GDB"
+
+/* Interfaces. */
+extern void gupcr_backtrace (void);
+extern void gupcr_fatal_backtrace (void);
+extern void gupcr_backtrace_init (const char *execname);
+extern void gupcr_backtrace_restore_handlers (void);
+
+#endif /* gupc_backtrace.h */
===================================================================
@@ -0,0 +1,1003 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/**
+ * @file gupcr_barrier.c
+ * GUPC Portals4 barrier implementation.
+ *
+ * The UPC barrier synchronization statements are:
+ * - upc_notify <i>expression</i>
+ * - upc_wait <i>expression</i>
+ * - upc_barrier <i>expression</i>
+ *
+ * The upc barrier statement is equivalent to the compound statement:
+ * <i>{ upc_notify barrier_value; upc_wait barrier_value; }</i>
+ *
+ * Important rules:
+ * - Each thread executes an alternating sequence of upc_notify and upc_wait
+ * statements.
+ * - A thread completes a <i>upc_wait</i> statement when all threads
+ * have executed a matching <i>upc_notify</i> statement.
+ * - <i>upc_notify</i> and <i>upc_wait</i> are collective operations and
+ * <i>expression</i> (if available) must match across all threads.
+ * - An empty <i>expression</i> matches any barrier ID.
+ *
+ * The GUPC runtime barrier implementation uses an "all reduce"
+ * algorithm as outlined in the paper <i>Enabling Flexible Collective
+ * Communication Offload with Triggered Operations</i> by Keith Underwood
+ * et al. January, 2007. Portals atomic operations and triggered
+ * atomic operations are used to propagate and verify
+ * that all UPC threads have entered the same synchronization phase
+ * with matching barrier IDs.
+ *
+ * For the purposes of implementing GUPC barriers, all UPC threads
+ * in a given job are organized as a tree. Thread 0 is the
+ * root thread (at the top of the tree). Other threads can be
+ * either an inner thread (has at least one child), or a leaf
+ * thread (has no children).
+ *
+ * A UPC barrier is implemented in two distinctive steps: notify and wait.
+ *
+ * A notify step uses the GUPCR_PTL_PTE_BARRIER_UP PTE to pass
+ * its barrier ID to the parent. The result of an atomic PTL_MIN
+ * operation among children and their parent is passed to the
+ * parent's parent until thread 0 is reached.
+ *
+ * A wait step uses the GUPCR_PTL_PTE_BARRIER_DOWN PTE to pass
+ * the derived consensus barrier ID to all threads. An error
+ * is raised if the derived ID does not match the thread's barrier ID.
+ *
+ * This implementation supports a split phase barrier where a given
+ * thread completes its wait statement once all other threads
+ * have reached their matching notify statement.
+ *
+ * Each thread uses the following resources:
+ *
+ * - PTEs (and LEs) for passing barrier IDs UP and DOWN the tree
+ * - MDs for sending a thread's barrier ID to parents and children
+ * - Counting events for LEs and MDs
+ * - Event queues for failure events on LEs and MDs
+ *
+ * Extensive use of Portals triggered functions allow for the efficient
+ * implementation of a split phase barrier.
+ *
+ * @addtogroup BARRIER GUPCR Barrier Functions
+ * @{
+ */
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_sup.h"
+#include "gupcr_sync.h"
+#include "gupcr_broadcast.h"
+#include "gupcr_portals.h"
+#include "gupcr_gmem.h"
+#include "gupcr_utils.h"
+
+/** Per-thread flag set by upc_notify() and cleared by upc_wait() */
+static int gupcr_barrier_active = 0;
+
+/** Max barrier ID used by the barrier implementation.
+ * The Portals PTL_MIN atomic function is used by
+ * each thread to report its barrier ID to its parents.
+ * The MAX barrier ID value is used to initialize the memory
+ * location targeted by PTL_MIN function.
+ */
+#define BARRIER_ID_MAX INT_MAX
+/** Anonymous barrier ID used by the barrier implementation.
+ * This barrier ID is used for barrier statements that do not
+ * specify a barrier ID and it matches any other barrier ID.
+ */
+#define BARRIER_ANONYMOUS INT_MIN
+/** Size of the barrier ID */
+#define BARRIER_ID_SIZE (sizeof (gupcr_barrier_value))
+
+/** Leaf thread check */
+#define LEAF_THREAD ((THREADS != 1) && (gupcr_child_cnt == 0))
+/** Root thread check */
+#define ROOT_THREAD (gupcr_parent_thread == -1)
+/** Inner thread check */
+#define INNER_THREAD ((gupcr_child_cnt != 0) && (gupcr_parent_thread != -1))
+
+/** Thread's current barrier ID */
+int gupcr_barrier_id;
+
+/** Memory storage for notify barrier ID. Mapped by
+ LE for external access, and MD for internal access. */
+
+static int gupcr_notify_value;
+/** Barrier notify LE handle (appended to GUPCR_PTL_PTE_BARRIER_UP) */
+static ptl_handle_le_t gupcr_notify_le;
+/** Barrier notify LE EQ handle */
+static ptl_handle_eq_t gupcr_notify_le_eq;
+/** Barrier notify LE CT handle */
+static ptl_handle_ct_t gupcr_notify_le_ct;
+/** Barrier notify LE CT wait counter */
+static ptl_size_t gupcr_notify_le_count;
+/** Barrier notify MD handle */
+static ptl_handle_md_t gupcr_notify_md;
+/** Barrier notify MD EQ handle */
+static ptl_handle_eq_t gupcr_notify_md_eq;
+/** Barrier notify MD CT handle */
+static ptl_handle_ct_t gupcr_notify_md_ct;
+/** Barrier notify MD CT wait counter */
+static ptl_size_t gupcr_notify_md_count;
+
+/** Barrier wait memory buffer pointer. The buffer is
+ mapped by a Portals LE for external access, and a Portals
+ MD for internal access. A pointer to the buffer is needed
+ because the broadcast implementation used internally by the
+ UPC runtime can broadcast arbitrarily sized values (that may
+ be larger than an 'int'. */
+static int *gupcr_wait_ptr;
+/** Barrier wait LE handle (appended to GUPCR_PTL_PTE_BARRIER_DOWN) */
+static ptl_handle_le_t gupcr_wait_le;
+/** Barrier wait LE CT handle */
+static ptl_handle_ct_t gupcr_wait_le_ct;
+/** Barrier wait LE CT wait counter */
+static ptl_size_t gupcr_wait_le_count;
+/** Barrier wait LE EQ handle */
+static ptl_handle_eq_t gupcr_wait_le_eq;
+/** Barrier wait MD handle */
+static ptl_handle_md_t gupcr_wait_md;
+/** Barrier wait MD CT handle */
+static ptl_handle_ct_t gupcr_wait_md_ct;
+/** Barrier wait MD CT wait counter */
+static ptl_size_t gupcr_wait_md_count;
+/** Barrier wait MD EQ handle */
+static ptl_handle_eq_t gupcr_wait_md_eq;
+
+/** Memory storage (notify source) that holds the barrier ID for the PTL_MIN
+ atomic function used in the notify phase of the barrier. */
+static int gupcr_barrier_value;
+/** Barrier MD handle for the notify source */
+static ptl_handle_md_t gupcr_barrier_md;
+/** Barrier CT handle for the notify source */
+static ptl_handle_ct_t gupcr_barrier_md_ct;
+/** Barrier CT handle for the notify source wait counter */
+static ptl_size_t gupcr_barrier_md_count;
+/** Barrier EQ handle for the notify source */
+static ptl_handle_eq_t gupcr_barrier_md_eq;
+
+/** Memory storage that holds the maximum barrier ID value used to
+ re-initialize the memory storage for the notify barrier ID. */
+static int gupcr_barrier_max_value = BARRIER_ID_MAX;
+/** Barrier MD for MAX re-init */
+static ptl_handle_md_t gupcr_barrier_max_md;
+/** Barrier CT handle for MAX re-init */
+static ptl_handle_ct_t gupcr_barrier_max_md_ct;
+/** Barrier CT handle for MAX re-init wait counter */
+static ptl_size_t gupcr_barrier_max_md_count;
+/** Barrier EQ handle for MAX re-init */
+static ptl_handle_eq_t gupcr_barrier_max_md_eq;
+
+/**
+ * @fn __upc_notify (int barrier_id)
+ * UPC <i>upc_notify<i> statement implementation
+ *
+ * This procedure sets the necessary Portals triggers to implement
+ * the pass that derives a consensus barrier ID value across all
+ * UPC threads. The inner threads use Portals triggered operations
+ * to pass the barrier ID negotiated among itself and its children
+ * up the tree its parent.
+ * @param [in] barrier_id Barrier ID
+ */
+void
+__upc_notify (int barrier_id)
+{
+ ptl_process_t rpid __attribute ((unused));
+
+ gupcr_trace (FC_BARRIER, "BARRIER NOTIFY ENTER %d", barrier_id);
+
+ if (gupcr_barrier_active)
+ gupcr_error ("two successive upc_notify statements executed "
+ "without an intervening upc_wait");
+ gupcr_barrier_active = 1;
+ gupcr_barrier_id = barrier_id;
+
+ /* The UPC shared memory consistency model requires all outstanding
+ read/write operations to complete on the thread's
+ current synchronization phase. */
+ gupcr_gmem_sync ();
+
+#if GUPCR_USE_PORTALS4_TRIGGERED_OPS
+ if (THREADS == 1)
+ return;
+
+ /* Use barrier MAX number if barrier ID is "match all"
+ This effectively excludes the thread from setting the min ID
+ among the threads. */
+ gupcr_barrier_value = (barrier_id == BARRIER_ANONYMOUS) ?
+ BARRIER_ID_MAX : barrier_id;
+
+ if (gupcr_debug_enabled (FC_BARRIER))
+ {
+ ptl_ct_event_t ct;
+ gupcr_portals_call (PtlCTGet, (gupcr_wait_le_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Wait LE counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_wait_le_count);
+ gupcr_portals_call (PtlCTGet, (gupcr_wait_md_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Wait MD counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_wait_md_count);
+ gupcr_portals_call (PtlCTGet, (gupcr_notify_le_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Notify LE counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_notify_le_count);
+ gupcr_portals_call (PtlCTGet, (gupcr_notify_md_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Notify MD counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_notify_md_count);
+ gupcr_portals_call (PtlCTGet, (gupcr_barrier_md_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Barrier MD counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_barrier_md_count);
+ gupcr_portals_call (PtlCTGet, (gupcr_barrier_max_md_ct, &ct));
+ gupcr_debug (FC_BARRIER, "Barrier max MD counter: %lu (%lu)",
+ (long unsigned) ct.success,
+ (long unsigned) gupcr_barrier_max_md_count);
+ }
+
+ if (LEAF_THREAD)
+ {
+ /* Send the barrier ID to the parent - use atomic PTL_MIN to allow
+ parent to find the minimum barrier ID among itself and its
+ children. */
+ gupcr_debug (FC_BARRIER, "Send atomic PTL_MIN %d to (%d)",
+ gupcr_barrier_value, gupcr_parent_thread);
+ rpid.rank = gupcr_parent_thread;
+ gupcr_portals_call (PtlAtomic, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE, PTL_NO_ACK_REQ,
+ rpid, GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA, PTL_MIN,
+ PTL_INT32_T));
+ }
+ else
+ {
+ int i;
+ if (ROOT_THREAD)
+ {
+ /* The consensus MIN barrier ID derived in the notify (UP) phase
+ must be transferred to the wait LE for delivery to all children.
+ Trigger: Barrier ID received in the notify phase.
+ Action: Send the barrier ID to the wait buffer of the
+ barrier DOWN LE. */
+ rpid.rank = MYTHREAD;
+ gupcr_notify_le_count += gupcr_child_cnt + 1;
+ gupcr_portals_call (PtlTriggeredPut, (gupcr_notify_md, 0,
+ BARRIER_ID_SIZE,
+ PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ gupcr_notify_le_ct,
+ gupcr_notify_le_count));
+
+ }
+ else
+ {
+ /* The consensus MIN barrier ID of the inner thread and its children
+ is sent to the parent UPC thread.
+ Trigger: All children and this thread execute an atomic PTL_MIN
+ using each thread's UP LE.
+ Action: Transfer the consensus minimum barrier ID to the
+ this thread's parent. */
+ rpid.rank = gupcr_parent_thread;
+ gupcr_notify_le_count += gupcr_child_cnt + 1;
+ gupcr_portals_call (PtlTriggeredAtomic, (gupcr_notify_md, 0,
+ BARRIER_ID_SIZE,
+ PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ PTL_MIN, PTL_INT32_T,
+ gupcr_notify_le_ct,
+ gupcr_notify_le_count));
+ }
+
+ /* Trigger: Barrier ID received in the wait buffer.
+ Action: Reinitialize the barrier UP ID to barrier MAX value
+ for the next call to upc_notify. */
+ rpid.rank = MYTHREAD;
+ gupcr_wait_le_count += 1;
+ gupcr_portals_call (PtlTriggeredPut, (gupcr_barrier_max_md, 0,
+ BARRIER_ID_SIZE,
+ PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ gupcr_wait_le_ct,
+ gupcr_wait_le_count));
+
+ /* Trigger: The barrier ID is reinitialized to MAX.
+ Action: Send the consensus barrier ID to all children. */
+ gupcr_notify_le_count += 1;
+ for (i = 0; i < gupcr_child_cnt; i++)
+ {
+ rpid.rank = gupcr_child[i];
+ gupcr_portals_call (PtlTriggeredPut, (gupcr_wait_md, 0,
+ BARRIER_ID_SIZE,
+ PTL_OC_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ gupcr_notify_le_ct,
+ gupcr_notify_le_count));
+ }
+
+ /* Allow notify to proceed and to possibly complete the wait
+ phase on other threads. */
+
+ /* Find the minimum barrier ID among children and the root. */
+ gupcr_debug (FC_BARRIER, "Send atomic PTL_MIN %d to (%d)",
+ gupcr_barrier_value, MYTHREAD);
+ rpid.rank = MYTHREAD;
+ gupcr_portals_call (PtlAtomic, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE, PTL_NO_ACK_REQ,
+ rpid, GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA, PTL_MIN,
+ PTL_INT32_T));
+ }
+#else
+ /* The UPC runtime barrier implementation that does not use
+ Portals triggered operations does not support split phase barriers.
+ In this case, all Portals actions related to the barrier
+ are performed in the __upc_wait() function. */
+#endif
+ gupcr_trace (FC_BARRIER, "BARRIER NOTIFY EXIT %d", barrier_id);
+}
+
+/**
+ * @fn __upc_wait (int barrier_id)
+ * UPC <i>upc_wait</i> statement implementation
+ *
+ * This procedure waits to receive the derived consensus
+ * barrier ID from the parent (leaf thread) or acknowledges that
+ * all children received the consensus barrier ID (inner
+ * and root threads). The consensus barrier ID is checked
+ * against the barrier ID passed in as an argument.
+ * @param [in] barrier_id Barrier ID
+ */
+void
+__upc_wait (int barrier_id)
+{
+ ptl_ct_event_t ct;
+ ptl_process_t rpid __attribute ((unused));
+ int received_barrier_id;
+ gupcr_trace (FC_BARRIER, "BARRIER WAIT ENTER %d", barrier_id);
+
+ if (!gupcr_barrier_active)
+ gupcr_error ("upc_wait statement executed without a "
+ "preceding upc_notify");
+
+ /* Check if notify/wait barrier IDs match.
+ BARRIER_ANONYMOUS matches any other barrier ID. */
+ if ((barrier_id != BARRIER_ANONYMOUS &&
+ gupcr_barrier_id != BARRIER_ANONYMOUS) &&
+ (gupcr_barrier_id != barrier_id))
+ {
+ gupcr_error ("UPC barrier identifier mismatch - notify %d, wait %d",
+ gupcr_barrier_id, barrier_id);
+ }
+
+ if (THREADS == 1)
+ {
+ gupcr_barrier_active = 0;
+ return;
+ }
+
+#if GUPCR_USE_PORTALS4_TRIGGERED_OPS
+ /* Wait for the barrier ID to propagate down the tree. */
+ if (gupcr_child_cnt)
+ {
+ /* Wait for the barrier ID to flow down to the children. */
+ gupcr_wait_md_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_md_ct, gupcr_wait_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_md_eq);
+ gupcr_fatal_error ("received an error on wait MD");
+ }
+ }
+ else
+ {
+ gupcr_wait_le_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_le_ct, gupcr_wait_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_le_eq);
+ gupcr_fatal_error ("received an error on wait LE");
+ }
+ }
+ received_barrier_id = *gupcr_wait_ptr;
+#else
+ /* UPC Barrier implementation without Portals Triggered Functions. */
+
+ /* NOTIFY - Propagate minimal barrier ID to the root thread. */
+
+ /* Use the barrier maximum ID number if the barrier ID is "match all".
+ This effectively excludes the thread from setting the minimum ID
+ among the threads. */
+ gupcr_barrier_value = (barrier_id == BARRIER_ANONYMOUS) ?
+ BARRIER_ID_MAX : barrier_id;
+
+ if (!LEAF_THREAD)
+ {
+ /* This step is performed by the root thread and inner threads. */
+ /* Find the minimal barrier ID among the thread and children.
+ Use the Portals PTL_MIN atomic operation on the value
+ in the notify LE. */
+ gupcr_debug (FC_BARRIER, "Send atomic PTL_MIN %d to (%d)",
+ gupcr_barrier_value, MYTHREAD);
+ rpid.rank = MYTHREAD;
+ gupcr_portals_call (PtlAtomic, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE, PTL_NO_ACK_REQ,
+ rpid, GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA, PTL_MIN,
+ PTL_INT32_T));
+ /* Wait for all children threads to report their barrier IDs.
+ Account for this thread's atomic PTL_MIN. */
+ gupcr_notify_le_count += gupcr_child_cnt + 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_notify_le_ct, gupcr_notify_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_notify_le_eq);
+ gupcr_fatal_error ("received an error on notify LE");
+ }
+ }
+
+ if (!ROOT_THREAD)
+ {
+ ptl_handle_md_t source_md;
+
+ /* This step is performed by leaf threads and inner threads. */
+ /* Send the barrier ID to the parent - use atomic PTL_MIN on the value
+ in the parents notify LE (derived minimal ID for the parent and its
+ children. */
+ gupcr_debug (FC_BARRIER, "Send atomic PTL_MIN %d to (%d)",
+ gupcr_barrier_value, gupcr_parent_thread);
+ if (LEAF_THREAD)
+ source_md = gupcr_barrier_md;
+ else
+ /* An inner thread uses the minimal barrier ID
+ derived from the parent thread and all its children. */
+ source_md = gupcr_notify_md;
+ rpid.rank = gupcr_parent_thread;
+ gupcr_portals_call (PtlAtomic,
+ (source_md, 0, BARRIER_ID_SIZE, PTL_NO_ACK_REQ,
+ rpid, GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA, PTL_MIN, PTL_INT32_T));
+ }
+
+ /* At this point, the derived minimal barrier ID among all threads
+ has arrived at the root thread. */
+ if (ROOT_THREAD)
+ {
+ *(int *) gupcr_wait_ptr = gupcr_notify_value;
+ }
+ else
+ {
+ /* Wait for the parent to send the derived agreed on barrier ID. */
+ gupcr_wait_le_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_le_ct, gupcr_wait_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_le_eq);
+ gupcr_fatal_error ("received an error on wait LE");
+ }
+ }
+
+ received_barrier_id = gupcr_notify_value;
+
+ /* An inner thread sends the derived consensus
+ minimum barrier ID to its children. */
+ if (!LEAF_THREAD)
+ {
+ int i;
+
+ /* Re-initialize the barrier ID maximum range value. */
+ gupcr_notify_value = BARRIER_ID_MAX;
+
+ /* Send the derived consensus minimum barrier ID to
+ this thread's children. */
+ for (i = 0; i < gupcr_child_cnt; i++)
+ {
+ rpid.rank = gupcr_child[i];
+ gupcr_portals_call (PtlPut,
+ (gupcr_wait_md, 0, BARRIER_ID_SIZE,
+ PTL_OC_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN, PTL_NO_MATCH_BITS,
+ 0, PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA));
+ }
+
+ /* Wait until all children receive the consensus minimum
+ barrier ID that is propagated down the tree. */
+ gupcr_wait_md_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_md_ct, gupcr_wait_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_md_eq);
+ gupcr_fatal_error ("received an error on wait MD");
+ }
+ }
+
+#endif /* GUPCR_USE_PORTALS4_TRIGGERED_OPS */
+
+ /* Verify that the barrier ID matches. */
+ if (barrier_id != INT_MIN &&
+ barrier_id != received_barrier_id &&
+ received_barrier_id != BARRIER_ID_MAX)
+ gupcr_error ("thread %d: UPC barrier identifier mismatch among threads - "
+ "expected %d, received %d",
+ MYTHREAD, barrier_id, received_barrier_id);
+
+ /* UPC Shared Memory Consistency Model requires all outstanding
+ read/write operations to complete on the thread's enter
+ into the next synchronization phase. */
+ gupcr_gmem_sync ();
+
+ gupcr_barrier_active = 0;
+
+ gupcr_trace (FC_BARRIER, "BARRIER WAIT EXIT %d", barrier_id);
+}
+
+/**
+ * @fn __upc_barrier (int barrier_id)
+ * UPC language upc_barrier implementation.
+ *
+ * @param [in] barrier_id Barrier ID
+ */
+void
+__upc_barrier (int barrier_id)
+{
+ __upc_notify (barrier_id);
+ __upc_wait (barrier_id);
+}
+
+/* This Portals4 based broadcast implementation uses barrier resources
+ * to pass the broadcast message from thread 0 to all other threads. */
+
+/**
+ * @fn gupcr_bcast_send (void *value, size_t nbytes)
+ * Send broadcast message to all thread's children.
+ *
+ * The broadcast is a collective operation where thread 0 (root thread)
+ * sends a message to all other threads. This function must be
+ * called by the thread 0 only from a public function
+ * "gupcr_broadcast_put".
+ *
+ * @param [in] value Pointer to send value
+ * @param [in] nbytes Number of bytes to send
+ * @ingroup BROADCAST
+ */
+void
+gupcr_bcast_send (void *value, size_t nbytes)
+{
+ int i;
+ ptl_process_t rpid;
+ ptl_ct_event_t ct;
+
+ gupcr_trace (FC_BROADCAST, "BROADCAST SEND ENTER 0x%lx %lu",
+ (long unsigned) value, (long unsigned) nbytes);
+
+ /* This broadcast operation is implemented a collective operation.
+ Before proceeding, complete all outstanding shared memory
+ read/write operations. */
+ gupcr_gmem_sync ();
+
+ /* Copy the message into the buffer used for delivery
+ to the children threads. */
+ memcpy (gupcr_wait_ptr, value, nbytes);
+
+ gupcr_notify_le_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_notify_le_ct, gupcr_notify_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_notify_le_eq);
+ gupcr_fatal_error ("received an error on notify LE");
+ }
+
+ /* Send broadcast to this thread's children. */
+ for (i = 0; i < gupcr_child_cnt; i++)
+ {
+ rpid.rank = gupcr_child[i];
+ gupcr_debug (FC_BROADCAST, "Send broadcast message to child (%d)",
+ gupcr_child[i]);
+ gupcr_portals_call (PtlPut, (gupcr_wait_md, 0,
+ nbytes, PTL_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA));
+ }
+
+ /* Wait for message delivery to all children. This ensures that
+ the source buffer is not overwritten by back-to-back
+ broadcast operations. */
+ gupcr_wait_md_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_md_ct, gupcr_wait_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_md_eq);
+ gupcr_fatal_error ("received an error on wait MD");
+ }
+ gupcr_trace (FC_BROADCAST, "BROADCAST SEND EXIT");
+}
+
+/**
+ * @fn gupcr_bcast_recv (void *value, size_t nbytes)
+ * Wait to receive the broadcast message and return its value.
+ *
+ * Broadcast is a collective operation where thread 0 (the root thread)
+ * sends a message to all other threads. This function must be
+ * called by every thread other then thread 0.
+ *
+ * @param [in] value Pointer to received value
+ * @param [in] nbytes Number of bytes to receive
+ * @ingroup BROADCAST
+ */
+void
+gupcr_bcast_recv (void *value, size_t nbytes)
+{
+ int i;
+ ptl_process_t rpid;
+ ptl_ct_event_t ct;
+
+ gupcr_trace (FC_BROADCAST, "BROADCAST RECV ENTER 0x%lx %lu",
+ (long unsigned) value, (long unsigned) nbytes);
+
+ gupcr_gmem_sync ();
+
+#if GUPCR_USE_PORTALS4_TRIGGERED_OPS
+ if (INNER_THREAD)
+ {
+ /* Prepare triggers for message push to all children. */
+ gupcr_wait_le_count += 1;
+ for (i = 0; i < gupcr_child_cnt; i++)
+ {
+ rpid.rank = gupcr_child[i];
+ gupcr_debug (FC_BROADCAST,
+ "Set broadcast trigger to the child (%d)",
+ gupcr_child[i]);
+ /* Trigger: message received from the parent.
+ Action: send the message to the child. */
+ gupcr_portals_call (PtlTriggeredPut, (gupcr_wait_md, 0,
+ nbytes, PTL_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ gupcr_wait_le_ct,
+ gupcr_wait_le_count));
+ }
+
+ /* Prepare a trigger to send notification to the parent. */
+ gupcr_debug (FC_BROADCAST,
+ "Set notification trigger to the parent (%d)",
+ gupcr_parent_thread);
+ rpid.rank = gupcr_parent_thread;
+ gupcr_barrier_value = BARRIER_ID_MAX;
+ /* Trigger: notification received from all children.
+ Action: send notification to the parent. */
+ gupcr_notify_le_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlTriggeredPut, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE,
+ PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA,
+ gupcr_notify_le_ct,
+ gupcr_notify_le_count));
+
+ /* Wait for delivery to all children. */
+ gupcr_wait_md_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_md_ct, gupcr_wait_md_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_md_eq);
+ gupcr_fatal_error ("received an error on wait MD");
+ }
+ gupcr_debug (FC_BROADCAST, "Received PtlPut acks: %lu",
+ (long unsigned) ct.success);
+ }
+ else
+ {
+ /* A leaf thread sends notification to its parent that
+ it is ready to receive the broadcast value. */
+ gupcr_debug (FC_BROADCAST, "Send notification to the parent (%d)",
+ gupcr_parent_thread);
+ rpid.rank = gupcr_parent_thread;
+ gupcr_barrier_value = BARRIER_ID_MAX;
+ gupcr_portals_call (PtlPut, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE, PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_UP,
+ PTL_NO_MATCH_BITS, 0, PTL_NULL_USER_PTR,
+ PTL_NULL_HDR_DATA));
+
+ /* Wait to receive a message from the parent. */
+ gupcr_wait_le_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_le_ct, gupcr_wait_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_le_eq);
+ gupcr_fatal_error ("received an error on wait LE");
+ }
+ }
+ memcpy (value, gupcr_wait_ptr, nbytes);
+#else
+ /* Inner threads must wait for its children threads to arrive. */
+ if (INNER_THREAD)
+ {
+ gupcr_debug (FC_BROADCAST, "Waiting for %d notifications",
+ gupcr_child_cnt);
+ gupcr_notify_le_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_notify_le_ct, gupcr_child_cnt, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_notify_le_eq);
+ gupcr_fatal_error ("received an error on notify LE");
+ }
+ gupcr_debug (FC_BROADCAST, "Received %lu broadcast notifications",
+ (long unsigned) ct.success);
+ }
+
+ /* Inform the parent that this thread and all its children arrived.
+ Send barrier MAX value as we share PTEs with the barrier
+ implementation. */
+ gupcr_debug (FC_BROADCAST, "Send notification to the parent %d",
+ gupcr_parent_thread);
+ rpid.rank = gupcr_parent_thread;
+ gupcr_barrier_value = BARRIER_ID_MAX;
+ gupcr_portals_call (PtlPut, (gupcr_barrier_md, 0,
+ BARRIER_ID_SIZE, PTL_NO_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_UP, PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA));
+
+ /* Receive the broadcast message from the parent. */
+ gupcr_wait_le_count += 1;
+ gupcr_portals_call (PtlCTWait,
+ (gupcr_wait_le_ct, gupcr_wait_le_count, &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_le_eq);
+ gupcr_fatal_error ("received an error on wait LE");
+ }
+
+ /* Copy the received message. */
+ memcpy (value, gupcr_wait_ptr, nbytes);
+
+ if (INNER_THREAD)
+ {
+ /* An inner thread must pass the message to its children. */
+ for (i = 0; i < gupcr_child_cnt; i++)
+ {
+ gupcr_debug (FC_BROADCAST, "Sending a message to %d",
+ gupcr_child[i]);
+ rpid.rank = gupcr_child[i];
+ gupcr_portals_call (PtlPut, (gupcr_wait_md, 0,
+ nbytes, PTL_ACK_REQ, rpid,
+ GUPCR_PTL_PTE_BARRIER_DOWN,
+ PTL_NO_MATCH_BITS, 0,
+ PTL_NULL_USER_PTR, PTL_NULL_HDR_DATA));
+ }
+ /* Wait for delivery to all children. */
+ gupcr_wait_md_count += gupcr_child_cnt;
+ gupcr_portals_call (PtlCTWait, (gupcr_wait_md_ct, gupcr_wait_md_count,
+ &ct));
+ if (ct.failure)
+ {
+ gupcr_process_fail_events (gupcr_wait_md_eq);
+ gupcr_fatal_error ("received an error on wait MD");
+ }
+ }
+#endif
+ gupcr_trace (FC_BROADCAST, "BROADCAST RECV EXIT");
+}
+
+/**
+ * @fn gupcr_barrier_init (void)
+ * Initialize barrier resources.
+ * @ingroup INIT
+ */
+void
+gupcr_barrier_init (void)
+{
+ ptl_pt_index_t pte;
+ ptl_le_t le;
+ ptl_md_t md;
+
+ gupcr_log (FC_BARRIER, "barrier init called");
+
+ /* Create necessary CT handles. */
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_notify_le_ct));
+ gupcr_notify_le_count = 0;
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_notify_md_ct));
+ gupcr_notify_md_count = 0;
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_wait_le_ct));
+ gupcr_wait_le_count = 0;
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_wait_md_ct));
+ gupcr_wait_md_count = 0;
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_barrier_md_ct));
+ gupcr_barrier_md_count = 0;
+ gupcr_portals_call (PtlCTAlloc, (gupcr_ptl_ni, &gupcr_barrier_max_md_ct));
+ gupcr_barrier_max_md_count = 0;
+
+ /* Create necessary EQ handles. Allocate only one event queue entry
+ as we abort on any error. */
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_notify_le_eq));
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_notify_md_eq));
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_wait_le_eq));
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_wait_md_eq));
+ gupcr_portals_call (PtlEQAlloc, (gupcr_ptl_ni, 1, &gupcr_barrier_md_eq));
+ gupcr_portals_call (PtlEQAlloc,
+ (gupcr_ptl_ni, 1, &gupcr_barrier_max_md_eq));
+
+ /* Allocate PTEs. */
+ gupcr_portals_call (PtlPTAlloc, (gupcr_ptl_ni, 0,
+ gupcr_notify_le_eq,
+ GUPCR_PTL_PTE_BARRIER_UP, &pte));
+ if (pte != GUPCR_PTL_PTE_BARRIER_UP)
+ gupcr_fatal_error ("cannot allocate GUPCR_PTL_PTE_BARRIER_UP PTE");
+ gupcr_debug (FC_BARRIER, "Barrier UP PTE allocated: %d",
+ GUPCR_PTL_PTE_BARRIER_UP);
+ gupcr_portals_call (PtlPTAlloc, (gupcr_ptl_ni, 0,
+ gupcr_wait_le_eq,
+ GUPCR_PTL_PTE_BARRIER_DOWN, &pte));
+ if (pte != GUPCR_PTL_PTE_BARRIER_DOWN)
+ gupcr_fatal_error ("cannot allocate GUPCR_PTL_PTE_BARRIER_DOWN PTE");
+ gupcr_debug (FC_BARRIER, "Barrier DOWN PTE allocated: %d",
+ GUPCR_PTL_PTE_BARRIER_DOWN);
+
+ /* Children perform atomic MIN on up_value,
+ make sure we start with the maximum possible value. */
+ gupcr_notify_value = BARRIER_ID_MAX;
+
+ /* Create LE for barrier ID value traveling up the tree. */
+ le.start = &gupcr_notify_value;
+ le.length = sizeof (gupcr_notify_value);
+ le.ct_handle = gupcr_notify_le_ct;
+ le.uid = PTL_UID_ANY;
+ le.options = PTL_LE_OP_PUT | PTL_LE_OP_GET |
+ PTL_LE_EVENT_CT_COMM | PTL_LE_EVENT_SUCCESS_DISABLE |
+ PTL_LE_EVENT_LINK_DISABLE;
+ gupcr_portals_call (PtlLEAppend,
+ (gupcr_ptl_ni, GUPCR_PTL_PTE_BARRIER_UP, &le,
+ PTL_PRIORITY_LIST, NULL, &gupcr_notify_le));
+
+ /* Create LE for barrier ID value traveling down the tree.
+ Allocate enough space as barrier resources are
+ used to also broadcast arbitrary values. */
+ gupcr_malloc (gupcr_wait_ptr, GUPCR_MAX_BROADCAST_SIZE);
+ le.start = gupcr_wait_ptr;
+ le.length = GUPCR_MAX_BROADCAST_SIZE;
+ le.ct_handle = gupcr_wait_le_ct;
+ le.uid = PTL_UID_ANY;
+ le.options = PTL_LE_OP_PUT | PTL_LE_OP_GET |
+ PTL_LE_EVENT_CT_COMM | PTL_LE_EVENT_SUCCESS_DISABLE |
+ PTL_LE_EVENT_LINK_DISABLE;
+ gupcr_portals_call (PtlLEAppend,
+ (gupcr_ptl_ni, GUPCR_PTL_PTE_BARRIER_DOWN, &le,
+ PTL_PRIORITY_LIST, NULL, &gupcr_wait_le));
+
+ /* Create source MD for barrier ID values sent up the tree. */
+ md.start = &gupcr_notify_value;
+ md.length = sizeof (gupcr_notify_value);
+ md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE;
+ md.eq_handle = gupcr_notify_md_eq;
+ md.ct_handle = gupcr_notify_md_ct;
+ gupcr_portals_call (PtlMDBind, (gupcr_ptl_ni, &md, &gupcr_notify_md));
+
+ /* Create source MD for barrier ID values sent down the tree. */
+ md.start = gupcr_wait_ptr;
+ md.length = GUPCR_MAX_BROADCAST_SIZE;
+ md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE;
+ md.eq_handle = gupcr_wait_md_eq;
+ md.ct_handle = gupcr_wait_md_ct;
+ gupcr_portals_call (PtlMDBind, (gupcr_ptl_ni, &md, &gupcr_wait_md));
+
+ /* Create source MD for barrier ID values sent up the tree. */
+ md.start = &gupcr_barrier_value;
+ md.length = sizeof (gupcr_barrier_value);
+ md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE;
+ md.eq_handle = gupcr_barrier_md_eq;
+ md.ct_handle = gupcr_barrier_md_ct;
+ gupcr_portals_call (PtlMDBind, (gupcr_ptl_ni, &md, &gupcr_barrier_md));
+
+ /* Create source MD that is used re-initialize the
+ the consensus minimum barrier ID value to a maximum
+ possible value. */
+ md.start = &gupcr_barrier_max_value;
+ md.length = sizeof (gupcr_barrier_max_value);
+ md.options = PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_SUCCESS_DISABLE;
+ md.eq_handle = gupcr_barrier_max_md_eq;
+ md.ct_handle = gupcr_barrier_max_md_ct;
+ gupcr_portals_call (PtlMDBind, (gupcr_ptl_ni, &md, &gupcr_barrier_max_md));
+}
+
+/**
+ * @fn gupcr_barrier_fini (void)
+ * Release barrier resources.
+ * @ingroup INIT
+ */
+void
+gupcr_barrier_fini (void)
+{
+ gupcr_log (FC_BARRIER, "barrier fini called");
+
+#if GUPCR_USE_PORTALS4_TRIGGERED_OPS
+ /* Cancel any outstanding triggered operations. */
+ gupcr_portals_call (PtlCTCancelTriggered, (gupcr_wait_le_ct));
+ gupcr_portals_call (PtlCTCancelTriggered, (gupcr_barrier_max_md_ct));
+ gupcr_portals_call (PtlCTCancelTriggered, (gupcr_notify_le_ct));
+ gupcr_portals_call (PtlCTCancelTriggered, (gupcr_wait_md_ct));
+#endif
+
+ /* Release MDs and their CTs. */
+ gupcr_portals_call (PtlMDRelease, (gupcr_barrier_md));
+ gupcr_portals_call (PtlCTFree, (gupcr_barrier_md_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_barrier_md_eq));
+ gupcr_portals_call (PtlMDRelease, (gupcr_barrier_max_md));
+ gupcr_portals_call (PtlCTFree, (gupcr_barrier_max_md_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_barrier_max_md_eq));
+ gupcr_portals_call (PtlMDRelease, (gupcr_notify_md));
+ gupcr_portals_call (PtlCTFree, (gupcr_notify_md_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_notify_md_eq));
+ gupcr_portals_call (PtlMDRelease, (gupcr_wait_md));
+ gupcr_portals_call (PtlCTFree, (gupcr_wait_md_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_wait_md_eq));
+
+ /* Release LEs, their CTs, and PTEs. */
+ gupcr_portals_call (PtlLEUnlink, (gupcr_notify_le));
+ gupcr_portals_call (PtlCTFree, (gupcr_notify_le_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_notify_le_eq));
+ gupcr_portals_call (PtlPTFree, (gupcr_ptl_ni, GUPCR_PTL_PTE_BARRIER_UP));
+
+ gupcr_portals_call (PtlLEUnlink, (gupcr_wait_le));
+ gupcr_portals_call (PtlCTFree, (gupcr_wait_le_ct));
+ gupcr_portals_call (PtlEQFree, (gupcr_wait_le_eq));
+ gupcr_portals_call (PtlPTFree, (gupcr_ptl_ni, GUPCR_PTL_PTE_BARRIER_DOWN));
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,49 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _GUPCR_BARRIER_H_
+#define _GUPCR_BARRIER_H_
+
+/**
+ * @file gupcr_barrier.h
+ * GUPC Portals4 barrier implementation.
+ *
+ * @addtogroup BARRIER GUPCR Barrier Functions
+ * @{
+ */
+
+extern void gupcr_barrier_init (void);
+extern void gupcr_barrier_fini (void);
+
+/* Broadcast support functions. */
+extern void gupcr_bcast_send (void *, size_t);
+extern void gupcr_bcast_recv (void *, size_t);
+
+/* Current barrier ID. */
+extern int gupcr_barrier_id;
+
+/** @} */
+#endif /* gupcr_barrier.h */
===================================================================
@@ -0,0 +1,122 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "gupcr_config.h"
+#include "gupcr_defs.h"
+#include "gupcr_lib.h"
+#include "gupcr_sup.h"
+#include "gupcr_utils.h"
+#include "gupcr_barrier.h"
+#include "gupcr_broadcast.h"
+
+/**
+ * @file gupcr_broadcast.c
+ * GUPC Portals4 broadcast implementation.
+ *
+ * The broadcast utility functions are internal functions used by
+ * the UPC runtime when it is necessary to propagate (broadcast)
+ * a value from thread 0 to all other threads.
+ * For example, upc_all_alloc and upc_all_lock_alloc will
+ * call the broadcast functions to propagate their result to all threads.
+ *
+ * The broadcast functions use an algorithm that is a variant of
+ * the one used to implement a barrier. The "up phase" signals
+ * that each thread is ready to receive the broadcast value, while the
+ * "down phase" is used to receive the actual value.
+ */
+
+/**
+ * @addtogroup BROADCAST GUPCR Broadcast Functions
+ * @{
+ */
+
+/**
+ * Receive the broadcast value.
+ *
+ * A thread signals to its parent (by writing to its parent's UP PTE)
+ * that it is ready to receive a broadcast value, and then waits on
+ * its down LE counting event until the message arrives.
+ * @param [in] value Pointer to received value
+ * @param [in] nbytes Number of bytes to receive
+ */
+void
+gupcr_broadcast_get (void *value, size_t nbytes)
+{
+ if (!MYTHREAD)
+ gupcr_fatal_error ("called from thread 0");
+ if (nbytes > GUPCR_MAX_BROADCAST_SIZE)
+ gupcr_fatal_error ("size of broadcast message (%ld) is greater then "
+ "the maximum allowed (%d)",
+ (long int) nbytes, GUPCR_MAX_BROADCAST_SIZE);
+ /* Wait to receive the broadcast message. */
+ gupcr_bcast_recv (value, nbytes);
+}
+
+/**
+ * Send the broadcast value. This function must be called
+ * only by thread 0 (the root thread). The broadcast value
+ * is pushed down the tree by first sending the value
+ * to the children of the root thread.
+ *
+ * @param [in] value Pointer to send value
+ * @param [in] nbytes Number of bytes to send
+ */
+void
+gupcr_broadcast_put (void *value, size_t nbytes)
+{
+ if (THREADS == 1)
+ return;
+ if (MYTHREAD)
+ gupcr_fatal_error ("called from thread other then 0");
+ if (nbytes > GUPCR_MAX_BROADCAST_SIZE)
+ gupcr_fatal_error ("size of broadcast message (%ld) is greater then "
+ "maximum allowed (%d)",
+ (long int) nbytes, GUPCR_MAX_BROADCAST_SIZE);
+ /* Send the broadcast message to the children of the root thread. */
+ gupcr_bcast_send (value, nbytes);
+}
+
+/**
+ * Initialize broadcast resources.
+ * @ingroup INIT
+ */
+void
+gupcr_broadcast_init (void)
+{
+ gupcr_log (FC_BROADCAST, "broadcast init called");
+}
+
+/**
+ * Release broadcast resources.
+ * @ingroup INIT
+ */
+void
+gupcr_broadcast_fini (void)
+{
+ gupcr_log (FC_BROADCAST, "broadcast fini called");
+}
+
+/** @} */
===================================================================
@@ -0,0 +1,50 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _GUPCR_BROADCAST_H_
+#define _GUPCR_BROADCAST_H_
+
+/**
+ * @file gupcr_broadcast.h
+ * GUPC Portals4 broadcast implementation.
+ */
+
+/**
+ * @addtogroup BROADCAST GUPCR Broadcast Functions
+ * @{
+ */
+
+/** Maximum message size that can be sent via broadcast. */
+#define GUPCR_MAX_BROADCAST_SIZE 32
+
+/** @} */
+
+extern void gupcr_broadcast_get (void *value, size_t nbytes);
+extern void gupcr_broadcast_put (void *value, size_t nbytes);
+extern void gupcr_broadcast_init (void);
+extern void gupcr_broadcast_fini (void);
+
+#endif /* gupcr_broadcast.h */
===================================================================
@@ -0,0 +1,69 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ This file is part of the UPC runtime Library.
+ Written by Gary Funck <gary@intrepid.com>
+ and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <upc.h>
+#include <upc_castable.h>
+#include "gupcr_portals.h"
+#include "gupcr_pts.h"
+#include "gupcr_gmem.h"
+#include "gupcr_node.h"
+
+void *
+upc_cast (const shared void *ptr)
+{
+ const upc_shared_ptr_t sptr = GUPCR_PTS_TO_REP (ptr);
+ void *local_ptr = NULL;
+ if (!GUPCR_PTS_IS_NULL (sptr))
+ {
+ const size_t thread = GUPCR_PTS_THREAD (sptr);
+ const int thread_as_int = (int) thread;
+ if (thread_as_int >= THREADS)
+ gupcr_fatal_error ("thread number %d in shared address "
+ "is out of range", thread_as_int);
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ size_t offset = GUPCR_PTS_OFFSET (sptr);
+ local_ptr = GUPCR_GMEM_OFF_TO_LOCAL (thread, offset);
+ }
+ }
+ return local_ptr;
+}
+
+upc_thread_info_t
+upc_thread_info (size_t thread)
+{
+ const int thread_as_int = (int) thread;
+ upc_thread_info_t cast_info = { 0, 0 };
+ if (thread_as_int >= THREADS)
+ gupcr_fatal_error ("thread number %d in shared address "
+ "is out of range", thread_as_int);
+ if (GUPCR_GMEM_IS_LOCAL (thread))
+ {
+ cast_info.guaranteedCastable = UPC_CASTABLE_ALL;
+ cast_info.probablyCastable = UPC_CASTABLE_ALL;
+ }
+ return cast_info;
+}