diff mbox

[committed] Improve OMP_PLACES=threads and OMP_PLACES=cores (PR libgomp/80822)

Message ID 20170530121249.GK24023@tucnak
State New
Headers show

Commit Message

Jakub Jelinek May 30, 2017, 12:12 p.m. UTC
Hi!

While the OpenMP specification leaves it completely in
implementation-defined territory how OMP_PLACES=threads or OMP_PLACES=cores
orders the individual places, for OMP_PROC_BIND=spread the order of the
places is very important and Intel libomp as well as Cray runtime apparently
choose an ordering based on topology distances, while libgomp was just
choosing to order the places based on increasing OS CPU id.

This patch changes it to match what the other runtimes do, e.g. with
OMP_PROC_BIND=spread OMP_NUM_THREADS=16 on 2s8c2t topology with Intel CPU
with the patch the binding is each thread to a different core, while without
the patch we ended up with 2 threads on half of the cores and 0 on others.
The disadvantage is that it is slower to initialize (needs to query the
topology, even for =cores it is more expensive, previously we queried only
the thread siblings, now we also need to query the core siblings).

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk,
queued for backports.

2017-05-30  Jakub Jelinek  <jakub@redhat.com>

	PR libgomp/80822
	* config/linux/affinity.c (gomp_affinity_init_level_1): New function.
	(gomp_affinity_init_level): Use it.  Always analyze the core and thread
	sibling lists, depending on level just pick up what CPUs to put
	together into a place vs. whether add multiple ordered places.


	Jakub
diff mbox

Patch

--- libgomp/config/linux/affinity.c.jj	2017-01-01 12:45:53.000000000 +0100
+++ libgomp/config/linux/affinity.c	2017-05-26 18:09:07.950939602 +0200
@@ -222,10 +222,87 @@  gomp_affinity_finalize_place_list (bool
   return true;
 }
 
+static void
+gomp_affinity_init_level_1 (int level, int this_level, unsigned long count,
+			    cpu_set_t *copy, char *name, bool quiet)
+{
+  size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1;
+  FILE *f;
+  char *line = NULL;
+  size_t linelen = 0;
+  unsigned long i, max = 8 * gomp_cpuset_size;
+
+  for (i = 0; i < max && gomp_places_list_len < count; i++)
+    if (CPU_ISSET_S (i, gomp_cpuset_size, copy))
+      {
+	sprintf (name + prefix_len, "%lu/topology/%s_siblings_list",
+		 i, this_level == 3 ? "core" : "thread");
+	f = fopen (name, "r");
+	if (f == NULL)
+	  {
+	    CPU_CLR_S (i, gomp_cpuset_size, copy);
+	    continue;
+	  }
+	if (getline (&line, &linelen, f) > 0)
+	  {
+	    char *p = line;
+	    void *pl = gomp_places_list[gomp_places_list_len];
+	    if (level == this_level)
+	      gomp_affinity_init_place (pl);
+	    while (*p && *p != '\n')
+	      {
+		unsigned long first, last;
+		errno = 0;
+		first = strtoul (p, &p, 10);
+		if (errno)
+		  break;
+		last = first;
+		if (*p == '-')
+		  {
+		    errno = 0;
+		    last = strtoul (p + 1, &p, 10);
+		    if (errno || last < first)
+		      break;
+		  }
+		for (; first <= last; first++)
+		  if (!CPU_ISSET_S (first, gomp_cpuset_size, copy))
+		    continue;
+		  else if (this_level == 3 && level < this_level)
+		    gomp_affinity_init_level_1 (level, 2, count, copy,
+						name, quiet);
+		  else
+		    {
+		      if (level == 1)
+			{
+			  pl = gomp_places_list[gomp_places_list_len];
+			  gomp_affinity_init_place (pl);
+			}
+		      if (gomp_affinity_add_cpus (pl, first, 1, 0, true))
+			{
+			  CPU_CLR_S (first, gomp_cpuset_size, copy);
+			  if (level == 1)
+			    gomp_places_list_len++;
+			}
+		    }
+		if (*p == ',')
+		  ++p;
+	      }
+	    if (level == this_level
+		&& !CPU_ISSET_S (i, gomp_cpuset_size, copy))
+	      gomp_places_list_len++;
+	    CPU_CLR_S (i, gomp_cpuset_size, copy);
+	  }
+	fclose (f);
+      }
+  free (line);
+}
+
 bool
 gomp_affinity_init_level (int level, unsigned long count, bool quiet)
 {
-  unsigned long i, max = 8 * gomp_cpuset_size;
+  char name[sizeof ("/sys/devices/system/cpu/cpu/topology/"
+		    "thread_siblings_list") + 3 * sizeof (unsigned long)];
+  cpu_set_t *copy;
 
   if (gomp_cpusetp)
     {
@@ -238,90 +315,20 @@  gomp_affinity_init_level (int level, uns
   gomp_places_list_len = 0;
   if (gomp_places_list == NULL)
     return false;
-  /* SMT (threads).  */
-  if (level == 1)
+
+  copy = gomp_alloca (gomp_cpuset_size);
+  strcpy (name, "/sys/devices/system/cpu/cpu");
+  memcpy (copy, gomp_cpusetp, gomp_cpuset_size);
+  gomp_affinity_init_level_1 (level, 3, count, copy, name, quiet);
+  if (gomp_places_list_len == 0)
     {
-      for (i = 0; i < max && gomp_places_list_len < count; i++)
-	if (CPU_ISSET_S (i, gomp_cpuset_size, gomp_cpusetp))
-	  {
-	    gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
-	    gomp_affinity_add_cpus (gomp_places_list[gomp_places_list_len],
-				    i, 1, 0, true);
-	    ++gomp_places_list_len;
-	  }
-      return true;
+      if (!quiet)
+	gomp_error ("Error reading core/socket topology");
+      free (gomp_places_list);
+      gomp_places_list = NULL;
+      return false;
     }
-  else
-    {
-      char name[sizeof ("/sys/devices/system/cpu/cpu/topology/"
-			"thread_siblings_list") + 3 * sizeof (unsigned long)];
-      size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1;
-      cpu_set_t *copy = gomp_alloca (gomp_cpuset_size);
-      FILE *f;
-      char *line = NULL;
-      size_t linelen = 0;
-
-      memcpy (name, "/sys/devices/system/cpu/cpu", prefix_len);
-      memcpy (copy, gomp_cpusetp, gomp_cpuset_size);
-      for (i = 0; i < max && gomp_places_list_len < count; i++)
-	if (CPU_ISSET_S (i, gomp_cpuset_size, copy))
-	  {
-	    sprintf (name + prefix_len, "%lu/topology/%s_siblings_list",
-		     i, level == 2 ? "thread" : "core");
-	    f = fopen (name, "r");
-	    if (f != NULL)
-	      {
-		if (getline (&line, &linelen, f) > 0)
-		  {
-		    char *p = line;
-		    bool seen_i = false;
-		    void *pl = gomp_places_list[gomp_places_list_len];
-		    gomp_affinity_init_place (pl);
-		    while (*p && *p != '\n')
-		      {
-			unsigned long first, last;
-			errno = 0;
-			first = strtoul (p, &p, 10);
-			if (errno)
-			  break;
-			last = first;
-			if (*p == '-')
-			  {
-			    errno = 0;
-			    last = strtoul (p + 1, &p, 10);
-			    if (errno || last < first)
-			      break;
-			  }
-			for (; first <= last; first++)
-			  if (CPU_ISSET_S (first, gomp_cpuset_size, copy)
-			      && gomp_affinity_add_cpus (pl, first, 1, 0,
-							 true))
-			    {
-			      CPU_CLR_S (first, gomp_cpuset_size, copy);
-			      if (first == i)
-				seen_i = true;
-			    }
-			if (*p == ',')
-			  ++p;
-		      }
-		    if (seen_i)
-		      gomp_places_list_len++;
-		  }
-		fclose (f);
-	      }
-	  }
-      if (gomp_places_list_len == 0)
-	{
-	  if (!quiet)
-	    gomp_error ("Error reading %s topology",
-			level == 2 ? "core" : "socket");
-	  free (gomp_places_list);
-	  gomp_places_list = NULL;
-	  return false;
-	}
-      return true;
-    }
-  return false;
+  return true;
 }
 
 void