Patchwork [gomp4] Support more than 1024 CPUs (PR libgomp/57298)

login
register
mail settings
Submitter Jakub Jelinek
Date Oct. 1, 2013, 2:55 p.m.
Message ID <20131001145534.GD30970@tucnak.zalov.cz>
Download mbox | patch
Permalink /patch/279485/
State New
Headers show

Comments

Jakub Jelinek - Oct. 1, 2013, 2:55 p.m.
Hi!

This is just a preparation for the OMP_PLACES work, I've figured out before
changing the affinity stuff it might be better to fix this PR.
As gomp_init_num_threads is always called before gomp_init_affinity, there
is no point calling the same pthread_getaffinity_np twice, and I'll need
the initial affinity masks for OMP_PLACES anyway, so the patch just
remembers it.  The CPU_*_S and CPU_ALLOC_SIZE macros were apparently
introduced only in glibc 2.7, so the patch attempts to deal even with older
glibcs, by just using gomp_cpuset_size = 128 in that case (1024 bits).

2013-10-01  Jakub Jelinek  <jakub@redhat.com>

	PR libgomp/57298
	* config/linux/proc.c (gomp_cpuset_size, gomp_cpusetp): New variables.
	(gomp_cpuset_popcount): Use CPU_COUNT_S if available, or CPU_COUNT if
	gomp_cpuset_size is sizeof (cpu_set_t).  Use gomp_cpuset_size instead
	of sizeof (cpu_set_t) to determine number of iterations.
	(gomp_init_num_threads): Initialize gomp_cpuset_size and gomp_cpusetp
	here, use gomp_cpusetp instead of &cpuset and pass gomp_cpuset_size
	instead of sizeof (cpu_set_t) to pthread_getaffinity_np.
	(get_num_procs): Don't call pthread_getaffinity_np if gomp_cpusetp
	is NULL.  Use gomp_cpusetp instead of &cpuset and pass gomp_cpuset_size
	instead of sizeof (cpu_set_t) to pthread_getaffinity_np.
	* config/linux/proc.h (gomp_cpuset_popcount): Add attribute_hidden.
	(gomp_cpuset_size, gomp_cpusetp): Declare.
	* config/linux/affinity.c (CPU_ISSET_S, CPU_ZERO_S, CPU_SET_S): Define
	if CPU_ALLOC_SIZE isn't defined.
	(gomp_init_affinity): Don't call pthread_getaffinity_np here, instead
	use gomp_cpusetp computed by gomp_init_num_threads.  Use CPU_*_S
	variants of macros with gomp_cpuset_size as set size, for cpusetnew
	use alloca for it if CPU_ALLOC_SIZE is defined, otherwise local
	fixed size variable.
	(gomp_init_thread_affinity): Use CPU_*_S variants of macros with
	gomp_cpuset_size as set size, for cpuset use alloca for it if
	CPU_ALLOC_SIZE is defined, otherwise local fixed size variable.


	Jakub

Patch

--- libgomp/config/linux/proc.c.jj	2013-03-20 10:02:06.000000000 +0100
+++ libgomp/config/linux/proc.c	2013-10-01 14:09:00.759638855 +0200
@@ -39,19 +39,27 @@ 
 #endif
 
 #ifdef HAVE_PTHREAD_AFFINITY_NP
+unsigned long gomp_cpuset_size;
+cpu_set_t *gomp_cpusetp;
+
 unsigned long
 gomp_cpuset_popcount (cpu_set_t *cpusetp)
 {
-#ifdef CPU_COUNT
-  /* glibc 2.6 and above provide a macro for this.  */
-  return CPU_COUNT (cpusetp);
+#ifdef CPU_COUNT_S
+  /* glibc 2.7 and above provide a macro for this.  */
+  return CPU_COUNT_S (gomp_cpuset_size, cpusetp);
 #else
+#ifdef CPU_COUNT
+  if (gomp_cpuset_size == sizeof (cpu_set_t))
+    /* glibc 2.6 and above provide a macro for this.  */
+    return CPU_COUNT (cpusetp);
+#endif
   size_t i;
   unsigned long ret = 0;
   extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)];
 
   (void) check;
-  for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++)
+  for (i = 0; i < gomp_cpuset_size / sizeof (cpusetp->__bits[0]); i++)
     {
       unsigned long int mask = cpusetp->__bits[i];
       if (mask == 0)
@@ -70,16 +78,28 @@  void
 gomp_init_num_threads (void)
 {
 #ifdef HAVE_PTHREAD_AFFINITY_NP
-  cpu_set_t cpuset;
+#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
+  gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
+  gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size);
+#else
+  gomp_cpuset_size = sizeof (cpuset);
+#endif
 
-  if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0)
+  gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size);
+  if (pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+			      gomp_cpusetp) == 0)
     {
       /* Count only the CPUs this process can use.  */
-      gomp_global_icv.nthreads_var = gomp_cpuset_popcount (&cpuset);
+      gomp_global_icv.nthreads_var = gomp_cpuset_popcount (gomp_cpusetp);
       if (gomp_global_icv.nthreads_var == 0)
 	gomp_global_icv.nthreads_var = 1;
       return;
     }
+  else
+    {
+      free (gomp_cpusetp);
+      gomp_cpusetp = NULL;
+    }
 #endif
 #ifdef _SC_NPROCESSORS_ONLN
   gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
@@ -90,15 +110,14 @@  static int
 get_num_procs (void)
 {
 #ifdef HAVE_PTHREAD_AFFINITY_NP
-  cpu_set_t cpuset;
-
   if (gomp_cpu_affinity == NULL)
     {
       /* Count only the CPUs this process can use.  */
-      if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset),
-				  &cpuset) == 0)
+      if (gomp_cpusetp
+	  && pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+				     gomp_cpusetp) == 0)
 	{
-	  int ret = gomp_cpuset_popcount (&cpuset);
+	  int ret = gomp_cpuset_popcount (gomp_cpusetp);
 	  return ret != 0 ? ret : 1;
 	}
     }
--- libgomp/config/linux/proc.h.jj	2013-03-20 10:02:06.000000000 +0100
+++ libgomp/config/linux/proc.h	2013-10-01 13:48:00.690527479 +0200
@@ -28,7 +28,9 @@ 
 #include <sched.h>
 
 #ifdef HAVE_PTHREAD_AFFINITY_NP
-extern unsigned long gomp_cpuset_popcount (cpu_set_t *);
+extern unsigned long gomp_cpuset_size attribute_hidden;
+extern cpu_set_t *gomp_cpusetp attribute_hidden;
+extern unsigned long gomp_cpuset_popcount (cpu_set_t *) attribute_hidden;
 #endif
 
 #endif /* GOMP_PROC_H */
--- libgomp/config/linux/affinity.c.jj	2013-03-20 10:02:06.000000000 +0100
+++ libgomp/config/linux/affinity.c	2013-10-01 15:52:33.173193230 +0200
@@ -33,17 +33,22 @@ 
 #include <unistd.h>
 
 #ifdef HAVE_PTHREAD_AFFINITY_NP
-
 static unsigned int affinity_counter;
 
+#ifndef CPU_ALLOC_SIZE
+#define CPU_ISSET_S(idx, size, set) CPU_ISSET(idx, set)
+#define CPU_ZERO_S(size, set) CPU_ZERO(set)
+#define CPU_SET_S(idx, size, set) CPU_SET(idx, set)
+#endif
+
 void
 gomp_init_affinity (void)
 {
-  cpu_set_t cpuset, cpusetnew;
   size_t idx, widx;
   unsigned long cpus = 0;
+  cpu_set_t *cpusetnewp;
 
-  if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset))
+  if (gomp_cpusetp == NULL)
     {
       gomp_error ("could not get CPU affinity set");
       free (gomp_cpu_affinity);
@@ -52,10 +57,16 @@  gomp_init_affinity (void)
       return;
     }
 
-  CPU_ZERO (&cpusetnew);
+#ifdef CPU_ALLOC_SIZE
+  cpusetnewp = (cpu_set_t *) gomp_alloca (gomp_cpuset_size);
+#else
+  cpu_set_t cpusetnew;
+  cpusetnewp = &cpusetnew;
+#endif
+
   if (gomp_cpu_affinity_len == 0)
     {
-      unsigned long count = gomp_cpuset_popcount (&cpuset);
+      unsigned long count = gomp_cpuset_popcount (gomp_cpusetp);
       if (count >= 65536)
 	count = 65536;
       gomp_cpu_affinity = malloc (count * sizeof (unsigned short));
@@ -65,24 +76,30 @@  gomp_init_affinity (void)
 	  return;
 	}
       for (widx = idx = 0; widx < count && idx < 65536; idx++)
-	if (CPU_ISSET (idx, &cpuset))
+	if (CPU_ISSET_S (idx, gomp_cpuset_size, gomp_cpusetp))
 	  {
 	    cpus++;
 	    gomp_cpu_affinity[widx++] = idx;
 	  }
     }
   else
-    for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++)
-      if (gomp_cpu_affinity[idx] < CPU_SETSIZE
-	  && CPU_ISSET (gomp_cpu_affinity[idx], &cpuset))
-	{
-	  if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpusetnew))
-	    {
-	      cpus++;
-	      CPU_SET (gomp_cpu_affinity[idx], &cpusetnew);
+    {
+      CPU_ZERO_S (gomp_cpuset_size, cpusetnewp);
+      for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++)
+	if (gomp_cpu_affinity[idx] < 8 * gomp_cpuset_size
+	    && CPU_ISSET_S (gomp_cpu_affinity[idx], gomp_cpuset_size,
+			    gomp_cpusetp))
+	  {
+	    if (! CPU_ISSET_S (gomp_cpu_affinity[idx], gomp_cpuset_size,
+			       cpusetnewp))
+	      {
+		cpus++;
+		CPU_SET_S (gomp_cpu_affinity[idx], gomp_cpuset_size,
+			   cpusetnewp);
 	    }
 	  gomp_cpu_affinity[widx++] = gomp_cpu_affinity[idx];
 	}
+    }
 
   if (widx == 0)
     {
@@ -96,9 +113,10 @@  gomp_init_affinity (void)
   gomp_cpu_affinity_len = widx;
   if (cpus < gomp_available_cpus)
     gomp_available_cpus = cpus;
-  CPU_ZERO (&cpuset);
-  CPU_SET (gomp_cpu_affinity[0], &cpuset);
-  pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset);
+  CPU_ZERO_S (gomp_cpuset_size, cpusetnewp);
+  CPU_SET_S (gomp_cpu_affinity[0], gomp_cpuset_size, cpusetnewp);
+  pthread_setaffinity_np (pthread_self (), gomp_cpuset_size,
+			  cpusetnewp);
   affinity_counter = 1;
 }
 
@@ -106,13 +124,20 @@  void
 gomp_init_thread_affinity (pthread_attr_t *attr)
 {
   unsigned int cpu;
+  cpu_set_t *cpusetp;
+
+#ifdef CPU_ALLOC_SIZE
+  cpusetp = (cpu_set_t *) gomp_alloca (gomp_cpuset_size);
+#else
   cpu_set_t cpuset;
+  cpusetp = &cpuset;
+#endif
 
   cpu = __atomic_fetch_add (&affinity_counter, 1, MEMMODEL_RELAXED);
   cpu %= gomp_cpu_affinity_len;
-  CPU_ZERO (&cpuset);
-  CPU_SET (gomp_cpu_affinity[cpu], &cpuset);
-  pthread_attr_setaffinity_np (attr, sizeof (cpu_set_t), &cpuset);
+  CPU_ZERO_S (gomp_cpuset_size, cpusetp);
+  CPU_SET_S (gomp_cpu_affinity[cpu], gomp_cpuset_size, cpusetp);
+  pthread_attr_setaffinity_np (attr, gomp_cpuset_size, cpusetp);
 }
 
 #else