chiark / gitweb /
use barriers rather than recreating threads
[moebius2.git] / parallel.c
index a79a3704732d53db09f8fbb489d89cdf500fff77..badc75d79b6f4a42bd4ed40b3dcaf69738bb8cc1 100644 (file)
@@ -2,6 +2,8 @@
  * Parallel processing
  */
 
+#include "common.h"
+
 #include <pthread.h>
 
 #include "mgraph.h"
@@ -20,16 +22,22 @@ typedef struct {
   pthread_t thread;
 } PerThread;
 
-#if NPROCESSORS != 1
 static void *routine(void *thread_v) {
   PerThread *t= thread_v;
-  ForAllThreads *a= t->allthreads;
 
-  a->separately(a->vertices, t->section, t->secdata, a->gendata);
+  for (;;) {
+    inparallel_barrier(); /* wait for work to do */
+    ForAllThreads *a= t->allthreads;
+    a->separately(a->vertices, t->section, t->secdata, a->gendata);
+    inparallel_barrier(); /* synchronise for completion */
+  }
 
   return 0;
 }
-#endif
+
+static int threads_started;
+static pthread_barrier_t threads_barrier;
+static PerThread threads[NSECTIONS-1];
 
 void inparallel(const struct Vertices *vertices,
                Computation *separately,
@@ -44,25 +52,37 @@ void inparallel(const struct Vertices *vertices,
   allthreads.separately= separately;
   allthreads.gendata= gendata;
 
-#if NPROCESSORS != 1
-  PerThread threads[nsections];
   int s, r;
 
-  for (s=0; s<nsections; s++) {
+  if (!threads_started) {
+    r= pthread_barrier_init(&threads_barrier, 0, NSECTIONS);
+    if (r) { errno=r; diee("pthread_barrier_init"); }
+
+    for (s=0; s<NSECTIONS-1; s++) {
+      r= pthread_create(&threads[s].thread,0,routine,&threads[s]);
+      if (r) { errno=r; diee("pthread_create"); }
+    }
+  }    
+
+  for (s=0; s<NSECTIONS-1; s++) {
     threads[s].allthreads= &allthreads;
     threads[s].section= s;
     threads[s].secdata= secdatas[s].secdata;
-    r= pthread_create(&threads[s].thread,0,routine,&threads[s]);
-    if (r) diee("pthread_create");
   }
 
-  for (s=0; s<nsections; s++) {
-    r= pthread_join(threads[s].thread, 0);
-    if (r) diee("pthread_join");
-    combine(vertices, s, threads[s].secdata, gendata);
-  }
-#else
-  separately(vertices, 0, &secdatas[0], gendata);
-  combine(vertices, 0, &secdatas[0], gendata);
-#endif
+  inparallel_barrier(); /* announce more work to do */
+
+  separately(vertices, NSECTIONS-1, &secdatas[NSECTIONS-1], gendata);
+
+  inparallel_barrier(); /* synchronise for completion */
+
+  for (s=0; s<nsections; s++)
+    combine(vertices, s, &secdatas[s].secdata, gendata);
+}
+
+void inparallel_barrier(void) {
+  int r;
+  r= pthread_barrier_wait(&threads_barrier);
+  if (r && r!=PTHREAD_BARRIER_SERIAL_THREAD)
+    { errno=r; diee("pthread_barrier_wait"); }
 }