chiark / gitweb /
better with more bendingness costs
[moebius2.git] / parallel.c
index 3c94607b845915fa06f04d3c09a1013a3b3d244a..f5841eeb21040f7394a833b25d66399cd8bd23e6 100644 (file)
@@ -2,6 +2,8 @@
  * Parallel processing
  */
 
+#include "common.h"
+
 #include <pthread.h>
 
 #include "mgraph.h"
@@ -22,13 +24,21 @@ typedef struct {
 
 static void *routine(void *thread_v) {
   PerThread *t= thread_v;
-  ForAllThreads *a= t->allthreads;
 
-  a->separately(a->vertices, t->section, t->secdata, a->gendata);
+  for (;;) {
+    inparallel_barrier(); /* wait for work to do */
+    ForAllThreads *a= t->allthreads;
+    a->separately(a->vertices, t->section, t->secdata, a->gendata);
+    inparallel_barrier(); /* synchronise for completion */
+  }
 
   return 0;
 }
 
+static int threads_started;
+static pthread_barrier_t threads_barrier;
+static PerThread threads[NSECTIONS-1];
+
 void inparallel(const struct Vertices *vertices,
                Computation *separately,
                Computation *combine,
@@ -37,25 +47,45 @@ void inparallel(const struct Vertices *vertices,
 
   ForAllThreads allthreads;
   SecData secdatas[nsections];
-  PerThread threads[nsections];
-
-  int s, r;
 
   allthreads.vertices= vertices;
   allthreads.separately= separately;
   allthreads.gendata= gendata;
 
-  for (s=0; s<nsections; s++) {
+  int s, r;
+
+  if (NSECTIONS>1 && !threads_started) {
+    r= pthread_barrier_init(&threads_barrier, 0, NSECTIONS);
+    if (r) { errno=r; diee("pthread_barrier_init"); }
+
+    for (s=0; s<NSECTIONS-1; s++) {
+      r= pthread_create(&threads[s].thread,0,routine,&threads[s]);
+      if (r) { errno=r; diee("pthread_create"); }
+    }
+    threads_started= 1;
+  }    
+
+  for (s=0; s<NSECTIONS-1; s++) {
     threads[s].allthreads= &allthreads;
     threads[s].section= s;
     threads[s].secdata= secdatas[s].secdata;
-    r= pthread_create(&threads[s].thread,0,routine,&threads[s]);
-    if (r) diee("pthread_create");
   }
 
-  for (s=0; s<nsections; s++) {
-    r= pthread_join(threads[s].thread, 0);
-    if (r) diee("pthread_join");
-    combine(vertices, s, threads[s].secdata, gendata);
+  inparallel_barrier(); /* announce more work to do */
+
+  separately(vertices, NSECTIONS-1, &secdatas[NSECTIONS-1], gendata);
+
+  inparallel_barrier(); /* synchronise for completion */
+
+  for (s=0; s<nsections; s++)
+    combine(vertices, s, &secdatas[s].secdata, gendata);
+}
+
+void inparallel_barrier(void) {
+  if (NSECTIONS>1) {
+    int r;
+    r= pthread_barrier_wait(&threads_barrier);
+    if (r && r!=PTHREAD_BARRIER_SERIAL_THREAD)
+      { errno=r; diee("pthread_barrier_wait"); }
   }
 }