chiark / gitweb /
core/manager: fix conditions to start and stop watching running jobs
authorMichal Schmidt <mschmidt@redhat.com>
Mon, 4 Mar 2013 13:38:51 +0000 (14:38 +0100)
committerHarald Hoyer <harald@redhat.com>
Mon, 4 Mar 2013 13:51:32 +0000 (14:51 +0100)
Harald encountered division by zero in manager_print_jobs_in_progress.
Clearly we had the watch enabled when we shouldn't - there were no
running jobs in m->jobs, only waiting ones. This is either a deadlock,
or maybe some of them would be detected as runnable in the next dispatch
of the run queue. In any case we mustn't crash.

Fix it by starting and stopping the watch based on n_running_jobs
instead of the number of all jobs.

src/core/manager.c

index 91d773c2f6c9f1589e4adbcfd4c8b14bc4643e4d..1d188081a1d71f5bac416242ec3744c94461e624 100644 (file)
@@ -1144,7 +1144,7 @@ unsigned manager_dispatch_run_queue(Manager *m) {
 
         m->dispatching_run_queue = false;
 
-        if (hashmap_size(m->jobs) > 0)
+        if (m->n_running_jobs > 0)
                 manager_watch_jobs_in_progress(m);
 
         return n;
@@ -2368,10 +2368,11 @@ void manager_check_finished(Manager *m) {
 
         assert(m);
 
-        if (hashmap_size(m->jobs) > 0) {
-                manager_jobs_in_progress_mod_timer(m);
+        if (m->n_running_jobs == 0)
+                manager_unwatch_jobs_in_progress(m);
+
+        if (hashmap_size(m->jobs) > 0)
                 return;
-        }
 
         /* Notify Type=idle units that we are done now */
         close_pipe(m->idle_pipe);
@@ -2379,8 +2380,6 @@ void manager_check_finished(Manager *m) {
         /* Turn off confirm spawn now */
         m->confirm_spawn = false;
 
-        manager_unwatch_jobs_in_progress(m);
-
         if (dual_timestamp_is_set(&m->finish_timestamp))
                 return;