From: Ian Jackson Date: Mon, 31 May 2021 22:51:37 +0000 (+0100) Subject: childio: When getting EOF when reading, wait blockingly for child X-Git-Tag: otter-0.7.0~185 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=e7740a0f4a9bbfb237bf4db95b3ed4b7393ed62b;p=otter.git childio: When getting EOF when reading, wait blockingly for child The t_false read test was flaky. It would sometimes panic at the "unwrap_err". This is because on Linux we can get EOF on a pipe which is being closed by the kernel as a result of process termination, and then still find that wait4 returns 0 meaning the child cannot be reaped. I think this is quite undesirable, and I regard it as a kernel bug. It is certainly untraditional. For example, it is conventional for a process that is coredumpting to finish dumping core before its fds show up as closed. However, the spec I have here does not appear to give a clear guarentee that this sequence of events is impossible. And whatever, we have to live with it. Bah. In our situation we can generally expect that it is sensible to assume that EOF means the child is in the process of exiting, and to reap it blockingly. Emprically I can no longer reproduce the race now. The sequence of events as seen from the parent looks like this: pipe2([5, 6], O_CLOEXEC) = 0 This is the stdin pipe. We don't use it in this test. pipe2([7, 8], O_CLOEXEC) = 0 This is the stdout pipe for the child. I have verified in another strace that these descriptors are right, since the post-fork child does this: 23597 dup2(5, 0) = 0 23597 dup2(8, 1) = 1 So, continuing with the parent: prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=4*1024}) = 0 prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=4*1024}) = 0 prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=4*1024}) = 0 prlimit64(0, RLIMIT_NOFILE, NULL, {rlim_cur=1024, rlim_max=4*1024}) = 0 mmap(NULL, 36864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f14d9a56000 rt_sigprocmask(SIG_BLOCK, ~[], [], 8) = 0 clone(child_stack=0x7f14d9a5eff0, flags=CLONE_VM|CLONE_VFORK|SIGCHLD) = 12678 Here we fork. The child runs in parallel, and is going to exit. munmap(0x7f14d9a56000, 36864) = 0 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 close(5) = 0 close(8) = 0 We close our copies of the child's ends of the pipes. read(7, "", 10) = 0 We read EOF from the child's stdout. write(2, "[src/childio.rs:103] self.rw.rea"..., 45) = 45 This is a dbg print I put in. wait4(12678, 0x7fff03c3d16c, WNOHANG, NULL) = 0 Here wait4 returns 0 meaning "child has not terminated". write(2, "[src/childio.rs:36] self.child.l"..., 64) = 64 write(2, "[", 1) = 1 write(2, "src/childio.rs", 14) = 14 write(2, ":", 1) = 1 write(2, "209", 3) = 3 write(2, "] ", 2) = 2 Again some dbg print. --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=12678, si_uid=1001, si_status=1, si_utime=0, si_stime=0} --- Finally we get a signal about the child but it is too late. write(2, "r.read(&mut buf)", 16) = 16 write(2, " = ", 3) = 3 write(2, "Ok", 2) = 2 write(2, "(\n", 2) = 2 write(2, " ", 4) = 4 write(2, "0", 1) = 1 write(2, ",\n", 2) = 2 write(2, ")", 1) = 1 write(2, "\n", 1) = 1 Another dbg print. write(2, "thread '", 8) = 8 write(2, "main", 4) = 4 write(2, "' panicked at '", 15) = 15 write(2, "called `Result::unwrap_err()` on"..., 50) = 50 And here we are panicking. (NB that source code line numbers here and debug output are different because this strace was not made with the exact code you find in this commit's parent.) Signed-off-by: Ian Jackson --- diff --git a/src/childio.rs b/src/childio.rs index 2e1c5668..5272ac82 100644 --- a/src/childio.rs +++ b/src/childio.rs @@ -31,9 +31,19 @@ impl Display for ChildWrapper { } impl ChildIo { - fn rw_result(&self, r: io::Result) -> io::Result { - if r.is_ok() && *r.as_ref().unwrap() > 0 { return r } - let status = self.child.lock().child.try_wait()?; + fn rw_result(&self, eofblock: bool, r: io::Result) + -> io::Result + { + let block = match &r { + &Ok(v) if v == 0 => eofblock, + &Ok(_) => return r, + Err(_) => false, + }; + let status = { + let mut child = self.child.lock(); + let child = &mut child.child; + if block { Some(child.wait()?) } else { child.try_wait()? } + }; match status { None => r, Some(es) if es.success() => r, @@ -101,18 +111,18 @@ impl Drop for ChildWrapper { impl Read for ChildIo where R: Read { fn read(&mut self, buf: &mut [u8]) -> io::Result { let r = self.rw.read(buf); - self.rw_result(r) + self.rw_result(true,r) } } impl Write for ChildIo where W: Write { fn write(&mut self, buf: &[u8]) -> io::Result { let r = self.rw.write(buf); - self.rw_result(r) + self.rw_result(false,r) } fn flush(&mut self) -> io::Result<()> { let r = self.rw.flush(); - self.rw_result(r.map(|()|0)).map(|_|()) + self.rw_result(false, r.map(|()|0) ).map(|_|()) } }