1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * A PTY object represents a single PTY connection between a master and a
25 * child. The child process is fork()ed so the caller controls what program
28 * Programs like /bin/login tend to perform a vhangup() on their TTY
29 * before running the login procedure. This also causes the pty master
30 * to get a EPOLLHUP event as long as no client has the TTY opened.
31 * This means, we cannot use the TTY connection as reliable way to track
32 * the client. Instead, we _must_ rely on the PID of the client to track
34 * However, this has the side effect that if the client forks and the
35 * parent exits, we loose them and restart the client. But this seems to
36 * be the expected behavior so we implement it here.
38 * Unfortunately, epoll always polls for EPOLLHUP so as long as the
39 * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
40 * This gets worse if the client closes the TTY but doesn't exit.
41 * Therefore, the fd must be edge-triggered in the epoll-set so we
42 * only get the events once they change.
55 #include <sys/epoll.h>
56 #include <sys/eventfd.h>
57 #include <sys/ioctl.h>
58 #include <sys/types.h>
70 #define PTY_BUFSIZE 16384
83 sd_event_source *fd_source;
84 sd_event_source *child_source;
86 char in_buf[PTY_BUFSIZE];
90 void *event_fn_userdata;
92 bool needs_requeue : 1;
93 unsigned int role : 2;
96 int pty_new(Pty **out) {
97 _pty_unref_ Pty *pty = NULL;
100 assert_return(out, -EINVAL);
109 pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
114 * The slave-node is initialized to uid/gid of the caller of
115 * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
116 * skipped. In that case, grantpt() can overwrite these, but then you
117 * have to be root to use chown() (or a pt_chown helper has to be
118 * present). In those cases grantpt() really does something,
119 * otherwise it's a no-op. We call grantpt() here to try supporting
120 * those cases, even though no-one uses that, I guess. If you need other
121 * access-rights, set them yourself after this call returns (no, this is
122 * not racy, it looks racy, but races regarding your own UID are never
123 * important as an attacker could ptrace you; and the slave-pty is also
126 r = grantpt(pty->fd);
130 r = barrier_init(&pty->barrier);
139 Pty *pty_ref(Pty *pty) {
140 if (!pty || pty->ref < 1)
147 Pty *pty_unref(Pty *pty) {
148 if (!pty || pty->ref < 1 || --pty->ref > 0)
152 pty->child_source = sd_event_source_unref(pty->child_source);
153 barrier_destroy(&pty->barrier);
154 ring_clear(&pty->out_buf);
160 Barrier *pty_get_barrier(Pty *pty) {
162 return &pty->barrier;
165 bool pty_is_unknown(Pty *pty) {
166 return pty && pty->role == PTY_ROLE_UNKNOWN;
169 bool pty_is_parent(Pty *pty) {
170 return pty && pty->role == PTY_ROLE_PARENT;
173 bool pty_is_child(Pty *pty) {
174 return pty && pty->role == PTY_ROLE_CHILD;
177 bool pty_has_child(Pty *pty) {
178 return pty_is_parent(pty) && pty->child > 0;
181 pid_t pty_get_child(Pty *pty) {
182 return pty_has_child(pty) ? pty->child : -ECHILD;
185 bool pty_is_open(Pty *pty) {
186 return pty && pty->fd >= 0;
189 int pty_get_fd(Pty *pty) {
190 assert_return(pty, -EINVAL);
192 return pty_is_open(pty) ? pty->fd : -EPIPE;
195 int pty_make_child(Pty *pty) {
196 char slave_name[1024];
199 assert_return(pty, -EINVAL);
200 assert_return(pty_is_unknown(pty), -EALREADY);
202 r = ptsname_r(pty->fd, slave_name, sizeof(slave_name));
206 fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
212 pty->child = getpid();
213 pty->role = PTY_ROLE_CHILD;
214 barrier_set_role(&pty->barrier, BARRIER_CHILD);
219 int pty_make_parent(Pty *pty, pid_t child) {
220 assert_return(pty, -EINVAL);
221 assert_return(pty_is_unknown(pty), -EALREADY);
224 pty->role = PTY_ROLE_PARENT;
229 int pty_unlock(Pty *pty) {
230 assert_return(pty, -EINVAL);
231 assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
232 assert_return(pty_is_open(pty), -ENODEV);
234 return unlockpt(pty->fd) < 0 ? -errno : 0;
237 int pty_setup_child(Pty *pty) {
242 assert_return(pty, -EINVAL);
243 assert_return(pty_is_child(pty), -EINVAL);
244 assert_return(pty_is_open(pty), -EALREADY);
246 r = sigprocmask_many(SIG_SETMASK, -1);
250 r = reset_all_signal_handlers();
255 if (pid < 0 && errno != EPERM)
258 r = ioctl(pty->fd, TIOCSCTTY, 0);
262 r = tcgetattr(pty->fd, &attr);
266 /* erase character should be normal backspace, PLEASEEE! */
267 attr.c_cc[VERASE] = 010;
268 /* always set UTF8 flag */
269 attr.c_iflag |= IUTF8;
271 r = tcsetattr(pty->fd, TCSANOW, &attr);
275 if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
276 dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
277 dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
280 /* only close FD if it's not a std-fd */
281 pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
286 void pty_close(Pty *pty) {
287 if (!pty_is_open(pty))
290 pty->fd_source = sd_event_source_unref(pty->fd_source);
291 pty->fd = safe_close(pty->fd);
295 * Drain input-queue and dispatch data via the event-handler. Returns <0 on
296 * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
297 * enough and there's still data left.
299 static int pty_dispatch_read(Pty *pty) {
305 * We're edge-triggered, means we need to read the whole queue. This,
306 * however, might cause us to stall if the writer is faster than we
307 * are. Therefore, we read twice and if the second read still returned
308 * data, we reschedule.
311 for (i = 0; i < 2; ++i) {
312 len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
317 return (errno == EAGAIN) ? 0 : -errno;
318 } else if (len == 0) {
322 /* set terminating zero for debugging safety */
323 pty->in_buf[len] = 0;
324 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
329 /* still data left, make sure we're queued again */
330 pty->needs_requeue = true;
336 * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
337 * output queue is empty now and 1 if we couldn't empty the output queue fast
338 * enough and there's still data left.
340 static int pty_dispatch_write(Pty *pty) {
347 * Same as pty_dispatch_read(), we're edge-triggered so we need to call
348 * write() until either all data is written or it returns EAGAIN. We
349 * call it twice and if it still writes successfully, we reschedule.
352 for (i = 0; i < 2; ++i) {
353 num = ring_peek(&pty->out_buf, vec);
357 len = writev(pty->fd, vec, (int)num);
362 return (errno == EAGAIN) ? 1 : -errno;
363 } else if (len == 0) {
367 ring_pull(&pty->out_buf, (size_t)len);
370 /* still data left, make sure we're queued again */
371 if (ring_get_size(&pty->out_buf) > 0) {
372 pty->needs_requeue = true;
379 static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
381 int r_hup = 0, r_write = 0, r_read = 0, r;
384 * Whenever we encounter I/O errors, we have to make sure to drain the
385 * input queue first, before we handle any HUP. A child might send us
386 * a message and immediately close the queue. We must not handle the
387 * HUP first or we loose data.
388 * Therefore, if we read a message successfully, we always return
389 * success and wait for the next event-loop iteration. Furthermore,
390 * whenever there is a write-error, we must try reading from the input
391 * queue even if EPOLLIN is not set. The input might have arrived in
392 * between epoll_wait() and write(). Therefore, write-errors are only
393 * ever handled if the input-queue is empty. In all other cases they
394 * are ignored until either reading fails or the input queue is empty.
397 if (revents & (EPOLLHUP | EPOLLERR))
400 if (revents & EPOLLOUT)
401 r_write = pty_dispatch_write(pty);
403 /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
404 if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
405 r_read = pty_dispatch_read(pty);
407 return 0; /* still data left to fetch next round */
410 if (r_hup < 0 || r_write < 0 || r_read < 0) {
411 /* PTY closed and input-queue drained */
413 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
421 static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
425 if (pty->needs_requeue) {
427 * We're edge-triggered. In case we couldn't handle all events
428 * or in case new write-data is queued, we set needs_requeue.
429 * Before going asleep, we set the io-events *again*. sd-event
430 * notices that we're edge-triggered and forwards the call to
431 * the kernel even if the events didn't change. The kernel will
432 * check the events and re-queue us on the ready queue in case
433 * an event is pending.
435 r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
437 pty->needs_requeue = false;
443 static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
449 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
456 int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
459 assert_return(pty, -EINVAL);
460 assert_return(event, -EINVAL);
461 assert_return(event_fn, -EINVAL);
462 assert_return(pty_is_parent(pty), -EINVAL);
464 pty_detach_event(pty);
466 if (pty_is_open(pty)) {
467 r = sd_event_add_io(event,
470 EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
476 r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
481 if (pty_has_child(pty)) {
482 r = sd_event_add_child(event,
492 pty->event_fn = event_fn;
493 pty->event_fn_userdata = event_fn_userdata;
498 pty_detach_event(pty);
502 void pty_detach_event(Pty *pty) {
506 pty->child_source = sd_event_source_unref(pty->child_source);
507 pty->fd_source = sd_event_source_unref(pty->fd_source);
508 pty->event_fn = NULL;
509 pty->event_fn_userdata = NULL;
512 int pty_write(Pty *pty, const void *buf, size_t size) {
516 assert_return(pty, -EINVAL);
517 assert_return(pty_is_open(pty), -ENODEV);
518 assert_return(pty_is_parent(pty), -ENODEV);
524 * Push @buf[0..@size] into the output ring-buffer. In case the
525 * ring-buffer wasn't empty beforehand, we're already waiting for
526 * EPOLLOUT and we're done. If it was empty, we have to re-queue the
527 * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
531 was_empty = ring_get_size(&pty->out_buf) < 1;
533 r = ring_push(&pty->out_buf, buf, size);
538 pty->needs_requeue = true;
543 int pty_signal(Pty *pty, int sig) {
544 assert_return(pty, -EINVAL);
545 assert_return(pty_is_open(pty), -ENODEV);
546 assert_return(pty_is_parent(pty), -ENODEV);
548 return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
551 int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
554 assert_return(pty, -EINVAL);
555 assert_return(pty_is_open(pty), -ENODEV);
556 assert_return(pty_is_parent(pty), -ENODEV);
559 ws.ws_col = term_width;
560 ws.ws_row = term_height;
563 * This will send SIGWINCH to the pty slave foreground process group.
564 * We will also get one, but we don't need it.
566 return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
569 pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
570 _pty_unref_ Pty *pty = NULL;
574 assert_return(out, -EINVAL);
575 assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
592 r = pty_make_child(pty);
596 r = pty_setup_child(pty);
600 /* sync with parent */
601 if (!barrier_place_and_sync(&pty->barrier))
604 /* fallthrough and return the child's PTY object */
608 r = pty_make_parent(pty, pid);
612 r = pty_resize(pty, initial_term_width, initial_term_height);
617 r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
622 /* sync with child */
623 if (!barrier_place_and_sync(&pty->barrier)) {
628 /* fallthrough and return the parent's PTY object */
636 barrier_abort(&pty->barrier);
637 waitpid(pty->child, NULL, 0);