1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * A PTY object represents a single PTY connection between a master and a
25 * child. The child process is fork()ed so the caller controls what program
28 * Programs like /bin/login tend to perform a vhangup() on their TTY
29 * before running the login procedure. This also causes the pty master
30 * to get a EPOLLHUP event as long as no client has the TTY opened.
31 * This means, we cannot use the TTY connection as reliable way to track
32 * the client. Instead, we _must_ rely on the PID of the client to track
34 * However, this has the side effect that if the client forks and the
35 * parent exits, we loose them and restart the client. But this seems to
36 * be the expected behavior so we implement it here.
38 * Unfortunately, epoll always polls for EPOLLHUP so as long as the
39 * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
40 * This gets worse if the client closes the TTY but doesn't exit.
41 * Therefore, the fd must be edge-triggered in the epoll-set so we
42 * only get the events once they change.
51 #include <sys/epoll.h>
52 #include <sys/ioctl.h>
62 #include "signal-util.h"
65 #define PTY_BUFSIZE 4096
78 sd_event_source *fd_source;
79 sd_event_source *child_source;
81 char in_buf[PTY_BUFSIZE];
85 void *event_fn_userdata;
87 bool needs_requeue : 1;
88 unsigned int role : 2;
91 int pty_new(Pty **out) {
92 _pty_unref_ Pty *pty = NULL;
95 assert_return(out, -EINVAL);
103 pty->barrier = (Barrier) BARRIER_NULL;
105 pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
110 * The slave-node is initialized to uid/gid of the caller of
111 * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
112 * skipped. In that case, grantpt() can overwrite these, but then you
113 * have to be root to use chown() (or a pt_chown helper has to be
114 * present). In those cases grantpt() really does something,
115 * otherwise it's a no-op. We call grantpt() here to try supporting
116 * those cases, even though no-one uses that, I guess. If you need other
117 * access-rights, set them yourself after this call returns (no, this is
118 * not racy, it looks racy, but races regarding your own UID are never
119 * important as an attacker could ptrace you; and the slave-pty is also
122 r = grantpt(pty->fd);
126 r = barrier_create(&pty->barrier);
135 Pty *pty_ref(Pty *pty) {
136 if (!pty || pty->ref < 1)
143 Pty *pty_unref(Pty *pty) {
144 if (!pty || pty->ref < 1 || --pty->ref > 0)
148 pty->child_source = sd_event_source_unref(pty->child_source);
149 barrier_destroy(&pty->barrier);
150 ring_clear(&pty->out_buf);
156 Barrier *pty_get_barrier(Pty *pty) {
158 return &pty->barrier;
161 bool pty_is_unknown(Pty *pty) {
162 return pty && pty->role == PTY_ROLE_UNKNOWN;
165 bool pty_is_parent(Pty *pty) {
166 return pty && pty->role == PTY_ROLE_PARENT;
169 bool pty_is_child(Pty *pty) {
170 return pty && pty->role == PTY_ROLE_CHILD;
173 bool pty_has_child(Pty *pty) {
174 return pty_is_parent(pty) && pty->child > 0;
177 pid_t pty_get_child(Pty *pty) {
178 return pty_has_child(pty) ? pty->child : -ECHILD;
181 bool pty_is_open(Pty *pty) {
182 return pty && pty->fd >= 0;
185 int pty_get_fd(Pty *pty) {
186 assert_return(pty, -EINVAL);
188 return pty_is_open(pty) ? pty->fd : -EPIPE;
191 int pty_make_child(Pty *pty) {
192 _cleanup_free_ char *slave_name = NULL;
195 assert_return(pty, -EINVAL);
196 assert_return(pty_is_unknown(pty), -EALREADY);
198 r = ptsname_malloc(pty->fd, &slave_name);
202 fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
208 pty->child = getpid();
209 pty->role = PTY_ROLE_CHILD;
210 barrier_set_role(&pty->barrier, BARRIER_CHILD);
215 int pty_make_parent(Pty *pty, pid_t child) {
216 assert_return(pty, -EINVAL);
217 assert_return(pty_is_unknown(pty), -EALREADY);
220 pty->role = PTY_ROLE_PARENT;
225 int pty_unlock(Pty *pty) {
226 assert_return(pty, -EINVAL);
227 assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
228 assert_return(pty_is_open(pty), -ENODEV);
230 return unlockpt(pty->fd) < 0 ? -errno : 0;
233 int pty_setup_child(Pty *pty) {
238 assert_return(pty, -EINVAL);
239 assert_return(pty_is_child(pty), -EINVAL);
240 assert_return(pty_is_open(pty), -EALREADY);
242 r = reset_signal_mask();
246 r = reset_all_signal_handlers();
251 if (pid < 0 && errno != EPERM)
254 r = ioctl(pty->fd, TIOCSCTTY, 0);
258 r = tcgetattr(pty->fd, &attr);
262 /* erase character should be normal backspace, PLEASEEE! */
263 attr.c_cc[VERASE] = 010;
264 /* always set UTF8 flag */
265 attr.c_iflag |= IUTF8;
267 r = tcsetattr(pty->fd, TCSANOW, &attr);
271 if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
272 dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
273 dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
276 /* only close FD if it's not a std-fd */
277 pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
282 void pty_close(Pty *pty) {
283 if (!pty_is_open(pty))
286 pty->fd_source = sd_event_source_unref(pty->fd_source);
287 pty->fd = safe_close(pty->fd);
291 * Drain input-queue and dispatch data via the event-handler. Returns <0 on
292 * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
293 * enough and there's still data left.
295 static int pty_dispatch_read(Pty *pty) {
301 * We're edge-triggered, means we need to read the whole queue. This,
302 * however, might cause us to stall if the writer is faster than we
303 * are. Therefore, try reading as much as 8 times (32KiB) and only
307 for (i = 0; i < 8; ++i) {
308 len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
313 return (errno == EAGAIN) ? 0 : -errno;
314 } else if (len == 0) {
318 /* set terminating zero for debugging safety */
319 pty->in_buf[len] = 0;
320 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
325 /* still data left, make sure we're queued again */
326 pty->needs_requeue = true;
332 * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
333 * output queue is empty now and 1 if we couldn't empty the output queue fast
334 * enough and there's still data left.
336 static int pty_dispatch_write(Pty *pty) {
343 * Same as pty_dispatch_read(), we're edge-triggered so we need to call
344 * write() until either all data is written or it returns EAGAIN. We
345 * call it twice and if it still writes successfully, we reschedule.
348 for (i = 0; i < 2; ++i) {
349 num = ring_peek(&pty->out_buf, vec);
353 len = writev(pty->fd, vec, (int)num);
358 return (errno == EAGAIN) ? 1 : -errno;
359 } else if (len == 0) {
363 ring_pull(&pty->out_buf, (size_t)len);
366 /* still data left, make sure we're queued again */
367 if (ring_get_size(&pty->out_buf) > 0) {
368 pty->needs_requeue = true;
375 static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
377 int r_hup = 0, r_write = 0, r_read = 0, r;
380 * Whenever we encounter I/O errors, we have to make sure to drain the
381 * input queue first, before we handle any HUP. A child might send us
382 * a message and immediately close the queue. We must not handle the
383 * HUP first or we loose data.
384 * Therefore, if we read a message successfully, we always return
385 * success and wait for the next event-loop iteration. Furthermore,
386 * whenever there is a write-error, we must try reading from the input
387 * queue even if EPOLLIN is not set. The input might have arrived in
388 * between epoll_wait() and write(). Therefore, write-errors are only
389 * ever handled if the input-queue is empty. In all other cases they
390 * are ignored until either reading fails or the input queue is empty.
393 if (revents & (EPOLLHUP | EPOLLERR))
396 if (revents & EPOLLOUT)
397 r_write = pty_dispatch_write(pty);
399 /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
400 if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
401 r_read = pty_dispatch_read(pty);
403 return 0; /* still data left to fetch next round */
406 if (r_hup < 0 || r_write < 0 || r_read < 0) {
407 /* PTY closed and input-queue drained */
409 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
417 static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
421 if (pty->needs_requeue) {
423 * We're edge-triggered. In case we couldn't handle all events
424 * or in case new write-data is queued, we set needs_requeue.
425 * Before going asleep, we set the io-events *again*. sd-event
426 * notices that we're edge-triggered and forwards the call to
427 * the kernel even if the events didn't change. The kernel will
428 * check the events and re-queue us on the ready queue in case
429 * an event is pending.
431 r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
433 pty->needs_requeue = false;
439 static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
445 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
452 int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
455 assert_return(pty, -EINVAL);
456 assert_return(event, -EINVAL);
457 assert_return(event_fn, -EINVAL);
458 assert_return(pty_is_parent(pty), -EINVAL);
460 pty_detach_event(pty);
462 if (pty_is_open(pty)) {
463 r = sd_event_add_io(event,
466 EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
472 r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
477 if (pty_has_child(pty)) {
478 r = sd_event_add_child(event,
488 pty->event_fn = event_fn;
489 pty->event_fn_userdata = event_fn_userdata;
494 pty_detach_event(pty);
498 void pty_detach_event(Pty *pty) {
502 pty->child_source = sd_event_source_unref(pty->child_source);
503 pty->fd_source = sd_event_source_unref(pty->fd_source);
504 pty->event_fn = NULL;
505 pty->event_fn_userdata = NULL;
508 int pty_write(Pty *pty, const void *buf, size_t size) {
512 assert_return(pty, -EINVAL);
513 assert_return(pty_is_open(pty), -ENODEV);
514 assert_return(pty_is_parent(pty), -ENODEV);
520 * Push @buf[0..@size] into the output ring-buffer. In case the
521 * ring-buffer wasn't empty beforehand, we're already waiting for
522 * EPOLLOUT and we're done. If it was empty, we have to re-queue the
523 * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
527 was_empty = ring_get_size(&pty->out_buf) < 1;
529 r = ring_push(&pty->out_buf, buf, size);
534 pty->needs_requeue = true;
539 int pty_signal(Pty *pty, int sig) {
540 assert_return(pty, -EINVAL);
541 assert_return(pty_is_open(pty), -ENODEV);
542 assert_return(pty_is_parent(pty), -ENODEV);
544 return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
547 int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
548 struct winsize ws = {
549 .ws_col = term_width,
550 .ws_row = term_height,
553 assert_return(pty, -EINVAL);
554 assert_return(pty_is_open(pty), -ENODEV);
555 assert_return(pty_is_parent(pty), -ENODEV);
558 * This will send SIGWINCH to the pty slave foreground process group.
559 * We will also get one, but we don't need it.
561 return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
564 pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
565 _pty_unref_ Pty *pty = NULL;
569 assert_return(out, -EINVAL);
570 assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
587 r = pty_make_child(pty);
591 r = pty_setup_child(pty);
595 /* sync with parent */
596 if (!barrier_place_and_sync(&pty->barrier))
599 /* fallthrough and return the child's PTY object */
603 r = pty_make_parent(pty, pid);
607 r = pty_resize(pty, initial_term_width, initial_term_height);
612 r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
617 /* sync with child */
618 if (!barrier_place_and_sync(&pty->barrier)) {
623 /* fallthrough and return the parent's PTY object */
631 barrier_abort(&pty->barrier);
632 waitpid(pty->child, NULL, 0);