1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * A PTY object represents a single PTY connection between a master and a
25 * child. The child process is fork()ed so the caller controls what program
28 * Programs like /bin/login tend to perform a vhangup() on their TTY
29 * before running the login procedure. This also causes the pty master
30 * to get a EPOLLHUP event as long as no client has the TTY opened.
31 * This means, we cannot use the TTY connection as reliable way to track
32 * the client. Instead, we _must_ rely on the PID of the client to track
34 * However, this has the side effect that if the client forks and the
35 * parent exits, we loose them and restart the client. But this seems to
36 * be the expected behavior so we implement it here.
38 * Unfortunately, epoll always polls for EPOLLHUP so as long as the
39 * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
40 * This gets worse if the client closes the TTY but doesn't exit.
41 * Therefore, the fd must be edge-triggered in the epoll-set so we
42 * only get the events once they change.
48 #include <linux/ioctl.h>
55 #include <sys/epoll.h>
56 #include <sys/eventfd.h>
57 #include <sys/ioctl.h>
58 #include <sys/types.h>
70 #define PTY_BUFSIZE 4096
83 sd_event_source *fd_source;
84 sd_event_source *child_source;
86 char in_buf[PTY_BUFSIZE];
90 void *event_fn_userdata;
92 bool needs_requeue : 1;
93 unsigned int role : 2;
96 int pty_new(Pty **out) {
97 _pty_unref_ Pty *pty = NULL;
100 assert_return(out, -EINVAL);
108 pty->barrier = (Barrier) BARRIER_NULL;
110 pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
115 * The slave-node is initialized to uid/gid of the caller of
116 * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
117 * skipped. In that case, grantpt() can overwrite these, but then you
118 * have to be root to use chown() (or a pt_chown helper has to be
119 * present). In those cases grantpt() really does something,
120 * otherwise it's a no-op. We call grantpt() here to try supporting
121 * those cases, even though no-one uses that, I guess. If you need other
122 * access-rights, set them yourself after this call returns (no, this is
123 * not racy, it looks racy, but races regarding your own UID are never
124 * important as an attacker could ptrace you; and the slave-pty is also
127 r = grantpt(pty->fd);
131 r = barrier_create(&pty->barrier);
140 Pty *pty_ref(Pty *pty) {
141 if (!pty || pty->ref < 1)
148 Pty *pty_unref(Pty *pty) {
149 if (!pty || pty->ref < 1 || --pty->ref > 0)
153 pty->child_source = sd_event_source_unref(pty->child_source);
154 barrier_destroy(&pty->barrier);
155 ring_clear(&pty->out_buf);
161 Barrier *pty_get_barrier(Pty *pty) {
163 return &pty->barrier;
166 bool pty_is_unknown(Pty *pty) {
167 return pty && pty->role == PTY_ROLE_UNKNOWN;
170 bool pty_is_parent(Pty *pty) {
171 return pty && pty->role == PTY_ROLE_PARENT;
174 bool pty_is_child(Pty *pty) {
175 return pty && pty->role == PTY_ROLE_CHILD;
178 bool pty_has_child(Pty *pty) {
179 return pty_is_parent(pty) && pty->child > 0;
182 pid_t pty_get_child(Pty *pty) {
183 return pty_has_child(pty) ? pty->child : -ECHILD;
186 bool pty_is_open(Pty *pty) {
187 return pty && pty->fd >= 0;
190 int pty_get_fd(Pty *pty) {
191 assert_return(pty, -EINVAL);
193 return pty_is_open(pty) ? pty->fd : -EPIPE;
196 int pty_make_child(Pty *pty) {
197 char slave_name[1024];
200 assert_return(pty, -EINVAL);
201 assert_return(pty_is_unknown(pty), -EALREADY);
203 r = ptsname_r(pty->fd, slave_name, sizeof(slave_name));
207 fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
213 pty->child = getpid();
214 pty->role = PTY_ROLE_CHILD;
215 barrier_set_role(&pty->barrier, BARRIER_CHILD);
220 int pty_make_parent(Pty *pty, pid_t child) {
221 assert_return(pty, -EINVAL);
222 assert_return(pty_is_unknown(pty), -EALREADY);
225 pty->role = PTY_ROLE_PARENT;
230 int pty_unlock(Pty *pty) {
231 assert_return(pty, -EINVAL);
232 assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
233 assert_return(pty_is_open(pty), -ENODEV);
235 return unlockpt(pty->fd) < 0 ? -errno : 0;
238 int pty_setup_child(Pty *pty) {
243 assert_return(pty, -EINVAL);
244 assert_return(pty_is_child(pty), -EINVAL);
245 assert_return(pty_is_open(pty), -EALREADY);
247 r = sigprocmask_many(SIG_SETMASK, -1);
251 r = reset_all_signal_handlers();
256 if (pid < 0 && errno != EPERM)
259 r = ioctl(pty->fd, TIOCSCTTY, 0);
263 r = tcgetattr(pty->fd, &attr);
267 /* erase character should be normal backspace, PLEASEEE! */
268 attr.c_cc[VERASE] = 010;
269 /* always set UTF8 flag */
270 attr.c_iflag |= IUTF8;
272 r = tcsetattr(pty->fd, TCSANOW, &attr);
276 if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
277 dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
278 dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
281 /* only close FD if it's not a std-fd */
282 pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
287 void pty_close(Pty *pty) {
288 if (!pty_is_open(pty))
291 pty->fd_source = sd_event_source_unref(pty->fd_source);
292 pty->fd = safe_close(pty->fd);
296 * Drain input-queue and dispatch data via the event-handler. Returns <0 on
297 * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
298 * enough and there's still data left.
300 static int pty_dispatch_read(Pty *pty) {
306 * We're edge-triggered, means we need to read the whole queue. This,
307 * however, might cause us to stall if the writer is faster than we
308 * are. Therefore, try reading as much as 8 times (32KiB) and only
312 for (i = 0; i < 8; ++i) {
313 len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
318 return (errno == EAGAIN) ? 0 : -errno;
319 } else if (len == 0) {
323 /* set terminating zero for debugging safety */
324 pty->in_buf[len] = 0;
325 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
330 /* still data left, make sure we're queued again */
331 pty->needs_requeue = true;
337 * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
338 * output queue is empty now and 1 if we couldn't empty the output queue fast
339 * enough and there's still data left.
341 static int pty_dispatch_write(Pty *pty) {
348 * Same as pty_dispatch_read(), we're edge-triggered so we need to call
349 * write() until either all data is written or it returns EAGAIN. We
350 * call it twice and if it still writes successfully, we reschedule.
353 for (i = 0; i < 2; ++i) {
354 num = ring_peek(&pty->out_buf, vec);
358 len = writev(pty->fd, vec, (int)num);
363 return (errno == EAGAIN) ? 1 : -errno;
364 } else if (len == 0) {
368 ring_pull(&pty->out_buf, (size_t)len);
371 /* still data left, make sure we're queued again */
372 if (ring_get_size(&pty->out_buf) > 0) {
373 pty->needs_requeue = true;
380 static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
382 int r_hup = 0, r_write = 0, r_read = 0, r;
385 * Whenever we encounter I/O errors, we have to make sure to drain the
386 * input queue first, before we handle any HUP. A child might send us
387 * a message and immediately close the queue. We must not handle the
388 * HUP first or we loose data.
389 * Therefore, if we read a message successfully, we always return
390 * success and wait for the next event-loop iteration. Furthermore,
391 * whenever there is a write-error, we must try reading from the input
392 * queue even if EPOLLIN is not set. The input might have arrived in
393 * between epoll_wait() and write(). Therefore, write-errors are only
394 * ever handled if the input-queue is empty. In all other cases they
395 * are ignored until either reading fails or the input queue is empty.
398 if (revents & (EPOLLHUP | EPOLLERR))
401 if (revents & EPOLLOUT)
402 r_write = pty_dispatch_write(pty);
404 /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
405 if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
406 r_read = pty_dispatch_read(pty);
408 return 0; /* still data left to fetch next round */
411 if (r_hup < 0 || r_write < 0 || r_read < 0) {
412 /* PTY closed and input-queue drained */
414 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
422 static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
426 if (pty->needs_requeue) {
428 * We're edge-triggered. In case we couldn't handle all events
429 * or in case new write-data is queued, we set needs_requeue.
430 * Before going asleep, we set the io-events *again*. sd-event
431 * notices that we're edge-triggered and forwards the call to
432 * the kernel even if the events didn't change. The kernel will
433 * check the events and re-queue us on the ready queue in case
434 * an event is pending.
436 r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
438 pty->needs_requeue = false;
444 static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
450 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
457 int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
460 assert_return(pty, -EINVAL);
461 assert_return(event, -EINVAL);
462 assert_return(event_fn, -EINVAL);
463 assert_return(pty_is_parent(pty), -EINVAL);
465 pty_detach_event(pty);
467 if (pty_is_open(pty)) {
468 r = sd_event_add_io(event,
471 EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
477 r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
482 if (pty_has_child(pty)) {
483 r = sd_event_add_child(event,
493 pty->event_fn = event_fn;
494 pty->event_fn_userdata = event_fn_userdata;
499 pty_detach_event(pty);
503 void pty_detach_event(Pty *pty) {
507 pty->child_source = sd_event_source_unref(pty->child_source);
508 pty->fd_source = sd_event_source_unref(pty->fd_source);
509 pty->event_fn = NULL;
510 pty->event_fn_userdata = NULL;
513 int pty_write(Pty *pty, const void *buf, size_t size) {
517 assert_return(pty, -EINVAL);
518 assert_return(pty_is_open(pty), -ENODEV);
519 assert_return(pty_is_parent(pty), -ENODEV);
525 * Push @buf[0..@size] into the output ring-buffer. In case the
526 * ring-buffer wasn't empty beforehand, we're already waiting for
527 * EPOLLOUT and we're done. If it was empty, we have to re-queue the
528 * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
532 was_empty = ring_get_size(&pty->out_buf) < 1;
534 r = ring_push(&pty->out_buf, buf, size);
539 pty->needs_requeue = true;
544 int pty_signal(Pty *pty, int sig) {
545 assert_return(pty, -EINVAL);
546 assert_return(pty_is_open(pty), -ENODEV);
547 assert_return(pty_is_parent(pty), -ENODEV);
549 return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
552 int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
555 assert_return(pty, -EINVAL);
556 assert_return(pty_is_open(pty), -ENODEV);
557 assert_return(pty_is_parent(pty), -ENODEV);
560 ws.ws_col = term_width;
561 ws.ws_row = term_height;
564 * This will send SIGWINCH to the pty slave foreground process group.
565 * We will also get one, but we don't need it.
567 return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
570 pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
571 _pty_unref_ Pty *pty = NULL;
575 assert_return(out, -EINVAL);
576 assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
593 r = pty_make_child(pty);
597 r = pty_setup_child(pty);
601 /* sync with parent */
602 if (!barrier_place_and_sync(&pty->barrier))
605 /* fallthrough and return the child's PTY object */
609 r = pty_make_parent(pty, pid);
613 r = pty_resize(pty, initial_term_width, initial_term_height);
618 r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
623 /* sync with child */
624 if (!barrier_place_and_sync(&pty->barrier)) {
629 /* fallthrough and return the parent's PTY object */
637 barrier_abort(&pty->barrier);
638 waitpid(pty->child, NULL, 0);