1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * A PTY object represents a single PTY connection between a master and a
25 * child. The child process is fork()ed so the caller controls what program
28 * Programs like /bin/login tend to perform a vhangup() on their TTY
29 * before running the login procedure. This also causes the pty master
30 * to get a EPOLLHUP event as long as no client has the TTY opened.
31 * This means, we cannot use the TTY connection as reliable way to track
32 * the client. Instead, we _must_ rely on the PID of the client to track
34 * However, this has the side effect that if the client forks and the
35 * parent exits, we loose them and restart the client. But this seems to
36 * be the expected behavior so we implement it here.
38 * Unfortunately, epoll always polls for EPOLLHUP so as long as the
39 * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
40 * This gets worse if the client closes the TTY but doesn't exit.
41 * Therefore, the fd must be edge-triggered in the epoll-set so we
42 * only get the events once they change.
48 #include <linux/ioctl.h>
56 #include <sys/epoll.h>
57 #include <sys/eventfd.h>
58 #include <sys/ioctl.h>
59 #include <sys/types.h>
71 #define PTY_BUFSIZE 16384
84 sd_event_source *fd_source;
85 sd_event_source *child_source;
87 char in_buf[PTY_BUFSIZE];
91 void *event_fn_userdata;
93 bool needs_requeue : 1;
94 unsigned int role : 2;
97 int pty_new(Pty **out) {
98 _pty_unref_ Pty *pty = NULL;
101 assert_return(out, -EINVAL);
109 pty->barrier = (Barrier) BARRIER_NULL;
111 pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
116 * The slave-node is initialized to uid/gid of the caller of
117 * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
118 * skipped. In that case, grantpt() can overwrite these, but then you
119 * have to be root to use chown() (or a pt_chown helper has to be
120 * present). In those cases grantpt() really does something,
121 * otherwise it's a no-op. We call grantpt() here to try supporting
122 * those cases, even though no-one uses that, I guess. If you need other
123 * access-rights, set them yourself after this call returns (no, this is
124 * not racy, it looks racy, but races regarding your own UID are never
125 * important as an attacker could ptrace you; and the slave-pty is also
128 r = grantpt(pty->fd);
132 r = barrier_create(&pty->barrier);
141 Pty *pty_ref(Pty *pty) {
142 if (!pty || pty->ref < 1)
149 Pty *pty_unref(Pty *pty) {
150 if (!pty || pty->ref < 1 || --pty->ref > 0)
154 pty->child_source = sd_event_source_unref(pty->child_source);
155 barrier_destroy(&pty->barrier);
156 ring_clear(&pty->out_buf);
162 Barrier *pty_get_barrier(Pty *pty) {
164 return &pty->barrier;
167 bool pty_is_unknown(Pty *pty) {
168 return pty && pty->role == PTY_ROLE_UNKNOWN;
171 bool pty_is_parent(Pty *pty) {
172 return pty && pty->role == PTY_ROLE_PARENT;
175 bool pty_is_child(Pty *pty) {
176 return pty && pty->role == PTY_ROLE_CHILD;
179 bool pty_has_child(Pty *pty) {
180 return pty_is_parent(pty) && pty->child > 0;
183 pid_t pty_get_child(Pty *pty) {
184 return pty_has_child(pty) ? pty->child : -ECHILD;
187 bool pty_is_open(Pty *pty) {
188 return pty && pty->fd >= 0;
191 int pty_get_fd(Pty *pty) {
192 assert_return(pty, -EINVAL);
194 return pty_is_open(pty) ? pty->fd : -EPIPE;
197 int pty_make_child(Pty *pty) {
198 char slave_name[1024];
201 assert_return(pty, -EINVAL);
202 assert_return(pty_is_unknown(pty), -EALREADY);
204 r = ptsname_r(pty->fd, slave_name, sizeof(slave_name));
208 fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
214 pty->child = getpid();
215 pty->role = PTY_ROLE_CHILD;
216 barrier_set_role(&pty->barrier, BARRIER_CHILD);
221 int pty_make_parent(Pty *pty, pid_t child) {
222 assert_return(pty, -EINVAL);
223 assert_return(pty_is_unknown(pty), -EALREADY);
226 pty->role = PTY_ROLE_PARENT;
231 int pty_unlock(Pty *pty) {
232 assert_return(pty, -EINVAL);
233 assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
234 assert_return(pty_is_open(pty), -ENODEV);
236 return unlockpt(pty->fd) < 0 ? -errno : 0;
239 int pty_setup_child(Pty *pty) {
244 assert_return(pty, -EINVAL);
245 assert_return(pty_is_child(pty), -EINVAL);
246 assert_return(pty_is_open(pty), -EALREADY);
248 r = sigprocmask_many(SIG_SETMASK, -1);
252 r = reset_all_signal_handlers();
257 if (pid < 0 && errno != EPERM)
260 r = ioctl(pty->fd, TIOCSCTTY, 0);
264 r = tcgetattr(pty->fd, &attr);
268 /* erase character should be normal backspace, PLEASEEE! */
269 attr.c_cc[VERASE] = 010;
270 /* always set UTF8 flag */
271 attr.c_iflag |= IUTF8;
273 r = tcsetattr(pty->fd, TCSANOW, &attr);
277 if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
278 dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
279 dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
282 /* only close FD if it's not a std-fd */
283 pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
288 void pty_close(Pty *pty) {
289 if (!pty_is_open(pty))
292 pty->fd_source = sd_event_source_unref(pty->fd_source);
293 pty->fd = safe_close(pty->fd);
297 * Drain input-queue and dispatch data via the event-handler. Returns <0 on
298 * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
299 * enough and there's still data left.
301 static int pty_dispatch_read(Pty *pty) {
307 * We're edge-triggered, means we need to read the whole queue. This,
308 * however, might cause us to stall if the writer is faster than we
309 * are. Therefore, we read twice and if the second read still returned
310 * data, we reschedule.
313 for (i = 0; i < 2; ++i) {
314 len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
319 return (errno == EAGAIN) ? 0 : -errno;
320 } else if (len == 0) {
324 /* set terminating zero for debugging safety */
325 pty->in_buf[len] = 0;
326 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
331 /* still data left, make sure we're queued again */
332 pty->needs_requeue = true;
338 * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
339 * output queue is empty now and 1 if we couldn't empty the output queue fast
340 * enough and there's still data left.
342 static int pty_dispatch_write(Pty *pty) {
349 * Same as pty_dispatch_read(), we're edge-triggered so we need to call
350 * write() until either all data is written or it returns EAGAIN. We
351 * call it twice and if it still writes successfully, we reschedule.
354 for (i = 0; i < 2; ++i) {
355 num = ring_peek(&pty->out_buf, vec);
359 len = writev(pty->fd, vec, (int)num);
364 return (errno == EAGAIN) ? 1 : -errno;
365 } else if (len == 0) {
369 ring_pull(&pty->out_buf, (size_t)len);
372 /* still data left, make sure we're queued again */
373 if (ring_get_size(&pty->out_buf) > 0) {
374 pty->needs_requeue = true;
381 static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
383 int r_hup = 0, r_write = 0, r_read = 0, r;
386 * Whenever we encounter I/O errors, we have to make sure to drain the
387 * input queue first, before we handle any HUP. A child might send us
388 * a message and immediately close the queue. We must not handle the
389 * HUP first or we loose data.
390 * Therefore, if we read a message successfully, we always return
391 * success and wait for the next event-loop iteration. Furthermore,
392 * whenever there is a write-error, we must try reading from the input
393 * queue even if EPOLLIN is not set. The input might have arrived in
394 * between epoll_wait() and write(). Therefore, write-errors are only
395 * ever handled if the input-queue is empty. In all other cases they
396 * are ignored until either reading fails or the input queue is empty.
399 if (revents & (EPOLLHUP | EPOLLERR))
402 if (revents & EPOLLOUT)
403 r_write = pty_dispatch_write(pty);
405 /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
406 if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
407 r_read = pty_dispatch_read(pty);
409 return 0; /* still data left to fetch next round */
412 if (r_hup < 0 || r_write < 0 || r_read < 0) {
413 /* PTY closed and input-queue drained */
415 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
423 static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
427 if (pty->needs_requeue) {
429 * We're edge-triggered. In case we couldn't handle all events
430 * or in case new write-data is queued, we set needs_requeue.
431 * Before going asleep, we set the io-events *again*. sd-event
432 * notices that we're edge-triggered and forwards the call to
433 * the kernel even if the events didn't change. The kernel will
434 * check the events and re-queue us on the ready queue in case
435 * an event is pending.
437 r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
439 pty->needs_requeue = false;
445 static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
451 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
458 int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
461 assert_return(pty, -EINVAL);
462 assert_return(event, -EINVAL);
463 assert_return(event_fn, -EINVAL);
464 assert_return(pty_is_parent(pty), -EINVAL);
466 pty_detach_event(pty);
468 if (pty_is_open(pty)) {
469 r = sd_event_add_io(event,
472 EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
478 r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
483 if (pty_has_child(pty)) {
484 r = sd_event_add_child(event,
494 pty->event_fn = event_fn;
495 pty->event_fn_userdata = event_fn_userdata;
500 pty_detach_event(pty);
504 void pty_detach_event(Pty *pty) {
508 pty->child_source = sd_event_source_unref(pty->child_source);
509 pty->fd_source = sd_event_source_unref(pty->fd_source);
510 pty->event_fn = NULL;
511 pty->event_fn_userdata = NULL;
514 int pty_write(Pty *pty, const void *buf, size_t size) {
518 assert_return(pty, -EINVAL);
519 assert_return(pty_is_open(pty), -ENODEV);
520 assert_return(pty_is_parent(pty), -ENODEV);
526 * Push @buf[0..@size] into the output ring-buffer. In case the
527 * ring-buffer wasn't empty beforehand, we're already waiting for
528 * EPOLLOUT and we're done. If it was empty, we have to re-queue the
529 * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
533 was_empty = ring_get_size(&pty->out_buf) < 1;
535 r = ring_push(&pty->out_buf, buf, size);
540 pty->needs_requeue = true;
545 int pty_signal(Pty *pty, int sig) {
546 assert_return(pty, -EINVAL);
547 assert_return(pty_is_open(pty), -ENODEV);
548 assert_return(pty_is_parent(pty), -ENODEV);
550 return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
553 int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
556 assert_return(pty, -EINVAL);
557 assert_return(pty_is_open(pty), -ENODEV);
558 assert_return(pty_is_parent(pty), -ENODEV);
561 ws.ws_col = term_width;
562 ws.ws_row = term_height;
565 * This will send SIGWINCH to the pty slave foreground process group.
566 * We will also get one, but we don't need it.
568 return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
571 pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
572 _pty_unref_ Pty *pty = NULL;
576 assert_return(out, -EINVAL);
577 assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
594 r = pty_make_child(pty);
598 r = pty_setup_child(pty);
602 /* sync with parent */
603 if (!barrier_place_and_sync(&pty->barrier))
606 /* fallthrough and return the child's PTY object */
610 r = pty_make_parent(pty, pid);
614 r = pty_resize(pty, initial_term_width, initial_term_height);
619 r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
624 /* sync with child */
625 if (!barrier_place_and_sync(&pty->barrier)) {
630 /* fallthrough and return the parent's PTY object */
638 barrier_abort(&pty->barrier);
639 waitpid(pty->child, NULL, 0);