1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * A PTY object represents a single PTY connection between a master and a
25 * child. The child process is fork()ed so the caller controls what program
28 * Programs like /bin/login tend to perform a vhangup() on their TTY
29 * before running the login procedure. This also causes the pty master
30 * to get a EPOLLHUP event as long as no client has the TTY opened.
31 * This means, we cannot use the TTY connection as reliable way to track
32 * the client. Instead, we _must_ rely on the PID of the client to track
34 * However, this has the side effect that if the client forks and the
35 * parent exits, we loose them and restart the client. But this seems to
36 * be the expected behavior so we implement it here.
38 * Unfortunately, epoll always polls for EPOLLHUP so as long as the
39 * vhangup() is ongoing, we will _always_ get EPOLLHUP and cannot sleep.
40 * This gets worse if the client closes the TTY but doesn't exit.
41 * Therefore, the fd must be edge-triggered in the epoll-set so we
42 * only get the events once they change.
51 #include <sys/epoll.h>
52 #include <sys/ioctl.h>
64 #define PTY_BUFSIZE 4096
77 sd_event_source *fd_source;
78 sd_event_source *child_source;
80 char in_buf[PTY_BUFSIZE];
84 void *event_fn_userdata;
86 bool needs_requeue : 1;
87 unsigned int role : 2;
90 int pty_new(Pty **out) {
91 _pty_unref_ Pty *pty = NULL;
94 assert_return(out, -EINVAL);
102 pty->barrier = (Barrier) BARRIER_NULL;
104 pty->fd = posix_openpt(O_RDWR | O_NOCTTY | O_CLOEXEC | O_NONBLOCK);
109 * The slave-node is initialized to uid/gid of the caller of
110 * posix_openpt(). Only if devpts is mounted with fixed uid/gid this is
111 * skipped. In that case, grantpt() can overwrite these, but then you
112 * have to be root to use chown() (or a pt_chown helper has to be
113 * present). In those cases grantpt() really does something,
114 * otherwise it's a no-op. We call grantpt() here to try supporting
115 * those cases, even though no-one uses that, I guess. If you need other
116 * access-rights, set them yourself after this call returns (no, this is
117 * not racy, it looks racy, but races regarding your own UID are never
118 * important as an attacker could ptrace you; and the slave-pty is also
121 r = grantpt(pty->fd);
125 r = barrier_create(&pty->barrier);
134 Pty *pty_ref(Pty *pty) {
135 if (!pty || pty->ref < 1)
142 Pty *pty_unref(Pty *pty) {
143 if (!pty || pty->ref < 1 || --pty->ref > 0)
147 pty->child_source = sd_event_source_unref(pty->child_source);
148 barrier_destroy(&pty->barrier);
149 ring_clear(&pty->out_buf);
155 Barrier *pty_get_barrier(Pty *pty) {
157 return &pty->barrier;
160 bool pty_is_unknown(Pty *pty) {
161 return pty && pty->role == PTY_ROLE_UNKNOWN;
164 bool pty_is_parent(Pty *pty) {
165 return pty && pty->role == PTY_ROLE_PARENT;
168 bool pty_is_child(Pty *pty) {
169 return pty && pty->role == PTY_ROLE_CHILD;
172 bool pty_has_child(Pty *pty) {
173 return pty_is_parent(pty) && pty->child > 0;
176 pid_t pty_get_child(Pty *pty) {
177 return pty_has_child(pty) ? pty->child : -ECHILD;
180 bool pty_is_open(Pty *pty) {
181 return pty && pty->fd >= 0;
184 int pty_get_fd(Pty *pty) {
185 assert_return(pty, -EINVAL);
187 return pty_is_open(pty) ? pty->fd : -EPIPE;
190 int pty_make_child(Pty *pty) {
191 _cleanup_free_ char *slave_name = NULL;
194 assert_return(pty, -EINVAL);
195 assert_return(pty_is_unknown(pty), -EALREADY);
197 r = ptsname_malloc(pty->fd, &slave_name);
201 fd = open(slave_name, O_RDWR | O_CLOEXEC | O_NOCTTY);
207 pty->child = getpid();
208 pty->role = PTY_ROLE_CHILD;
209 barrier_set_role(&pty->barrier, BARRIER_CHILD);
214 int pty_make_parent(Pty *pty, pid_t child) {
215 assert_return(pty, -EINVAL);
216 assert_return(pty_is_unknown(pty), -EALREADY);
219 pty->role = PTY_ROLE_PARENT;
224 int pty_unlock(Pty *pty) {
225 assert_return(pty, -EINVAL);
226 assert_return(pty_is_unknown(pty) || pty_is_parent(pty), -EINVAL);
227 assert_return(pty_is_open(pty), -ENODEV);
229 return unlockpt(pty->fd) < 0 ? -errno : 0;
232 int pty_setup_child(Pty *pty) {
237 assert_return(pty, -EINVAL);
238 assert_return(pty_is_child(pty), -EINVAL);
239 assert_return(pty_is_open(pty), -EALREADY);
241 r = sigprocmask_many(SIG_SETMASK, -1);
245 r = reset_all_signal_handlers();
250 if (pid < 0 && errno != EPERM)
253 r = ioctl(pty->fd, TIOCSCTTY, 0);
257 r = tcgetattr(pty->fd, &attr);
261 /* erase character should be normal backspace, PLEASEEE! */
262 attr.c_cc[VERASE] = 010;
263 /* always set UTF8 flag */
264 attr.c_iflag |= IUTF8;
266 r = tcsetattr(pty->fd, TCSANOW, &attr);
270 if (dup2(pty->fd, STDIN_FILENO) != STDIN_FILENO ||
271 dup2(pty->fd, STDOUT_FILENO) != STDOUT_FILENO ||
272 dup2(pty->fd, STDERR_FILENO) != STDERR_FILENO)
275 /* only close FD if it's not a std-fd */
276 pty->fd = (pty->fd > 2) ? safe_close(pty->fd) : -1;
281 void pty_close(Pty *pty) {
282 if (!pty_is_open(pty))
285 pty->fd_source = sd_event_source_unref(pty->fd_source);
286 pty->fd = safe_close(pty->fd);
290 * Drain input-queue and dispatch data via the event-handler. Returns <0 on
291 * error, 0 if queue is empty and 1 if we couldn't empty the input queue fast
292 * enough and there's still data left.
294 static int pty_dispatch_read(Pty *pty) {
300 * We're edge-triggered, means we need to read the whole queue. This,
301 * however, might cause us to stall if the writer is faster than we
302 * are. Therefore, try reading as much as 8 times (32KiB) and only
306 for (i = 0; i < 8; ++i) {
307 len = read(pty->fd, pty->in_buf, sizeof(pty->in_buf) - 1);
312 return (errno == EAGAIN) ? 0 : -errno;
313 } else if (len == 0) {
317 /* set terminating zero for debugging safety */
318 pty->in_buf[len] = 0;
319 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_DATA, pty->in_buf, len);
324 /* still data left, make sure we're queued again */
325 pty->needs_requeue = true;
331 * Drain output-queue by writing data to the pty. Returns <0 on error, 0 if the
332 * output queue is empty now and 1 if we couldn't empty the output queue fast
333 * enough and there's still data left.
335 static int pty_dispatch_write(Pty *pty) {
342 * Same as pty_dispatch_read(), we're edge-triggered so we need to call
343 * write() until either all data is written or it returns EAGAIN. We
344 * call it twice and if it still writes successfully, we reschedule.
347 for (i = 0; i < 2; ++i) {
348 num = ring_peek(&pty->out_buf, vec);
352 len = writev(pty->fd, vec, (int)num);
357 return (errno == EAGAIN) ? 1 : -errno;
358 } else if (len == 0) {
362 ring_pull(&pty->out_buf, (size_t)len);
365 /* still data left, make sure we're queued again */
366 if (ring_get_size(&pty->out_buf) > 0) {
367 pty->needs_requeue = true;
374 static int pty_fd_fn(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
376 int r_hup = 0, r_write = 0, r_read = 0, r;
379 * Whenever we encounter I/O errors, we have to make sure to drain the
380 * input queue first, before we handle any HUP. A child might send us
381 * a message and immediately close the queue. We must not handle the
382 * HUP first or we loose data.
383 * Therefore, if we read a message successfully, we always return
384 * success and wait for the next event-loop iteration. Furthermore,
385 * whenever there is a write-error, we must try reading from the input
386 * queue even if EPOLLIN is not set. The input might have arrived in
387 * between epoll_wait() and write(). Therefore, write-errors are only
388 * ever handled if the input-queue is empty. In all other cases they
389 * are ignored until either reading fails or the input queue is empty.
392 if (revents & (EPOLLHUP | EPOLLERR))
395 if (revents & EPOLLOUT)
396 r_write = pty_dispatch_write(pty);
398 /* Awesome! Kernel signals HUP without IN but queues are not empty.. */
399 if ((revents & EPOLLIN) || r_hup < 0 || r_write < 0) {
400 r_read = pty_dispatch_read(pty);
402 return 0; /* still data left to fetch next round */
405 if (r_hup < 0 || r_write < 0 || r_read < 0) {
406 /* PTY closed and input-queue drained */
408 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_HUP, NULL, 0);
416 static int pty_fd_prepare_fn(sd_event_source *source, void *userdata) {
420 if (pty->needs_requeue) {
422 * We're edge-triggered. In case we couldn't handle all events
423 * or in case new write-data is queued, we set needs_requeue.
424 * Before going asleep, we set the io-events *again*. sd-event
425 * notices that we're edge-triggered and forwards the call to
426 * the kernel even if the events didn't change. The kernel will
427 * check the events and re-queue us on the ready queue in case
428 * an event is pending.
430 r = sd_event_source_set_io_events(source, EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET);
432 pty->needs_requeue = false;
438 static int pty_child_fn(sd_event_source *source, const siginfo_t *si, void *userdata) {
444 r = pty->event_fn(pty, pty->event_fn_userdata, PTY_CHILD, si, sizeof(*si));
451 int pty_attach_event(Pty *pty, sd_event *event, pty_event_t event_fn, void *event_fn_userdata) {
454 assert_return(pty, -EINVAL);
455 assert_return(event, -EINVAL);
456 assert_return(event_fn, -EINVAL);
457 assert_return(pty_is_parent(pty), -EINVAL);
459 pty_detach_event(pty);
461 if (pty_is_open(pty)) {
462 r = sd_event_add_io(event,
465 EPOLLHUP | EPOLLERR | EPOLLIN | EPOLLOUT | EPOLLET,
471 r = sd_event_source_set_prepare(pty->fd_source, pty_fd_prepare_fn);
476 if (pty_has_child(pty)) {
477 r = sd_event_add_child(event,
487 pty->event_fn = event_fn;
488 pty->event_fn_userdata = event_fn_userdata;
493 pty_detach_event(pty);
497 void pty_detach_event(Pty *pty) {
501 pty->child_source = sd_event_source_unref(pty->child_source);
502 pty->fd_source = sd_event_source_unref(pty->fd_source);
503 pty->event_fn = NULL;
504 pty->event_fn_userdata = NULL;
507 int pty_write(Pty *pty, const void *buf, size_t size) {
511 assert_return(pty, -EINVAL);
512 assert_return(pty_is_open(pty), -ENODEV);
513 assert_return(pty_is_parent(pty), -ENODEV);
519 * Push @buf[0..@size] into the output ring-buffer. In case the
520 * ring-buffer wasn't empty beforehand, we're already waiting for
521 * EPOLLOUT and we're done. If it was empty, we have to re-queue the
522 * FD for EPOLLOUT as we're edge-triggered and wouldn't get any new
526 was_empty = ring_get_size(&pty->out_buf) < 1;
528 r = ring_push(&pty->out_buf, buf, size);
533 pty->needs_requeue = true;
538 int pty_signal(Pty *pty, int sig) {
539 assert_return(pty, -EINVAL);
540 assert_return(pty_is_open(pty), -ENODEV);
541 assert_return(pty_is_parent(pty), -ENODEV);
543 return ioctl(pty->fd, TIOCSIG, sig) < 0 ? -errno : 0;
546 int pty_resize(Pty *pty, unsigned short term_width, unsigned short term_height) {
547 struct winsize ws = {
548 .ws_col = term_width,
549 .ws_row = term_height,
552 assert_return(pty, -EINVAL);
553 assert_return(pty_is_open(pty), -ENODEV);
554 assert_return(pty_is_parent(pty), -ENODEV);
557 * This will send SIGWINCH to the pty slave foreground process group.
558 * We will also get one, but we don't need it.
560 return ioctl(pty->fd, TIOCSWINSZ, &ws) < 0 ? -errno : 0;
563 pid_t pty_fork(Pty **out, sd_event *event, pty_event_t event_fn, void *event_fn_userdata, unsigned short initial_term_width, unsigned short initial_term_height) {
564 _pty_unref_ Pty *pty = NULL;
568 assert_return(out, -EINVAL);
569 assert_return((event && event_fn) || (!event && !event_fn), -EINVAL);
586 r = pty_make_child(pty);
590 r = pty_setup_child(pty);
594 /* sync with parent */
595 if (!barrier_place_and_sync(&pty->barrier))
598 /* fallthrough and return the child's PTY object */
602 r = pty_make_parent(pty, pid);
606 r = pty_resize(pty, initial_term_width, initial_term_height);
611 r = pty_attach_event(pty, event, event_fn, event_fn_userdata);
616 /* sync with child */
617 if (!barrier_place_and_sync(&pty->barrier)) {
622 /* fallthrough and return the parent's PTY object */
630 barrier_abort(&pty->barrier);
631 waitpid(pty->child, NULL, 0);