1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
12 #include <stdio_ext.h>
16 #include "alloc-util.h"
18 //#include "locale-util.h"
20 #include "string-util.h"
21 //#include "terminal-util.h"
26 int strcmp_ptr(const char *a, const char *b) {
28 /* Like strcmp(), but tries to make sense of NULL pointers */
41 char* endswith(const char *s, const char *postfix) {
51 return (char*) s + sl;
56 if (memcmp(s + sl - pl, postfix, pl) != 0)
59 return (char*) s + sl - pl;
62 char* endswith_no_case(const char *s, const char *postfix) {
72 return (char*) s + sl;
77 if (strcasecmp(s + sl - pl, postfix) != 0)
80 return (char*) s + sl - pl;
83 char* first_word(const char *s, const char *word) {
90 /* Checks if the string starts with the specified word, either
91 * followed by NUL or by whitespace. Returns a pointer to the
92 * NUL or the first character after the whitespace. */
103 if (memcmp(s, word, wl) != 0)
110 if (!strchr(WHITESPACE, *p))
113 p += strspn(p, WHITESPACE);
117 static size_t strcspn_escaped(const char *s, const char *reject) {
118 bool escaped = false;
121 for (n=0; s[n]; n++) {
124 else if (s[n] == '\\')
126 else if (strchr(reject, s[n]))
130 /* if s ends in \, return index of previous char */
134 /* Split a string into words. */
135 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
141 assert(**state == '\0');
145 current += strspn(current, separator);
151 if (quoted && strchr("\'\"", *current)) {
152 char quotechars[2] = {*current, '\0'};
154 *l = strcspn_escaped(current + 1, quotechars);
155 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
156 (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
157 /* right quote missing or garbage at the end */
161 *state = current++ + *l + 2;
163 *l = strcspn_escaped(current, separator);
164 if (current[*l] && !strchr(separator, current[*l])) {
165 /* unfinished escape */
169 *state = current + *l;
171 *l = strcspn(current, separator);
172 *state = current + *l;
178 char *strnappend(const char *s, const char *suffix, size_t b) {
186 return strndup(suffix, b);
195 if (b > ((size_t) -1) - a)
198 r = new(char, a+b+1);
203 memcpy(r+a, suffix, b);
209 char *strappend(const char *s, const char *suffix) {
210 return strnappend(s, suffix, strlen_ptr(suffix));
213 char *strjoin_real(const char *x, ...) {
227 t = va_arg(ap, const char *);
232 if (n > ((size_t) -1) - l) {
256 t = va_arg(ap, const char *);
270 char *strstrip(char *s) {
276 /* Drops trailing whitespace. Modifies the string in
277 * place. Returns pointer to first non-space character */
279 s += strspn(s, WHITESPACE);
281 for (e = strchr(s, 0); e > s; e --)
282 if (!strchr(WHITESPACE, e[-1]))
290 #if 0 /// UNNEEDED by elogind
291 char *delete_chars(char *s, const char *bad) {
294 /* Drops all specified bad characters, regardless where in the string */
302 for (f = s, t = s; *f; f++) {
315 char *delete_trailing_chars(char *s, const char *bad) {
318 /* Drops all specified bad characters, at the end of the string */
327 if (!strchr(bad, *p))
335 char *truncate_nl(char *s) {
338 s[strcspn(s, NEWLINE)] = 0;
342 #if 0 /// UNNEEDED by elogind
343 char ascii_tolower(char x) {
345 if (x >= 'A' && x <= 'Z')
346 return x - 'A' + 'a';
351 char ascii_toupper(char x) {
353 if (x >= 'a' && x <= 'z')
354 return x - 'a' + 'A';
359 char *ascii_strlower(char *t) {
365 *p = ascii_tolower(*p);
370 char *ascii_strupper(char *t) {
376 *p = ascii_toupper(*p);
381 char *ascii_strlower_n(char *t, size_t n) {
387 for (i = 0; i < n; i++)
388 t[i] = ascii_tolower(t[i]);
393 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
395 for (; n > 0; a++, b++, n--) {
398 x = (int) (uint8_t) ascii_tolower(*a);
399 y = (int) (uint8_t) ascii_tolower(*b);
408 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
411 r = ascii_strcasecmp_n(a, b, MIN(n, m));
423 bool chars_intersect(const char *a, const char *b) {
426 /* Returns true if any of the chars in a are in b. */
435 bool string_has_cc(const char *p, const char *ok) {
441 * Check if a string contains control characters. If 'ok' is
442 * non-NULL it may be a string containing additional CCs to be
446 for (t = p; *t; t++) {
447 if (ok && strchr(ok, *t))
450 if (*t > 0 && *t < ' ')
460 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
461 size_t x, need_space;
465 assert(percent <= 100);
466 assert(new_length != (size_t) -1);
468 if (old_length <= new_length)
469 return strndup(s, old_length);
471 /* Special case short ellipsations */
472 switch (new_length) {
478 if (is_locale_utf8())
484 if (!is_locale_utf8())
493 /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
494 * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
495 * either for the UTF-8 encoded character or for three ASCII characters. */
496 need_space = is_locale_utf8() ? 1 : 3;
498 r = new(char, new_length+3);
502 assert(new_length >= need_space);
504 x = ((new_length - need_space) * percent + 50) / 100;
505 assert(x <= new_length - need_space);
509 if (is_locale_utf8()) {
510 r[x+0] = 0xe2; /* tri-dot ellipsis: … */
520 s + old_length - (new_length - x - need_space),
521 new_length - x - need_space + 1);
526 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
527 size_t x, k, len, len2;
532 /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
533 * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
536 * Ellipsation is done in a locale-dependent way:
537 * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
538 * 2. Otherwise, a unicode ellipsis is used ("…")
540 * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
541 * the current locale is UTF-8.
545 assert(percent <= 100);
547 if (new_length == (size_t) -1)
548 return strndup(s, old_length);
553 /* If no multibyte characters use ascii_ellipsize_mem for speed */
554 if (ascii_is_valid(s))
555 return ascii_ellipsize_mem(s, old_length, new_length, percent);
557 x = ((new_length - 1) * percent) / 100;
558 assert(x <= new_length - 1);
561 for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
564 r = utf8_encoded_to_unichar(i, &c);
567 k += unichar_iswide(c) ? 2 : 1;
570 if (k > x) /* last character was wide and went over quota */
573 for (j = s + old_length; k < new_length && j > i; ) {
576 j = utf8_prev_char(j);
577 r = utf8_encoded_to_unichar(j, &c);
580 k += unichar_iswide(c) ? 2 : 1;
584 /* we don't actually need to ellipsize */
586 return memdup(s, old_length + 1);
588 /* make space for ellipsis */
589 j = utf8_next_char(j);
592 len2 = s + old_length - j;
593 e = new(char, len + 3 + len2 + 1);
598 printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
599 old_length, new_length, x, len, len2, k);
603 e[len + 0] = 0xe2; /* tri-dot ellipsis: … */
607 memcpy(e + len + 3, j, len2 + 1);
612 char *ellipsize(const char *s, size_t length, unsigned percent) {
614 if (length == (size_t) -1)
617 return ellipsize_mem(s, strlen(s), length, percent);
620 bool nulstr_contains(const char *nulstr, const char *needle) {
626 NULSTR_FOREACH(i, nulstr)
627 if (streq(i, needle))
633 char* strshorten(char *s, size_t l) {
636 if (strnlen(s, l+1) > l)
642 char *strreplace(const char *text, const char *old_string, const char *new_string) {
643 size_t l, old_len, new_len, allocated = 0;
644 char *t, *ret = NULL;
653 old_len = strlen(old_string);
654 new_len = strlen(new_string);
657 if (!GREEDY_REALLOC(ret, allocated, l+1))
665 if (!startswith(f, old_string)) {
671 nl = l - old_len + new_len;
673 if (!GREEDY_REALLOC(ret, allocated, nl + 1))
679 t = stpcpy(t, new_string);
687 static void advance_offsets(ssize_t diff, size_t offsets[2], size_t shift[2], size_t size) {
691 if ((size_t) diff < offsets[0])
693 if ((size_t) diff < offsets[1])
697 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
698 const char *i, *begin = NULL;
704 } state = STATE_OTHER;
706 size_t osz = 0, isz, shift[2] = {};
712 /* This does three things:
714 * 1. Replaces TABs by 8 spaces
715 * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
716 * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
718 * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
719 * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
720 * most basic formatting noise, but nothing else.
722 * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
724 isz = _isz ? *_isz : strlen(*ibuf);
726 f = open_memstream(&obuf, &osz);
730 /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
731 * and it doesn't leave our scope. */
733 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
735 for (i = *ibuf; i < *ibuf + isz + 1; i++) {
740 if (i >= *ibuf + isz) /* EOT */
742 else if (*i == '\x1B')
743 state = STATE_ESCAPE;
744 else if (*i == '\t') {
746 advance_offsets(i - *ibuf, highlight, shift, 7);
753 if (i >= *ibuf + isz) { /* EOT */
755 advance_offsets(i - *ibuf, highlight, shift, 1);
757 } else if (*i == '[') { /* ANSI CSI */
760 } else if (*i == ']') { /* ANSI CSO */
766 advance_offsets(i - *ibuf, highlight, shift, 1);
774 if (i >= *ibuf + isz || /* EOT … */
775 !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
778 advance_offsets(i - *ibuf, highlight, shift, 2);
781 } else if (*i == 'm')
788 if (i >= *ibuf + isz || /* EOT … */
789 (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
792 advance_offsets(i - *ibuf, highlight, shift, 2);
795 } else if (*i == '\a')
802 if (fflush_and_check(f) < 0) {
816 highlight[0] += shift[0];
817 highlight[1] += shift[1];
823 char *strextend_with_separator(char **x, const char *separator, ...) {
825 size_t f, l, l_separator;
831 l = f = strlen_ptr(*x);
833 need_separator = !isempty(*x);
834 l_separator = strlen_ptr(separator);
836 va_start(ap, separator);
841 t = va_arg(ap, const char *);
850 if (n > ((size_t) -1) - l) {
856 need_separator = true;
860 need_separator = !isempty(*x);
862 r = realloc(*x, l+1);
868 va_start(ap, separator);
872 t = va_arg(ap, const char *);
876 if (need_separator && separator)
877 p = stpcpy(p, separator);
881 need_separator = true;
893 char *strrep(const char *s, unsigned n) {
901 p = r = malloc(l * n + 1);
905 for (i = 0; i < n; i++)
912 int split_pair(const char *s, const char *sep, char **l, char **r) {
927 a = strndup(s, x - s);
931 b = strdup(x + strlen(sep));
943 int free_and_strdup(char **p, const char *s) {
948 /* Replaces a string pointer with an strdup()ed new string,
949 * possibly freeing the old one. */
951 if (streq_ptr(*p, s))
967 #if !HAVE_EXPLICIT_BZERO
969 * Pointer to memset is volatile so that compiler must de-reference
970 * the pointer and can't assume that it points to any function in
971 * particular (such as memset, which it then might further "optimize")
972 * This approach is inspired by openssl's crypto/mem_clr.c.
974 typedef void *(*memset_t)(void *,int,size_t);
976 static volatile memset_t memset_func = memset;
978 void explicit_bzero(void *p, size_t l) {
979 memset_func(p, '\0', l);
983 char* string_erase(char *x) {
987 /* A delicious drop of snake-oil! To be called on memory where
988 * we stored passphrases or so, after we used them. */
989 explicit_bzero(x, strlen(x));
993 char *string_free_erase(char *s) {
994 return mfree(string_erase(s));
997 bool string_is_safe(const char *p) {
1003 for (t = p; *t; t++) {
1004 if (*t > 0 && *t < ' ') /* no control characters */
1007 if (strchr(QUOTES "\\\x7f", *t))