1 /* SPDX-License-Identifier: LGPL-2.1+ */
11 #include "alloc-util.h"
14 #include "locale-util.h"
16 #include "string-util.h"
17 //#include "terminal-util.h"
22 int strcmp_ptr(const char *a, const char *b) {
24 /* Like strcmp(), but tries to make sense of NULL pointers */
37 char* endswith(const char *s, const char *postfix) {
47 return (char*) s + sl;
52 if (memcmp(s + sl - pl, postfix, pl) != 0)
55 return (char*) s + sl - pl;
58 char* endswith_no_case(const char *s, const char *postfix) {
68 return (char*) s + sl;
73 if (strcasecmp(s + sl - pl, postfix) != 0)
76 return (char*) s + sl - pl;
79 char* first_word(const char *s, const char *word) {
86 /* Checks if the string starts with the specified word, either
87 * followed by NUL or by whitespace. Returns a pointer to the
88 * NUL or the first character after the whitespace. */
99 if (memcmp(s, word, wl) != 0)
106 if (!strchr(WHITESPACE, *p))
109 p += strspn(p, WHITESPACE);
113 static size_t strcspn_escaped(const char *s, const char *reject) {
114 bool escaped = false;
117 for (n=0; s[n]; n++) {
120 else if (s[n] == '\\')
122 else if (strchr(reject, s[n]))
126 /* if s ends in \, return index of previous char */
130 /* Split a string into words. */
131 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
137 assert(**state == '\0');
141 current += strspn(current, separator);
147 if (quoted && strchr("\'\"", *current)) {
148 char quotechars[2] = {*current, '\0'};
150 *l = strcspn_escaped(current + 1, quotechars);
151 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
152 (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
153 /* right quote missing or garbage at the end */
157 *state = current++ + *l + 2;
159 *l = strcspn_escaped(current, separator);
160 if (current[*l] && !strchr(separator, current[*l])) {
161 /* unfinished escape */
165 *state = current + *l;
167 *l = strcspn(current, separator);
168 *state = current + *l;
174 char *strnappend(const char *s, const char *suffix, size_t b) {
182 return strndup(suffix, b);
191 if (b > ((size_t) -1) - a)
194 r = new(char, a+b+1);
199 memcpy(r+a, suffix, b);
205 char *strappend(const char *s, const char *suffix) {
206 return strnappend(s, suffix, strlen_ptr(suffix));
209 char *strjoin_real(const char *x, ...) {
223 t = va_arg(ap, const char *);
228 if (n > ((size_t) -1) - l) {
252 t = va_arg(ap, const char *);
266 char *strstrip(char *s) {
270 /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
272 return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
275 char *delete_chars(char *s, const char *bad) {
278 /* Drops all specified bad characters, regardless where in the string */
286 for (f = s, t = s; *f; f++) {
298 char *delete_trailing_chars(char *s, const char *bad) {
301 /* Drops all specified bad characters, at the end of the string */
310 if (!strchr(bad, *p))
318 char *truncate_nl(char *s) {
321 s[strcspn(s, NEWLINE)] = 0;
325 #if 0 /// UNNEEDED by elogind
326 char ascii_tolower(char x) {
328 if (x >= 'A' && x <= 'Z')
329 return x - 'A' + 'a';
334 char ascii_toupper(char x) {
336 if (x >= 'a' && x <= 'z')
337 return x - 'a' + 'A';
342 char *ascii_strlower(char *t) {
348 *p = ascii_tolower(*p);
353 char *ascii_strupper(char *t) {
359 *p = ascii_toupper(*p);
364 char *ascii_strlower_n(char *t, size_t n) {
370 for (i = 0; i < n; i++)
371 t[i] = ascii_tolower(t[i]);
376 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
378 for (; n > 0; a++, b++, n--) {
381 x = (int) (uint8_t) ascii_tolower(*a);
382 y = (int) (uint8_t) ascii_tolower(*b);
391 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
394 r = ascii_strcasecmp_n(a, b, MIN(n, m));
406 bool chars_intersect(const char *a, const char *b) {
409 /* Returns true if any of the chars in a are in b. */
418 bool string_has_cc(const char *p, const char *ok) {
424 * Check if a string contains control characters. If 'ok' is
425 * non-NULL it may be a string containing additional CCs to be
429 for (t = p; *t; t++) {
430 if (ok && strchr(ok, *t))
433 if (*t > 0 && *t < ' ')
443 static int write_ellipsis(char *buf, bool unicode) {
444 if (unicode || is_locale_utf8()) {
445 buf[0] = 0xe2; /* tri-dot ellipsis: … */
457 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
458 size_t x, need_space, suffix_len;
462 assert(percent <= 100);
463 assert(new_length != (size_t) -1);
465 if (old_length <= new_length)
466 return strndup(s, old_length);
468 /* Special case short ellipsations */
469 switch (new_length) {
475 if (is_locale_utf8())
476 return strdup("…");
481 if (!is_locale_utf8())
490 /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
491 * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
492 * either for the UTF-8 encoded character or for three ASCII characters. */
493 need_space = is_locale_utf8() ? 1 : 3;
495 t = new(char, new_length+3);
499 assert(new_length >= need_space);
501 x = ((new_length - need_space) * percent + 50) / 100;
502 assert(x <= new_length - need_space);
505 write_ellipsis(t + x, false);
506 suffix_len = new_length - x - need_space;
507 memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
508 *(t + x + 3 + suffix_len) = '\0';
513 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
514 size_t x, k, len, len2;
519 /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
520 * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
523 * Ellipsation is done in a locale-dependent way:
524 * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
525 * 2. Otherwise, a unicode ellipsis is used ("…")
527 * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
528 * the current locale is UTF-8.
532 assert(percent <= 100);
534 if (new_length == (size_t) -1)
535 return strndup(s, old_length);
540 /* If no multibyte characters use ascii_ellipsize_mem for speed */
541 if (ascii_is_valid_n(s, old_length))
542 return ascii_ellipsize_mem(s, old_length, new_length, percent);
544 x = ((new_length - 1) * percent) / 100;
545 assert(x <= new_length - 1);
548 for (i = s; i < s + old_length; i = utf8_next_char(i)) {
552 r = utf8_encoded_to_unichar(i, &c);
556 w = unichar_iswide(c) ? 2 : 1;
563 for (j = s + old_length; j > i; ) {
568 jj = utf8_prev_char(j);
569 r = utf8_encoded_to_unichar(jj, &c);
573 w = unichar_iswide(c) ? 2 : 1;
574 if (k + w <= new_length) {
582 /* we don't actually need to ellipsize */
584 return memdup_suffix0(s, old_length);
586 /* make space for ellipsis, if possible */
587 if (j < s + old_length)
588 j = utf8_next_char(j);
590 i = utf8_prev_char(i);
593 len2 = s + old_length - j;
594 e = new(char, len + 3 + len2 + 1);
599 printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
600 old_length, new_length, x, len, len2, k);
604 write_ellipsis(e + len, true);
605 memcpy(e + len + 3, j, len2);
606 *(e + len + 3 + len2) = '\0';
611 char *cellescape(char *buf, size_t len, const char *s) {
612 /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
613 * characters are copied as they are, everything else is escaped. The result
614 * is different then if escaping and ellipsization was performed in two
615 * separate steps, because each sequence is either stored in full or skipped.
617 * This function should be used for logging about strings which expected to
618 * be plain ASCII in a safe way.
620 * An ellipsis will be used if s is too long. It was always placed at the
624 size_t i = 0, last_char_width[4] = {}, k = 0, j;
626 assert(len > 0); /* at least a terminating NUL */
632 if (*s == 0) /* terminating NUL detected? then we are done! */
635 w = cescape_char(*s, four);
636 if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
637 * ellipsize at the previous location */
640 /* OK, there was space, let's add this escaped character to the buffer */
641 memcpy(buf + i, four, w);
644 /* And remember its width in the ring buffer */
645 last_char_width[k] = w;
651 /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
652 * characters ideally, but the buffer is shorter than that in the first place take what we can get */
653 for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
655 if (i + 4 <= len) /* nice, we reached our space goal */
658 k = k == 0 ? 3 : k - 1;
659 if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
662 assert(i >= last_char_width[k]);
663 i -= last_char_width[k];
666 if (i + 4 <= len) /* yay, enough space */
667 i += write_ellipsis(buf + i, false);
668 else if (i + 3 <= len) { /* only space for ".." */
671 } else if (i + 2 <= len) /* only space for a single "." */
674 assert(i + 1 <= len);
681 bool nulstr_contains(const char *nulstr, const char *needle) {
687 NULSTR_FOREACH(i, nulstr)
688 if (streq(i, needle))
694 char* strshorten(char *s, size_t l) {
697 if (strnlen(s, l+1) > l)
703 char *strreplace(const char *text, const char *old_string, const char *new_string) {
704 size_t l, old_len, new_len, allocated = 0;
705 char *t, *ret = NULL;
714 old_len = strlen(old_string);
715 new_len = strlen(new_string);
718 if (!GREEDY_REALLOC(ret, allocated, l+1))
726 if (!startswith(f, old_string)) {
732 nl = l - old_len + new_len;
734 if (!GREEDY_REALLOC(ret, allocated, nl + 1))
740 t = stpcpy(t, new_string);
748 static void advance_offsets(ssize_t diff, size_t offsets[2], size_t shift[2], size_t size) {
752 if ((size_t) diff < offsets[0])
754 if ((size_t) diff < offsets[1])
758 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
759 const char *i, *begin = NULL;
765 } state = STATE_OTHER;
767 size_t osz = 0, isz, shift[2] = {};
773 /* This does three things:
775 * 1. Replaces TABs by 8 spaces
776 * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
777 * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
779 * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
780 * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
781 * most basic formatting noise, but nothing else.
783 * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
785 isz = _isz ? *_isz : strlen(*ibuf);
787 f = open_memstream(&obuf, &osz);
791 /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
792 * and it doesn't leave our scope. */
794 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
796 for (i = *ibuf; i < *ibuf + isz + 1; i++) {
801 if (i >= *ibuf + isz) /* EOT */
803 else if (*i == '\x1B')
804 state = STATE_ESCAPE;
805 else if (*i == '\t') {
807 advance_offsets(i - *ibuf, highlight, shift, 7);
814 if (i >= *ibuf + isz) { /* EOT */
816 advance_offsets(i - *ibuf, highlight, shift, 1);
818 } else if (*i == '[') { /* ANSI CSI */
821 } else if (*i == ']') { /* ANSI CSO */
827 advance_offsets(i - *ibuf, highlight, shift, 1);
835 if (i >= *ibuf + isz || /* EOT … */
836 !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
839 advance_offsets(i - *ibuf, highlight, shift, 2);
842 } else if (*i == 'm')
849 if (i >= *ibuf + isz || /* EOT … */
850 (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
853 advance_offsets(i - *ibuf, highlight, shift, 2);
856 } else if (*i == '\a')
863 if (fflush_and_check(f) < 0) {
877 highlight[0] += shift[0];
878 highlight[1] += shift[1];
884 char *strextend_with_separator(char **x, const char *separator, ...) {
886 size_t f, l, l_separator;
892 l = f = strlen_ptr(*x);
894 need_separator = !isempty(*x);
895 l_separator = strlen_ptr(separator);
897 va_start(ap, separator);
902 t = va_arg(ap, const char *);
911 if (n > ((size_t) -1) - l) {
917 need_separator = true;
921 need_separator = !isempty(*x);
923 r = realloc(*x, l+1);
929 va_start(ap, separator);
933 t = va_arg(ap, const char *);
937 if (need_separator && separator)
938 p = stpcpy(p, separator);
942 need_separator = true;
954 char *strrep(const char *s, unsigned n) {
962 p = r = malloc(l * n + 1);
966 for (i = 0; i < n; i++)
973 int split_pair(const char *s, const char *sep, char **l, char **r) {
988 a = strndup(s, x - s);
992 b = strdup(x + strlen(sep));
1004 int free_and_strdup(char **p, const char *s) {
1009 /* Replaces a string pointer with an strdup()ed new string,
1010 * possibly freeing the old one. */
1012 if (streq_ptr(*p, s))
1028 #if !HAVE_EXPLICIT_BZERO
1030 * Pointer to memset is volatile so that compiler must de-reference
1031 * the pointer and can't assume that it points to any function in
1032 * particular (such as memset, which it then might further "optimize")
1033 * This approach is inspired by openssl's crypto/mem_clr.c.
1035 typedef void *(*memset_t)(void *,int,size_t);
1037 static volatile memset_t memset_func = memset;
1039 void explicit_bzero(void *p, size_t l) {
1040 memset_func(p, '\0', l);
1044 char* string_erase(char *x) {
1048 /* A delicious drop of snake-oil! To be called on memory where
1049 * we stored passphrases or so, after we used them. */
1050 explicit_bzero(x, strlen(x));
1054 char *string_free_erase(char *s) {
1055 return mfree(string_erase(s));
1058 bool string_is_safe(const char *p) {
1064 for (t = p; *t; t++) {
1065 if (*t > 0 && *t < ' ') /* no control characters */
1068 if (strchr(QUOTES "\\\x7f", *t))