src/basic/escape.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "alloc-util.h"
   8 #include "escape.h"
   9 #include "hexdecoct.h"
  10 #include "macro.h"
  11 #include "utf8.h"
  12
  13 int cescape_char(char c, char *buf) {
  14         char *buf_old = buf;
  15
  16         /* Needs space for 4 characters in the buffer */
  17
  18         switch (c) {
  19
  20                 case '\a':
  21                         *(buf++) = '\\';
  22                         *(buf++) = 'a';
  23                         break;
  24                 case '\b':
  25                         *(buf++) = '\\';
  26                         *(buf++) = 'b';
  27                         break;
  28                 case '\f':
  29                         *(buf++) = '\\';
  30                         *(buf++) = 'f';
  31                         break;
  32                 case '\n':
  33                         *(buf++) = '\\';
  34                         *(buf++) = 'n';
  35                         break;
  36                 case '\r':
  37                         *(buf++) = '\\';
  38                         *(buf++) = 'r';
  39                         break;
  40                 case '\t':
  41                         *(buf++) = '\\';
  42                         *(buf++) = 't';
  43                         break;
  44                 case '\v':
  45                         *(buf++) = '\\';
  46                         *(buf++) = 'v';
  47                         break;
  48                 case '\\':
  49                         *(buf++) = '\\';
  50                         *(buf++) = '\\';
  51                         break;
  52                 case '"':
  53                         *(buf++) = '\\';
  54                         *(buf++) = '"';
  55                         break;
  56                 case '\'':
  57                         *(buf++) = '\\';
  58                         *(buf++) = '\'';
  59                         break;
  60
  61                 default:
  62                         /* For special chars we prefer octal over
  63                          * hexadecimal encoding, simply because glib's
  64                          * g_strescape() does the same */
  65                         if ((c < ' ') || (c >= 127)) {
  66                                 *(buf++) = '\\';
  67                                 *(buf++) = octchar((unsigned char) c >> 6);
  68                                 *(buf++) = octchar((unsigned char) c >> 3);
  69                                 *(buf++) = octchar((unsigned char) c);
  70                         } else
  71                                 *(buf++) = c;
  72                         break;
  73         }
  74
  75         return buf - buf_old;
  76 }
  77
  78 char *cescape_length(const char *s, size_t n) {
  79         const char *f;
  80         char *r, *t;
  81
  82         assert(s || n == 0);
  83
  84         /* Does C style string escaping. May be reversed with
  85          * cunescape(). */
  86
  87         r = new(char, n*4 + 1);
  88         if (!r)
  89                 return NULL;
  90
  91         for (f = s, t = r; f < s + n; f++)
  92                 t += cescape_char(*f, t);
  93
  94         *t = 0;
  95
  96         return r;
  97 }
  98
  99 char *cescape(const char *s) {
 100         assert(s);
 101
 102         return cescape_length(s, strlen(s));
 103 }
 104
 105 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
 106         int r = 1;
 107
 108         assert(p);
 109         assert(*p);
 110         assert(ret);
 111
 112         /* Unescapes C style. Returns the unescaped character in ret.
 113          * Sets *eight_bit to true if the escaped sequence either fits in
 114          * one byte in UTF-8 or is a non-unicode literal byte and should
 115          * instead be copied directly.
 116          */
 117
 118         if (length != (size_t) -1 && length < 1)
 119                 return -EINVAL;
 120
 121         switch (p[0]) {
 122
 123         case 'a':
 124                 *ret = '\a';
 125                 break;
 126         case 'b':
 127                 *ret = '\b';
 128                 break;
 129         case 'f':
 130                 *ret = '\f';
 131                 break;
 132         case 'n':
 133                 *ret = '\n';
 134                 break;
 135         case 'r':
 136                 *ret = '\r';
 137                 break;
 138         case 't':
 139                 *ret = '\t';
 140                 break;
 141         case 'v':
 142                 *ret = '\v';
 143                 break;
 144         case '\\':
 145                 *ret = '\\';
 146                 break;
 147         case '"':
 148                 *ret = '"';
 149                 break;
 150         case '\'':
 151                 *ret = '\'';
 152                 break;
 153
 154         case 's':
 155                 /* This is an extension of the XDG syntax files */
 156                 *ret = ' ';
 157                 break;
 158
 159         case 'x': {
 160                 /* hexadecimal encoding */
 161                 int a, b;
 162
 163                 if (length != (size_t) -1 && length < 3)
 164                         return -EINVAL;
 165
 166                 a = unhexchar(p[1]);
 167                 if (a < 0)
 168                         return -EINVAL;
 169
 170                 b = unhexchar(p[2]);
 171                 if (b < 0)
 172                         return -EINVAL;
 173
 174                 /* Don't allow NUL bytes */
 175                 if (a == 0 && b == 0)
 176                         return -EINVAL;
 177
 178                 *ret = (a << 4U) | b;
 179                 *eight_bit = true;
 180                 r = 3;
 181                 break;
 182         }
 183
 184         case 'u': {
 185                 /* C++11 style 16bit unicode */
 186
 187                 int a[4];
 188                 size_t i;
 189                 uint32_t c;
 190
 191                 if (length != (size_t) -1 && length < 5)
 192                         return -EINVAL;
 193
 194                 for (i = 0; i < 4; i++) {
 195                         a[i] = unhexchar(p[1 + i]);
 196                         if (a[i] < 0)
 197                                 return a[i];
 198                 }
 199
 200                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 201
 202                 /* Don't allow 0 chars */
 203                 if (c == 0)
 204                         return -EINVAL;
 205
 206                 *ret = c;
 207                 r = 5;
 208                 break;
 209         }
 210
 211         case 'U': {
 212                 /* C++11 style 32bit unicode */
 213
 214                 int a[8];
 215                 size_t i;
 216                 char32_t c;
 217
 218                 if (length != (size_t) -1 && length < 9)
 219                         return -EINVAL;
 220
 221                 for (i = 0; i < 8; i++) {
 222                         a[i] = unhexchar(p[1 + i]);
 223                         if (a[i] < 0)
 224                                 return a[i];
 225                 }
 226
 227                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 228                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 229
 230                 /* Don't allow 0 chars */
 231                 if (c == 0)
 232                         return -EINVAL;
 233
 234                 /* Don't allow invalid code points */
 235                 if (!unichar_is_valid(c))
 236                         return -EINVAL;
 237
 238                 *ret = c;
 239                 r = 9;
 240                 break;
 241         }
 242
 243         case '0':
 244         case '1':
 245         case '2':
 246         case '3':
 247         case '4':
 248         case '5':
 249         case '6':
 250         case '7': {
 251                 /* octal encoding */
 252                 int a, b, c;
 253                 char32_t m;
 254
 255                 if (length != (size_t) -1 && length < 3)
 256                         return -EINVAL;
 257
 258                 a = unoctchar(p[0]);
 259                 if (a < 0)
 260                         return -EINVAL;
 261
 262                 b = unoctchar(p[1]);
 263                 if (b < 0)
 264                         return -EINVAL;
 265
 266                 c = unoctchar(p[2]);
 267                 if (c < 0)
 268                         return -EINVAL;
 269
 270                 /* don't allow NUL bytes */
 271                 if (a == 0 && b == 0 && c == 0)
 272                         return -EINVAL;
 273
 274                 /* Don't allow bytes above 255 */
 275                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 276                 if (m > 255)
 277                         return -EINVAL;
 278
 279                 *ret = m;
 280                 *eight_bit = true;
 281                 r = 3;
 282                 break;
 283         }
 284
 285         default:
 286                 return -EINVAL;
 287         }
 288
 289         return r;
 290 }
 291
 292 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 293         char *r, *t;
 294         const char *f;
 295         size_t pl;
 296
 297         assert(s);
 298         assert(ret);
 299
 300         /* Undoes C style string escaping, and optionally prefixes it. */
 301
 302         pl = strlen_ptr(prefix);
 303
 304         r = new(char, pl+length+1);
 305         if (!r)
 306                 return -ENOMEM;
 307
 308         if (prefix)
 309                 memcpy(r, prefix, pl);
 310
 311         for (f = s, t = r + pl; f < s + length; f++) {
 312                 size_t remaining;
 313                 bool eight_bit = false;
 314                 char32_t u;
 315                 int k;
 316
 317                 remaining = s + length - f;
 318                 assert(remaining > 0);
 319
 320                 if (*f != '\\') {
 321                         /* A literal, copy verbatim */
 322                         *(t++) = *f;
 323                         continue;
 324                 }
 325
 326                 if (remaining == 1) {
 327                         if (flags & UNESCAPE_RELAX) {
 328                                 /* A trailing backslash, copy verbatim */
 329                                 *(t++) = *f;
 330                                 continue;
 331                         }
 332
 333                         free(r);
 334                         return -EINVAL;
 335                 }
 336
 337                 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
 338                 if (k < 0) {
 339                         if (flags & UNESCAPE_RELAX) {
 340                                 /* Invalid escape code, let's take it literal then */
 341                                 *(t++) = '\\';
 342                                 continue;
 343                         }
 344
 345                         free(r);
 346                         return k;
 347                 }
 348
 349                 f += k;
 350                 if (eight_bit)
 351                         /* One byte? Set directly as specified */
 352                         *(t++) = u;
 353                 else
 354                         /* Otherwise encode as multi-byte UTF-8 */
 355                         t += utf8_encode_unichar(t, u);
 356         }
 357
 358         *t = 0;
 359
 360         *ret = r;
 361         return t - r;
 362 }
 363
 364 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 365         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 366 }
 367
 368 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 369         return cunescape_length(s, strlen(s), flags, ret);
 370 }
 371
 372 char *xescape(const char *s, const char *bad) {
 373         char *r, *t;
 374         const char *f;
 375
 376         /* Escapes all chars in bad, in addition to \ and all special
 377          * chars, in \xFF style escaping. May be reversed with
 378          * cunescape(). */
 379
 380         r = new(char, strlen(s) * 4 + 1);
 381         if (!r)
 382                 return NULL;
 383
 384         for (f = s, t = r; *f; f++) {
 385
 386                 if ((*f < ' ') || (*f >= 127) ||
 387                     (*f == '\\') || strchr(bad, *f)) {
 388                         *(t++) = '\\';
 389                         *(t++) = 'x';
 390                         *(t++) = hexchar(*f >> 4);
 391                         *(t++) = hexchar(*f);
 392                 } else
 393                         *(t++) = *f;
 394         }
 395
 396         *t = 0;
 397
 398         return r;
 399 }
 400
 401 #if 0 /// UNNEEDED by elogind
 402 char *octescape(const char *s, size_t len) {
 403         char *r, *t;
 404         const char *f;
 405
 406         /* Escapes all chars in bad, in addition to \ and " chars,
 407          * in \nnn style escaping. */
 408
 409         r = new(char, len * 4 + 1);
 410         if (!r)
 411                 return NULL;
 412
 413         for (f = s, t = r; f < s + len; f++) {
 414
 415                 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
 416                         *(t++) = '\\';
 417                         *(t++) = '0' + (*f >> 6);
 418                         *(t++) = '0' + ((*f >> 3) & 8);
 419                         *(t++) = '0' + (*f & 8);
 420                 } else
 421                         *(t++) = *f;
 422         }
 423
 424         *t = 0;
 425
 426         return r;
 427
 428 }
 429
 430 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
 431         assert(bad);
 432
 433         for (; *s; s++) {
 434                 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
 435                         *(t++) = '\\';
 436                         *(t++) = *s == '\n' ? 'n' : 't';
 437                         continue;
 438                 }
 439
 440                 if (*s == '\\' || strchr(bad, *s))
 441                         *(t++) = '\\';
 442
 443                 *(t++) = *s;
 444         }
 445
 446         return t;
 447 }
 448
 449 char *shell_escape(const char *s, const char *bad) {
 450         char *r, *t;
 451
 452         r = new(char, strlen(s)*2+1);
 453         if (!r)
 454                 return NULL;
 455
 456         t = strcpy_backslash_escaped(r, s, bad, false);
 457         *t = 0;
 458
 459         return r;
 460 }
 461
 462 char* shell_maybe_quote(const char *s, EscapeStyle style) {
 463         const char *p;
 464         char *r, *t;
 465
 466         assert(s);
 467
 468         /* Encloses a string in quotes if necessary to make it OK as a shell
 469          * string. Note that we treat benign UTF-8 characters as needing
 470          * escaping too, but that should be OK. */
 471
 472         for (p = s; *p; p++)
 473                 if (*p <= ' ' ||
 474                     *p >= 127 ||
 475                     strchr(SHELL_NEED_QUOTES, *p))
 476                         break;
 477
 478         if (!*p)
 479                 return strdup(s);
 480
 481         r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
 482         if (!r)
 483                 return NULL;
 484
 485         t = r;
 486         if (style == ESCAPE_BACKSLASH)
 487                 *(t++) = '"';
 488         else if (style == ESCAPE_POSIX) {
 489                 *(t++) = '$';
 490                 *(t++) = '\'';
 491         } else
 492                 assert_not_reached("Bad EscapeStyle");
 493
 494         t = mempcpy(t, s, p - s);
 495
 496         if (style == ESCAPE_BACKSLASH)
 497                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
 498         else
 499                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
 500
 501         if (style == ESCAPE_BACKSLASH)
 502                 *(t++) = '"';
 503         else
 504                 *(t++) = '\'';
 505         *t = 0;
 506
 507         return r;
 508 }
 509 #endif // 0