src/basic/escape.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2010 Lennart Poettering
   4 ***/
   5
   6 #include <errno.h>
   7 #include <stdlib.h>
   8 #include <string.h>
   9
  10 #include "alloc-util.h"
  11 #include "escape.h"
  12 #include "hexdecoct.h"
  13 #include "macro.h"
  14 #include "utf8.h"
  15
  16 int cescape_char(char c, char *buf) {
  17         char *buf_old = buf;
  18
  19         /* Needs space for 4 characters in the buffer */
  20
  21         switch (c) {
  22
  23                 case '\a':
  24                         *(buf++) = '\\';
  25                         *(buf++) = 'a';
  26                         break;
  27                 case '\b':
  28                         *(buf++) = '\\';
  29                         *(buf++) = 'b';
  30                         break;
  31                 case '\f':
  32                         *(buf++) = '\\';
  33                         *(buf++) = 'f';
  34                         break;
  35                 case '\n':
  36                         *(buf++) = '\\';
  37                         *(buf++) = 'n';
  38                         break;
  39                 case '\r':
  40                         *(buf++) = '\\';
  41                         *(buf++) = 'r';
  42                         break;
  43                 case '\t':
  44                         *(buf++) = '\\';
  45                         *(buf++) = 't';
  46                         break;
  47                 case '\v':
  48                         *(buf++) = '\\';
  49                         *(buf++) = 'v';
  50                         break;
  51                 case '\\':
  52                         *(buf++) = '\\';
  53                         *(buf++) = '\\';
  54                         break;
  55                 case '"':
  56                         *(buf++) = '\\';
  57                         *(buf++) = '"';
  58                         break;
  59                 case '\'':
  60                         *(buf++) = '\\';
  61                         *(buf++) = '\'';
  62                         break;
  63
  64                 default:
  65                         /* For special chars we prefer octal over
  66                          * hexadecimal encoding, simply because glib's
  67                          * g_strescape() does the same */
  68                         if ((c < ' ') || (c >= 127)) {
  69                                 *(buf++) = '\\';
  70                                 *(buf++) = octchar((unsigned char) c >> 6);
  71                                 *(buf++) = octchar((unsigned char) c >> 3);
  72                                 *(buf++) = octchar((unsigned char) c);
  73                         } else
  74                                 *(buf++) = c;
  75                         break;
  76         }
  77
  78         return buf - buf_old;
  79 }
  80
  81 char *cescape_length(const char *s, size_t n) {
  82         const char *f;
  83         char *r, *t;
  84
  85         assert(s || n == 0);
  86
  87         /* Does C style string escaping. May be reversed with
  88          * cunescape(). */
  89
  90         r = new(char, n*4 + 1);
  91         if (!r)
  92                 return NULL;
  93
  94         for (f = s, t = r; f < s + n; f++)
  95                 t += cescape_char(*f, t);
  96
  97         *t = 0;
  98
  99         return r;
 100 }
 101
 102 char *cescape(const char *s) {
 103         assert(s);
 104
 105         return cescape_length(s, strlen(s));
 106 }
 107
 108 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
 109         int r = 1;
 110
 111         assert(p);
 112         assert(*p);
 113         assert(ret);
 114
 115         /* Unescapes C style. Returns the unescaped character in ret.
 116          * Sets *eight_bit to true if the escaped sequence either fits in
 117          * one byte in UTF-8 or is a non-unicode literal byte and should
 118          * instead be copied directly.
 119          */
 120
 121         if (length != (size_t) -1 && length < 1)
 122                 return -EINVAL;
 123
 124         switch (p[0]) {
 125
 126         case 'a':
 127                 *ret = '\a';
 128                 break;
 129         case 'b':
 130                 *ret = '\b';
 131                 break;
 132         case 'f':
 133                 *ret = '\f';
 134                 break;
 135         case 'n':
 136                 *ret = '\n';
 137                 break;
 138         case 'r':
 139                 *ret = '\r';
 140                 break;
 141         case 't':
 142                 *ret = '\t';
 143                 break;
 144         case 'v':
 145                 *ret = '\v';
 146                 break;
 147         case '\\':
 148                 *ret = '\\';
 149                 break;
 150         case '"':
 151                 *ret = '"';
 152                 break;
 153         case '\'':
 154                 *ret = '\'';
 155                 break;
 156
 157         case 's':
 158                 /* This is an extension of the XDG syntax files */
 159                 *ret = ' ';
 160                 break;
 161
 162         case 'x': {
 163                 /* hexadecimal encoding */
 164                 int a, b;
 165
 166                 if (length != (size_t) -1 && length < 3)
 167                         return -EINVAL;
 168
 169                 a = unhexchar(p[1]);
 170                 if (a < 0)
 171                         return -EINVAL;
 172
 173                 b = unhexchar(p[2]);
 174                 if (b < 0)
 175                         return -EINVAL;
 176
 177                 /* Don't allow NUL bytes */
 178                 if (a == 0 && b == 0)
 179                         return -EINVAL;
 180
 181                 *ret = (a << 4U) | b;
 182                 *eight_bit = true;
 183                 r = 3;
 184                 break;
 185         }
 186
 187         case 'u': {
 188                 /* C++11 style 16bit unicode */
 189
 190                 int a[4];
 191                 size_t i;
 192                 uint32_t c;
 193
 194                 if (length != (size_t) -1 && length < 5)
 195                         return -EINVAL;
 196
 197                 for (i = 0; i < 4; i++) {
 198                         a[i] = unhexchar(p[1 + i]);
 199                         if (a[i] < 0)
 200                                 return a[i];
 201                 }
 202
 203                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 204
 205                 /* Don't allow 0 chars */
 206                 if (c == 0)
 207                         return -EINVAL;
 208
 209                 *ret = c;
 210                 r = 5;
 211                 break;
 212         }
 213
 214         case 'U': {
 215                 /* C++11 style 32bit unicode */
 216
 217                 int a[8];
 218                 size_t i;
 219                 char32_t c;
 220
 221                 if (length != (size_t) -1 && length < 9)
 222                         return -EINVAL;
 223
 224                 for (i = 0; i < 8; i++) {
 225                         a[i] = unhexchar(p[1 + i]);
 226                         if (a[i] < 0)
 227                                 return a[i];
 228                 }
 229
 230                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 231                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 232
 233                 /* Don't allow 0 chars */
 234                 if (c == 0)
 235                         return -EINVAL;
 236
 237                 /* Don't allow invalid code points */
 238                 if (!unichar_is_valid(c))
 239                         return -EINVAL;
 240
 241                 *ret = c;
 242                 r = 9;
 243                 break;
 244         }
 245
 246         case '0':
 247         case '1':
 248         case '2':
 249         case '3':
 250         case '4':
 251         case '5':
 252         case '6':
 253         case '7': {
 254                 /* octal encoding */
 255                 int a, b, c;
 256                 char32_t m;
 257
 258                 if (length != (size_t) -1 && length < 3)
 259                         return -EINVAL;
 260
 261                 a = unoctchar(p[0]);
 262                 if (a < 0)
 263                         return -EINVAL;
 264
 265                 b = unoctchar(p[1]);
 266                 if (b < 0)
 267                         return -EINVAL;
 268
 269                 c = unoctchar(p[2]);
 270                 if (c < 0)
 271                         return -EINVAL;
 272
 273                 /* don't allow NUL bytes */
 274                 if (a == 0 && b == 0 && c == 0)
 275                         return -EINVAL;
 276
 277                 /* Don't allow bytes above 255 */
 278                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 279                 if (m > 255)
 280                         return -EINVAL;
 281
 282                 *ret = m;
 283                 *eight_bit = true;
 284                 r = 3;
 285                 break;
 286         }
 287
 288         default:
 289                 return -EINVAL;
 290         }
 291
 292         return r;
 293 }
 294
 295 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 296         char *r, *t;
 297         const char *f;
 298         size_t pl;
 299
 300         assert(s);
 301         assert(ret);
 302
 303         /* Undoes C style string escaping, and optionally prefixes it. */
 304
 305         pl = strlen_ptr(prefix);
 306
 307         r = new(char, pl+length+1);
 308         if (!r)
 309                 return -ENOMEM;
 310
 311         if (prefix)
 312                 memcpy(r, prefix, pl);
 313
 314         for (f = s, t = r + pl; f < s + length; f++) {
 315                 size_t remaining;
 316                 bool eight_bit = false;
 317                 char32_t u;
 318                 int k;
 319
 320                 remaining = s + length - f;
 321                 assert(remaining > 0);
 322
 323                 if (*f != '\\') {
 324                         /* A literal, copy verbatim */
 325                         *(t++) = *f;
 326                         continue;
 327                 }
 328
 329                 if (remaining == 1) {
 330                         if (flags & UNESCAPE_RELAX) {
 331                                 /* A trailing backslash, copy verbatim */
 332                                 *(t++) = *f;
 333                                 continue;
 334                         }
 335
 336                         free(r);
 337                         return -EINVAL;
 338                 }
 339
 340                 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
 341                 if (k < 0) {
 342                         if (flags & UNESCAPE_RELAX) {
 343                                 /* Invalid escape code, let's take it literal then */
 344                                 *(t++) = '\\';
 345                                 continue;
 346                         }
 347
 348                         free(r);
 349                         return k;
 350                 }
 351
 352                 f += k;
 353                 if (eight_bit)
 354                         /* One byte? Set directly as specified */
 355                         *(t++) = u;
 356                 else
 357                         /* Otherwise encode as multi-byte UTF-8 */
 358                         t += utf8_encode_unichar(t, u);
 359         }
 360
 361         *t = 0;
 362
 363         *ret = r;
 364         return t - r;
 365 }
 366
 367 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 368         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 369 }
 370
 371 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 372         return cunescape_length(s, strlen(s), flags, ret);
 373 }
 374
 375 char *xescape(const char *s, const char *bad) {
 376         char *r, *t;
 377         const char *f;
 378
 379         /* Escapes all chars in bad, in addition to \ and all special
 380          * chars, in \xFF style escaping. May be reversed with
 381          * cunescape(). */
 382
 383         r = new(char, strlen(s) * 4 + 1);
 384         if (!r)
 385                 return NULL;
 386
 387         for (f = s, t = r; *f; f++) {
 388
 389                 if ((*f < ' ') || (*f >= 127) ||
 390                     (*f == '\\') || strchr(bad, *f)) {
 391                         *(t++) = '\\';
 392                         *(t++) = 'x';
 393                         *(t++) = hexchar(*f >> 4);
 394                         *(t++) = hexchar(*f);
 395                 } else
 396                         *(t++) = *f;
 397         }
 398
 399         *t = 0;
 400
 401         return r;
 402 }
 403
 404 #if 0 /// UNNEEDED by elogind
 405 char *octescape(const char *s, size_t len) {
 406         char *r, *t;
 407         const char *f;
 408
 409         /* Escapes all chars in bad, in addition to \ and " chars,
 410          * in \nnn style escaping. */
 411
 412         r = new(char, len * 4 + 1);
 413         if (!r)
 414                 return NULL;
 415
 416         for (f = s, t = r; f < s + len; f++) {
 417
 418                 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
 419                         *(t++) = '\\';
 420                         *(t++) = '0' + (*f >> 6);
 421                         *(t++) = '0' + ((*f >> 3) & 8);
 422                         *(t++) = '0' + (*f & 8);
 423                 } else
 424                         *(t++) = *f;
 425         }
 426
 427         *t = 0;
 428
 429         return r;
 430
 431 }
 432
 433 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
 434         assert(bad);
 435
 436         for (; *s; s++) {
 437                 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
 438                         *(t++) = '\\';
 439                         *(t++) = *s == '\n' ? 'n' : 't';
 440                         continue;
 441                 }
 442
 443                 if (*s == '\\' || strchr(bad, *s))
 444                         *(t++) = '\\';
 445
 446                 *(t++) = *s;
 447         }
 448
 449         return t;
 450 }
 451
 452 char *shell_escape(const char *s, const char *bad) {
 453         char *r, *t;
 454
 455         r = new(char, strlen(s)*2+1);
 456         if (!r)
 457                 return NULL;
 458
 459         t = strcpy_backslash_escaped(r, s, bad, false);
 460         *t = 0;
 461
 462         return r;
 463 }
 464
 465 char* shell_maybe_quote(const char *s, EscapeStyle style) {
 466         const char *p;
 467         char *r, *t;
 468
 469         assert(s);
 470
 471         /* Encloses a string in quotes if necessary to make it OK as a shell
 472          * string. Note that we treat benign UTF-8 characters as needing
 473          * escaping too, but that should be OK. */
 474
 475         for (p = s; *p; p++)
 476                 if (*p <= ' ' ||
 477                     *p >= 127 ||
 478                     strchr(SHELL_NEED_QUOTES, *p))
 479                         break;
 480
 481         if (!*p)
 482                 return strdup(s);
 483
 484         r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
 485         if (!r)
 486                 return NULL;
 487
 488         t = r;
 489         if (style == ESCAPE_BACKSLASH)
 490                 *(t++) = '"';
 491         else if (style == ESCAPE_POSIX) {
 492                 *(t++) = '$';
 493                 *(t++) = '\'';
 494         } else
 495                 assert_not_reached("Bad EscapeStyle");
 496
 497         t = mempcpy(t, s, p - s);
 498
 499         if (style == ESCAPE_BACKSLASH)
 500                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
 501         else
 502                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
 503
 504         if (style == ESCAPE_BACKSLASH)
 505                 *(t++) = '"';
 506         else
 507                 *(t++) = '\'';
 508         *t = 0;
 509
 510         return r;
 511 }
 512 #endif // 0