udev_utils_string.c

   1 /*
   2  * udev_utils_string.c - string manipulation
   3  *
   4  * Copyright (C) 2004-2005 Kay Sievers <kay.sievers@vrfy.org>
   5  *
   6  *      This program is free software; you can redistribute it and/or modify it
   7  *      under the terms of the GNU General Public License as published by the
   8  *      Free Software Foundation version 2 of the License.
   9  *
  10  *      This program is distributed in the hope that it will be useful, but
  11  *      WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  *      General Public License for more details.
  14  *
  15  *      You should have received a copy of the GNU General Public License along
  16  *      with this program; if not, write to the Free Software Foundation, Inc.,
  17  *      675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  */
  20
  21
  22 #include <stdlib.h>
  23 #include <stdio.h>
  24 #include <stddef.h>
  25 #include <unistd.h>
  26 #include <fcntl.h>
  27 #include <errno.h>
  28 #include <ctype.h>
  29 #include <dirent.h>
  30 #include <syslog.h>
  31 #include <sys/utsname.h>
  32
  33 #include "udev_libc_wrapper.h"
  34 #include "udev.h"
  35 #include "logging.h"
  36 #include "udev_utils.h"
  37 #include "list.h"
  38
  39 /* compare string with pattern (like fnmatch(), supports * ? [0-9] [!A-Z]) */
  40 int strcmp_pattern(const char *p, const char *s)
  41 {
  42         if (s[0] == '\0') {
  43                 while (p[0] == '*')
  44                         p++;
  45                 return (p[0] != '\0');
  46         }
  47         switch (p[0]) {
  48         case '[':
  49                 {
  50                         int not = 0;
  51                         p++;
  52                         if (p[0] == '!') {
  53                                 not = 1;
  54                                 p++;
  55                         }
  56                         while ((p[0] != '\0') && (p[0] != ']')) {
  57                                 int match = 0;
  58                                 if (p[1] == '-') {
  59                                         if ((s[0] >= p[0]) && (s[0] <= p[2]))
  60                                                 match = 1;
  61                                         p += 3;
  62                                 } else {
  63                                         match = (p[0] == s[0]);
  64                                         p++;
  65                                 }
  66                                 if (match ^ not) {
  67                                         while ((p[0] != '\0') && (p[0] != ']'))
  68                                                 p++;
  69                                         if (p[0] == ']')
  70                                                 return strcmp_pattern(p+1, s+1);
  71                                 }
  72                         }
  73                 }
  74                 break;
  75         case '*':
  76                 if (strcmp_pattern(p, s+1))
  77                         return strcmp_pattern(p+1, s);
  78                 return 0;
  79         case '\0':
  80                 if (s[0] == '\0') {
  81                         return 0;
  82                 }
  83                 break;
  84         default:
  85                 if ((p[0] == s[0]) || (p[0] == '?'))
  86                         return strcmp_pattern(p+1, s+1);
  87                 break;
  88         }
  89         return 1;
  90 }
  91
  92 int string_is_true(const char *str)
  93 {
  94         if (strcasecmp(str, "true") == 0)
  95                 return 1;
  96         if (strcasecmp(str, "yes") == 0)
  97                 return 1;
  98         if (strcasecmp(str, "1") == 0)
  99                 return 1;
 100         return 0;
 101 }
 102
 103 void remove_trailing_chars(char *path, char c)
 104 {
 105         size_t len;
 106
 107         len = strlen(path);
 108         while (len > 0 && path[len-1] == c)
 109                 path[--len] = '\0';
 110 }
 111
 112 /* count of characters used to encode one unicode char */
 113 static int utf8_encoded_expected_len(const char *str)
 114 {
 115         unsigned char c = (unsigned char)str[0];
 116
 117         if (c < 0x80)
 118                 return 1;
 119         if ((c & 0xe0) == 0xc0)
 120                 return 2;
 121         if ((c & 0xf0) == 0xe0)
 122                 return 3;
 123         if ((c & 0xf8) == 0xf0)
 124                 return 4;
 125         if ((c & 0xfc) == 0xf8)
 126                 return 5;
 127         if ((c & 0xfe) == 0xfc)
 128                 return 6;
 129         return 0;
 130 }
 131
 132 /* decode one unicode char */
 133 static int utf8_encoded_to_unichar(const char *str)
 134 {
 135         int unichar;
 136         int len;
 137         int i;
 138
 139         len = utf8_encoded_expected_len(str);
 140         switch (len) {
 141         case 1:
 142                 return (int)str[0];
 143         case 2:
 144                 unichar = str[0] & 0x1f;
 145                 break;
 146         case 3:
 147                 unichar = (int)str[0] & 0x0f;
 148                 break;
 149         case 4:
 150                 unichar = (int)str[0] & 0x07;
 151                 break;
 152         case 5:
 153                 unichar = (int)str[0] & 0x03;
 154                 break;
 155         case 6:
 156                 unichar = (int)str[0] & 0x01;
 157                 break;
 158         default:
 159                 return -1;
 160         }
 161
 162         for (i = 1; i < len; i++) {
 163                 if (((int)str[i] & 0xc0) != 0x80)
 164                         return -1;
 165                 unichar <<= 6;
 166                 unichar |= (int)str[i] & 0x3f;
 167         }
 168
 169         return unichar;
 170 }
 171
 172 /* expected size used to encode one unicode char */
 173 static int utf8_unichar_to_encoded_len(int unichar)
 174 {
 175         if (unichar < 0x80)
 176                 return 1;
 177         if (unichar < 0x800)
 178                 return 2;
 179         if (unichar < 0x10000)
 180                 return 3;
 181         if (unichar < 0x200000)
 182                 return 4;
 183         if (unichar < 0x4000000)
 184                 return 5;
 185         return 6;
 186 }
 187
 188 /* check if unicode char has a valid numeric range */
 189 static int utf8_unichar_valid_range(int unichar)
 190 {
 191         if (unichar > 0x10ffff)
 192                 return 0;
 193         if ((unichar & 0xfffff800) == 0xd800)
 194                 return 0;
 195         if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
 196                 return 0;
 197         if ((unichar & 0xffff) == 0xffff)
 198                 return 0;
 199         return 1;
 200 }
 201
 202 /* validate one encoded unicode char and return its length */
 203 int utf8_encoded_valid_unichar(const char *str)
 204 {
 205         int len;
 206         int unichar;
 207         int i;
 208
 209         len = utf8_encoded_expected_len(str);
 210         if (len == 0)
 211                 return -1;
 212
 213         /* ascii is valid */
 214         if (len == 1)
 215                 return 1;
 216
 217         /* check if expected encoded chars are available */
 218         for (i = 0; i < len; i++)
 219                 if ((str[i] & 0x80) != 0x80)
 220                         return -1;
 221
 222         unichar = utf8_encoded_to_unichar(str);
 223
 224         /* check if encoded length matches encoded value */
 225         if (utf8_unichar_to_encoded_len(unichar) != len)
 226                 return -1;
 227
 228         /* check if value has valid range */
 229         if (!utf8_unichar_valid_range(unichar))
 230                 return -1;
 231
 232         return len;
 233 }
 234
 235 /* replace everything but whitelisted plain ascii and valid utf8 */
 236 int replace_untrusted_chars(char *str)
 237 {
 238         size_t i = 0;
 239         int replaced = 0;
 240
 241         while (str[i] != '\0') {
 242                 int len;
 243
 244                 /* valid printable ascii char */
 245                 if ((str[i] >= '0' && str[i] <= '9') ||
 246                     (str[i] >= 'A' && str[i] <= 'Z') ||
 247                     (str[i] >= 'a' && str[i] <= 'z') ||
 248                     strchr(" #$%+-./:=?@_", str[i])) {
 249                         i++;
 250                         continue;
 251                 }
 252                 /* valid utf8 is accepted */
 253                 len = utf8_encoded_valid_unichar(&str[i]);
 254                 if (len > 1) {
 255                         i += len;
 256                         continue;
 257                 }
 258
 259                 /* everything else is garbage */
 260                 str[i] = '_';
 261                 i++;
 262                 replaced++;
 263         }
 264
 265         return replaced;
 266 }