chiark / gitweb /
volume_id: add and export string encoding function
[elogind.git] / extras / volume_id / lib / util.c
1 /*
2  * volume_id - reads filesystem label and uuid
3  *
4  * Copyright (C) 2005-2007 Kay Sievers <kay.sievers@vrfy.org>
5  *
6  *      This program is free software; you can redistribute it and/or modify it
7  *      under the terms of the GNU General Public License as published by the
8  *      Free Software Foundation version 2 of the License.
9  */
10
11 #ifndef _GNU_SOURCE
12 #define _GNU_SOURCE 1
13 #endif
14
15 #ifdef HAVE_CONFIG_H
16 #  include <config.h>
17 #endif
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <fcntl.h>
26 #include <sys/stat.h>
27
28 #include "libvolume_id.h"
29 #include "util.h"
30
31 /* count of characters used to encode one unicode char */
32 static int utf8_encoded_expected_len(const char *str)
33 {
34         unsigned char c = (unsigned char)str[0];
35
36         if (c < 0x80)
37                 return 1;
38         if ((c & 0xe0) == 0xc0)
39                 return 2;
40         if ((c & 0xf0) == 0xe0)
41                 return 3;
42         if ((c & 0xf8) == 0xf0)
43                 return 4;
44         if ((c & 0xfc) == 0xf8)
45                 return 5;
46         if ((c & 0xfe) == 0xfc)
47                 return 6;
48         return 0;
49 }
50
51 /* decode one unicode char */
52 static int utf8_encoded_to_unichar(const char *str)
53 {
54         int unichar;
55         int len;
56         int i;
57
58         len = utf8_encoded_expected_len(str);
59         switch (len) {
60         case 1:
61                 return (int)str[0];
62         case 2:
63                 unichar = str[0] & 0x1f;
64                 break;
65         case 3:
66                 unichar = (int)str[0] & 0x0f;
67                 break;
68         case 4:
69                 unichar = (int)str[0] & 0x07;
70                 break;
71         case 5:
72                 unichar = (int)str[0] & 0x03;
73                 break;
74         case 6:
75                 unichar = (int)str[0] & 0x01;
76                 break;
77         default:
78                 return -1;
79         }
80
81         for (i = 1; i < len; i++) {
82                 if (((int)str[i] & 0xc0) != 0x80)
83                         return -1;
84                 unichar <<= 6;
85                 unichar |= (int)str[i] & 0x3f;
86         }
87
88         return unichar;
89 }
90
91 /* expected size used to encode one unicode char */
92 static int utf8_unichar_to_encoded_len(int unichar)
93 {
94         if (unichar < 0x80)
95                 return 1;
96         if (unichar < 0x800)
97                 return 2;
98         if (unichar < 0x10000)
99                 return 3;
100         if (unichar < 0x200000)
101                 return 4;
102         if (unichar < 0x4000000)
103                 return 5;
104         return 6;
105 }
106
107 /* check if unicode char has a valid numeric range */
108 static int utf8_unichar_valid_range(int unichar)
109 {
110         if (unichar > 0x10ffff)
111                 return 0;
112         if ((unichar & 0xfffff800) == 0xd800)
113                 return 0;
114         if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
115                 return 0;
116         if ((unichar & 0xffff) == 0xffff)
117                 return 0;
118         return 1;
119 }
120
121 /* validate one encoded unicode char and return its length */
122 int volume_id_utf8_encoded_valid_unichar(const char *str)
123 {
124         int len;
125         int unichar;
126         int i;
127
128         len = utf8_encoded_expected_len(str);
129         if (len == 0)
130                 return -1;
131
132         /* ascii is valid */
133         if (len == 1)
134                 return 1;
135
136         /* check if expected encoded chars are available */
137         for (i = 0; i < len; i++)
138                 if ((str[i] & 0x80) != 0x80)
139                         return -1;
140
141         unichar = utf8_encoded_to_unichar(str);
142
143         /* check if encoded length matches encoded value */
144         if (utf8_unichar_to_encoded_len(unichar) != len)
145                 return -1;
146
147         /* check if value has valid range */
148         if (!utf8_unichar_valid_range(unichar))
149                 return -1;
150
151         return len;
152 }
153
154 void volume_id_set_unicode16(char *str, size_t len, const uint8_t *buf, enum endian endianess, size_t count)
155 {
156         unsigned int i, j;
157         uint16_t c;
158
159         j = 0;
160         for (i = 0; i + 2 <= count; i += 2) {
161                 if (endianess == LE)
162                         c = (buf[i+1] << 8) | buf[i];
163                 else
164                         c = (buf[i] << 8) | buf[i+1];
165                 if (c == 0) {
166                         str[j] = '\0';
167                         break;
168                 } else if (c < 0x80) {
169                         if (j+1 >= len)
170                                 break;
171                         str[j++] = (uint8_t) c;
172                 } else if (c < 0x800) {
173                         if (j+2 >= len)
174                                 break;
175                         str[j++] = (uint8_t) (0xc0 | (c >> 6));
176                         str[j++] = (uint8_t) (0x80 | (c & 0x3f));
177                 } else {
178                         if (j+3 >= len)
179                                 break;
180                         str[j++] = (uint8_t) (0xe0 | (c >> 12));
181                         str[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
182                         str[j++] = (uint8_t) (0x80 | (c & 0x3f));
183                 }
184         }
185         str[j] = '\0';
186 }
187
188 static char *usage_to_string(enum volume_id_usage usage_id)
189 {
190         switch (usage_id) {
191         case VOLUME_ID_FILESYSTEM:
192                 return "filesystem";
193         case VOLUME_ID_OTHER:
194                 return "other";
195         case VOLUME_ID_RAID:
196                 return "raid";
197         case VOLUME_ID_DISKLABEL:
198                 return "disklabel";
199         case VOLUME_ID_CRYPTO:
200                 return "crypto";
201         case VOLUME_ID_UNPROBED:
202                 return "unprobed";
203         case VOLUME_ID_UNUSED:
204                 return "unused";
205         }
206         return NULL;
207 }
208
209 void volume_id_set_usage(struct volume_id *id, enum volume_id_usage usage_id)
210 {
211         id->usage_id = usage_id;
212         id->usage = usage_to_string(usage_id);
213 }
214
215 void volume_id_set_label_raw(struct volume_id *id, const uint8_t *buf, size_t count)
216 {
217         memcpy(id->label_raw, buf, count);
218         id->label_raw_len = count;
219 }
220
221 void volume_id_set_label_string(struct volume_id *id, const uint8_t *buf, size_t count)
222 {
223         unsigned int i;
224
225         memcpy(id->label, buf, count);
226
227         /* remove trailing whitespace */
228         i = strnlen(id->label, count);
229         while (i--) {
230                 if (!isspace(id->label[i]))
231                         break;
232         }
233         id->label[i+1] = '\0';
234 }
235
236 void volume_id_set_label_unicode16(struct volume_id *id, const uint8_t *buf, enum endian endianess, size_t count)
237 {
238          volume_id_set_unicode16(id->label, sizeof(id->label), buf, endianess, count);
239 }
240
241 void volume_id_set_uuid(struct volume_id *id, const uint8_t *buf, size_t len, enum uuid_format format)
242 {
243         unsigned int i;
244         unsigned int count = 0;
245
246         if (len > sizeof(id->uuid_raw))
247                 len = sizeof(id->uuid_raw);
248
249         switch(format) {
250         case UUID_STRING:
251                 count = len;
252                 break;
253         case UUID_HEX_STRING:
254                 count = len;
255                 break;
256         case UUID_DOS:
257                 count = 4;
258                 break;
259         case UUID_64BIT_LE:
260         case UUID_64BIT_BE:
261                 count = 8;
262                 break;
263         case UUID_DCE:
264                 count = 16;
265                 break;
266         case UUID_FOURINT:
267                 count = 35;
268                 break;
269         }
270         memcpy(id->uuid_raw, buf, count);
271         id->uuid_raw_len = count;
272
273         /* if set, create string in the same format, the native platform uses */
274         for (i = 0; i < count; i++)
275                 if (buf[i] != 0)
276                         goto set;
277         return;
278
279 set:
280         switch(format) {
281         case UUID_DOS:
282                 sprintf(id->uuid, "%02X%02X-%02X%02X",
283                         buf[3], buf[2], buf[1], buf[0]);
284                 break;
285         case UUID_64BIT_LE:
286                 sprintf(id->uuid,"%02X%02X%02X%02X%02X%02X%02X%02X",
287                         buf[7], buf[6], buf[5], buf[4],
288                         buf[3], buf[2], buf[1], buf[0]);
289                 break;
290         case UUID_64BIT_BE:
291                 sprintf(id->uuid,"%02X%02X%02X%02X%02X%02X%02X%02X",
292                         buf[0], buf[1], buf[2], buf[3],
293                         buf[4], buf[5], buf[6], buf[7]);
294                 break;
295         case UUID_DCE:
296                 sprintf(id->uuid,
297                         "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
298                         buf[0], buf[1], buf[2], buf[3],
299                         buf[4], buf[5],
300                         buf[6], buf[7],
301                         buf[8], buf[9],
302                         buf[10], buf[11], buf[12], buf[13], buf[14],buf[15]);
303                 break;
304         case UUID_HEX_STRING:
305                 /* translate A..F to a..f */
306                 memcpy(id->uuid, buf, count);
307                 for (i = 0; i < count; i++)
308                         if (id->uuid[i] >= 'A' && id->uuid[i] <= 'F')
309                                 id->uuid[i] = (id->uuid[i] - 'A') + 'a';
310                 id->uuid[count] = '\0';
311                 break;
312         case UUID_STRING:
313                 memcpy(id->uuid, buf, count);
314                 id->uuid[count] = '\0';
315                 break;
316         case UUID_FOURINT:
317                 sprintf(id->uuid,
318                         "%02x%02x%02x%02x:%02x%02x%02x%02x:%02x%02x%02x%02x:%02x%02x%02x%02x",
319                         buf[0], buf[1], buf[2], buf[3],
320                         buf[4], buf[5], buf[6], buf[7],
321                         buf[8], buf[9], buf[10], buf[11],
322                         buf[12], buf[13], buf[14],buf[15]);
323                 break;
324         }
325 }
326
327 uint8_t *volume_id_get_buffer(struct volume_id *id, uint64_t off, size_t len)
328 {
329         ssize_t buf_len;
330
331         info("get buffer off 0x%llx(%llu), len 0x%zx", (unsigned long long) off, (unsigned long long) off, len);
332         /* check if requested area fits in superblock buffer */
333         if (off + len <= SB_BUFFER_SIZE) {
334                 if (id->sbbuf == NULL) {
335                         id->sbbuf = malloc(SB_BUFFER_SIZE);
336                         if (id->sbbuf == NULL) {
337                                 dbg("error malloc");
338                                 return NULL;
339                         }
340                 }
341
342                 /* check if we need to read */
343                 if ((off + len) > id->sbbuf_len) {
344                         info("read sbbuf len:0x%llx", (unsigned long long) (off + len));
345                         if (lseek(id->fd, 0, SEEK_SET) < 0) {
346                                 dbg("lseek failed (%s)", strerror(errno));
347                                 return NULL;
348                         }
349                         buf_len = read(id->fd, id->sbbuf, off + len);
350                         if (buf_len < 0) {
351                                 dbg("read failed (%s)", strerror(errno));
352                                 return NULL;
353                         }
354                         dbg("got 0x%zx (%zi) bytes", buf_len, buf_len);
355                         id->sbbuf_len = buf_len;
356                         if ((size_t)buf_len < off + len) {
357                                 dbg("requested 0x%zx bytes, got only 0x%zx bytes", len, buf_len);
358                                 return NULL;
359                         }
360                 }
361
362                 return &(id->sbbuf[off]);
363         } else {
364                 if (len > SEEK_BUFFER_SIZE) {
365                         dbg("seek buffer too small %d", SEEK_BUFFER_SIZE);
366                         return NULL;
367                 }
368
369                 /* get seek buffer */
370                 if (id->seekbuf == NULL) {
371                         id->seekbuf = malloc(SEEK_BUFFER_SIZE);
372                         if (id->seekbuf == NULL) {
373                                 dbg("error malloc");
374                                 return NULL;
375                         }
376                 }
377
378                 /* check if we need to read */
379                 if ((off < id->seekbuf_off) || ((off + len) > (id->seekbuf_off + id->seekbuf_len))) {
380                         info("read seekbuf off:0x%llx len:0x%zx", (unsigned long long) off, len);
381                         if (lseek(id->fd, off, SEEK_SET) < 0) {
382                                 dbg("lseek failed (%s)", strerror(errno));
383                                 return NULL;
384                         }
385                         buf_len = read(id->fd, id->seekbuf, len);
386                         if (buf_len < 0) {
387                                 dbg("read failed (%s)", strerror(errno));
388                                 return NULL;
389                         }
390                         dbg("got 0x%zx (%zi) bytes", buf_len, buf_len);
391                         id->seekbuf_off = off;
392                         id->seekbuf_len = buf_len;
393                         if ((size_t)buf_len < len) {
394                                 dbg("requested 0x%zx bytes, got only 0x%zx bytes", len, buf_len);
395                                 return NULL;
396                         }
397                 }
398
399                 return &(id->seekbuf[off - id->seekbuf_off]);
400         }
401 }
402
403 void volume_id_free_buffer(struct volume_id *id)
404 {
405         if (id->sbbuf != NULL) {
406                 free(id->sbbuf);
407                 id->sbbuf = NULL;
408                 id->sbbuf_len = 0;
409         }
410         if (id->seekbuf != NULL) {
411                 free(id->seekbuf);
412                 id->seekbuf = NULL;
413                 id->seekbuf_len = 0;
414         }
415 }