460b9539 |
1 | /* |
2 | * This file is part of DisOrder |
3 | * Copyright (C) 2004, 2005 Richard Kettlewell |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
18 | * USA |
19 | */ |
20 | #ifndef UTF8_H |
21 | #define UTF8_H |
22 | |
23 | #define PARSE_UTF8(S,C,E) do { \ |
24 | if((unsigned char)*S < 0x80) \ |
25 | C = *S++; \ |
26 | else if((unsigned char)*S <= 0xDF) { \ |
27 | C = (*S++ & 0x1F) << 6; \ |
28 | if((*S & 0xC0) != 0x80) { E; } \ |
29 | C |= (*S++ & 0x3F); \ |
30 | if(C < 0x80) { E; } \ |
31 | } else if((unsigned char)*S <= 0xEF) { \ |
32 | C = (*S++ & 0x0F) << 12; \ |
33 | if((*S & 0xC0) != 0x80) { E; } \ |
34 | C |= (*S++ & 0x3F) << 6; \ |
35 | if((*S & 0xC0) != 0x80) { E; } \ |
36 | C |= (*S++ & 0x3F); \ |
37 | if(C < 0x800 \ |
38 | || (C >= 0xD800 && C <= 0xDFFF)) { \ |
39 | E; \ |
40 | } \ |
41 | } else if((unsigned char)*S <= 0xF7) { \ |
42 | C = (*S++ & 0x07) << 18; \ |
43 | if((*S & 0xC0) != 0x80) { E; } \ |
44 | C |= (*S++ & 0x3F) << 12; \ |
45 | if((*S & 0xC0) != 0x80) { E; } \ |
46 | C |= (*S++ & 0x3F) << 6; \ |
47 | if((*S & 0xC0) != 0x80) { E; } \ |
48 | C |= (*S++ & 0x3F); \ |
49 | if(C < 0x10000 || C > 0x10FFFF) { E; } \ |
50 | } else { \ |
51 | E; \ |
52 | } \ |
53 | } while(0) |
54 | |
55 | int validutf8(const char *s); |
56 | /* return nonzero if S is a valid UTF-8 sequence, else false */ |
57 | |
58 | #endif /* UTF8_h */ |
59 | |
60 | /* |
61 | Local Variables: |
62 | c-basic-offset:2 |
63 | comment-column:40 |
64 | End: |
65 | */ |