chiark / gitweb /
more doxygen
[disorder] / lib / utf8.h
1 /*
2  * This file is part of DisOrder
3  * Copyright (C) 2004, 2005 Richard Kettlewell
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18  * USA
19  */
20 #ifndef UTF8_H
21 #define UTF8_H
22
23 #define PARSE_UTF8(S,C,E) do {                  \
24   if((unsigned char)*S < 0x80)                  \
25     C = *S++;                                   \
26   else if((unsigned char)*S <= 0xDF) {          \
27     C = (*S++ & 0x1F) << 6;                     \
28     if((*S & 0xC0) != 0x80) { E; }              \
29     C |= (*S++ & 0x3F);                         \
30     if(C < 0x80) { E; }                         \
31   } else if((unsigned char)*S <= 0xEF) {        \
32     C = (*S++ & 0x0F) << 12;                    \
33     if((*S & 0xC0) != 0x80) { E; }              \
34     C |= (*S++ & 0x3F) << 6;                    \
35     if((*S & 0xC0) != 0x80) { E; }              \
36     C |= (*S++ & 0x3F);                         \
37     if(C < 0x800                                \
38        || (C >= 0xD800 && C <= 0xDFFF)) {       \
39       E;                                        \
40     }                                           \
41   } else if((unsigned char)*S <= 0xF7) {        \
42     C = (*S++ & 0x07) << 18;                    \
43     if((*S & 0xC0) != 0x80) { E; }              \
44     C |= (*S++ & 0x3F) << 12;                   \
45     if((*S & 0xC0) != 0x80) { E; }              \
46     C |= (*S++ & 0x3F) << 6;                    \
47     if((*S & 0xC0) != 0x80) { E; }              \
48     C |= (*S++ & 0x3F);                         \
49     if(C < 0x10000 || C > 0x10FFFF) { E; }      \
50   } else {                                      \
51     E;                                          \
52   }                                             \
53 } while(0)
54
55 int validutf8(const char *s);
56 /* return nonzero if S is a valid UTF-8 sequence, else false */
57
58 #endif /* UTF8_h */
59
60 /*
61 Local Variables:
62 c-basic-offset:2
63 comment-column:40
64 End:
65 */