460b9539 |
1 | /* |
2 | * This file is part of DisOrder. |
3 | * Copyright (C) 2004, 2005 Richard Kettlewell |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
18 | * USA |
19 | */ |
20 | |
21 | #include <config.h> |
22 | #include "types.h" |
23 | |
24 | #include <iconv.h> |
25 | #include <string.h> |
26 | #include <errno.h> |
27 | #include <langinfo.h> |
28 | |
29 | #include "mem.h" |
30 | #include "log.h" |
31 | #include "charset.h" |
32 | #include "configuration.h" |
33 | #include "utf8.h" |
34 | #include "vector.h" |
35 | |
36 | static void *convert(const char *from, const char *to, |
37 | const void *ptr, size_t n) { |
38 | iconv_t i; |
39 | size_t len; |
40 | char *buf = 0, *s, *d; |
41 | size_t bufsize = 0, sl, dl; |
42 | |
43 | if((i = iconv_open(to, from)) == (iconv_t)-1) |
44 | fatal(errno, "error calling iconv_open"); |
45 | do { |
46 | bufsize = bufsize ? 2 * bufsize : 32; |
47 | buf = xrealloc_noptr(buf, bufsize); |
48 | iconv(i, 0, 0, 0, 0); |
49 | s = (char *)ptr; |
50 | sl = n; |
51 | d = buf; |
52 | dl = bufsize; |
53 | /* (void *) to work around FreeBSD's nonstandard iconv prototype */ |
54 | len = iconv(i, (void *)&s, &sl, &d, &dl); |
55 | } while(len == (size_t)-1 && errno == E2BIG); |
56 | iconv_close(i); |
57 | if(len == (size_t)-1) { |
58 | error(errno, "error converting from %s to %s", from, to); |
59 | return 0; |
60 | } |
61 | return buf; |
62 | } |
63 | |
64 | /* not everybody's iconv supports UCS-4, and it's inconvenient to have to know |
65 | * our endianness, and it's easy to convert it ourselves, so we do */ |
66 | uint32_t *utf82ucs4(const char *mb) { |
67 | struct dynstr_ucs4 d; |
68 | uint32_t c; |
69 | |
70 | dynstr_ucs4_init(&d); |
71 | while(*mb) { |
72 | PARSE_UTF8(mb, c, |
73 | error(0, "invalid UTF-8 sequence"); return 0;); |
74 | dynstr_ucs4_append(&d, c); |
75 | } |
76 | dynstr_ucs4_terminate(&d); |
77 | return d.vec; |
78 | } |
79 | |
80 | char *ucs42utf8(const uint32_t *u) { |
81 | struct dynstr d; |
82 | uint32_t c; |
83 | |
84 | dynstr_init(&d); |
85 | while((c = *u++)) { |
86 | if(c < 0x80) |
87 | dynstr_append(&d, c); |
88 | else if(c < 0x800) { |
89 | dynstr_append(&d, 0xC0 | (c >> 6)); |
90 | dynstr_append(&d, 0x80 | (c & 0x3F)); |
91 | } else if(c < 0x10000) { |
92 | dynstr_append(&d, 0xE0 | (c >> 12)); |
93 | dynstr_append(&d, 0x80 | ((c >> 6) & 0x3F)); |
94 | dynstr_append(&d, 0x80 | (c & 0x3F)); |
95 | } else if(c < 0x110000) { |
96 | dynstr_append(&d, 0xF0 | (c >> 18)); |
97 | dynstr_append(&d, 0x80 | ((c >> 12) & 0x3F)); |
98 | dynstr_append(&d, 0x80 | ((c >> 6) & 0x3F)); |
99 | dynstr_append(&d, 0x80 | (c & 0x3F)); |
100 | } else { |
101 | error(0, "invalid UCS-4 character"); |
102 | return 0; |
103 | } |
104 | } |
105 | dynstr_terminate(&d); |
106 | return d.vec; |
107 | } |
108 | |
109 | char *mb2utf8(const char *mb) { |
110 | return convert(nl_langinfo(CODESET), "UTF-8", mb, strlen(mb) + 1); |
111 | } |
112 | |
113 | char *utf82mb(const char *utf8) { |
114 | return convert("UTF-8", nl_langinfo(CODESET), utf8, strlen(utf8) + 1); |
115 | } |
116 | |
117 | char *any2utf8(const char *from, const char *any) { |
118 | return convert(from, "UTF-8", any, strlen(any) + 1); |
119 | } |
120 | |
121 | char *any2mb(const char *from, const char *any) { |
122 | if(from) return convert(from, nl_langinfo(CODESET), any, strlen(any) + 1); |
123 | else return xstrdup(any); |
124 | } |
125 | |
126 | char *any2any(const char *from, |
127 | const char *to, |
128 | const char *any) { |
129 | if(from || to) return convert(from, to, any, strlen(any) + 1); |
130 | else return xstrdup(any); |
131 | } |
132 | |
133 | int ucs4cmp(const uint32_t *a, const uint32_t *b) { |
134 | while(*a && *b && *a == *b) ++a, ++b; |
135 | if(*a > *b) return 1; |
136 | else if(*a < *b) return -1; |
137 | else return 0; |
138 | } |
139 | |
140 | /* |
141 | Local Variables: |
142 | c-basic-offset:2 |
143 | comment-column:40 |
144 | End: |
145 | */ |