4 * Copyright (c) 2008 James McKenzie <sympathy@madingley.org>,
9 static char rcsid[] = "$Id: utf8.c,v 1.16 2010/07/27 14:49:35 james Exp $";
13 * Revision 1.16 2010/07/27 14:49:35 james
14 * add support for byte logging
16 * Revision 1.15 2008/03/07 13:16:02 james
17 * *** empty log message ***
19 * Revision 1.14 2008/03/07 12:37:04 james
20 * *** empty log message ***
22 * Revision 1.13 2008/03/06 16:49:39 james
23 * *** empty log message ***
25 * Revision 1.12 2008/03/06 16:49:05 james
26 * *** empty log message ***
28 * Revision 1.11 2008/03/03 06:04:42 james
29 * *** empty log message ***
31 * Revision 1.10 2008/03/02 10:37:56 james
32 * *** empty log message ***
34 * Revision 1.9 2008/02/27 01:31:14 james
35 * *** empty log message ***
37 * Revision 1.8 2008/02/27 00:54:16 james
38 * *** empty log message ***
40 * Revision 1.7 2008/02/26 23:56:12 james
41 * *** empty log message ***
43 * Revision 1.6 2008/02/26 23:23:17 james
44 * *** empty log message ***
46 * Revision 1.5 2008/02/24 00:42:53 james
47 * *** empty log message ***
49 * Revision 1.4 2008/02/23 13:05:58 staffcvs
50 * *** empty log message ***
52 * Revision 1.3 2008/02/23 11:48:37 james
53 * *** empty log message ***
55 * Revision 1.2 2008/02/22 23:39:27 james
56 * *** empty log message ***
58 * Revision 1.1 2008/02/22 19:12:05 james
59 * *** empty log message ***
67 utf8_flush (Context * c)
75 log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
78 log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
79 u->utf_buf[0], u->utf_buf[1]);
82 log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
83 u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
87 "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
88 u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
92 for (i = 0; i < u->utf_ptr; ++i)
93 err += vt102_parse_char (c, u->utf_buf[i]);
103 utf8_parse (Context * c, uint32_t ch)
108 if (ch == SYM_CHAR_RESET) {
110 err += vt102_parse_char (c, ch);
114 if (c->l && c->byte_logging) {
115 uint8_t ch8=(uint8_t) ch;
116 c->l->log_bytes(c->l,&ch8,1);
120 /* FIXME: for the moment we bodge utf8 support - need to do */
121 /* L->R and R->L and double width characters */
122 if (ch == 0xb9) // FIXME - OTHER 8 bit control chars
123 { /* CSI, not a valid utf8 start char */
124 err += vt102_parse_char (c, ch);
125 } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */
128 u->utf_buf[u->utf_ptr++] = ch;
129 u->ch = (ch & 0x1f) << 6;
131 } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence
135 u->utf_buf[u->utf_ptr++] = ch;
136 u->ch = (ch & 0x0f) << 12;
138 } else if ((ch & 0xf8) == 0xf0) {
141 u->utf_buf[u->utf_ptr++] = ch;
142 u->ch = (ch & 0x07) << 18;
145 err += vt102_parse_char (c, ch);
148 if ((ch & 0xc0) != 0x80) {
149 err += utf8_flush (c);
150 err += vt102_parse_char (c, ch);
152 u->utf_buf[u->utf_ptr++] = ch;
153 u->ch |= (ch & 0x3f) << u->sh;
158 err += vt102_parse_char (c, u->ch);
171 ret = (UTF8 *) xmalloc (sizeof (UTF8));
178 utf8_encode (char *ptr, int ch)
184 } else if (ch < 0x800) {
185 ptr[0] = 0xc0 | (ch >> 6);
186 ptr[1] = 0x80 | (ch & 0x3f);
188 } else if (ch < 0x10000) {
189 ptr[0] = 0xe0 | (ch >> 12);
190 ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
191 ptr[2] = 0x80 | (ch & 0x3f);
193 } else if (ch < 0x1fffff) {
194 ptr[0] = 0xf0 | (ch >> 18);
195 ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
196 ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
197 ptr[3] = 0x80 | (ch & 0x3f);
204 utf8_emit (TTY * t, int ch)
208 i = utf8_encode (buf, ch);
212 if (t->xmit (t, buf, i) != i)