X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=sympathy.git;a=blobdiff_plain;f=src%2Futf8.c;h=aaa5ffc69cb13d6a79c1a2e0e9a8f14eef937d75;hp=09066db0a282bac7bb71b6d2490b91bff2d357fe;hb=3e72a1f6fc28777c26e4fb109867bd2a3c7b89b0;hpb=11313f344099e326aaf010fb9781b921df3b1967 diff --git a/src/utf8.c b/src/utf8.c index 09066db..aaa5ffc 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -10,6 +10,27 @@ static char rcsid[] = "$Id$"; /* * $Log$ + * Revision 1.9 2008/02/27 01:31:14 james + * *** empty log message *** + * + * Revision 1.8 2008/02/27 00:54:16 james + * *** empty log message *** + * + * Revision 1.7 2008/02/26 23:56:12 james + * *** empty log message *** + * + * Revision 1.6 2008/02/26 23:23:17 james + * *** empty log message *** + * + * Revision 1.5 2008/02/24 00:42:53 james + * *** empty log message *** + * + * Revision 1.4 2008/02/23 13:05:58 staffcvs + * *** empty log message *** + * + * Revision 1.3 2008/02/23 11:48:37 james + * *** empty log message *** + * * Revision 1.2 2008/02/22 23:39:27 james * *** empty log message *** * @@ -21,61 +42,162 @@ static char rcsid[] = "$Id$"; #include "project.h" -void utf8_flush(Context *c) +void +utf8_flush (Context * c) { -UTF8 *u=c->u; -int i; + UTF8 *u = c->u; + int i; + + switch (u->utf_ptr) + { + case 1: + log_f (c->l, "", u->utf_buf[0]); + break; + case 2: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1]); + break; + case 3: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); + break; + case 4: + log_f (c->l, + "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); + break; + } -for (i=0;iutf_ptr;++i) - vt102_parse(c,u->utf_buf[i]); + for (i = 0; i < u->utf_ptr; ++i) + vt102_parse_char (c, u->utf_buf[i]); -u->utf_ptr=0; -u->in_utf8=0; + u->utf_ptr = 0; + u->in_utf8 = 0; } -int utf8_parse(Context *c,int ch) +void +utf8_parse (Context * c, uint32_t ch) { -UTF8 *u=&c->u; - - if (!u->in_utf8) { - /*FIXME: for the moment we bodge utf8 support*/ - if (ch==0xb9) { /*CSI, not a valid utf8 start char*/ - vt102_parse(c,ch); - } else if ((ch & 0xe0) == 0xc0) { /*Start of two byte unicode sequence*/ - u->in_utf8=2; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else if ((ch & 0xf0) ==0xe0) { /*Start of three byte unicode sequence*/ - u->in_utf8=3; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else if ((ch & 0xf8) ==0xf0) { - u->in_utf8=4; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else { - vt102_parse(c,ch); - } - } else { - if ((ch & 0xc0) != 0x80) { - utf8_flush(c); - vt102_parse(c,ch); - } else { - u->utf_buf[u->utf_ptr++]=ch; - u->in_utf8--; - } - } + UTF8 *u = c->u; + + if (ch == SYM_CHAR_RESET) + { + u->in_utf8 = 0; + vt102_parse_char (c, ch); + return; + } + + if (!u->in_utf8) + { + /*FIXME: for the moment we bodge utf8 support - need to do */ + /* L->R and R->L and double width characters */ + if (ch == 0xb9) + { /*CSI, not a valid utf8 start char */ + vt102_parse_char (c, ch); + } + else if ((ch & 0xe0) == 0xc0) + { /*Start of two byte unicode sequence */ + u->in_utf8 = 1; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x1f) << 6; + u->sh = 0; + } + else if ((ch & 0xf0) == 0xe0) + { /*Start of three byte unicode sequence */ + u->in_utf8 = 2; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x0f) << 12; + u->sh = 6; + } + else if ((ch & 0xf8) == 0xf0) + { + u->in_utf8 = 3; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x07) << 18; + u->sh = 12; + } + else + { + vt102_parse_char (c, ch); + } + } + else + { + if ((ch & 0xc0) != 0x80) + { + utf8_flush (c); + vt102_parse_char (c, ch); + } + else + { + u->utf_buf[u->utf_ptr++] = ch; + u->ch |= (ch & 0x3f) << u->sh; + u->sh -= 6; + u->in_utf8--; + + if (!u->in_utf8) + vt102_parse_char (c, u->ch); + } + } } -UTF8 *utf8_new(void) +UTF8 * +utf8_new (void) { -UTF8 *ret; + UTF8 *ret; + + ret = (UTF8 *) malloc (sizeof (UTF8)); -ret=(UTF8 *) malloc(sizeof(UTF8)); + ret->in_utf8 = 0; -ret->in_utf8=0; +} + +int +utf8_encode (char *ptr, int ch) +{ + if (ch < 0x80) + { + ptr[0] = ch; + return 1; + } + else if (ch < 0x800) + { + ptr[0] = 0xc0 | (ch >> 6); + ptr[1] = 0x80 | (ch & 0x3f); + return 2; + } + else if (ch < 0x10000) + { + ptr[0] = 0xe0 | (ch >> 12); + ptr[1] = 0x80 | ((ch >> 6) & 0x3f); + ptr[2] = 0x80 | (ch & 0x3f); + return 3; + } + else if (ch < 0x1fffff) + { + ptr[0] = 0xf0 | (ch >> 18); + ptr[1] = 0x80 | ((ch >> 12) & 0x3f); + ptr[2] = 0x80 | ((ch >> 6) & 0x3f); + ptr[3] = 0x80 | (ch & 0x3f); + return 4; + } + return 0; } +void +utf8_emit (TTY * t, int ch) +{ + uint8_t buf[4]; + int i; + i = utf8_encode (buf, ch); + if (!i) + return; + + t->xmit (t, buf, i); +}