X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Futf8.c;h=e4af9d4f37a3c4cb196bf4d8e0ecee4c71e110bd;hb=fb8a81756fdf0e39f49612e543c9961ad5544977;hp=ff8a2ecdc3f13d9a9180a93042ec50afd3c8ce2e;hpb=82aed0c41cd917fdf9cdb20db788fe9da0662eba;p=sympathy.git diff --git a/src/utf8.c b/src/utf8.c index ff8a2ec..e4af9d4 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -1,15 +1,48 @@ -/* +/* * utf8.c: * - * Copyright (c) 2008 James McKenzie , + * Copyright (c) 2008 James McKenzie , * All rights reserved. * */ -static char rcsid[] = "$Id$"; +static char rcsid[] = "$Id: utf8.c,v 1.16 2010/07/27 14:49:35 james Exp $"; -/* - * $Log$ +/* + * $Log: utf8.c,v $ + * Revision 1.16 2010/07/27 14:49:35 james + * add support for byte logging + * + * Revision 1.15 2008/03/07 13:16:02 james + * *** empty log message *** + * + * Revision 1.14 2008/03/07 12:37:04 james + * *** empty log message *** + * + * Revision 1.13 2008/03/06 16:49:39 james + * *** empty log message *** + * + * Revision 1.12 2008/03/06 16:49:05 james + * *** empty log message *** + * + * Revision 1.11 2008/03/03 06:04:42 james + * *** empty log message *** + * + * Revision 1.10 2008/03/02 10:37:56 james + * *** empty log message *** + * + * Revision 1.9 2008/02/27 01:31:14 james + * *** empty log message *** + * + * Revision 1.8 2008/02/27 00:54:16 james + * *** empty log message *** + * + * Revision 1.7 2008/02/26 23:56:12 james + * *** empty log message *** + * + * Revision 1.6 2008/02/26 23:23:17 james + * *** empty log message *** + * * Revision 1.5 2008/02/24 00:42:53 james * *** empty log message *** * @@ -30,106 +63,102 @@ static char rcsid[] = "$Id$"; #include "project.h" -void +int utf8_flush (Context * c) { UTF8 *u = c->u; int i; - - switch (u->utf_ptr) - { - case 1: - log_f (c->l, "", u->utf_buf[0]); - break; - case 2: - log_f (c->l, "", - u->utf_buf[0], u->utf_buf[1]); - break; - case 3: - log_f (c->l, "", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); - break; - case 4: - log_f (c->l, - "", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); - break; - } + int err = 0; + + switch (u->utf_ptr) { + case 1: + log_f (c->l, "", u->utf_buf[0]); + break; + case 2: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1]); + break; + case 3: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); + break; + case 4: + log_f (c->l, + "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); + break; + } for (i = 0; i < u->utf_ptr; ++i) - vt102_parse_char (c, u->utf_buf[i]); + err += vt102_parse_char (c, u->utf_buf[i]); u->utf_ptr = 0; u->in_utf8 = 0; + + return err; + } -void +int utf8_parse (Context * c, uint32_t ch) { UTF8 *u = c->u; - - if (ch == SYM_CHAR_RESET) - { - u->in_utf8 = 0; - vt102_parse_char (c, ch); - return; - } - - if (!u->in_utf8) - { - /*FIXME: for the moment we bodge utf8 support - need to do */ - /* L->R and R->L and double width characters */ - if (ch == 0xb9) - { /*CSI, not a valid utf8 start char */ - vt102_parse_char (c, ch); - } - else if ((ch & 0xe0) == 0xc0) - { /*Start of two byte unicode sequence */ - u->in_utf8 = 1; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x1f) << 6; - u->sh = 0; - } - else if ((ch & 0xf0) == 0xe0) - { /*Start of three byte unicode sequence */ - u->in_utf8 = 2; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x0f) << 12; - u->sh = 6; - } - else if ((ch & 0xf8) == 0xf0) - { - u->in_utf8 = 3; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x07) << 18; - u->sh = 12; - } - else - { - vt102_parse_char (c, ch); - } + int err = 0; + + if (ch == SYM_CHAR_RESET) { + u->in_utf8 = 0; + err += vt102_parse_char (c, ch); + return err; + } + + if (c->l && c->byte_logging) { + uint8_t ch8=(uint8_t) ch; + c->l->log_bytes(c->l,&ch8,1); + } + + if (!u->in_utf8) { + /* FIXME: for the moment we bodge utf8 support - need to do */ + /* L->R and R->L and double width characters */ + if (ch == 0xb9) // FIXME - OTHER 8 bit control chars + { /* CSI, not a valid utf8 start char */ + err += vt102_parse_char (c, ch); + } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */ + u->in_utf8 = 1; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x1f) << 6; + u->sh = 0; + } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence + */ + u->in_utf8 = 2; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x0f) << 12; + u->sh = 6; + } else if ((ch & 0xf8) == 0xf0) { + u->in_utf8 = 3; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x07) << 18; + u->sh = 12; + } else { + err += vt102_parse_char (c, ch); } - else - { - if ((ch & 0xc0) != 0x80) - { - utf8_flush (c); - vt102_parse_char (c, ch); - } - else - { - u->utf_buf[u->utf_ptr++] = ch; - u->ch |= (ch & 0x3f) << u->sh; - u->sh -= 6; - u->in_utf8--; - - if (!u->in_utf8) - vt102_parse_char (c, u->ch); - } + } else { + if ((ch & 0xc0) != 0x80) { + err += utf8_flush (c); + err += vt102_parse_char (c, ch); + } else { + u->utf_buf[u->utf_ptr++] = ch; + u->ch |= (ch & 0x3f) << u->sh; + u->sh -= 6; + u->in_utf8--; + + if (!u->in_utf8) + err += vt102_parse_char (c, u->ch); } + } + return err; } @@ -139,44 +168,48 @@ utf8_new (void) { UTF8 *ret; - ret = (UTF8 *) malloc (sizeof (UTF8)); + ret = (UTF8 *) xmalloc (sizeof (UTF8)); ret->in_utf8 = 0; } +int +utf8_encode (char *ptr, int ch) +{ + if (ch < 0x80) { + ptr[0] = ch; + return 1; + } else if (ch < 0x800) { + ptr[0] = 0xc0 | (ch >> 6); + ptr[1] = 0x80 | (ch & 0x3f); + return 2; + } else if (ch < 0x10000) { + ptr[0] = 0xe0 | (ch >> 12); + ptr[1] = 0x80 | ((ch >> 6) & 0x3f); + ptr[2] = 0x80 | (ch & 0x3f); + return 3; + } else if (ch < 0x1fffff) { + ptr[0] = 0xf0 | (ch >> 18); + ptr[1] = 0x80 | ((ch >> 12) & 0x3f); + ptr[2] = 0x80 | ((ch >> 6) & 0x3f); + ptr[3] = 0x80 | (ch & 0x3f); + return 4; + } + return 0; +} -void +int utf8_emit (TTY * t, int ch) { uint8_t buf[4]; + int i; + i = utf8_encode (buf, ch); + if (!i) + return 0; - if (ch < 0x80) - { - buf[0] = ch; - t->xmit (t, buf, 1); - } - else if (ch < 0x800) - { - buf[0] = 0xc0 | (ch >> 6); - buf[1] = 0x80 | (ch & 0x3f); - - t->xmit (t, buf, 2); - } - else if (ch < 0x10000) - { - buf[0] = 0xe0 | (ch >> 12); - buf[1] = 0x80 | ((ch >> 6) & 0x3f); - buf[2] = 0x80 | (ch & 0x3f); - t->xmit (t, buf, 3); - } - else if (ch < 0x1fffff) - { - buf[0] = 0xf0 | (ch >> 18); - buf[1] = 0x80 | ((ch >> 12) & 0x3f); - buf[2] = 0x80 | ((ch >> 6) & 0x3f); - buf[3] = 0x80 | (ch & 0x3f); - t->xmit (t, buf, 4); - } + if (t->xmit (t, buf, i) != i) + return -1; + return 0; }