-/*
+/*
* utf8.c:
*
* Copyright (c) 2008 James McKenzie <james@fishsoup.dhs.org>,
*
*/
-static char rcsid[] = "$Id$";
+static char rcsid[] = "$Id: utf8.c,v 1.16 2010/07/27 14:49:35 james Exp $";
-/*
- * $Log$
+/*
+ * $Log: utf8.c,v $
+ * Revision 1.16 2010/07/27 14:49:35 james
+ * add support for byte logging
+ *
+ * Revision 1.15 2008/03/07 13:16:02 james
+ * *** empty log message ***
+ *
+ * Revision 1.14 2008/03/07 12:37:04 james
+ * *** empty log message ***
+ *
+ * Revision 1.13 2008/03/06 16:49:39 james
+ * *** empty log message ***
+ *
+ * Revision 1.12 2008/03/06 16:49:05 james
+ * *** empty log message ***
+ *
+ * Revision 1.11 2008/03/03 06:04:42 james
+ * *** empty log message ***
+ *
+ * Revision 1.10 2008/03/02 10:37:56 james
+ * *** empty log message ***
+ *
+ * Revision 1.9 2008/02/27 01:31:14 james
+ * *** empty log message ***
+ *
+ * Revision 1.8 2008/02/27 00:54:16 james
+ * *** empty log message ***
+ *
+ * Revision 1.7 2008/02/26 23:56:12 james
+ * *** empty log message ***
+ *
+ * Revision 1.6 2008/02/26 23:23:17 james
+ * *** empty log message ***
+ *
+ * Revision 1.5 2008/02/24 00:42:53 james
+ * *** empty log message ***
+ *
+ * Revision 1.4 2008/02/23 13:05:58 staffcvs
+ * *** empty log message ***
+ *
+ * Revision 1.3 2008/02/23 11:48:37 james
+ * *** empty log message ***
+ *
+ * Revision 1.2 2008/02/22 23:39:27 james
+ * *** empty log message ***
+ *
* Revision 1.1 2008/02/22 19:12:05 james
* *** empty log message ***
*
#include "project.h"
- /*FIXME: for the moment we bodge utf8 support*/
- if ((ch>=0xc0) && (ch<0xe0)) /*Start of two byte unicode sequence*/
- {
- p->in_utf8=2;
- } else if ((ch>=0xe0) && (ch<0xf0)) /*Start of three byte unicode sequence*/
- {
- p->in_utf8=3;
- } else if ((ch>=0xf0) && (ch<0xf7)) /*Start of four byte unicode sequence*/
- p->in_utf8=4;
+int
+utf8_flush (Context * c)
+{
+ UTF8 *u = c->u;
+ int i;
+ int err = 0;
+
+ switch (u->utf_ptr) {
+ case 1:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
+ break;
+ case 2:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1]);
+ break;
+ case 3:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
+ break;
+ case 4:
+ log_f (c->l,
+ "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
+ break;
+ }
+
+ for (i = 0; i < u->utf_ptr; ++i)
+ err += vt102_parse_char (c, u->utf_buf[i]);
+
+ u->utf_ptr = 0;
+ u->in_utf8 = 0;
+
+ return err;
+
+}
+
+int
+utf8_parse (Context * c, uint32_t ch)
+{
+ UTF8 *u = c->u;
+ int err = 0;
+
+ if (ch == SYM_CHAR_RESET) {
+ u->in_utf8 = 0;
+ err += vt102_parse_char (c, ch);
+ return err;
+ }
+
+ if (c->l && c->byte_logging) {
+ uint8_t ch8=(uint8_t) ch;
+ c->l->log_bytes(c->l,&ch8,1);
+ }
+
+ if (!u->in_utf8) {
+ /* FIXME: for the moment we bodge utf8 support - need to do */
+ /* L->R and R->L and double width characters */
+ if (ch == 0xb9) // FIXME - OTHER 8 bit control chars
+ { /* CSI, not a valid utf8 start char */
+ err += vt102_parse_char (c, ch);
+ } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */
+ u->in_utf8 = 1;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x1f) << 6;
+ u->sh = 0;
+ } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence
+ */
+ u->in_utf8 = 2;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x0f) << 12;
+ u->sh = 6;
+ } else if ((ch & 0xf8) == 0xf0) {
+ u->in_utf8 = 3;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x07) << 18;
+ u->sh = 12;
+ } else {
+ err += vt102_parse_char (c, ch);
+ }
+ } else {
+ if ((ch & 0xc0) != 0x80) {
+ err += utf8_flush (c);
+ err += vt102_parse_char (c, ch);
+ } else {
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch |= (ch & 0x3f) << u->sh;
+ u->sh -= 6;
+ u->in_utf8--;
+
+ if (!u->in_utf8)
+ err += vt102_parse_char (c, u->ch);
+ }
+ }
+ return err;
+}
+
+
+
+UTF8 *
+utf8_new (void)
+{
+ UTF8 *ret;
+
+ ret = (UTF8 *) xmalloc (sizeof (UTF8));
+
+ ret->in_utf8 = 0;
+
+}
+
+int
+utf8_encode (char *ptr, int ch)
+{
+
+ if (ch < 0x80) {
+ ptr[0] = ch;
+ return 1;
+ } else if (ch < 0x800) {
+ ptr[0] = 0xc0 | (ch >> 6);
+ ptr[1] = 0x80 | (ch & 0x3f);
+ return 2;
+ } else if (ch < 0x10000) {
+ ptr[0] = 0xe0 | (ch >> 12);
+ ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
+ ptr[2] = 0x80 | (ch & 0x3f);
+ return 3;
+ } else if (ch < 0x1fffff) {
+ ptr[0] = 0xf0 | (ch >> 18);
+ ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
+ ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
+ ptr[3] = 0x80 | (ch & 0x3f);
+ return 4;
}
+ return 0;
+}
- if (p->utf_8) {
- p->in_utf8--;
- ch='?';
- }
+int
+utf8_emit (TTY * t, int ch)
+{
+ uint8_t buf[4];
+ int i;
+ i = utf8_encode (buf, ch);
+ if (!i)
+ return 0;
- if (!p->utf_8) {
- /*Not first or last byte in sequence*/
+ if (t->xmit (t, buf, i) != i)
+ return -1;
+ return 0;
+}