src/utf8.c

   1 /*
   2  * utf8.c:
   3  *
   4  * Copyright (c) 2008 James McKenzie <james@fishsoup.dhs.org>,
   5  * All rights reserved.
   6  *
   7  */
   8
   9 static char rcsid[] = "$Id$";
  10
  11 /*
  12  * $Log$
  13  * Revision 1.4  2008/02/23 13:05:58  staffcvs
  14  * *** empty log message ***
  15  *
  16  * Revision 1.3  2008/02/23 11:48:37  james
  17  * *** empty log message ***
  18  *
  19  * Revision 1.2  2008/02/22 23:39:27  james
  20  * *** empty log message ***
  21  *
  22  * Revision 1.1  2008/02/22 19:12:05  james
  23  * *** empty log message ***
  24  *
  25  */
  26
  27 #include "project.h"
  28
  29
  30 void
  31 utf8_flush (Context * c)
  32 {
  33   UTF8 *u = c->u;
  34   int i;
  35
  36   switch (u->utf_ptr)
  37     {
  38     case 1:
  39       log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
  40       break;
  41     case 2:
  42       log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
  43              u->utf_buf[0], u->utf_buf[1]);
  44       break;
  45     case 3:
  46       log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
  47              u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
  48       break;
  49     case 4:
  50       log_f (c->l,
  51              "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
  52              u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
  53       break;
  54     }
  55
  56   for (i = 0; i < u->utf_ptr; ++i)
  57     vt102_parse_char (c, u->utf_buf[i]);
  58
  59   u->utf_ptr = 0;
  60   u->in_utf8 = 0;
  61 }
  62
  63 void
  64 utf8_parse (Context * c, int ch)
  65 {
  66   UTF8 *u = c->u;
  67
  68   if (ch == SYM_CHAR_RESET)
  69     {
  70       u->in_utf8 = 0;
  71       vt102_parse_char (c, ch);
  72       return;
  73     }
  74
  75   if (!u->in_utf8)
  76     {
  77       /*FIXME: for the moment we bodge utf8 support */
  78       if (ch == 0xb9)
  79         {                       /*CSI, not a valid utf8 start char */
  80           vt102_parse_char (c, ch);
  81         }
  82       else if ((ch & 0xe0) == 0xc0)
  83         {                       /*Start of two byte unicode sequence */
  84           u->in_utf8 = 1;
  85           u->utf_ptr = 0;
  86           u->utf_buf[u->utf_ptr++] = ch;
  87           u->ch = (ch & 0x1f) << 6;
  88           u->sh = 0;
  89         }
  90       else if ((ch & 0xf0) == 0xe0)
  91         {                       /*Start of three byte unicode sequence */
  92           u->in_utf8 = 2;
  93           u->utf_ptr = 0;
  94           u->utf_buf[u->utf_ptr++] = ch;
  95           u->ch = (ch & 0x0f) << 12;
  96           u->sh = 6;
  97         }
  98       else if ((ch & 0xf8) == 0xf0)
  99         {
 100           u->in_utf8 = 3;
 101           u->utf_ptr = 0;
 102           u->utf_buf[u->utf_ptr++] = ch;
 103           u->ch = (ch & 0x07) << 18;
 104           u->sh = 12;
 105         }
 106       else
 107         {
 108           vt102_parse_char (c, ch);
 109         }
 110     }
 111   else
 112     {
 113       if ((ch & 0xc0) != 0x80)
 114         {
 115           utf8_flush (c);
 116           vt102_parse_char (c, ch);
 117         }
 118       else
 119         {
 120           u->utf_buf[u->utf_ptr++] = ch;
 121           u->ch |= (ch & 0x3f) << u->sh;
 122           u->sh -= 6;
 123           u->in_utf8--;
 124
 125           if (!u->in_utf8)
 126             vt102_parse_char (c, ch);
 127         }
 128     }
 129 }
 130
 131
 132
 133 UTF8 *
 134 utf8_new (void)
 135 {
 136   UTF8 *ret;
 137
 138   ret = (UTF8 *) malloc (sizeof (UTF8));
 139
 140   ret->in_utf8 = 0;
 141
 142 }