2 * This file is part of DisOrder
3 * Copyright (C) 2004 Richard Kettlewell
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
34 #include "unicodegc.h"
36 const char *casefold(const char *ptr) {
41 const char *start, *s = ptr;
46 PARSE_UTF8(s, c, return ptr);
47 /* seek the folded equivalent */
50 r = cmn[c & CM_MASK] - 1;
51 while(l <= r && c != t[m = (l + r) / 2].ch)
57 dynstr_append_string(&d, t[m].tr);
59 dynstr_append_bytes(&d, start, s - start);
65 static enum unicode_gc_cat cat(uint32_t c) {
69 r = sizeof gcs / sizeof *gcs;
79 return unicode_gc_none;
82 /* XXX this is a bit kludgy */
84 char **words(const char *s, int *nvecp) {
94 PARSE_UTF8(s, c, return 0);
95 /* special cases first */
106 /* do the rest on category */
120 /* letters, digits and symbols are considered to be part of
126 dynstr_append_bytes(&d, start, s - start);
140 dynstr_terminate(&d);
141 vector_append(&v, d.vec);
154 case unicode_gc_none:
155 /* control and punctuation is completely ignored */
161 /* pick up the final word */
162 dynstr_terminate(&d);
163 vector_append(&v, d.vec);
165 vector_terminate(&v);