chiark / gitweb /
Update copyright dates
[disorder] / lib / split.c
CommitLineData
460b9539 1/*
2 * This file is part of DisOrder.
5aff007d 3 * Copyright (C) 2004, 2006-2008 Richard Kettlewell
460b9539 4 *
e7eb3a27 5 * This program is free software: you can redistribute it and/or modify
460b9539 6 * it under the terms of the GNU General Public License as published by
e7eb3a27 7 * the Free Software Foundation, either version 3 of the License, or
460b9539 8 * (at your option) any later version.
e7eb3a27
RK
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
460b9539 15 * You should have received a copy of the GNU General Public License
e7eb3a27 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
460b9539 17 */
132a5a4a
RK
18/** @file lib/split.c
19 * @brief String splitting
20 */
05b75f8d 21#include "common.h"
460b9539 22
23#include <ctype.h>
460b9539 24#include <errno.h>
25
26#include "mem.h"
27#include "split.h"
28#include "log.h"
29#include "charset.h"
30#include "vector.h"
31
32static inline int space(int c) {
33 return (c == ' '
34 || c == '\t'
35 || c == '\n'
36 || c == '\r');
37}
38
39static void no_error_handler(const char attribute((unused)) *msg,
40 void attribute((unused)) *u) {
41}
42
f9635e06
RK
43/* TODO: handle combining characters attached to delimiters in some
44 * sane way (might include reporting an error) */
45
460b9539 46char **split(const char *p,
47 int *np,
48 unsigned flags,
49 void (*error_handler)(const char *msg, void *u),
50 void *u) {
51 char *f, *g;
52 const char *q;
53 struct vector v;
54 size_t l;
55 int qc;
56
5e49fa7f
RK
57 if(!error_handler)
58 error_handler = no_error_handler;
460b9539 59 vector_init(&v);
60 while(*p && !(*p == '#' && (flags & SPLIT_COMMENTS))) {
61 if(space(*p)) {
62 ++p;
63 continue;
64 }
65 if((flags & SPLIT_QUOTES) && (*p == '"' || *p == '\'')) {
66 qc = *p++;
67 l = 0;
68 for(q = p; *q && *q != qc; ++q) {
69 if(*q == '\\' && q[1])
70 ++q;
71 ++l;
72 }
73 if(!*q) {
74 error_handler("unterminated quoted string", u);
75 return 0;
76 }
77 f = g = xmalloc_noptr(l + 1);
78 for(q = p; *q != qc;) {
79 if(*q == '\\') {
80 ++q;
81 switch(*q) {
82 case '\\':
83 case '"':
84 case '\'':
85 *g++ = *q++;
86 break;
87 case 'n':
88 ++q;
89 *g++ = '\n';
90 break;
91 default:
92 error_handler("illegal escape sequence", u);
93 return 0;
94 }
95 } else
96 *g++ = *q++;
97 }
98 *g = 0;
99 p = q + 1;
100 } else {
101 for(q = p; *q && !space(*q); ++q)
102 ;
103 l = q - p;
104 f = xstrndup(p, l);
105 p = q;
106 }
107 vector_append(&v, f);
108 }
109 vector_terminate(&v);
110 if(np)
111 *np = v.nvec;
112 return v.vec;
113}
114
f9635e06
RK
115/* TODO handle initial combining characters sanely */
116
460b9539 117const char *quoteutf8(const char *s) {
118 size_t len = 3 + strlen(s);
119 const char *t;
120 char *r, *q;
121
122 /* see if we need to quote */
123 if(*s) {
124 for(t = s; *t; t++)
125 if((unsigned char)*t <= ' '
126 || *t == '"'
127 || *t == '\\'
128 || *t == '\''
129 || *t == '#')
130 break;
131 if(!*t)
132 return s;
133 }
134
135 /* we rely on ASCII characters only ever representing themselves in UTF-8. */
136 for(t = s; *t; t++) {
137 switch(*t) {
138 case '"':
139 case '\\':
140 case '\n':
141 ++len;
142 break;
143 }
144 }
145 q = r = xmalloc_noptr(len);
146 *q++ = '"';
147 for(t = s; *t; t++) {
148 switch(*t) {
149 case '"':
150 case '\\':
151 *q++ = '\\';
152 /* fall through */
153 default:
154 *q++ = *t;
155 break;
156 case '\n':
157 *q++ = '\\';
158 *q++ = 'n';
159 break;
160 }
161 }
162 *q++ = '"';
163 *q = 0;
164 return r;
165}
166
167/*
168Local Variables:
169c-basic-offset:2
170comment-column:40
171End:
172*/