chiark / gitweb /
Switch to GPL v3
[disorder] / lib / split.c
CommitLineData
460b9539 1/*
2 * This file is part of DisOrder.
5aff007d 3 * Copyright (C) 2004, 2006-2008 Richard Kettlewell
460b9539 4 *
e7eb3a27 5 * This program is free software: you can redistribute it and/or modify
460b9539 6 * it under the terms of the GNU General Public License as published by
e7eb3a27 7 * the Free Software Foundation, either version 3 of the License, or
460b9539 8 * (at your option) any later version.
e7eb3a27
RK
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
460b9539 15 * You should have received a copy of the GNU General Public License
e7eb3a27 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
460b9539 17 */
18
05b75f8d 19#include "common.h"
460b9539 20
21#include <ctype.h>
460b9539 22#include <errno.h>
23
24#include "mem.h"
25#include "split.h"
26#include "log.h"
27#include "charset.h"
28#include "vector.h"
29
30static inline int space(int c) {
31 return (c == ' '
32 || c == '\t'
33 || c == '\n'
34 || c == '\r');
35}
36
37static void no_error_handler(const char attribute((unused)) *msg,
38 void attribute((unused)) *u) {
39}
40
f9635e06
RK
41/* TODO: handle combining characters attached to delimiters in some
42 * sane way (might include reporting an error) */
43
460b9539 44char **split(const char *p,
45 int *np,
46 unsigned flags,
47 void (*error_handler)(const char *msg, void *u),
48 void *u) {
49 char *f, *g;
50 const char *q;
51 struct vector v;
52 size_t l;
53 int qc;
54
5e49fa7f
RK
55 if(!error_handler)
56 error_handler = no_error_handler;
460b9539 57 vector_init(&v);
58 while(*p && !(*p == '#' && (flags & SPLIT_COMMENTS))) {
59 if(space(*p)) {
60 ++p;
61 continue;
62 }
63 if((flags & SPLIT_QUOTES) && (*p == '"' || *p == '\'')) {
64 qc = *p++;
65 l = 0;
66 for(q = p; *q && *q != qc; ++q) {
67 if(*q == '\\' && q[1])
68 ++q;
69 ++l;
70 }
71 if(!*q) {
72 error_handler("unterminated quoted string", u);
73 return 0;
74 }
75 f = g = xmalloc_noptr(l + 1);
76 for(q = p; *q != qc;) {
77 if(*q == '\\') {
78 ++q;
79 switch(*q) {
80 case '\\':
81 case '"':
82 case '\'':
83 *g++ = *q++;
84 break;
85 case 'n':
86 ++q;
87 *g++ = '\n';
88 break;
89 default:
90 error_handler("illegal escape sequence", u);
91 return 0;
92 }
93 } else
94 *g++ = *q++;
95 }
96 *g = 0;
97 p = q + 1;
98 } else {
99 for(q = p; *q && !space(*q); ++q)
100 ;
101 l = q - p;
102 f = xstrndup(p, l);
103 p = q;
104 }
105 vector_append(&v, f);
106 }
107 vector_terminate(&v);
108 if(np)
109 *np = v.nvec;
110 return v.vec;
111}
112
f9635e06
RK
113/* TODO handle initial combining characters sanely */
114
460b9539 115const char *quoteutf8(const char *s) {
116 size_t len = 3 + strlen(s);
117 const char *t;
118 char *r, *q;
119
120 /* see if we need to quote */
121 if(*s) {
122 for(t = s; *t; t++)
123 if((unsigned char)*t <= ' '
124 || *t == '"'
125 || *t == '\\'
126 || *t == '\''
127 || *t == '#')
128 break;
129 if(!*t)
130 return s;
131 }
132
133 /* we rely on ASCII characters only ever representing themselves in UTF-8. */
134 for(t = s; *t; t++) {
135 switch(*t) {
136 case '"':
137 case '\\':
138 case '\n':
139 ++len;
140 break;
141 }
142 }
143 q = r = xmalloc_noptr(len);
144 *q++ = '"';
145 for(t = s; *t; t++) {
146 switch(*t) {
147 case '"':
148 case '\\':
149 *q++ = '\\';
150 /* fall through */
151 default:
152 *q++ = *t;
153 break;
154 case '\n':
155 *q++ = '\\';
156 *q++ = 'n';
157 break;
158 }
159 }
160 *q++ = '"';
161 *q = 0;
162 return r;
163}
164
165/*
166Local Variables:
167c-basic-offset:2
168comment-column:40
169End:
170*/