chiark / gitweb /
disorder.h: more consistent approach to function attributes
[disorder] / lib / split.c
CommitLineData
460b9539 1/*
2 * This file is part of DisOrder.
08b645e5 3 * Copyright (C) 2004, 2006-2008, 2013 Richard Kettlewell
460b9539 4 *
e7eb3a27 5 * This program is free software: you can redistribute it and/or modify
460b9539 6 * it under the terms of the GNU General Public License as published by
e7eb3a27 7 * the Free Software Foundation, either version 3 of the License, or
460b9539 8 * (at your option) any later version.
e7eb3a27
RK
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
460b9539 15 * You should have received a copy of the GNU General Public License
e7eb3a27 16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
460b9539 17 */
132a5a4a
RK
18/** @file lib/split.c
19 * @brief String splitting
20 */
05b75f8d 21#include "common.h"
460b9539 22
23#include <ctype.h>
460b9539 24#include <errno.h>
25
26#include "mem.h"
27#include "split.h"
28#include "log.h"
460b9539 29#include "vector.h"
30
31static inline int space(int c) {
32 return (c == ' '
33 || c == '\t'
34 || c == '\n'
35 || c == '\r');
36}
37
38static void no_error_handler(const char attribute((unused)) *msg,
39 void attribute((unused)) *u) {
40}
41
f9635e06
RK
42/* TODO: handle combining characters attached to delimiters in some
43 * sane way (might include reporting an error) */
44
460b9539 45char **split(const char *p,
46 int *np,
47 unsigned flags,
48 void (*error_handler)(const char *msg, void *u),
49 void *u) {
50 char *f, *g;
51 const char *q;
52 struct vector v;
53 size_t l;
54 int qc;
55
5e49fa7f
RK
56 if(!error_handler)
57 error_handler = no_error_handler;
460b9539 58 vector_init(&v);
59 while(*p && !(*p == '#' && (flags & SPLIT_COMMENTS))) {
60 if(space(*p)) {
61 ++p;
62 continue;
63 }
64 if((flags & SPLIT_QUOTES) && (*p == '"' || *p == '\'')) {
65 qc = *p++;
66 l = 0;
67 for(q = p; *q && *q != qc; ++q) {
68 if(*q == '\\' && q[1])
69 ++q;
70 ++l;
71 }
72 if(!*q) {
73 error_handler("unterminated quoted string", u);
74 return 0;
75 }
76 f = g = xmalloc_noptr(l + 1);
77 for(q = p; *q != qc;) {
78 if(*q == '\\') {
79 ++q;
80 switch(*q) {
81 case '\\':
82 case '"':
83 case '\'':
84 *g++ = *q++;
85 break;
86 case 'n':
87 ++q;
88 *g++ = '\n';
89 break;
90 default:
91 error_handler("illegal escape sequence", u);
92 return 0;
93 }
94 } else
95 *g++ = *q++;
96 }
97 *g = 0;
98 p = q + 1;
99 } else {
100 for(q = p; *q && !space(*q); ++q)
101 ;
102 l = q - p;
103 f = xstrndup(p, l);
104 p = q;
105 }
106 vector_append(&v, f);
107 }
108 vector_terminate(&v);
109 if(np)
110 *np = v.nvec;
111 return v.vec;
112}
113
f9635e06
RK
114/* TODO handle initial combining characters sanely */
115
460b9539 116const char *quoteutf8(const char *s) {
117 size_t len = 3 + strlen(s);
118 const char *t;
119 char *r, *q;
120
121 /* see if we need to quote */
122 if(*s) {
123 for(t = s; *t; t++)
124 if((unsigned char)*t <= ' '
125 || *t == '"'
126 || *t == '\\'
127 || *t == '\''
128 || *t == '#')
129 break;
130 if(!*t)
131 return s;
132 }
133
134 /* we rely on ASCII characters only ever representing themselves in UTF-8. */
135 for(t = s; *t; t++) {
136 switch(*t) {
137 case '"':
138 case '\\':
139 case '\n':
140 ++len;
141 break;
142 }
143 }
144 q = r = xmalloc_noptr(len);
145 *q++ = '"';
146 for(t = s; *t; t++) {
147 switch(*t) {
148 case '"':
149 case '\\':
150 *q++ = '\\';
151 /* fall through */
152 default:
153 *q++ = *t;
154 break;
155 case '\n':
156 *q++ = '\\';
157 *q++ = 'n';
158 break;
159 }
160 }
161 *q++ = '"';
162 *q = 0;
163 return r;
164}
165
166/*
167Local Variables:
168c-basic-offset:2
169comment-column:40
170End:
171*/