Commit | Line | Data |
---|---|---|
5b1830f3 MW |
1 | ### -*-pyrex-*- |
2 | ### | |
3 | ### Line buffering | |
4 | ### | |
5 | ### (c) 2005 Straylight/Edgeware | |
6 | ### | |
579d0169 | 7 | |
5b1830f3 MW |
8 | ###----- Licensing notice --------------------------------------------------- |
9 | ### | |
10 | ### This file is part of the Python interface to mLib. | |
11 | ### | |
12 | ### mLib/Python is free software; you can redistribute it and/or modify | |
13 | ### it under the terms of the GNU General Public License as published by | |
14 | ### the Free Software Foundation; either version 2 of the License, or | |
15 | ### (at your option) any later version. | |
16 | ### | |
17 | ### mLib/Python is distributed in the hope that it will be useful, | |
18 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ### GNU General Public License for more details. | |
21 | ### | |
22 | ### You should have received a copy of the GNU General Public License | |
23 | ### along with mLib/Python; if not, write to the Free Software Foundation, | |
24 | ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
579d0169 | 25 | |
26 | LBUF_CRLF = _LBUF_CRLF | |
27 | LBUF_STRICTCRLF = _LBUF_STRICTCRLF | |
28 | ||
29 | cdef class LineBuffer: | |
addc0c37 MW |
30 | """ |
31 | LineBuffer([lineproc = None], [eofproc = None]) | |
32 | ||
33 | Split an incoming stream into lines. | |
34 | """ | |
803869bc | 35 | |
579d0169 | 36 | cdef lbuf b |
803869bc MW |
37 | cdef object _line |
38 | cdef object _eof | |
39 | ||
40 | def __cinit__(me): | |
579d0169 | 41 | lbuf_init(&me.b, _lbfunc, <void *>me) |
803869bc MW |
42 | me._line = None |
43 | me._eof = None | |
579d0169 | 44 | def __dealloc__(me): |
45 | lbuf_destroy(&me.b) | |
803869bc MW |
46 | |
47 | def __init__(me, object lineproc = None, object eofproc = None): | |
48 | me._line = _checkcallable(lineproc, 'line proc') | |
49 | me._eof = _checkcallable(eofproc, 'eof proc') | |
50 | ||
51 | @property | |
52 | def activep(me): | |
addc0c37 | 53 | """LB.activep -> BOOL: is the buffer still active?""" |
803869bc MW |
54 | return <bint>(me.b.f & LBUF_ENABLE) |
55 | ||
56 | @property | |
57 | def delim(me): | |
addc0c37 | 58 | """LB.delim -> CHAR | LBUF_...: line-end delimiter""" |
803869bc MW |
59 | if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF: |
60 | return me.b.delim | |
61 | else: | |
62 | return chr(me.b.delim) | |
63 | @delim.setter | |
64 | def delim(me, d): | |
65 | cdef ch | |
66 | if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF: | |
67 | me.b.delim = d | |
68 | else: | |
69 | me.b.delim = ord(d) | |
70 | ||
71 | @property | |
72 | def size(me): | |
addc0c37 | 73 | """LB.size -> INT: buffer size limit""" |
803869bc MW |
74 | return me.b.sz |
75 | @size.setter | |
76 | def size(me, size_t sz): | |
77 | lbuf_setsize(&me.b, sz) | |
78 | ||
79 | @property | |
80 | def lineproc(me): | |
addc0c37 | 81 | """LB.lineproc -> FUNC: call FUNC(LINE) on each line""" |
803869bc MW |
82 | return me._line |
83 | @lineproc.setter | |
84 | def lineproc(me, object proc): | |
85 | me._line = _checkcallable(proc, 'line proc') | |
86 | @lineproc.deleter | |
87 | def lineproc(me): | |
88 | me._line = None | |
89 | ||
90 | @property | |
91 | def eofproc(me): | |
addc0c37 | 92 | """LB.eofproc -> FUNC: call FUNC() at end-of-file""" |
803869bc MW |
93 | return me._eof |
94 | @eofproc.setter | |
95 | def eofproc(me, object proc): | |
96 | me._eof = _checkcallable(proc, 'eof proc') | |
97 | @eofproc.deleter | |
98 | def eofproc(me): | |
99 | me._eof = None | |
100 | ||
579d0169 | 101 | def enable(me): |
addc0c37 | 102 | """LB.enable(): enable the buffer, allowing lines to be emitted""" |
579d0169 | 103 | if me.b.f & LBUF_ENABLE: |
803869bc | 104 | raise ValueError('already enabled') |
579d0169 | 105 | me.b.f = me.b.f | LBUF_ENABLE |
106 | me.enabled() | |
107 | return me | |
803869bc | 108 | |
579d0169 | 109 | def disable(me): |
addc0c37 | 110 | """LB.disable(): disable the buffer, suspending line emission""" |
579d0169 | 111 | if not (me.b.f & LBUF_ENABLE): |
803869bc | 112 | raise ValueError('already disabled') |
579d0169 | 113 | me.b.f = me.b.f & ~LBUF_ENABLE |
114 | me.disabled() | |
115 | return me | |
803869bc | 116 | |
579d0169 | 117 | def close(me): |
addc0c37 | 118 | """LB.close(): report the end of the input stream""" |
579d0169 | 119 | if not (me.b.f & LBUF_ENABLE): |
803869bc | 120 | raise ValueError('buffer disabled') |
579d0169 | 121 | lbuf_close(&me.b) |
122 | return me | |
803869bc MW |
123 | |
124 | @property | |
125 | def free(me): | |
addc0c37 | 126 | """LB.free -> INT: amount of space remaining in buffer""" |
803869bc MW |
127 | cdef char *p |
128 | return lbuf_free(&me.b, &p) | |
129 | ||
579d0169 | 130 | def flush(me, str): |
addc0c37 | 131 | """LB.flush(STR) -> insert STR into the buffer and emit lines""" |
78911cdb | 132 | cdef Py_ssize_t len |
579d0169 | 133 | cdef char *p |
134 | cdef char *q | |
135 | cdef size_t n | |
803869bc MW |
136 | |
137 | ## Get the input string as bytes. | |
138 | TEXT_PTRLEN(str, &p, &len) | |
139 | ||
140 | ## Feed the input string into the buffer. | |
579d0169 | 141 | while len > 0: |
142 | n = lbuf_free(&me.b, &q) | |
143 | if n > len: | |
144 | n = len | |
803869bc | 145 | memcpy(q, p, n); p += n; len -= n |
579d0169 | 146 | if not (me.b.f & LBUF_ENABLE): |
147 | break | |
148 | lbuf_flush(&me.b, q, n) | |
803869bc MW |
149 | |
150 | IF PYVERSION >= (3,): | |
151 | ## And here we have a problem. The line buffer may have been disabled | |
152 | ## while we still have text to push through, and the split may be | |
153 | ## within a UTF-8-encoded scalar. Let's see if there's anything to do | |
154 | ## before we start worrying too much. | |
155 | ||
156 | if len == 0: | |
157 | ## We pushed all of our data into the buffer, so there's nothing left | |
158 | ## over. | |
159 | ||
160 | pass | |
161 | ||
162 | elif me.b.len == me.b.sz: | |
163 | ## We filled the buffer up, and there was no newline. We already | |
164 | ## sent the truncated line to the output function, but we still have | |
165 | ## the remaining piece. Trim any remaining pieces of the UTF-8 | |
166 | ## scalar from the start of the leftover string. | |
167 | ||
168 | while len > 0 and 128 <= <unsigned char>p[0] < 192: | |
169 | p += 1; len -= 1 | |
170 | ||
171 | else: | |
172 | ## The remaining possibility is the tricky one. After accepting a | |
173 | ## full line, the line function has disabled further input. We've | |
174 | ## just filled the buffer up and we have stuff left over. If the | |
175 | ## leftover portion starts midway through a UTF-8-encoded scalar then | |
176 | ## Python won't let us stuff it back into a string. So work | |
177 | ## backwards through the buffer until we reach the start of a scalar. | |
178 | ## | |
179 | ## This must work, because the only way the tail end of a scalar | |
180 | ## could be left over is if the start of that scalar came from our | |
181 | ## original input string. | |
182 | ||
183 | while 128 <= <unsigned char>p[0] < 192: | |
184 | p -= 1; len += 1; me.b.len -= 1 | |
185 | ||
186 | ## Everything is OK now. | |
187 | return TEXT_FROMSTRLEN(p, len) | |
188 | ||
579d0169 | 189 | def enabled(me): |
addc0c37 | 190 | """LB.enabled(): called when buffer is enabled""" |
579d0169 | 191 | pass |
192 | def disabled(me): | |
addc0c37 | 193 | """LB.disabled(): called when buffer is disabled""" |
579d0169 | 194 | pass |
195 | def line(me, line): | |
addc0c37 | 196 | """LB.line(LINE): called for each completed line""" |
579d0169 | 197 | return _maybecall(me._line, (line,)) |
198 | def eof(me): | |
addc0c37 | 199 | """LB.eof(): called at end-of-file""" |
579d0169 | 200 | return _maybecall(me._eof, ()) |
201 | ||
202 | cdef void _lbfunc(char *s, size_t n, void *arg): | |
803869bc | 203 | cdef LineBuffer sb = <LineBuffer>arg |
579d0169 | 204 | if s is NULL: |
205 | sb.eof() | |
206 | else: | |
803869bc MW |
207 | IF PYVERSION >= (3,): |
208 | ## If the input line was too long and has been truncated then there | |
209 | ## might be an incomplete Unicode scalar at the end. Strip this away. | |
210 | ||
211 | while n > 0 and 128 <= <unsigned char>s[n - 1] < 192: | |
212 | n -= 1 | |
213 | ||
214 | sb.line(TEXT_FROMSTRLEN(s, n)) | |
579d0169 | 215 | |
5b1830f3 | 216 | ###----- That's all, folks -------------------------------------------------- |