Commit | Line | Data |
---|---|---|
1f7d590d MW |
1 | %%% -*-latex-*- |
2 | %%% | |
3 | %%% Description of the parsing machinery | |
4 | %%% | |
5 | %%% (c) 2015 Straylight/Edgeware | |
6 | %%% | |
7 | ||
8 | %%%----- Licensing notice --------------------------------------------------- | |
9 | %%% | |
e0808c47 | 10 | %%% This file is part of the Sensible Object Design, an object system for C. |
1f7d590d MW |
11 | %%% |
12 | %%% SOD is free software; you can redistribute it and/or modify | |
13 | %%% it under the terms of the GNU General Public License as published by | |
14 | %%% the Free Software Foundation; either version 2 of the License, or | |
15 | %%% (at your option) any later version. | |
16 | %%% | |
17 | %%% SOD is distributed in the hope that it will be useful, | |
18 | %%% but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | %%% GNU General Public License for more details. | |
21 | %%% | |
22 | %%% You should have received a copy of the GNU General Public License | |
23 | %%% along with SOD; if not, write to the Free Software Foundation, | |
24 | %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | ||
26 | \chapter{Parsing} \label{ch:parsing} | |
27 | ||
28 | %%%-------------------------------------------------------------------------- | |
29 | \section{The parser protocol} \label{sec:parsing.proto} | |
30 | ||
31 | For the purpose of Sod's parsing library, \emph{parsing} is the process of | |
32 | reading a sequence of input items, in order, and computing an output value. | |
33 | ||
34 | A \emph{parser} is an expression which consumes zero or more input items and | |
35 | returns three values: a \emph{result}, a \emph{success flag}, and a | |
36 | \emph{consumed flag}. The two flags are (generalized) booleans. If the | |
37 | success flag is non-nil, then the parser is said to have \emph{succeeded}, | |
38 | and the result is the parser's output. If the success flag is nil then the | |
39 | parser is said to have \emph{failed}, and the result is a list of | |
40 | \emph{indicators}. Finally, the consumed flag is non-nil if the parser | |
41 | consumed any input items. | |
42 | ||
fcb6c0fb MW |
43 | \begin{describe}{fun}{combine-parser-failures @<failures> @> @<list>} |
44 | \end{describe} | |
45 | ||
da901cf2 MW |
46 | \begin{describe}{fun}{parse-empty \&optional @<value> @> @<function>} |
47 | \end{describe} | |
48 | ||
49 | \begin{describe}{fun} | |
50 | {parse-fail @<indicator> \&optional @<consumedp> @> @<function>} | |
51 | \end{describe} | |
52 | ||
d2f1db72 MW |
53 | %%%-------------------------------------------------------------------------- |
54 | \section{Streams} \label{sec:parsing.streams} | |
55 | ||
56 | \begin{describe*} | |
57 | {\dhead{cls}{position-aware-stream \&key :file :line :column} | |
58 | \dhead{gf}{position-aware-stream-file @<stream> @> @<pathname>} | |
59 | \dhead{gf}{setf (position-aware-stream-file @<stream>) @<pathname>} | |
60 | \dhead{gf}{position-aware-stream-line @<stream> @> @<fixnum>} | |
61 | \dhead{gf}{setf (position-aware-stream-line @<stream>) @<fixnum>} | |
62 | \dhead{gf}{position-aware-stream-column @<stream> @> @<fixnum>} | |
63 | \dhead{gf}{setf (position-aware-stream-column @<stream>) @<fixnum>}} | |
64 | \end{describe*} | |
65 | ||
a75cd932 MW |
66 | \begin{describe*} |
67 | {\dhead{cls}{position-aware-input-stream \&key :file :line :column} | |
68 | \dhead{cls}{position-aware-output-stream \&key :file :line :column}} | |
69 | \end{describe*} | |
70 | ||
d2f1db72 MW |
71 | \begin{describe}{gf}{stream-pathname @<stream> @> @<pathname-or-nil>} |
72 | \begin{describe}{meth}{stream} | |
73 | {stream-pathname (@<stream> stream) @> nil} | |
74 | \end{describe} | |
75 | \begin{describe}{meth}{file-stream} | |
76 | {stream-pathname (@<stream> file-stream) @> @<pathname>} | |
77 | \end{describe} | |
78 | \begin{describe}{meth}{position-aware-stream} | |
79 | {stream-pathname (@<stream> position-aware-stream) @> @<pathname>} | |
80 | \end{describe} | |
81 | \end{describe} | |
82 | ||
83 | \begin{describe}{gf}{stream-line-and-column @<stream> @> @<line> @<column>} | |
84 | \begin{describe}{meth}{stream} | |
85 | {stream-line-and-column (@<stream> stream) @> nil nil} | |
86 | \end{describe} | |
87 | \begin{describe}{meth}{position-aware-stream} | |
88 | {stream-line-and-column (@<stream> position-aware-stream) | |
89 | \nlret @<line> @<column>} | |
90 | \end{describe} | |
91 | \end{describe} | |
92 | ||
1f7d590d | 93 | %%%-------------------------------------------------------------------------- |
fcb6c0fb MW |
94 | \section{File locations} \label{sec:parsing.floc} |
95 | ||
96 | \begin{describe}{cls}{file-location} | |
97 | \end{describe} | |
98 | ||
99 | \begin{describe}{fun}{file-location-p @<object> @> @<generalized-boolean>} | |
100 | \end{describe} | |
101 | ||
102 | \begin{describe}{fun} | |
103 | {make-file-location @<filename> \&optional @<line> @<column> | |
104 | @> @<file-location>} | |
105 | \end{describe} | |
106 | ||
107 | \begin{describe*} | |
108 | {\dhead{fun}{file-location-filename @<floc> @> @<string-or-nil>} | |
109 | \dhead{fun}{file-location-line @<floc> @> @<fixnum-or-nil>} | |
110 | \dhead{fun}{file-location-column @<floc> @> @<fixnum-or-nil>}} | |
111 | \end{describe*} | |
112 | ||
113 | \begin{describe}{gf}{file-location @<object> @> @<floc>} | |
87883222 MW |
114 | \begin{describe}{meth}{file-location} |
115 | {file-location (@<floc> file-location) @> @<floc>} | |
fcb6c0fb | 116 | \end{describe} |
87883222 MW |
117 | \begin{describe}{meth}{stream} |
118 | {file-location (@<stream> stream) @> @<floc>} | |
fcb6c0fb | 119 | \end{describe} |
87883222 MW |
120 | \begin{describe}{meth}{t} |
121 | {file-location (@<any> t) @> @<floc>} | |
fcb6c0fb MW |
122 | \end{describe} |
123 | \end{describe} | |
124 | ||
125 | \begin{describe}{cls}{condition-with-location (condition) \&key :location} | |
126 | \end{describe} | |
127 | ||
87883222 | 128 | \begin{describe}{meth}{condition-with-location} |
fcb6c0fb MW |
129 | {file-location (@<condition> condition-with-location) @> @<floc>} |
130 | \end{describe} | |
131 | ||
132 | \begin{describe*} | |
133 | {\quad\=\quad\=\kill | |
134 | \dhead{cls} | |
020b9e2b | 135 | {error-with-location (condition-with-location error) \\ \> |
acaf88ad | 136 | \&key :location} |
fcb6c0fb | 137 | \dhead{cls} |
acaf88ad MW |
138 | {warning-with-location (condition-with-location warning) \\ \> |
139 | \&key :location} | |
db6c3279 MW |
140 | \dhead{cls} |
141 | {information-with-location (condition-with-location information) \\ \> | |
142 | \&key :location} | |
a75cd932 MW |
143 | \dhead{cls} |
144 | {enclosing-condition-with-location (condition) \\ \> | |
145 | \&key :location} | |
fcb6c0fb | 146 | \dhead{cls} |
acaf88ad | 147 | {enclosing-error-with-location |
020b9e2b | 148 | (enclosing-error-with-location error) \\ \> |
acaf88ad MW |
149 | \&key :condition :location} |
150 | \dhead{cls} | |
151 | {enclosing-warning-with-location | |
020b9e2b | 152 | (enclosing-condition-with-location warning) \\ \> |
acaf88ad | 153 | \&key :condition :location} |
db6c3279 MW |
154 | \dhead{cls} |
155 | {enclosing-information-with-location | |
156 | (enclosing-condition-with-location information) \\ \> | |
157 | \&key :condition :location} | |
acaf88ad MW |
158 | \dhead{cls} |
159 | {simple-condition-with-location | |
020b9e2b | 160 | (condition-with-location simple-condition) \\ \> |
acaf88ad MW |
161 | \&key :format-control :format-arguments :location} |
162 | \dhead{cls} | |
163 | {simple-error-with-location | |
020b9e2b | 164 | (error-with-location simple-error) \\ \> |
acaf88ad MW |
165 | \&key :format-control :format-arguments :location} |
166 | \dhead{cls} | |
167 | {simple-warning-with-location | |
020b9e2b | 168 | (warning-with-location simple-warning) \\ \> |
db6c3279 MW |
169 | \&key :format-control :format-arguments :location} |
170 | \dhead{cls} | |
171 | {simple-information-with-location | |
172 | (information-with-location simple-information) \\ \> | |
acaf88ad | 173 | \&key :format-control :format-arguments :location}} |
fcb6c0fb MW |
174 | \end{describe*} |
175 | ||
388ab382 MW |
176 | \begin{describe}{gf} |
177 | {enclosing-condition-with-location-type @<condition> @> @<symbol>} | |
178 | \end{describe} | |
179 | ||
fcb6c0fb MW |
180 | \begin{describe}{fun} |
181 | {make-condition-with-location @<default-type> @<floc> | |
182 | @<datum> \&rest @<arguments> | |
183 | \nlret @<condition-with-location>} | |
184 | \end{describe} | |
185 | ||
186 | \begin{describe*} | |
187 | {\dhead{fun}{error-with-location @<floc> @<datum> \&rest @<arguments>} | |
188 | \dhead{fun}{cerror-with-location @<floc> @<continue-string> | |
189 | @<datum> \&rest @<arguments>} | |
190 | \dhead{fun}{cerror*-with-location @<floc> @<datum> \&rest @<arguments>} | |
a75cd932 MW |
191 | \dhead{fun}{warn-with-location @<floc> @<datum> \&rest @<arguments>} |
192 | \dhead{fun}{info-with-location @<floc> @<datum> \&rest @<arguments>}} | |
fcb6c0fb MW |
193 | \end{describe*} |
194 | ||
40d95de7 MW |
195 | \begin{describe*} |
196 | {\dhead{cls}{parser-error (error) \\ \ind | |
197 | \&key :expected :found \-} | |
198 | \dhead{gf}{parser-error-expected @<condition> @> @<list>} | |
199 | \dhead{gf}{parser-error-found @<condition> @> @<value>}} | |
200 | \end{describe*} | |
201 | ||
202 | \begin{describe}{fun} | |
203 | {report-parser-error @<error> @<stream> @<show-expected> @<show-found>} | |
204 | \end{describe} | |
205 | ||
206 | \begin{describe*} | |
207 | {\quad\=\kill | |
208 | \dhead{cls}{base-lexer-error (error-with-location) \&key :location} | |
209 | \dhead{cls}{simple-lexer-error | |
210 | (base-lexer-error simple-error-with-location) \\\> | |
211 | \&key :format-control :format-arguments :location} | |
212 | \dhead{cls}{base-syntax-error (error-with-location) \&key :location} | |
213 | \dhead{cls}{simple-syntax-error | |
214 | (base-syntax-error simple-error-with-location) \\\> | |
215 | \&key :format-control :format-arguments :location}} | |
216 | \end{describe*} | |
217 | ||
fcb6c0fb | 218 | \begin{describe}{mac} |
cac85e0b MW |
219 | {with-default-error-location (@<floc>) @<declaration>^* @<form>^* |
220 | @> @<value>^*} | |
fcb6c0fb MW |
221 | \end{describe} |
222 | ||
40d95de7 MW |
223 | \begin{describe}{gf}{classify-condition @<condition> @> @<string>} |
224 | \begin{describe*} | |
87883222 MW |
225 | {\dhead{meth}{error} |
226 | {classify-condition (@<condition> error) @> @<string>} | |
227 | \dhead{meth}{warning} | |
228 | {classify-condition (@<condition> warning) @> @<string>} | |
229 | \dhead{meth}{information} | |
230 | {classify-condition (@<condition> information) | |
231 | @> @<string>} | |
232 | \dhead{meth}{base-lexer-error} | |
233 | {classify-condition (@<condition> base-lexer-error) | |
234 | @> @<string>} | |
235 | \dhead{meth}{base-syntax-error} | |
236 | {classify-condition (@<condition> base-syntax-error) | |
237 | @> @<string>}} | |
40d95de7 MW |
238 | \end{describe*} |
239 | \end{describe} | |
240 | ||
fcb6c0fb | 241 | \begin{describe}{mac} |
cac85e0b | 242 | {count-and-report-errors () @<declaration>^* @<form>^* |
fcb6c0fb MW |
243 | @> @<value> @<n-errors> @<n-warnings>} |
244 | \end{describe} | |
1f7d590d MW |
245 | |
246 | %%%-------------------------------------------------------------------------- | |
247 | \section{Scanners} \label{sec:parsing.scanner} | |
248 | ||
249 | A \emph{scanner} is an object which keeps track of a parser's progress as it | |
250 | works through its input. There's no common base class for scanners: a | |
251 | scanner is simply any object which implements the scanner protocol described | |
252 | here. | |
253 | ||
254 | A scanner maintains a sequence of items to read. It can step forwards | |
255 | through the items, one at a time, until it reaches the end (if, indeed, the | |
256 | sequence is finite, which it needn't be). Until that point, there is a | |
257 | current item, though there's no protocol for accessing it at this level | |
258 | because the nature of the items is left unspecified. | |
259 | ||
260 | Some scanners support an additional \emph{place-capture} protocol which | |
261 | allows rewinding the scanner to an earlier point in the input so that it can | |
262 | be scanned again. | |
263 | ||
e38e8367 | 264 | |
1f7d590d MW |
265 | \subsection{Basic scanner protocol} \label{sec:parsing.scanner.basic} |
266 | ||
267 | The basic protocol supports stepping the scanner forward through its input | |
268 | sequence, and detecting the end of the sequence. | |
269 | ||
270 | \begin{describe}{gf}{scanner-step @<scanner>} | |
271 | Advance the @<scanner> to the next item, which becomes current. | |
272 | ||
273 | It is an error to step the scanner if the scanner is at end-of-file. | |
274 | \end{describe} | |
275 | ||
276 | \begin{describe}{gf}{scanner-at-eof-p @<scanner> @> @<generalized-boolean>} | |
277 | Return non-nil if the scanner is at end-of-file, i.e., there are no more | |
278 | items to read. | |
279 | ||
280 | If nil is returned, there is a current item, and it is safe to step the | |
281 | scanner again; otherwise, it is an error to query the current item or to | |
282 | step the scanner. | |
283 | \end{describe} | |
284 | ||
e38e8367 | 285 | |
1f7d590d MW |
286 | \subsection{Place-capture scanner protocol} \label{sec:parsing.scanner.place} |
287 | ||
288 | The place-capture protocol allows rewinding to an earlier point in the | |
289 | sequence. Not all scanners support the place-capture protocol. | |
290 | ||
291 | To rewind a scanner to a particular point, that point must be \emph{captured} | |
292 | as a \emph{place} when it's current -- so you must know in advance that this | |
293 | is an interesting place that's worth capturing. The type of place returned | |
294 | depends on the type of scanner. Given a captured place, the scanner can be | |
295 | rewound to the position held in it. | |
296 | ||
297 | Depending on how the scanner works, holding onto a captured place might | |
054e8f8f | 298 | consume a lot of memory or cause poor performance. For example, if the |
1f7d590d MW |
299 | scanner is reading from an input stream, having a captured place means that |
300 | data from that point on must be buffered in case the program needs to rewind | |
301 | the scanner and read that data again. Therefore it's possible to | |
302 | \emph{release} a place when it turns out not to be needed any more. | |
303 | ||
304 | \begin{describe}{gf}{scanner-capture-place @<scanner> @> @<place>} | |
305 | Capture the @<scanner>'s current position as a place, and return the place. | |
306 | \end{describe} | |
307 | ||
308 | \begin{describe}{gf}{scanner-restore-place @<scanner> @<place>} | |
309 | Rewind the @<scanner> to the state it was in when @<place> was captured. | |
310 | In particular, the item that was current when the @<place> was captured | |
311 | becomes current again. | |
312 | ||
313 | It is an error to restore a @<place> that has been released, or if the | |
314 | @<place> wasn't captured from the @<scanner>. | |
315 | \end{describe} | |
316 | ||
317 | \begin{describe}{gf}{scanner-release-place @<scanner> @<place>} | |
318 | Release the @<place>, to avoid having to maintaining the ability to restore | |
319 | it after it's not needed any more.. | |
320 | ||
321 | It is an error if the @<place> wasn't captured from the @<scanner>. | |
322 | \end{describe} | |
323 | ||
324 | \begin{describe}{mac} | |
cac85e0b MW |
325 | {with-scanner-place (@<place> @<scanner>) @<declarations>^* @<form>^* |
326 | @> @<value>^*} | |
cd35a54e MW |
327 | Capture the @<scanner>'s current position as a place, evaluate the @<form>s |
328 | as an implicit progn with the variable @<place> bound to the captured | |
329 | place. When control leaves the @<form>s, the place is released. The | |
330 | return values are the values of the final @<form>. | |
1f7d590d MW |
331 | \end{describe} |
332 | ||
e38e8367 | 333 | |
1f7d590d MW |
334 | \subsection{Scanner file-location protocol} \label{sec:parsing.scanner.floc} |
335 | ||
fcb6c0fb MW |
336 | Some scanners participate in the file-location protocol |
337 | (\xref{sec:parsing.floc}). They implement a method on @|file-location| which | |
338 | collects the necessary information using scanner-specific functions described | |
339 | here. | |
1f7d590d MW |
340 | |
341 | \begin{describe}{fun}{scanner-file-location @<scanner> @> @<file-location>} | |
342 | Return a @|file-location| object describing the current position of the | |
343 | @<scanner>. | |
344 | ||
345 | This calls the @|scanner-filename|, @|scanner-line| and @|scanner-column| | |
346 | generic functions on the scanner, and uses these to fill in an appropriate | |
347 | @|file-location|. | |
348 | ||
349 | Since there are default methods on these generic functions, it is not an | |
350 | error to call @|scanner-file-location| on any kind of value, but it might | |
351 | not be very useful. This function exists to do the work of appropriately | |
352 | specialized methods on @|file-location|. | |
353 | \end{describe} | |
354 | ||
fcb6c0fb MW |
355 | \begin{describe*} |
356 | {\dhead{gf}{scanner-filename @<scanner> @> @<string>} | |
357 | \dhead{gf}{scanner-line @<scanner> @> @<integer>} | |
358 | \dhead{gf}{scanner-column @<scanner> @> @<integer>}} | |
359 | Return the filename, line and column components of the @<scanner>'s current | |
360 | position, for use in assembling a @<file-location>: see the | |
361 | @|scanner-file-location| function. | |
1f7d590d | 362 | |
fcb6c0fb MW |
363 | There are default methods on all three generic functions which simply |
364 | return nil. | |
365 | \end{describe*} | |
1f7d590d | 366 | |
e38e8367 | 367 | |
1f7d590d MW |
368 | \subsection{Character scanners} \label{sec:parsing.scanner.char} |
369 | ||
370 | Character scanners are scanners which read sequences of characters. | |
371 | ||
372 | \begin{describe}{cls}{character-scanner () \&key} | |
373 | Base class for character scanners. This provides some very basic | |
374 | functionality. | |
375 | ||
376 | Not all character scanners are subclasses of @|character-scanner|. | |
377 | \end{describe} | |
378 | ||
379 | \begin{describe}{gf}{scanner-current-char @<scanner> @> @<character>} | |
380 | Returns the current character. | |
381 | \end{describe} | |
382 | ||
383 | \begin{describe}{gf}{scanner-unread @<scanner> @<character>} | |
384 | Rewind the @<scanner> by one step. The @<chararacter> must be the previous | |
385 | current character, and becomes the current character again. It is an error | |
054e8f8f | 386 | if: the @<scanner> has reached end-of-file; the @<scanner> has never been |
1f7d590d MW |
387 | stepped; or @<character> was not the previous current character. |
388 | \end{describe} | |
389 | ||
390 | \begin{describe}{gf} | |
391 | {scanner-interval @<scanner> @<place-a> \&optional @<place-b> | |
392 | @> @<string>} | |
393 | Return the characters in the @<scanner>'s input from @<place-a> up to (but | |
394 | not including) @<place-b>. | |
395 | ||
396 | The characters are returned as a string. If @<place-b> is omitted, return | |
397 | the characters up to (but not including) the current position. It is an | |
398 | error if @<place-b> precedes @<place-a> or they are from different | |
399 | scanners. | |
400 | ||
401 | This function is a character-scanner-specific extension to the | |
402 | place-capture protocol; not all character scanners implement the | |
403 | place-capture protocol, and some that do may not implement this function. | |
404 | \end{describe} | |
405 | ||
406 | \subsubsection{Stream access to character scanners} | |
407 | Sometimes it can be useful to apply the standard Lisp character input | |
408 | operations to the sequence of characters held by a character scanner. | |
409 | ||
410 | \begin{describe}{gf}{make-scanner-stream @<scanner> @> @<stream>} | |
411 | Returns a fresh input @|stream| object which fetches input characters from | |
412 | the character scanner object @<scanner>. Reading characters from the | |
413 | stream steps the scanner. The stream will reach end-of-file when the | |
414 | scanner reports end-of-file. If the scanner implements the file-location | |
415 | protocol then reading from the stream will change the file location in an | |
416 | appropriate manner. | |
417 | ||
418 | This is mostly useful for applying standard Lisp stream functions, most | |
419 | particularly the @|read| function, in the middle of a parsing operation. | |
420 | \end{describe} | |
421 | ||
422 | \begin{describe}{cls}{character-scanner-stream (stream) \&key :scanner} | |
423 | A Common Lisp input @|stream| object which works using the character | |
424 | scanner protocol. Any @<scanner> which implements the base scanner and | |
425 | character scanner protocols is suitable. See @|make-scanner-stream|. | |
426 | \end{describe} | |
427 | ||
e38e8367 | 428 | |
1f7d590d MW |
429 | \subsection{String scanners} \label{sec:parsing.scanner.string} |
430 | ||
431 | A \emph{string scanner} is a simple kind of character scanner which reads | |
432 | input from a string object. String scanners implement the character scanner | |
433 | and place-capture protocols. | |
434 | ||
435 | \begin{describe}{cls}{string-scanner} | |
436 | The class of string scanners. The @|string-scanner| class is not a | |
437 | subclass of @|character-scanner|. | |
438 | \end{describe} | |
439 | ||
440 | \begin{describe}{fun}{string-scanner-p @<value> @> @<generalized-boolean>} | |
441 | Return non-nil if @<value> is a @|string-scanner| object; otherwise return | |
442 | nil. | |
443 | \end{describe} | |
444 | ||
445 | \begin{describe}{fun} | |
446 | {make-string-scanner @<string> \&key :start :end @> @<string-scanner>} | |
447 | Construct and return a fresh @|string-scanner| object. The new scanner | |
448 | will read characters from @<string>, starting at index @<start> (which | |
449 | defaults to zero), and continuing until it reaches index @<end> (defaults | |
450 | to the end of the @<string>). | |
451 | \end{describe} | |
452 | ||
e38e8367 | 453 | |
1f7d590d MW |
454 | \subsection{Character buffer scanners} \label{sec:parsing.scanner.charbuf} |
455 | ||
456 | A \emph{character buffer scanner}, or \emph{charbuf scanner} for short, is an | |
457 | efficient scanner for reading characters from an input stream. Charbuf | |
458 | scanners implements the basic scanner, character buffer, place-capture, and | |
459 | file-location protocols. | |
460 | ||
461 | \begin{describe}{cls} | |
462 | {charbuf-scanner (character-scanner) | |
463 | \&key :stream :filename :line :column} | |
464 | The class of charbuf scanners. The scanner will read characters from | |
465 | @<stream>. Charbuf scanners implement the file-location protocol: the | |
466 | initial location is set from the given @<filename>, @<line> and @<column>; | |
467 | the scanner will update the location as it reads its input. | |
468 | \end{describe} | |
469 | ||
a75cd932 MW |
470 | \begin{describe}{cls} |
471 | {charbuf-scanner-stream (character-scanner-stream) \&key :scanner} | |
472 | \end{describe} | |
473 | ||
1f7d590d MW |
474 | \begin{describe}{cls}{charbuf-scanner-place} |
475 | The class of place objects captured by a charbuf scanner. | |
476 | \end{describe} | |
477 | ||
478 | \begin{describe}{fun} | |
479 | {charbuf-scanner-place-p @<value> @> @<generalized-boolean>} | |
480 | Type predicate for charbuf scanner places: returns non-nil if @<value> is a | |
481 | place captured by a charbuf scanner, and nil otherwise. | |
482 | \end{describe} | |
483 | ||
484 | \begin{describe}{gf} | |
485 | {charbuf-scanner-map @<scanner> @<func> \&optional @<fail> | |
054e8f8f | 486 | \nlret @<result> @<success-flag> @<consumed-flag>} |
1f7d590d MW |
487 | Read characters from the @<scanner>'s buffers. |
488 | ||
489 | This is intended to be an efficient and versatile interface for reading | |
490 | characters from a scanner in bulk. The function @<func> is invoked | |
491 | repeatedly, as if by | |
492 | \begin{prog} | |
020b9e2b MW |
493 | (multiple-value-bind (@<donep> @<used>) \\ \ind\ind |
494 | (funcall @<func> @<buf> @<start> @<end>) \-\\ | |
1f7d590d MW |
495 | \textrm\ldots) |
496 | \end{prog} | |
497 | The argument @<buf> is a simple string; @<start> and @<end> are two | |
498 | nonnegative fixnums, indicating that the subsequence of @<buf> between | |
499 | @<start> (inclusive) and @<end> (exclusive) should be processed. If | |
500 | @<func>'s return value @<donep> is nil then @<used> is ignored: the | |
501 | function has consumed the entire buffer and wishes to read more. If | |
054e8f8f | 502 | @<donep> is non-nil, then @<used> must be a fixnum such that $@<start> \le |
1f7d590d MW |
503 | @<used> \le @<end>$: the function has consumed the buffer as far as @<used> |
504 | (exclusive) and has completed successfully. | |
505 | ||
506 | If end-of-file is encountered before @<func> completes successfully then it | |
507 | fails: the @<fail> function is called with no arguments, and is expected to | |
508 | return two values. If omitted, @<fail> defaults to | |
509 | \begin{prog} | |
020b9e2b MW |
510 | (lambda () \\ \ind |
511 | (values nil nil)) | |
1f7d590d MW |
512 | \end{prog} |
513 | ||
514 | The @|charbuf-scanner-map| function returns three values. The first value | |
515 | is the non-nil @<donep> value returned by @<func> if @|charbuf-scanner-map| | |
516 | succeeded, or the first value returned by @<fail>; the second value is @|t| | |
517 | on success, or the second value returned by @<fail>; the third value is | |
518 | non-nil if @<func> consumed any input, i.e., it returned with @<donep> nil | |
519 | at least once, or with $@<used> > @<start>$. | |
520 | \end{describe} | |
521 | ||
e38e8367 | 522 | |
1f7d590d MW |
523 | \subsection{Token scanners} \label{sec:parsing.scanner.token} |
524 | ||
525 | \begin{describe}{cls} | |
526 | {token-scanner () \&key :filename (:line 1) (:column 0)} | |
527 | \end{describe} | |
528 | ||
a75cd932 MW |
529 | \begin{describe*} |
530 | {\dhead{gf}{setf (scanner-line @<scanner>) @<fixnum>} | |
531 | \dhead{gf}{setf (scanner-column @<scanner>) @<fixnum>}} | |
532 | \end{describe*} | |
533 | ||
1f7d590d MW |
534 | \begin{describe}{gf}{token-type @<scanner> @> @<type>} |
535 | \end{describe} | |
536 | ||
537 | \begin{describe}{gf}{token-value @<scanner> @> @<value>} | |
538 | \end{describe} | |
539 | ||
540 | \begin{describe}{gf}{scanner-token @<scanner> @> @<type> @<value>} | |
541 | \end{describe} | |
542 | ||
543 | \begin{describe}{ty}{token-scanner-place} | |
544 | \end{describe} | |
545 | ||
546 | \begin{describe}{fun} | |
547 | {token-scanner-place-p @<value> @> @<generalized-boolean>} | |
548 | \end{describe} | |
549 | ||
e38e8367 | 550 | |
1f7d590d MW |
551 | \subsection{List scanners} |
552 | ||
553 | \begin{describe}{ty}{list-scanner} | |
554 | \end{describe} | |
555 | ||
556 | \begin{describe}{fun}{list-scanner-p @<value> @> @<generalized-boolean>} | |
557 | \end{describe} | |
558 | ||
559 | \begin{describe}{fun}{make-list-scanner @<list> @> @<list-scanner>} | |
560 | \end{describe} | |
561 | ||
562 | %%%-------------------------------------------------------------------------- | |
4c35de3a MW |
563 | \section{Parser contexts and parser syntax} \label{sec:parsing.syntax} |
564 | ||
565 | ||
566 | \subsection{Parser contexts} \label{sec:parsing.syntax.contexts} | |
567 | ||
568 | \begin{describe}{mac} | |
569 | {with-parser-context | |
570 | (@<context-class> @{ @<init-keyword> @<value> @}^*) \\ \ind | |
571 | @<declaration>^* \\ | |
572 | @<form>^* | |
573 | \-\nlret @<value>^*} | |
574 | \end{describe} | |
1f7d590d | 575 | |
fcb6c0fb MW |
576 | \begin{describe}{gf}{expand-parser-spec @<context> @<spec> @> @<form>} |
577 | \end{describe} | |
578 | ||
579 | \begin{describe}{gf} | |
580 | {expand-parser-form @<context> @<head> @<tail> @> @<form>} | |
581 | \end{describe} | |
582 | ||
583 | \begin{describe}{gf}{wrap-parser @<context> @<form> @> @<wrapped-form>} | |
584 | \end{describe} | |
585 | ||
586 | \begin{describe}{mac} | |
587 | {defparse @<name> (@[[ :context (@<var> @<context-class>) @]] | |
020b9e2b MW |
588 | @<destructuring-lambda-list-item>^*) \\ \ind |
589 | @[[ @<declaration>^* @! @<doc-string> @]] \\ | |
590 | @<form>^* | |
591 | \-\nlret @<name>} | |
fcb6c0fb MW |
592 | \end{describe} |
593 | ||
fcb6c0fb MW |
594 | \begin{describe}{lmac} |
595 | {parse @<parser> @> @<result> @<success-flag> @<consumed-flag>} | |
596 | \end{describe} | |
597 | ||
3f921e3f MW |
598 | \begin{describe}{mac} |
599 | {parser @<lambda-list> | |
600 | @[[ @<declaration>^* @! @<doc-string> @]] | |
601 | @<parser> | |
602 | @> @<function>} | |
603 | \end{describe} | |
604 | ||
fcb6c0fb MW |
605 | \begin{describe}{gf}{parser-at-eof-p @<context> @> @<form>} |
606 | \end{describe} | |
607 | ||
608 | \begin{describe}{gf}{parser-step @<context> @> @<form>} | |
609 | \end{describe} | |
610 | ||
fcb6c0fb MW |
611 | \begin{describe}{mac} |
612 | {if-parse (@[[ \=:result @<result-var> @! | |
020b9e2b MW |
613 | :expected @<expected-var> @! \+\\ |
614 | :consumedp @<consumed-var> @]]) \-\\ \ind\ind | |
615 | @<parser> \-\\ | |
616 | @<consequent> \\ | |
617 | @[@<alternatve>@] | |
618 | \-\nlret @<value>^*} | |
fcb6c0fb MW |
619 | \end{describe} |
620 | ||
621 | \begin{describe}{mac} | |
020b9e2b MW |
622 | {when-parse (@[@<result-var>@]) @<parser> \\ \ind |
623 | @<form>^* | |
624 | \-\nlret @<value>^*} | |
fcb6c0fb MW |
625 | \end{describe} |
626 | ||
627 | \begin{describe}{mac} | |
628 | {cond-parse (@[[ \=:result @<result-var> @! | |
020b9e2b MW |
629 | :expected @<expected-var> @! \+\\ |
630 | :consumedp @<consumed-var> @]]) \-\\ \ind | |
631 | @{ (@<parser> @<form>^*) @}^* | |
632 | \-\nlret @<value>^*} | |
fcb6c0fb MW |
633 | \end{describe} |
634 | ||
a75cd932 MW |
635 | \begin{describe}{cls}{list-parser () \&key :var} |
636 | \end{describe} | |
637 | ||
4c35de3a MW |
638 | |
639 | \subsection{Basic parser syntax} \label{sec:parsing.syntax.basic} | |
640 | ||
fcb6c0fb MW |
641 | \begin{describe}{parse}{:eof} |
642 | \end{describe} | |
643 | ||
644 | \begin{describe}{parseform}{lisp @<form>^*} | |
645 | \end{describe} | |
646 | ||
647 | \begin{describe}{parseform}{label @<parser>} | |
648 | \end{describe} | |
649 | ||
650 | \begin{describe}{parse}{t} | |
651 | \end{describe} | |
652 | ||
653 | \begin{describe}{parseform}{t @<value>} | |
654 | \end{describe} | |
655 | ||
656 | \begin{describe}{parse}{nil} | |
657 | \end{describe} | |
658 | ||
659 | \begin{describe}{parseform}{nil @<indicator>} | |
660 | \end{describe} | |
661 | ||
662 | \begin{describe}{parseform}{when @<cond> @<parser>} | |
663 | \end{describe} | |
664 | ||
665 | \begin{describe}{parseform} | |
020b9e2b MW |
666 | {seq (@{ @<atomic-parser-spec> @! |
667 | (@[@<var>@] @<parser>) @}^*) \\ \ind | |
cd35a54e | 668 | @<form>^*} |
fcb6c0fb MW |
669 | \end{describe} |
670 | ||
671 | \begin{describe}{parseform}{and @<parser>^*} | |
672 | \end{describe} | |
673 | ||
674 | \begin{describe}{parseform}{or @<parser>^*} | |
675 | \end{describe} | |
676 | ||
677 | \begin{describe}{parseform}{? @<parser> @[@<default>@]} | |
678 | \end{describe} | |
679 | ||
680 | \begin{describe}{parseform} | |
020b9e2b MW |
681 | {many (\=@<accumulator-var> @<init-form> @<update-form> \+\\ |
682 | @[[ \=:new @<new-var> @! :final @<final-form> @! \+\\ | |
683 | :min @<minimum> @! :max @<maximum> @! \\ | |
684 | :commitp @<commitp> @]]) \-\-\\ \ind | |
fcb6c0fb MW |
685 | @<item-parser> @[@<sep-parser>@]} |
686 | \end{describe} | |
687 | ||
688 | \begin{describe}{parseform} | |
689 | {list (@[[ :min @<minimum> @! :max @<maximum> @! | |
020b9e2b | 690 | :commitp @<commitp> @]]) \\ \ind |
fcb6c0fb MW |
691 | @<item-parser> @[@<sep-parser>@]} |
692 | \end{describe} | |
693 | ||
694 | \begin{describe}{parseform} | |
695 | {skip-many (@[[ :min @<minimum> @! :max @<maximum> @! | |
020b9e2b | 696 | :commitp @<commitp> @]]) \\ \ind |
fcb6c0fb MW |
697 | @<item-parser> @[@<sep-parser>@]} |
698 | \end{describe} | |
699 | ||
700 | \begin{describe}{fun}{call-pluggable-parser @<symbol> \&rest @<args>} | |
701 | \end{describe} | |
702 | ||
703 | \begin{describe}{parseform}{plug @<symbol> @<arg>^*} | |
704 | \end{describe} | |
705 | ||
706 | \begin{describe}{fun} | |
707 | {pluggable-parser-add @<symbol> @<tag> @<parser-function>} | |
708 | \end{describe} | |
709 | ||
710 | \begin{describe}{mac} | |
cac85e0b MW |
711 | {define-pluggable-parser @<symbol> @<tag> @<lambda-list> |
712 | @[[ @<declaration>^* @! @<doc-string> @]] | |
713 | @<form>^*} | |
fcb6c0fb MW |
714 | \end{describe} |
715 | ||
4c35de3a MW |
716 | |
717 | \subsection{Place-capture protocol} \label{sec:parsing.syntax.place} | |
718 | ||
fcb6c0fb MW |
719 | \begin{describe}{gf}{parser-capture-place @<context> @> @<form>} |
720 | \end{describe} | |
721 | ||
722 | \begin{describe}{gf}{parser-restore-place @<context> @<place> @> @<form>} | |
723 | \end{describe} | |
724 | ||
725 | \begin{describe}{gf}{parser-release-place @<context> @<place> @> @<form>} | |
726 | \end{describe} | |
727 | ||
728 | \begin{describe}{gf} | |
46a4727d | 729 | {parser-places-must-be-released-p @<context> @> @<generalized-boolean>} |
fcb6c0fb MW |
730 | \end{describe} |
731 | ||
732 | \begin{describe}{mac} | |
cac85e0b MW |
733 | {with-parser-place (@<place-var> @<context>) |
734 | @[[ @<declaration>^* @! @<doc-string> @]] | |
735 | @<form>^*} | |
fcb6c0fb MW |
736 | \end{describe} |
737 | ||
738 | \begin{describe}{parseform}{peek @<parser>} | |
739 | \end{describe} | |
740 | ||
2b8759bf MW |
741 | \begin{describe}{parseform}{commit} |
742 | \end{describe} | |
743 | ||
4c35de3a MW |
744 | |
745 | \subsection{Character parsers} \label{sec:parsing.syntax.character} | |
746 | ||
fcb6c0fb MW |
747 | \begin{describe}{cls}{character-parser-context () \&key} |
748 | \end{describe} | |
749 | ||
750 | \begin{describe}{gf}{parser-current-char @<context> @> @<form>} | |
751 | \end{describe} | |
752 | ||
a75cd932 MW |
753 | \begin{describe}{cls} |
754 | {string-parser (character-parser-context) \&key :string :index :length} | |
755 | \end{describe} | |
756 | ||
fcb6c0fb MW |
757 | \begin{describe}{parseform} |
758 | {if-char (@[@<result-var>@]) @<condition> @<consequent> @<alternative>} | |
759 | \end{describe} | |
760 | ||
761 | \begin{describe}{parseform}{char @<character>} | |
762 | \end{describe} | |
763 | ||
34042b35 | 764 | \begin{describe}{parse}[char]{@<character>} |
fcb6c0fb MW |
765 | \end{describe} |
766 | ||
34042b35 | 767 | \begin{describe}{parse}[string]{@<string>} |
fcb6c0fb MW |
768 | \end{describe} |
769 | ||
770 | \begin{describe}{parse}{:any} | |
771 | \end{describe} | |
772 | ||
773 | \begin{describe}{parseform}{satisfies @<predicate>} | |
774 | \end{describe} | |
775 | ||
776 | \begin{describe}{parseform}{not @<character>} | |
777 | \end{describe} | |
778 | ||
779 | \begin{describe}{parseform}{filter @<predicate>} | |
780 | \end{describe} | |
781 | ||
782 | \begin{describe}{parse}{:whitespace} | |
783 | \end{describe} | |
784 | ||
785 | \begin{describe}{cls}{token-parser-context () \&key} | |
786 | \end{describe} | |
787 | ||
788 | \begin{describe}{gf}{parser-token-type @<context> @> @<form>} | |
789 | \end{describe} | |
790 | ||
791 | \begin{describe}{gf}{parser-token-value @<context> @> @<form>} | |
792 | \end{describe} | |
793 | ||
794 | \begin{describe}{parseform}{token @<type> @[@<value>@] @[:peekp @<peek>@]} | |
795 | \end{describe} | |
796 | ||
34042b35 | 797 | \begin{describe}{parse}[atom]{@<atom>} |
fcb6c0fb MW |
798 | \end{describe} |
799 | ||
4c35de3a MW |
800 | |
801 | \subsection{Scanner contexts} \label{sec:parsing.syntax.scanner} | |
802 | ||
fcb6c0fb MW |
803 | \begin{describe}{cls}{scanner-context () \&key :scanner} |
804 | \end{describe} | |
805 | ||
a75cd932 | 806 | \begin{describe}{gf}{parser-scanner @<context> @> @<symbol>} |
fcb6c0fb MW |
807 | \end{describe} |
808 | ||
809 | \begin{describe}{cls} | |
810 | {character-scanner-context (scanner-context character-parser-context) | |
811 | \&key :scanner} | |
812 | \end{describe} | |
813 | ||
814 | \begin{describe}{cls} | |
815 | {token-scanner-context (scanner-context token-parser-context) | |
816 | \&key :scanner} | |
817 | \end{describe} | |
818 | ||
4c35de3a MW |
819 | |
820 | \subsection{Expression parsing} \label{sec:parsing.syntax.expression} | |
821 | ||
2c7465ac MW |
822 | \begin{describe}{gf}{operator-push-action @<left> @<right>} |
823 | \end{describe} | |
824 | ||
825 | \begin{describe}{parseform} | |
020b9e2b | 826 | {expr \=(@[[ :nestedp @<nestedp-var> @]]) \+\\ |
2c7465ac MW |
827 | @<operand-parser> @<binop-parser> |
828 | @<preop-parser> @<postop-parser>} | |
829 | \end{describe} | |
830 | ||
831 | \begin{describe}{gf}{operator-left-precedence @<operator> @> @<prec>} | |
832 | \end{describe} | |
833 | ||
834 | \begin{describe}{gf}{operator-right-precedence @<operator> @> @<prec>} | |
835 | \end{describe} | |
836 | ||
837 | \begin{describe}{gf}{operator-associativity @<operator> @> @<assoc>} | |
838 | \end{describe} | |
839 | ||
840 | \begin{describe}{cls}{prefix-operator () \&key} | |
841 | \end{describe} | |
842 | ||
843 | \begin{describe}{cls}{simple-operator () \&key :name :function} | |
844 | \end{describe} | |
845 | ||
846 | \begin{describe}{cls} | |
847 | {simple-unary-operator (simple-operator) \&key :name :function} | |
848 | \end{describe} | |
849 | ||
850 | \begin{describe*} | |
851 | {\quad\=\kill | |
020b9e2b MW |
852 | \dhead{cls}{simple-binary-operator (simple-operator) \\ \> |
853 | \&key :name :function | |
854 | :lprec :rprec :associativity} | |
2c7465ac MW |
855 | \dhead{cls}{simple-postfix-operator (simple-unary-operator) \\ \> |
856 | \&key :name :function :lprec :rprec} | |
857 | \dhead{cls}{simple-prefix-operator | |
020b9e2b | 858 | (prefix-operator simple-unary-operator) \\ \> |
2c7465ac MW |
859 | \&key :name :function :rprec}} |
860 | \end{describe*} | |
861 | ||
862 | \begin{describe*} | |
863 | {\dhead{mac}{preop @<name> (@<operand-var> @<lprec>) | |
cac85e0b | 864 | @<declaration>^* @<form>^* |
2c7465ac MW |
865 | @> @<prefix-operator>} |
866 | \dhead{mac}{postop @<name> | |
867 | (@<operand-var> @<lprec> @[[ :rprec @<rprec> @]]) | |
cac85e0b MW |
868 | @<declaration>^* @<form>^* |
869 | \nlret @<postfix-operator>} | |
2c7465ac | 870 | \dhead{mac}{binop @<name> (@<operand-var> @<lprec> @<rprec> @<assoc>) |
cac85e0b | 871 | @<declaration>^*@<form>^* |
2c7465ac MW |
872 | @> @<binary-operator>}} |
873 | \end{describe*} | |
874 | ||
875 | \begin{describe*} | |
a75cd932 | 876 | {\dhead{cls}{open-parenthesis (parenthesis prefix-operator) \&key :tag} |
2c7465ac MW |
877 | \dhead{cls}{close-parenthesis (parenthesis) \&key :tag}} |
878 | \end{describe*} | |
879 | ||
880 | \begin{describe*} | |
881 | {\dhead{fun}{lparen @<tag> @> @<open-paren>} | |
882 | \dhead{fun}{rparen @<tag> @> @<close-paren>}} | |
883 | \end{describe*} | |
fcb6c0fb MW |
884 | |
885 | %%%------------------------------------------------------------------------- | |
4c35de3a | 886 | \section{Lexical analyser} \label{sec:parsing.lexical} |
1f7d590d | 887 | |
2c7465ac MW |
888 | \begin{describe}{cls} |
889 | {sod-token-scanner (token-scanner) | |
890 | \&key :filename (:line 1) (:column 0) :char-scanner} | |
891 | \end{describe} | |
892 | ||
893 | \begin{describe}{fun}{define-indicator @<indicator> @<description>} | |
894 | \end{describe} | |
895 | ||
40d95de7 MW |
896 | \begin{describe*} |
897 | {\dhead{cls}{lexer-error (parser-error base-lexer-error) \\ \ind | |
898 | \&key :expected :found :location \-} | |
899 | \dhead{cls}{syntax-error (parser-error base-syntax-error) \\ \ind | |
900 | \&key :expected :found :location \-}} | |
901 | \end{describe*} | |
902 | ||
d63df20a MW |
903 | \begin{describe}{fun} |
904 | {syntax-error @<scanner> @<expected> \&key :continuep :location} | |
2c7465ac MW |
905 | \end{describe} |
906 | ||
907 | \begin{describe}{fun} | |
d63df20a | 908 | {lexer-error @<char-scanner> @<expected> \&key :location} |
2c7465ac MW |
909 | \end{describe} |
910 | ||
911 | \begin{describe}{parseform} | |
912 | {skip-until (@[[ :keep-end @<keep-end-flag> @]]) @<token-type>^*} | |
913 | \end{describe} | |
914 | ||
5fa27d76 | 915 | \begin{describe}{parseform} |
450a4be6 | 916 | {error (@[[ :ignore-unconsumed @<flag> @! |
b5911ce8 MW |
917 | :force-process @<flag> @]]) \\ \ind\ind |
918 | @<sub-parser> @<recover-parser> \-\\ | |
919 | @<declaration>^* \\ | |
920 | @<form>^*} | |
2c7465ac MW |
921 | \end{describe} |
922 | ||
ae7a3c8f MW |
923 | \begin{describe}{parseform}{must @<sub-parser> @[@<default>@]} |
924 | \end{describe} | |
925 | ||
2c7465ac MW |
926 | \begin{describe}{fun} |
927 | {scan-comment @<char-scanner> | |
928 | @> @<result> @<success-flag> @<consumed-flag>} | |
929 | \end{describe} | |
930 | ||
1f7d590d MW |
931 | %%%----- That's all, folks -------------------------------------------------- |
932 | ||
933 | %%% Local variables: | |
934 | %%% mode: LaTeX | |
935 | %%% TeX-master: "sod.tex" | |
936 | %%% TeX-PDF-mode: t | |
937 | %%% End: |